diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1e38937 --- /dev/null +++ b/.gitignore @@ -0,0 +1,34 @@ + +*.pro.user +*.pro.user* +.qmake.stash +Makefile + +cpp-projects/_build/bin/**/*.dll +cpp-projects/_build/bin/**/*.exe +cpp-projects/_build/bin/**/*.lib +cpp-projects/_build/bin/**/*.pdb +cpp-projects/_build/bin/**/*.ilk +cpp-projects/_build/bin/**/*.pdb +cpp-projects/_build/bin/**/*.exp +cpp-projects/_build/bin/**/*logs.html +cpp-projects/_build/bin/**/logs +cpp-projects/_build/bin/**/k4a_*.html +cpp-projects/_build/bin/**/imgui.ini +cpp-projects/_build/bin/**/*.onnx + +cpp-projects/_build/bin/base +cpp-projects/_build/bin/demos +cpp-projects/_build/temp +cpp-projects/_thirdparty + +.qtc_clangd + +## bin +cpp-projects/**/Makefile +cpp-projects/**/Makefile* +cpp-projects/**/*.pch +cpp-projects/**/*.rc + + + diff --git a/CREDITS.md b/CREDITS.md new file mode 100644 index 0000000..92d52bf --- /dev/null +++ b/CREDITS.md @@ -0,0 +1,91 @@ +**Credits** +======= + +Authors +------- + +* Florian Lance + + +Contributors +------- + +* Florian Lance + +Thirdparty +---------- + +* **stb**: https://github.com/nothings/stb + +ALTERNATIVE A - MIT License + +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------------------------------------ + +* **Assimp**: https://github.com/assimp/assimp + +Open Asset Import Library (assimp) + +Copyright (c) 2006-2020, assimp team +All rights reserved. + +Redistribution and use of this software in source and binary forms, +with or without modification, are permitted provided that the +following conditions are met: + + +1. Redistributions of source code must retain the above + copyright notice, this list of conditions and the + following disclaimer. + +2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the + following disclaimer in the documentation and/or other + materials provided with the distribution. + +3. Neither the name of the assimp team, nor the names of its + contributors may be used to endorse or promote products + derived from this software without specific prior + written permission of the assimp team. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + +------------------------------------------ + + + +catch +signals +turbojpg +fastpfor +opencv +kinect2 +kinect4 +boost + +qt \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..890339f --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ + + +Toolset +MIT License + +Copyright (c) 2018 Florian Lance + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..53d71f3 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# toolset +Geometry library, OpenGL engine and various utilities diff --git a/cpp-projects/3d-engine/3d-engine.pro b/cpp-projects/3d-engine/3d-engine.pro new file mode 100644 index 0000000..20104f3 --- /dev/null +++ b/cpp-projects/3d-engine/3d-engine.pro @@ -0,0 +1,190 @@ + +# /******************************************************************************* +# ** Toolset-3d-engine ** +# ** MIT License ** +# ** Copyright (c) [2018] [Florian Lance] ** +# ** ** +# ** Permission is hereby granted, free of charge, to any person obtaining a ** +# ** copy of this software and associated documentation files (the "Software"), ** +# ** to deal in the Software without restriction, including without limitation ** +# ** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +# ** and/or sell copies of the Software, and to permit persons to whom the ** +# ** Software is furnished to do so, subject to the following conditions: ** +# ** ** +# ** The above copyright notice and this permission notice shall be included in ** +# ** all copies or substantial portions of the Software. ** +# ** ** +# ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +# ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +# ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +# ** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +# ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +# ** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +# ** DEALINGS IN THE SOFTWARE. ** +# ** ** +# ********************************************************************************/ + +####################################### repo +TOOLBOX_REPOSITORY_DIR = $$PWD"/../.." + +####################################### PRI +# defines compiling options +include(../ts-settings.pri) +# defines projects paths and variables +include(../ts-projects.pri) +# defines thirdparty includes and libs +include(../ts-thirdparty.pri) + +####################################### TARGET +equals(CFG, "debug"){ + TARGET = 3d-engined +} +equals(CFG, "release"){ + TARGET = 3d-engine +} + +####################################### TEMPLATE +TEMPLATE = lib +CONFIG += staticlib +CONFIG -= console + +####################################### BUILD FILES +OBJECTS_DIR = $$3D_ENGINE_OBJ +DESTDIR = $$3D_ENGINE_DEST + +####################################### CONFIG +CONFIG -= qt + +####################################### INCLUDES +INCLUDEPATH += \ + # tool + $$BASE_INCLUDES\ + $$OPENGL_UTILITY_INCLUDES\ + # local + $$3D_ENGINE_INCLUDES"/imgui"\ + # thirdparty + $$GLEW_INCLUDES\ + $$SFML_INCLUDES\ + $$ASSIMP_INCLUDES\ + $$KINECT4_INCLUDES \ + +####################################### LIBRAIRIES + +PRE_TARGETDEPS += \ + # tool + $$BASE_LIB_DEP \ + $$OPENGL_UTILITY_LIB_DEP \ + +LIBS +=\ + # tool + $$BASE_LIB\ + $$OPENGL_UTILITY_LIB \ + # thirdparty + $$WINDOWS_LIBS \ + $$GLEW_LIBS \ + $$SFML_LIBS \ + $$ASSIMP_LIBS\ + $$KINECT4_LIBS \ + +####################################### PROJECT FILES + +HEADERS += \ + # ui + imgui-tb/imgui_k4_calibrator_drawer.hpp \ + imgui-tb/imgui_k4_direct_drawer.hpp \ + immediate_ui.hpp \ + # resources + resources.hpp \ + # engine + engine/drawers_manager.hpp \ + engine/managers.hpp \ + engine/models_manager.hpp \ + engine/shaders_manager.hpp \ + engine/textures_manager.hpp \ + # windows + windows/base_sfml_gl_window.hpp \ + # imgui + imgui/imgui.h \ + imgui/imconfig.h \ + imgui/imgui_internal.h \ + imgui/imstb_rectpack.h \ + imgui/imstb_textedit.h \ + imgui/imstb_truetype.h \ + imgui/misc/cpp/imgui_stdlib.h \ + # imgui-extra + imgui/extra/imgui_markdown.h \ + imgui/extra/implot/implot.h \ + imgui/extra/implot/implot_internal.h \ + imgui/extra/imgui-node-editor/crude_json.h \ + imgui/extra/imgui-node-editor/imgui_bezier_math.h \ + imgui/extra/imgui-node-editor/imgui_bezier_math.inl \ + imgui/extra/imgui-node-editor/imgui_canvas.h \ + imgui/extra/imgui-node-editor/imgui_extra_math.h \ + imgui/extra/imgui-node-editor/imgui_extra_math.inl \ + imgui/extra/imgui-node-editor/imgui_node_editor.h \ + imgui/extra/imgui-node-editor/imgui_node_editor_internal.h \ + imgui/extra/imgui-node-editor/imgui_node_editor_internal.inl \ + imgui/extra/ImGuiFileDialog.h \ + imgui/extra/ImGuiFileDialogConfig.h \ + # imgui-sfml + imgui-sfml/imgui-SFML.h \ + imgui-sfml/imgui-SFML_export.h \ + # imgui-tb + imgui-tb/imgui_convert.hpp \ + imgui-tb/imgui_fbo_ui_drawer.hpp \ + imgui-tb/imgui_helper.hpp \ + imgui-tb/imgui_k4_cloud_drawer.hpp \ + imgui-tb/imgui_k4_clouds_scene_drawer.hpp \ + imgui-tb/imgui_k4_device_drawer.hpp \ + imgui-tb/imgui_k4_player_drawer.hpp \ + imgui-tb/imgui_k4_recorder_drawer.hpp \ + imgui-tb/imgui_k4_ui_drawer.hpp \ + imgui-tb/imgui_logs.hpp \ + imgui-tb/imgui_texture_ui_drawer.hpp \ + imgui-tb/imgui_types.hpp \ + imgui-tb/imgui_ui_drawer.hpp \ + + +SOURCES += \ + # resources + imgui-tb/imgui_k4_calibrator_drawer.cpp \ + imgui-tb/imgui_k4_direct_drawer.cpp \ + resources/resources.cpp \ + # engine + engine/drawers_manager.cpp \ + engine/models_manager.cpp \ + engine/shaders_manager.cpp \ + engine/textures_manager.cpp \ + # windows + windows/base_sfml_gl_window.cpp \ + # imgui + imgui/imgui.cpp \ + imgui/imgui_draw.cpp \ + imgui/imgui_widgets.cpp \ + imgui/imgui_tables.cpp \ + imgui/misc/cpp/imgui_stdlib.cpp \ + # imgui-extra + imgui/extra/implot/implot.cpp \ + imgui/extra/implot/implot_items.cpp \ + imgui/extra/imgui-node-editor/crude_json.cpp \ + imgui/extra/imgui-node-editor/imgui_canvas.cpp \ + imgui/extra/imgui-node-editor/imgui_node_editor.cpp \ + imgui/extra/imgui-node-editor/imgui_node_editor_api.cpp \ + imgui/extra/ImGuiFileDialog.cpp \ + # imgui-sfml + imgui-sfml/imgui-SFML.cpp \ + # imgui-tb + imgui-tb/imgui_fbo_ui_drawer.cpp \ + imgui-tb/imgui_k4_cloud_drawer.cpp \ + imgui-tb/imgui_k4_clouds_scene_drawer.cpp \ + imgui-tb/imgui_k4_device_drawer.cpp \ + imgui-tb/imgui_k4_player_drawer.cpp \ + imgui-tb/imgui_k4_recorder_drawer.cpp \ + imgui-tb/imgui_k4_ui_drawer.cpp \ + imgui-tb/imgui_logs.cpp \ + imgui-tb/imgui_texture_ui_drawer.cpp \ + imgui-tb/imgui_ui_drawer.cpp \ + + + + diff --git a/cpp-projects/3d-engine/LICENSE b/cpp-projects/3d-engine/LICENSE new file mode 100644 index 0000000..c685ea9 --- /dev/null +++ b/cpp-projects/3d-engine/LICENSE @@ -0,0 +1,24 @@ + + +Toolbox-3d-engine +MIT License + +Copyright (c) 2018 Florian Lance + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/cpp-projects/3d-engine/engine/drawers_manager.cpp b/cpp-projects/3d-engine/engine/drawers_manager.cpp new file mode 100644 index 0000000..9afaff2 --- /dev/null +++ b/cpp-projects/3d-engine/engine/drawers_manager.cpp @@ -0,0 +1,94 @@ + +#include "drawers_manager.hpp" + +// std +#include + +// base +#include "utility/logger.hpp" + + +using namespace tool; +using namespace tool::graphics; + + +struct Global{ + static inline std::unique_ptr instance = nullptr; +}; + +auto DrawersManager::get_instance() -> DrawersManager*{ + if(Global::instance == nullptr){ + Global::instance = std::make_unique(); + } + return Global::instance.get(); +} + +auto DrawersManager::add_drawer(const Alias &alias, std::shared_ptr drawer, float scaleHint) -> void{ + + if(drawer == nullptr){ + Logger::error(std::format("[DrawersManager] Drawer with alias: {} is null.\n",alias)); + return; + } + + if(drawers.has_alias(alias)){ + Logger::error(std::format("[DrawersManager] Drawer alias already used: {}.\n",alias)); + return; + } + + drawer->scaleHint = scaleHint; + drawers.add_element(alias, std::move(drawer)); +} + +auto DrawersManager::get_drawer(AliasV alias) const noexcept -> std::weak_ptr{ + if(auto drawer = drawers.get_element(alias); drawer != nullptr){ + return drawer; + } + return {}; +} + +auto DrawersManager::get_alias(Id id) const noexcept -> DrawersManager::AliasV{ + if(drawers.has_id(id)){ + return drawers.get_alias(id).value(); + } + Logger::error(std::format("[DrawersManager] No drawer with id {}.\n",id)); + return ""sv; +} + +auto DrawersManager::get_id(DrawersManager::AliasV alias) const -> size_t{ + if(drawers.has_alias(alias)){ + return drawers.get_id(alias).value(); + } + Logger::error(std::format("[DrawersManager] Drawer with alias {} not found.\n",alias)); + return 0; +} + +auto DrawersManager::draw(AliasV alias, gl::ShaderProgram *shader) -> void{ + if(auto drawer = drawers.get_element_ptr(alias); drawer){ + drawer->draw(shader); + } +} + +auto DrawersManager::sub(std::vector aliases) const -> IdAliasMapSharedPtr{ + IdAliasMapSharedPtr subDrawers; + for(std::string_view alias : aliases){ + if(auto drawer = get_drawer(alias).lock(); drawer != nullptr){ + subDrawers.add_element(alias, std::move(drawer)); + } + } + return subDrawers; +} + +auto DrawersManager::debug() -> void { + drawers.debug(); +// for(size_t ii = 0; ii < drawers.count(); ++ii){ +// Logger::message(std::format("drawer {}:{}\n", ii, drawers.has_id(ii))); +// } + +// for(size_t ii = 0; ii < drawers.count(); ++ii){ +// Logger::message(std::format("drawer {}:{}\n", ii, drawers.get_alias(ii).value())); +// } +} + + + + diff --git a/cpp-projects/3d-engine/engine/drawers_manager.hpp b/cpp-projects/3d-engine/engine/drawers_manager.hpp new file mode 100644 index 0000000..80c14ef --- /dev/null +++ b/cpp-projects/3d-engine/engine/drawers_manager.hpp @@ -0,0 +1,73 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// base +#include "utility/id_alias_map.hpp" + +// opengl-utility +#include "opengl/draw/drawer.hpp" + +namespace tool::graphics { + +struct Scale{ + float v; +}; + +class DrawersManager{ + + using Alias = std::string; + using AliasV = std::string_view; + using Id = size_t; + +public: + + static auto get_instance() -> DrawersManager*; + + template + auto add_drawer(const Alias &alias, Scale scaleHint, A&&... args){ + add_drawer(alias, std::make_shared(std::forward(args)...), scaleHint.v); + } + auto add_drawer(const Alias &alias, std::shared_ptr drawer, float scaleHint = 1.f) -> void; + + auto get_drawer(AliasV alias) const noexcept -> std::weak_ptr; + auto get_alias(Id id) const noexcept -> AliasV; + auto get_id(AliasV alias) const -> size_t; + inline auto count() const noexcept -> size_t{return drawers.count();} + + auto draw(AliasV alias, gl::ShaderProgram *shader = nullptr) -> void; + + auto sub(std::vector aliases) const -> IdAliasMapSharedPtr; + + auto debug() -> void; +private: + + IdAliasMapSharedPtr drawers; +}; +} + + diff --git a/cpp-projects/3d-engine/engine/managers.hpp b/cpp-projects/3d-engine/engine/managers.hpp new file mode 100644 index 0000000..7448218 --- /dev/null +++ b/cpp-projects/3d-engine/engine/managers.hpp @@ -0,0 +1,52 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "engine/models_manager.hpp" +#include "engine/shaders_manager.hpp" +#include "engine/textures_manager.hpp" +#include "engine/drawers_manager.hpp" + +namespace tool::graphics { + +struct Managers{ + + static auto initialize() -> void{ + + shaders = ShadersManager::get_instance(); + textures = TexturesManager::get_instance(); + models = ModelsManager::get_instance(); + drawers = DrawersManager::get_instance(); + } + + static inline ShadersManager *shaders = nullptr; + static inline TexturesManager *textures = nullptr; + static inline ModelsManager *models = nullptr; + static inline DrawersManager *drawers = nullptr; +}; +} diff --git a/cpp-projects/3d-engine/engine/models_manager.cpp b/cpp-projects/3d-engine/engine/models_manager.cpp new file mode 100644 index 0000000..053752e --- /dev/null +++ b/cpp-projects/3d-engine/engine/models_manager.cpp @@ -0,0 +1,138 @@ + +#include "models_manager.hpp" + +// std +#include +#include +#include + +// base +#include "files/assimp_loader.hpp" +#include "utility/logger.hpp" + +using namespace tool; +using namespace tool::graphics; + +struct Global{ + static inline std::unique_ptr instance = nullptr; +}; + +auto ModelsManager::get_instance() -> ModelsManager*{ + if(Global::instance == nullptr){ + Global::instance = std::make_unique(); + } + return Global::instance.get(); +} + + +auto ModelsManager::add(std::vector> &&aliasPaths, bool forceReload) -> bool{ + + // preprocess aliases/paths + std::vector> newV; + newV.reserve(aliasPaths.size()); + + std::set aliases; + std::set paths; + for(auto &ap : aliasPaths){ + + if(aliases.count(ap.first) != 0){ // check for duplicates aliases + continue; + } + if(paths.count(ap.second) != 0){ // check for duplicates paths + continue; + } + (aliases).insert(ap.first); + aliases.insert(ap.second); + + if(!forceReload){ + if(aliasPerPath.count(ap.second) != 0){ + Logger::error(std::format("[ModelsManager] Model with alias {} and path {} already loaded with alias {}, loading canceled.\n", + ap.first, ap.second, aliasPerPath[ap.second])); + continue; + } + } + + newV.emplace_back(std::move(ap)); + } + + // do asynch loading + s_umap>> m; + for(const auto &ap : newV){ + aliasPerPath[ap.first] = ap.second; + m[ap.first] = std::async(std::launch::async, files::AiLoader::load_model, ap.second, false); + } + + // add results to map + for(auto &futureModel : m){ + models.add_element(futureModel.first, futureModel.second.get()); + } + + return true; +} + +auto ModelsManager::add(const std::string &alias, const std::string &path) -> bool{ + + if(aliasPerPath.count(path) != 0){ + Logger::error(std::format( + "[ModelsManager] Model with alias {} and path {} already loaded with alias {}, loading canceled.\n", + alias, path, aliasPerPath[path]) + ); + return false; + } + + if(models.has_alias(alias)){ + Logger::error(std::format( + "[ModelsManager] Model alias {} already used, it will be replaced by newly loaded model.\n", + alias) + ); + + } + if(auto model = files::AiLoader::load_model(path); model != nullptr){ + models.add_element(alias, std::move(model)); + aliasPerPath[path] = alias; + return true; + } + + return false; + +} + +auto ModelsManager::get(AliasV alias) const -> std::weak_ptr { + if(auto model = models.get_element(alias); model){ + return model; + } + return {}; +} + +auto ModelsManager::get_animations_count(AliasV alias) const -> size_t{ + if(auto model = get_ptr(alias); model != nullptr){ + return model->animations.size(); + } + return 0; +} + +auto ModelsManager::get_animation(AliasV alias, size_t id) const -> Animation{ + if(auto model = get_ptr(alias); model != nullptr){ + if(id < model->animations.size()){ + return model->animations[id]; + } + } + return {}; +} + +auto ModelsManager::get_animation_name(AliasV alias, size_t id) const -> std::string_view{ + if(auto model = get_ptr(alias); model != nullptr){ + if(id < model->animations.size()){ + return model->animations[id].name; + } + } + return ""; +} + + +auto ModelsManager::get_ptr(AliasV alias) const -> graphics::Model*{ + return models.get_element_ptr(alias); +} + + + diff --git a/cpp-projects/3d-engine/engine/models_manager.hpp b/cpp-projects/3d-engine/engine/models_manager.hpp new file mode 100644 index 0000000..fe39472 --- /dev/null +++ b/cpp-projects/3d-engine/engine/models_manager.hpp @@ -0,0 +1,68 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// base +#include "utility/id_alias_map.hpp" + +// opengl-utility +#include "graphics/model.hpp" + +namespace tool::graphics { + +class ModelsManager{ + + using Alias = std::string; + using Path = std::string; + using AliasV = std::string_view; + using PathV = std::string_view; + +public: + + static auto get_instance() -> ModelsManager*; + + auto add(std::vector> &&aliasPaths, bool forceReload = false) -> bool; + auto add(const std::string &alias, const std::string &path) -> bool; + + auto get(AliasV alias) const -> std::weak_ptr; + auto get_animations_count(AliasV alias) const -> size_t; + auto get_animation(AliasV alias, size_t id) const -> Animation; + auto get_animation_name(AliasV alias, size_t id) const -> std::string_view; + + inline auto count() const noexcept -> size_t {return models.count();} + +private: + + auto get_ptr(AliasV alias) const -> graphics::Model*; + + s_umap aliasPerPath; + IdAliasMapSharedPtr models; +}; +} + + + diff --git a/cpp-projects/3d-engine/engine/shaders_manager.cpp b/cpp-projects/3d-engine/engine/shaders_manager.cpp new file mode 100644 index 0000000..c274cd5 --- /dev/null +++ b/cpp-projects/3d-engine/engine/shaders_manager.cpp @@ -0,0 +1,123 @@ + +#include "shaders_manager.hpp" + +// base +#include "utility/logger.hpp" +#include "utility/format.hpp" +#include "utility/string.hpp" + +using namespace tool::graphics; +using namespace tool::gl; + +ShadersManager::~ShadersManager(){ + for(auto &shader : shaders){ + shader.second->clean(); + } +} + +struct Global{ + static inline std::unique_ptr instance = nullptr; +}; + +auto ShadersManager::get_instance() -> ShadersManager*{ + if(Global::instance == nullptr){ + Global::instance = std::make_unique(); + } + return Global::instance.get(); +} + +auto ShadersManager::load_shader(const Alias &alias, const std::vector &paths) -> bool{ + + if(shaders.count(alias) != 0){ + Logger::warning(fmt("[ShadersM] Shader alias {} already exists, shader replaced.\n", alias)); + } + + auto shader = std::make_shared(); + if(!shader->load_from_files(paths)){ + Logger::error(std::format("[ShadersM] Cannot generate ShaderProgram from paths:\n{}", String::join(paths, "\n"))); + return false; + } + + shaders[alias] = shader; + return true; +} + +auto ShadersManager::add_shader(const Alias &alias, ShaderProgram&& shader) -> void{ + + if(shaders.count(alias) != 0){ + Logger::warning(fmt("[ShadersM] Shader alias {} already exists, shader replaced.\n", alias)); + } + + shaders[alias] = std::make_shared(std::move(shader)); +} + +auto ShadersManager::reload_shader(AliasV alias) -> std::shared_ptr{ + + if(shaders.contains(alias)){ + auto paths = shaders[alias]->shaders_file_paths(); + shaders[alias]->clean(); + shaders[alias] = std::make_shared(); + Logger::message(std::format("[ShadersM] Reload ShaderProgram with alias {}:\n", alias)); + if(!shaders[alias]->load_from_files(std::move(paths))){ + Logger::error(std::format("[ShadersM] Cannot reload ShaderProgram with alias {}:\n", alias)); + return nullptr; + } + return shaders[alias]; + } + Logger::error(std::format("[ShadersM] Cannot find ShaderProgram with alias {}:\n", alias)); + return nullptr; +} + +auto ShadersManager::reload_shader(ShaderProgram *shaderToBeReloaded) -> std::shared_ptr{ + + std::string alias; + for(auto &shader : shaders){ + if(shader.second.get() == shaderToBeReloaded){ + return reload_shader(shader.first); + } + } + Logger::error("[ShadersM] ShaderProgram pointer not found in manager.\n"); + return nullptr; +} + +auto ShadersManager::reload_all_shaders() -> void{ + for(auto &shader : shaders){ + reload_shader(shader.first); + } +} + +auto ShadersManager::get_shader(AliasV alias) const noexcept -> std::weak_ptr{ + + if(auto shader = shaders.find(alias); shader != shaders.end()){ + return shader->second; + } + return {}; +} + +auto ShadersManager::get_ptr(AliasV alias) -> ShaderProgram *{ + if(shaders.count(alias) != 0){ + return shaders[alias].get(); + } + return nullptr; +} + +auto ShadersManager::get_all_aliases() const noexcept -> std::vector{ + std::vector aliases; + aliases.reserve(shaders.size()); + for(const auto &shader : shaders){ + aliases.push_back(shader.first); + } + return aliases; +} + +auto ShadersManager::unbind() -> void{ + ShaderProgram::unbind(); +} + +auto ShadersManager::debug() -> void{ + for(const auto &shader : shaders){ + Logger::message(std::format("shader [{}] [{}]\n", shader.first, shader.second->id())); + } +} + + diff --git a/cpp-projects/3d-engine/engine/shaders_manager.hpp b/cpp-projects/3d-engine/engine/shaders_manager.hpp new file mode 100644 index 0000000..50fa522 --- /dev/null +++ b/cpp-projects/3d-engine/engine/shaders_manager.hpp @@ -0,0 +1,70 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// base +#include "utility/string_unordered_map.hpp" + +// opengl-utility +#include "opengl/shader/shader.hpp" + + +namespace tool::graphics { + +class ShadersManager{ + + using Alias = std::string; + using AliasV = std::string_view; + using Path = std::string; + using PathV = std::string_view; + +public: + + ~ShadersManager(); + + static auto get_instance() -> ShadersManager*; + + auto load_shader(const Alias &alias, const std::vector &paths) -> bool; + auto reload_shader(AliasV alias) -> std::shared_ptr; + auto reload_shader(gl::ShaderProgram *shader) -> std::shared_ptr; + auto reload_all_shaders() -> void; + auto add_shader(const Alias &alias, gl::ShaderProgram&& shader) -> void; + + auto get_shader(AliasV alias) const noexcept -> std::weak_ptr; + auto get_ptr(AliasV alias) -> gl::ShaderProgram*; + auto get_all_aliases() const noexcept -> std::vector; + + auto unbind() -> void; + + auto debug() -> void; + +private: + + s_umap> shaders; +}; + +} diff --git a/cpp-projects/3d-engine/engine/textures_manager.cpp b/cpp-projects/3d-engine/engine/textures_manager.cpp new file mode 100644 index 0000000..752ce9d --- /dev/null +++ b/cpp-projects/3d-engine/engine/textures_manager.cpp @@ -0,0 +1,194 @@ + +#include "textures_manager.hpp" + +using namespace tool; +using namespace tool::graphics; + +struct Global{ + static inline std::unique_ptr instance = nullptr; +}; + +auto TexturesManager::get_instance() -> TexturesManager*{ + if(Global::instance == nullptr){ + Global::instance = std::make_unique(); + } + return Global::instance.get(); +} + +bool TexturesManager::load_textures_from_directory(const Path &directoryPath, std::vector infos){ + + for(const auto &info : infos){ + + // check alias + if(textures.count(info.alias) != 0){ + std::cerr << "[TexturesM] Texture alias already used: " << info.alias << "\n"; + continue; + } + + // check if texture has already been loaded + const std::string textureFilePath = {directoryPath + "/" + info.fileName}; + if(aliasPerPath.count(textureFilePath) != 0){ + + // find previously loaded texture + textures[info.alias] = textures[aliasPerPath[textureFilePath]]; + std::cout << "[TexturesM] Texture file " << textureFilePath << " has already been loaded with alias" << info.alias << "\n"; + continue; + } + + // load texture + auto texture = std::make_shared(); + if(!texture->load_2d_image_file_data(textureFilePath, info.flip, info.targetNbChannels)){ + std::cerr << "[TexturesM] Cannot load texture file: " << textureFilePath << "\n"; + return false; + } + + // add to map + texturesAliases.push_back(info.alias); + aliasPerPath[textureFilePath] = info.alias; + textures[info.alias] = texture; + } + + return true; +} + +bool TexturesManager::load_cube_map(const Path &basePath, const std::array &extensions, const Alias &alias, bool flip){ + + // check alias + if(cubeMaps.count(alias) != 0){ + std::cerr << "[TexturesM] CubeMap alias already used: " << alias << "\n"; + return false; + } + + // check if textures have already been loaded + std::array texturesPath; + for(size_t ii = 0; ii < extensions.size(); ++ii){ + texturesPath[ii] = {basePath + extensions[ii]}; + if(aliasPerPath.count(texturesPath[ii].v) != 0){ + std::cerr << "[TexturesM] Cubemap texture file already loaded: " << texturesPath[ii].v << "\n"; + continue; + } + } + + // load cube map + auto cubeMap = std::make_shared(); + if(!cubeMap->load_2d_images_files(texturesPath, flip)){ + std::cerr << "[TexturesM] Cannot load cubemap files. \n"; + return false; + } + + // add to map + for(const auto &path : texturesPath){ + aliasPerPath[path.v] = alias; + } + cubeMapsAliases.push_back(alias); + cubeMaps[alias] = cubeMap; + + return true; +} + +std::weak_ptr TexturesManager::get_texture(const Alias &textureAlias){ + if(textures.count({textureAlias}) != 0){ + return textures[{textureAlias}]; + } + return {}; +} + +Texture2D *TexturesManager::get_texture_ptr(const Alias &textureAlias){ + if(textures.count({textureAlias}) != 0){ + return textures[{textureAlias}].get(); + } + return nullptr; +} + +TextureInfo TexturesManager::get_texture_info(const Alias &textureAlias, TextureOptions options){ + return TextureInfo{get_texture_ptr(textureAlias),options}; +} + +bool TexturesManager::generate_texture2d_tbo(const Alias &tboAlias, const Alias &textureAlias, TextureOptions options){ + + if(textures.count(textureAlias) == 0){ + std::cerr << "[TextureM] Texture alias " << textureAlias << " doesn't exist.\n"; + return false; + } + + if(texturesTBOs.count(tboAlias) != 0){ + std::cerr << "[TexturesM] TBO alias already used: " << tboAlias << "\n"; + return false; + } + + tool::gl::Texture2D tbo; + tbo.load_texture(textures[textureAlias].get(), {options}); + tboAliases.push_back(tboAlias); + texturesTBOs[tboAlias] = std::move(tbo); + + return true; +} + +bool TexturesManager::generate_projected_texture2d_tbo(const Alias &tboAlias, const Alias &textureAlias){ + + if(textures.count(textureAlias) == 0){ + std::cerr << "[TextureM] Texture alias " << textureAlias << " doesn't exist.\n"; + return false; + } + + if(texturesTBOs.count(tboAlias) != 0){ + std::cerr << "[TexturesM] TBO alias already used: " << tboAlias << "\n"; + return false; + } + + tool::gl::Texture2D tbo; + tbo.load_projected_texture(textures[textureAlias].get()); + tboAliases.push_back(tboAlias); + texturesTBOs[tboAlias] = std::move(tbo); + + return true; +} + +bool TexturesManager::generate_cubemap_tbo(const Alias &tboAlias, const Alias &cubemapAlias){ + + if(cubeMaps.count(cubemapAlias) == 0){ + std::cerr << "[TextureM] Cubemap alias " << cubemapAlias << " doesn't exist.\n"; + return false; + } + + if(cubeMapsTBOs.count(tboAlias) != 0){ + std::cerr << "[TexturesM] TBO alias already used: " << tboAlias << "\n"; + return false; + } + + tool::gl::CubeMap tbo; + tbo.load_textures(&cubeMaps[cubemapAlias]->textures); + tboAliases.push_back(tboAlias); + cubeMapsTBOs[tboAlias] = std::move(tbo); + + return true; +} + +gl::TextureName TexturesManager::texture_id(const TexturesManager::Alias &tboAlias){ + if(auto tbo = texture_tbo(tboAlias); tbo != nullptr){ + return tbo->id(); + } + return 0; +} + +gl::TBO *TexturesManager::cube_map_tbo(const TexturesManager::Alias &tboAlias){ + if(cubeMapsTBOs.count(tboAlias) != 0){ + return &cubeMapsTBOs[tboAlias]; + } + return nullptr; +} + +gl::TextureName TexturesManager::cube_map_id(const TexturesManager::Alias &tboAlias){ + if(auto tbo = cube_map_tbo(tboAlias); tbo != nullptr){ + return tbo->id(); + } + return 0; +} + +gl::TBO *TexturesManager::texture_tbo(const Alias &tboAlias){ + if(texturesTBOs.count(tboAlias) != 0){ + return &texturesTBOs[tboAlias]; + } + return nullptr; +} + diff --git a/cpp-projects/3d-engine/engine/textures_manager.hpp b/cpp-projects/3d-engine/engine/textures_manager.hpp new file mode 100644 index 0000000..2a55e32 --- /dev/null +++ b/cpp-projects/3d-engine/engine/textures_manager.hpp @@ -0,0 +1,94 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// opengl-utlity +#include "opengl/gl_texture.hpp" + +namespace tool::graphics { + +struct TextureLoadingInfo{ + std::string alias; + std::string fileName; + bool flip = true; + int targetNbChannels = 4; +}; + +class TexturesManager{ + +public: + + using Alias = std::string; + using Path = std::string; + + static auto get_instance() -> TexturesManager*; + + bool load_textures_from_directory(const Path &directoryPath, std::vector infos); + bool load_cube_map(const Path &basePath, const std::array &extensions, const Alias &alias, bool flip = true); + + std::weak_ptr get_texture(const Alias &textureAlias); + Texture2D* get_texture_ptr(const Alias &textureAlias); + TextureInfo get_texture_info(const Alias &textureAlias, TextureOptions options = {}); + + bool generate_texture2d_tbo(const Alias &tboAlias, const Alias &textureAlias, TextureOptions options = {}); + bool generate_projected_texture2d_tbo(const Alias &tboAlias, const Alias &textureAlias); + bool generate_cubemap_tbo(const Alias &tboAlias, const Alias &cubemapAlias); + + gl::TBO *texture_tbo(const Alias &tboAlias); + gl::TextureName texture_id(const Alias &tboAlias); + + gl::TBO *cube_map_tbo(const Alias &tboAlias); + gl::TextureName cube_map_id(const Alias &tboAlias); + +// constexpr size_t textures_count() const noexcept {return texturesAliases.size();} +// constexpr size_t cube_maps_count() const noexcept {return cubeMapsAliases.size();} +// constexpr size_t tbo_count() const noexcept {return tboAliases.size();} + +// gl::TextureName get_cube_maps_tbo(){ + +// } + + + + + +private: + + std::vector texturesAliases; + std::vector cubeMapsAliases; + std::vector tboAliases; + + std::unordered_map aliasPerPath; + + std::unordered_map> textures; + std::unordered_map> cubeMaps; + std::unordered_map texturesTBOs; + std::unordered_map cubeMapsTBOs; + +}; + +} diff --git a/cpp-projects/3d-engine/imgui-sfml/imgui-SFML.cpp b/cpp-projects/3d-engine/imgui-sfml/imgui-SFML.cpp new file mode 100644 index 0000000..aadeafb --- /dev/null +++ b/cpp-projects/3d-engine/imgui-sfml/imgui-SFML.cpp @@ -0,0 +1,938 @@ +#include "imgui-SFML.h" +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include // abs +#include // offsetof, NULL +#include // memcpy + +#ifdef ANDROID +#ifdef USE_JNI + +#include +#include +#include + +static bool s_wantTextInput = false; + +int openKeyboardIME() { + ANativeActivity* activity = sf::getNativeActivity(); + JavaVM* vm = activity->vm; + JNIEnv* env = activity->env; + JavaVMAttachArgs attachargs; + attachargs.version = JNI_VERSION_1_6; + attachargs.name = "NativeThread"; + attachargs.group = NULL; + jint res = vm->AttachCurrentThread(&env, &attachargs); + if (res == JNI_ERR) return EXIT_FAILURE; + + jclass natact = env->FindClass("android/app/NativeActivity"); + jclass context = env->FindClass("android/content/Context"); + + jfieldID fid = env->GetStaticFieldID(context, "INPUT_METHOD_SERVICE", + "Ljava/lang/String;"); + jobject svcstr = env->GetStaticObjectField(context, fid); + + jmethodID getss = env->GetMethodID( + natact, "getSystemService", "(Ljava/lang/String;)Ljava/lang/Object;"); + jobject imm_obj = env->CallObjectMethod(activity->clazz, getss, svcstr); + + jclass imm_cls = env->GetObjectClass(imm_obj); + jmethodID toggleSoftInput = + env->GetMethodID(imm_cls, "toggleSoftInput", "(II)V"); + + env->CallVoidMethod(imm_obj, toggleSoftInput, 2, 0); + + env->DeleteLocalRef(imm_obj); + env->DeleteLocalRef(imm_cls); + env->DeleteLocalRef(svcstr); + env->DeleteLocalRef(context); + env->DeleteLocalRef(natact); + + vm->DetachCurrentThread(); + + return EXIT_SUCCESS; +} + +int closeKeyboardIME() { + ANativeActivity* activity = sf::getNativeActivity(); + JavaVM* vm = activity->vm; + JNIEnv* env = activity->env; + JavaVMAttachArgs attachargs; + attachargs.version = JNI_VERSION_1_6; + attachargs.name = "NativeThread"; + attachargs.group = NULL; + jint res = vm->AttachCurrentThread(&env, &attachargs); + if (res == JNI_ERR) return EXIT_FAILURE; + + jclass natact = env->FindClass("android/app/NativeActivity"); + jclass context = env->FindClass("android/content/Context"); + + jfieldID fid = env->GetStaticFieldID(context, "INPUT_METHOD_SERVICE", + "Ljava/lang/String;"); + jobject svcstr = env->GetStaticObjectField(context, fid); + + jmethodID getss = env->GetMethodID( + natact, "getSystemService", "(Ljava/lang/String;)Ljava/lang/Object;"); + jobject imm_obj = env->CallObjectMethod(activity->clazz, getss, svcstr); + + jclass imm_cls = env->GetObjectClass(imm_obj); + jmethodID toggleSoftInput = + env->GetMethodID(imm_cls, "toggleSoftInput", "(II)V"); + + env->CallVoidMethod(imm_obj, toggleSoftInput, 1, 0); + + env->DeleteLocalRef(imm_obj); + env->DeleteLocalRef(imm_cls); + env->DeleteLocalRef(svcstr); + env->DeleteLocalRef(context); + env->DeleteLocalRef(natact); + + vm->DetachCurrentThread(); + + return EXIT_SUCCESS; +} + +#endif +#endif + +#if __cplusplus >= 201103L // C++11 and above +static_assert(sizeof(GLuint) <= sizeof(ImTextureID), + "ImTextureID is not large enough to fit GLuint."); +#endif + +namespace { +// data +static bool s_windowHasFocus = false; +static bool s_mousePressed[3] = {false, false, false}; +static bool s_touchDown[3] = {false, false, false}; +static bool s_mouseMoved = false; +static sf::Vector2i s_touchPos; +static sf::Texture* s_fontTexture = + NULL; // owning pointer to internal font atlas which is used if user + // doesn't set custom sf::Texture. + +static const unsigned int NULL_JOYSTICK_ID = sf::Joystick::Count; +static unsigned int s_joystickId = NULL_JOYSTICK_ID; + +static const unsigned int NULL_JOYSTICK_BUTTON = sf::Joystick::ButtonCount; +static unsigned int s_joystickMapping[ImGuiNavInput_COUNT]; + +struct StickInfo { + sf::Joystick::Axis xAxis; + sf::Joystick::Axis yAxis; + + bool xInverted; + bool yInverted; + + float threshold; +}; + +StickInfo s_dPadInfo; +StickInfo s_lStickInfo; + +// various helper functions +ImColor toImColor(sf::Color c); +ImVec2 getTopLeftAbsolute(const sf::FloatRect& rect); +ImVec2 getDownRightAbsolute(const sf::FloatRect& rect); + +ImTextureID convertGLTextureHandleToImTextureID(GLuint glTextureHandle); +GLuint convertImTextureIDToGLTextureHandle(ImTextureID textureID); + +void RenderDrawLists( + ImDrawData* draw_data); // rendering callback function prototype + +// Implementation of ImageButton overload +bool imageButtonImpl(const sf::Texture& texture, + const sf::FloatRect& textureRect, const sf::Vector2f& size, + const int framePadding, const sf::Color& bgColor, + const sf::Color& tintColor); + +// Default mapping is XInput gamepad mapping +void initDefaultJoystickMapping(); + +// Returns first id of connected joystick +unsigned int getConnectedJoystickId(); + +void updateJoystickActionState(ImGuiIO& io, ImGuiNavInput_ action); +void updateJoystickDPadState(ImGuiIO& io); +void updateJoystickLStickState(ImGuiIO& io); + +// clipboard functions +void setClipboardText(void* userData, const char* text); +const char* getClipboadText(void* userData); +std::string s_clipboardText; + +// mouse cursors +void loadMouseCursor(ImGuiMouseCursor imguiCursorType, + sf::Cursor::Type sfmlCursorType); +void updateMouseCursor(sf::Window& window); + +sf::Cursor* s_mouseCursors[ImGuiMouseCursor_COUNT]; +bool s_mouseCursorLoaded[ImGuiMouseCursor_COUNT]; + +} // namespace + +namespace ImGui { +namespace SFML { + +void Init(sf::RenderWindow& window, bool loadDefaultFont) { + Init(window, window, loadDefaultFont); +} + +void Init(sf::Window& window, sf::RenderTarget& target, bool loadDefaultFont) { + Init(window, static_cast(target.getSize()), loadDefaultFont); +} + +void Init(sf::Window& window, const sf::Vector2f& displaySize, bool loadDefaultFont) { +#if __cplusplus < 201103L // runtime assert when using earlier than C++11 as no + // static_assert support + assert( + sizeof(GLuint) <= + sizeof(ImTextureID)); // ImTextureID is not large enough to fit GLuint. +#endif + + ImGui::CreateContext(); + ImGuiIO& io = ImGui::GetIO(); + + // tell ImGui which features we support + io.BackendFlags |= ImGuiBackendFlags_HasGamepad; + io.BackendFlags |= ImGuiBackendFlags_HasMouseCursors; + io.BackendFlags |= ImGuiBackendFlags_HasSetMousePos; + io.BackendPlatformName = "imgui_impl_sfml"; + + // init keyboard mapping + io.KeyMap[ImGuiKey_Tab] = sf::Keyboard::Tab; + io.KeyMap[ImGuiKey_LeftArrow] = sf::Keyboard::Left; + io.KeyMap[ImGuiKey_RightArrow] = sf::Keyboard::Right; + io.KeyMap[ImGuiKey_UpArrow] = sf::Keyboard::Up; + io.KeyMap[ImGuiKey_DownArrow] = sf::Keyboard::Down; + io.KeyMap[ImGuiKey_PageUp] = sf::Keyboard::PageUp; + io.KeyMap[ImGuiKey_PageDown] = sf::Keyboard::PageDown; + io.KeyMap[ImGuiKey_Home] = sf::Keyboard::Home; + io.KeyMap[ImGuiKey_End] = sf::Keyboard::End; + io.KeyMap[ImGuiKey_Insert] = sf::Keyboard::Insert; +#ifdef ANDROID + io.KeyMap[ImGuiKey_Backspace] = sf::Keyboard::Delete; +#else + io.KeyMap[ImGuiKey_Delete] = sf::Keyboard::Delete; + io.KeyMap[ImGuiKey_Backspace] = sf::Keyboard::BackSpace; +#endif + io.KeyMap[ImGuiKey_Space] = sf::Keyboard::Space; + io.KeyMap[ImGuiKey_Enter] = sf::Keyboard::Return; + io.KeyMap[ImGuiKey_Escape] = sf::Keyboard::Escape; + io.KeyMap[ImGuiKey_A] = sf::Keyboard::A; + io.KeyMap[ImGuiKey_C] = sf::Keyboard::C; + io.KeyMap[ImGuiKey_V] = sf::Keyboard::V; + io.KeyMap[ImGuiKey_X] = sf::Keyboard::X; + io.KeyMap[ImGuiKey_Y] = sf::Keyboard::Y; + io.KeyMap[ImGuiKey_Z] = sf::Keyboard::Z; + + s_joystickId = getConnectedJoystickId(); + + for (unsigned int i = 0; i < ImGuiNavInput_COUNT; i++) { + s_joystickMapping[i] = NULL_JOYSTICK_BUTTON; + } + + initDefaultJoystickMapping(); + + // init rendering + io.DisplaySize = ImVec2(displaySize.x, displaySize.y); + + // clipboard + io.SetClipboardTextFn = setClipboardText; + io.GetClipboardTextFn = getClipboadText; + + // load mouse cursors + for (int i = 0; i < ImGuiMouseCursor_COUNT; ++i) { + s_mouseCursorLoaded[i] = false; + } + + loadMouseCursor(ImGuiMouseCursor_Arrow, sf::Cursor::Arrow); + loadMouseCursor(ImGuiMouseCursor_TextInput, sf::Cursor::Text); + loadMouseCursor(ImGuiMouseCursor_ResizeAll, sf::Cursor::SizeAll); + loadMouseCursor(ImGuiMouseCursor_ResizeNS, sf::Cursor::SizeVertical); + loadMouseCursor(ImGuiMouseCursor_ResizeEW, sf::Cursor::SizeHorizontal); + loadMouseCursor(ImGuiMouseCursor_ResizeNESW, + sf::Cursor::SizeBottomLeftTopRight); + loadMouseCursor(ImGuiMouseCursor_ResizeNWSE, + sf::Cursor::SizeTopLeftBottomRight); + loadMouseCursor(ImGuiMouseCursor_Hand, sf::Cursor::Hand); + + if (s_fontTexture) { // delete previously created texture + delete s_fontTexture; + } + s_fontTexture = new sf::Texture; + + if (loadDefaultFont) { + // this will load default font automatically + // No need to call AddDefaultFont + UpdateFontTexture(); + } + + s_windowHasFocus = window.hasFocus(); +} + +void ProcessEvent(const sf::Event& event) { + if (s_windowHasFocus) { + ImGuiIO& io = ImGui::GetIO(); + + switch (event.type) { + case sf::Event::MouseMoved: + s_mouseMoved = true; + break; + case sf::Event::MouseButtonPressed: // fall-through + case sf::Event::MouseButtonReleased: { + int button = event.mouseButton.button; + if (event.type == sf::Event::MouseButtonPressed && + button >= 0 && button < 3) { + s_mousePressed[event.mouseButton.button] = true; + } + } break; + case sf::Event::TouchBegan: // fall-through + case sf::Event::TouchEnded: { + s_mouseMoved = false; + int button = event.touch.finger; + if (event.type == sf::Event::TouchBegan && button >= 0 && + button < 3) { + s_touchDown[event.touch.finger] = true; + } + } break; + case sf::Event::MouseWheelScrolled: + if (event.mouseWheelScroll.wheel == sf::Mouse::VerticalWheel || + (event.mouseWheelScroll.wheel == sf::Mouse::HorizontalWheel && + io.KeyShift)) { + io.MouseWheel += event.mouseWheelScroll.delta; + } else if (event.mouseWheelScroll.wheel == sf::Mouse::HorizontalWheel) { + io.MouseWheelH += event.mouseWheelScroll.delta; + } + break; + case sf::Event::KeyPressed: // fall-through + case sf::Event::KeyReleased: + io.KeysDown[event.key.code] = + (event.type == sf::Event::KeyPressed); + break; + case sf::Event::TextEntered: + // Don't handle the event for unprintable characters + if (event.text.unicode < ' ' || event.text.unicode == 127) { + break; + } + io.AddInputCharacter(event.text.unicode); + break; + case sf::Event::JoystickConnected: + if (s_joystickId == NULL_JOYSTICK_ID) { + s_joystickId = event.joystickConnect.joystickId; + } + break; + case sf::Event::JoystickDisconnected: + if (s_joystickId == + event.joystickConnect + .joystickId) { // used gamepad was disconnected + s_joystickId = getConnectedJoystickId(); + } + break; + default: + break; + } + } + + switch (event.type) { + case sf::Event::LostFocus: + s_windowHasFocus = false; + break; + case sf::Event::GainedFocus: + s_windowHasFocus = true; + break; + default: + break; + } +} + +void Update(sf::RenderWindow& window, sf::Time dt) { + Update(window, window, dt); +} + +void Update(sf::Window& window, sf::RenderTarget& target, sf::Time dt) { + // Update OS/hardware mouse cursor if imgui isn't drawing a software cursor + updateMouseCursor(window); + + if (!s_mouseMoved) { + if (sf::Touch::isDown(0)) + s_touchPos = sf::Touch::getPosition(0, window); + + Update(s_touchPos, static_cast(target.getSize()), dt); + } else { + Update(sf::Mouse::getPosition(window), + static_cast(target.getSize()), dt); + } + + if (ImGui::GetIO().MouseDrawCursor) { + // Hide OS mouse cursor if imgui is drawing it + window.setMouseCursorVisible(false); + } +} + +void Update(const sf::Vector2i& mousePos, const sf::Vector2f& displaySize, + sf::Time dt) { + ImGuiIO& io = ImGui::GetIO(); + io.DisplaySize = ImVec2(displaySize.x, displaySize.y); + + io.DeltaTime = dt.asSeconds(); + + if (s_windowHasFocus) { + if (io.WantSetMousePos) { + sf::Vector2i newMousePos(static_cast(io.MousePos.x), + static_cast(io.MousePos.y)); + sf::Mouse::setPosition(newMousePos); + } else { + io.MousePos = ImVec2(static_cast(mousePos.x), + static_cast(mousePos.y)); + } + for (unsigned int i = 0; i < 3; i++) { + io.MouseDown[i] = s_touchDown[i] || sf::Touch::isDown(i) || + s_mousePressed[i] || + sf::Mouse::isButtonPressed((sf::Mouse::Button)i); + s_mousePressed[i] = false; + s_touchDown[i] = false; + } + } + + // Update Ctrl, Shift, Alt, Super state + io.KeyCtrl = io.KeysDown[sf::Keyboard::LControl] || + io.KeysDown[sf::Keyboard::RControl]; + io.KeyAlt = + io.KeysDown[sf::Keyboard::LAlt] || io.KeysDown[sf::Keyboard::RAlt]; + io.KeyShift = + io.KeysDown[sf::Keyboard::LShift] || io.KeysDown[sf::Keyboard::RShift]; + io.KeySuper = io.KeysDown[sf::Keyboard::LSystem] || + io.KeysDown[sf::Keyboard::RSystem]; + +#ifdef ANDROID +#ifdef USE_JNI + if (io.WantTextInput && !s_wantTextInput) { + openKeyboardIME(); + s_wantTextInput = true; + } + + if (!io.WantTextInput && s_wantTextInput) { + closeKeyboardIME(); + s_wantTextInput = false; + } +#endif +#endif + + assert(io.Fonts->Fonts.Size > 0); // You forgot to create and set up font + // atlas (see createFontTexture) + + // gamepad navigation + if ((io.ConfigFlags & ImGuiConfigFlags_NavEnableGamepad) && + s_joystickId != NULL_JOYSTICK_ID) { + updateJoystickActionState(io, ImGuiNavInput_Activate); + updateJoystickActionState(io, ImGuiNavInput_Cancel); + updateJoystickActionState(io, ImGuiNavInput_Input); + updateJoystickActionState(io, ImGuiNavInput_Menu); + + updateJoystickActionState(io, ImGuiNavInput_FocusPrev); + updateJoystickActionState(io, ImGuiNavInput_FocusNext); + + updateJoystickActionState(io, ImGuiNavInput_TweakSlow); + updateJoystickActionState(io, ImGuiNavInput_TweakFast); + + updateJoystickDPadState(io); + updateJoystickLStickState(io); + } + + ImGui::NewFrame(); +} + +void Render(sf::RenderTarget& target) { + target.resetGLStates(); + ImGui::Render(); + RenderDrawLists(ImGui::GetDrawData()); +} + +void Render() { + ImGui::Render(); + RenderDrawLists(ImGui::GetDrawData()); +} + +void Shutdown() { + ImGui::GetIO().Fonts->TexID = (ImTextureID)NULL; + + if (s_fontTexture) { // if internal texture was created, we delete it + delete s_fontTexture; + s_fontTexture = NULL; + } + + for (int i = 0; i < ImGuiMouseCursor_COUNT; ++i) { + if (s_mouseCursorLoaded[i]) { + delete s_mouseCursors[i]; + s_mouseCursors[i] = NULL; + + s_mouseCursorLoaded[i] = false; + } + } + + ImGui::DestroyContext(); +} + +void UpdateFontTexture() { + ImGuiIO& io = ImGui::GetIO(); + unsigned char* pixels; + int width, height; + + io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); + + sf::Texture& texture = *s_fontTexture; + texture.create(width, height); + texture.update(pixels); + + io.Fonts->TexID = + convertGLTextureHandleToImTextureID(texture.getNativeHandle()); +} + +sf::Texture& GetFontTexture() { return *s_fontTexture; } + +void SetActiveJoystickId(unsigned int joystickId) { + assert(joystickId < sf::Joystick::Count); + s_joystickId = joystickId; +} + +void SetJoytickDPadThreshold(float threshold) { + assert(threshold >= 0.f && threshold <= 100.f); + s_dPadInfo.threshold = threshold; +} + +void SetJoytickLStickThreshold(float threshold) { + assert(threshold >= 0.f && threshold <= 100.f); + s_lStickInfo.threshold = threshold; +} + +void SetJoystickMapping(int action, unsigned int joystickButton) { + assert(action < ImGuiNavInput_COUNT); + assert(joystickButton < sf::Joystick::ButtonCount); + s_joystickMapping[action] = joystickButton; +} + +void SetDPadXAxis(sf::Joystick::Axis dPadXAxis, bool inverted) { + s_dPadInfo.xAxis = dPadXAxis; + s_dPadInfo.xInverted = inverted; +} + +void SetDPadYAxis(sf::Joystick::Axis dPadYAxis, bool inverted) { + s_dPadInfo.yAxis = dPadYAxis; + s_dPadInfo.yInverted = inverted; +} + +void SetLStickXAxis(sf::Joystick::Axis lStickXAxis, bool inverted) { + s_lStickInfo.xAxis = lStickXAxis; + s_lStickInfo.xInverted = inverted; +} + +void SetLStickYAxis(sf::Joystick::Axis lStickYAxis, bool inverted) { + s_lStickInfo.yAxis = lStickYAxis; + s_lStickInfo.yInverted = inverted; +} + +} // end of namespace SFML + +/////////////// Image Overloads + +void Image(const sf::Texture& texture, const sf::Color& tintColor, + const sf::Color& borderColor) { + Image(texture, static_cast(texture.getSize()), tintColor, + borderColor); +} + +void Image(const sf::Texture& texture, const sf::Vector2f& size, + const sf::Color& tintColor, const sf::Color& borderColor) { + ImTextureID textureID = + convertGLTextureHandleToImTextureID(texture.getNativeHandle()); + + ImGui::Image(textureID, ImVec2(size.x,size.y), ImVec2(0, 0), ImVec2(1, 1), toImColor(tintColor), + toImColor(borderColor)); +} + +void Image(const sf::Texture& texture, const sf::FloatRect& textureRect, + const sf::Color& tintColor, const sf::Color& borderColor) { + Image( + texture, + sf::Vector2f(std::abs(textureRect.width), std::abs(textureRect.height)), + textureRect, tintColor, borderColor); +} + +void Image(const sf::Texture& texture, const sf::Vector2f& size, + const sf::FloatRect& textureRect, const sf::Color& tintColor, + const sf::Color& borderColor) { + sf::Vector2f textureSize = static_cast(texture.getSize()); + ImVec2 uv0(textureRect.left / textureSize.x, + textureRect.top / textureSize.y); + ImVec2 uv1((textureRect.left + textureRect.width) / textureSize.x, + (textureRect.top + textureRect.height) / textureSize.y); + + ImTextureID textureID = + convertGLTextureHandleToImTextureID(texture.getNativeHandle()); + ImGui::Image(textureID, ImVec2(size.x, size.y), uv0, uv1, toImColor(tintColor), + toImColor(borderColor)); +} + +void Image(const sf::Sprite& sprite, const sf::Color& tintColor, + const sf::Color& borderColor) { + sf::FloatRect bounds = sprite.getGlobalBounds(); + Image(sprite, sf::Vector2f(bounds.width, bounds.height), tintColor, + borderColor); +} + +void Image(const sf::Sprite& sprite, const sf::Vector2f& size, + const sf::Color& tintColor, const sf::Color& borderColor) { + const sf::Texture* texturePtr = sprite.getTexture(); + // sprite without texture cannot be drawn + if (!texturePtr) { + return; + } + + Image(*texturePtr, size, + static_cast(sprite.getTextureRect()), tintColor, + borderColor); +} + +/////////////// Image Button Overloads + +bool ImageButton(const sf::Texture& texture, const int framePadding, + const sf::Color& bgColor, const sf::Color& tintColor) { + return ImageButton(texture, static_cast(texture.getSize()), + framePadding, bgColor, tintColor); +} + +bool ImageButton(const sf::Texture& texture, const sf::Vector2f& size, + const int framePadding, const sf::Color& bgColor, + const sf::Color& tintColor) { + sf::Vector2f textureSize = static_cast(texture.getSize()); + return ::imageButtonImpl( + texture, sf::FloatRect(0.f, 0.f, textureSize.x, textureSize.y), size, + framePadding, bgColor, tintColor); +} + +bool ImageButton(const sf::Sprite& sprite, const int framePadding, + const sf::Color& bgColor, const sf::Color& tintColor) { + sf::FloatRect spriteSize = sprite.getGlobalBounds(); + return ImageButton(sprite, + sf::Vector2f(spriteSize.width, spriteSize.height), + framePadding, bgColor, tintColor); +} + +bool ImageButton(const sf::Sprite& sprite, const sf::Vector2f& size, + const int framePadding, const sf::Color& bgColor, + const sf::Color& tintColor) { + const sf::Texture* texturePtr = sprite.getTexture(); + if (!texturePtr) { + return false; + } + return ::imageButtonImpl( + *texturePtr, static_cast(sprite.getTextureRect()), size, + framePadding, bgColor, tintColor); +} + +/////////////// Draw_list Overloads + +void DrawLine(const sf::Vector2f& a, const sf::Vector2f& b, + const sf::Color& color, float thickness) { + ImDrawList* draw_list = ImGui::GetWindowDrawList(); + ImVec2 pos = ImGui::GetCursorScreenPos(); + draw_list->AddLine(ImVec2(a.x + pos.x, a.y + pos.y), ImVec2(b.x + pos.x, b.y + pos.y), ColorConvertFloat4ToU32(toImColor(color)), + thickness); +} + +void DrawRect(const sf::FloatRect& rect, const sf::Color& color, float rounding, + int rounding_corners, float thickness) { + ImDrawList* draw_list = ImGui::GetWindowDrawList(); + draw_list->AddRect(getTopLeftAbsolute(rect), getDownRightAbsolute(rect), + ColorConvertFloat4ToU32(toImColor(color)), rounding, + rounding_corners, thickness); +} + +void DrawRectFilled(const sf::FloatRect& rect, const sf::Color& color, + float rounding, int rounding_corners) { + ImDrawList* draw_list = ImGui::GetWindowDrawList(); + draw_list->AddRectFilled( + getTopLeftAbsolute(rect), getDownRightAbsolute(rect), + ColorConvertFloat4ToU32(toImColor(color)), rounding, rounding_corners); +} + +} // end of namespace ImGui + +namespace { +ImColor toImColor(sf::Color c ) { + return ImColor(static_cast(c.r), static_cast(c.g), static_cast(c.b), static_cast(c.a)); +} +ImVec2 getTopLeftAbsolute(const sf::FloatRect& rect) { + ImVec2 pos = ImGui::GetCursorScreenPos(); + return ImVec2(rect.left + pos.x, rect.top + pos.y); +} +ImVec2 getDownRightAbsolute(const sf::FloatRect& rect) { + ImVec2 pos = ImGui::GetCursorScreenPos(); + return ImVec2(rect.left + rect.width + pos.x, + rect.top + rect.height + pos.y); +} + +ImTextureID convertGLTextureHandleToImTextureID(GLuint glTextureHandle) { + ImTextureID textureID = (ImTextureID)NULL; + std::memcpy(&textureID, &glTextureHandle, sizeof(GLuint)); + return textureID; +} +GLuint convertImTextureIDToGLTextureHandle(ImTextureID textureID) { + GLuint glTextureHandle; + std::memcpy(&glTextureHandle, &textureID, sizeof(GLuint)); + return glTextureHandle; +} + +// Rendering callback +void RenderDrawLists(ImDrawData* draw_data) { + ImGui::GetDrawData(); + if (draw_data->CmdListsCount == 0) { + return; + } + + ImGuiIO& io = ImGui::GetIO(); + assert(io.Fonts->TexID != + (ImTextureID)NULL); // You forgot to create and set font texture + + // scale stuff (needed for proper handling of window resize) + int fb_width = + static_cast(io.DisplaySize.x * io.DisplayFramebufferScale.x); + int fb_height = + static_cast(io.DisplaySize.y * io.DisplayFramebufferScale.y); + if (fb_width == 0 || fb_height == 0) { + return; + } + draw_data->ScaleClipRects(io.DisplayFramebufferScale); + +#ifdef GL_VERSION_ES_CL_1_1 + GLint last_program, last_texture, last_array_buffer, + last_element_array_buffer; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &last_texture); + glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &last_array_buffer); + glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING, &last_element_array_buffer); +#else + glPushAttrib(GL_ENABLE_BIT | GL_COLOR_BUFFER_BIT | GL_TRANSFORM_BIT); +#endif + + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glEnable(GL_SCISSOR_TEST); + glEnable(GL_TEXTURE_2D); + glDisable(GL_LIGHTING); + glEnableClientState(GL_VERTEX_ARRAY); + glEnableClientState(GL_COLOR_ARRAY); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + + glViewport(0, 0, (GLsizei)fb_width, (GLsizei)fb_height); + + glMatrixMode(GL_TEXTURE); + glLoadIdentity(); + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + +#ifdef GL_VERSION_ES_CL_1_1 + glOrthof(0.0f, io.DisplaySize.x, io.DisplaySize.y, 0.0f, -1.0f, +1.0f); +#else + glOrtho(0.0f, io.DisplaySize.x, io.DisplaySize.y, 0.0f, -1.0f, +1.0f); +#endif + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + + for (int n = 0; n < draw_data->CmdListsCount; ++n) { + const ImDrawList* cmd_list = draw_data->CmdLists[n]; + const unsigned char* vtx_buffer = + (const unsigned char*)&cmd_list->VtxBuffer.front(); + const ImDrawIdx* idx_buffer = &cmd_list->IdxBuffer.front(); + + glVertexPointer(2, GL_FLOAT, sizeof(ImDrawVert), + (void*)(vtx_buffer + offsetof(ImDrawVert, pos))); + glTexCoordPointer(2, GL_FLOAT, sizeof(ImDrawVert), + (void*)(vtx_buffer + offsetof(ImDrawVert, uv))); + glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(ImDrawVert), + (void*)(vtx_buffer + offsetof(ImDrawVert, col))); + + for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.size(); ++cmd_i) { + const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; + if (pcmd->UserCallback) { + pcmd->UserCallback(cmd_list, pcmd); + } else { + GLuint textureHandle = + convertImTextureIDToGLTextureHandle(pcmd->TextureId); + glBindTexture(GL_TEXTURE_2D, textureHandle); + glScissor((int)pcmd->ClipRect.x, + (int)(fb_height - pcmd->ClipRect.w), + (int)(pcmd->ClipRect.z - pcmd->ClipRect.x), + (int)(pcmd->ClipRect.w - pcmd->ClipRect.y)); + glDrawElements(GL_TRIANGLES, (GLsizei)pcmd->ElemCount, + GL_UNSIGNED_SHORT, idx_buffer); + } + idx_buffer += pcmd->ElemCount; + } + } +#ifdef GL_VERSION_ES_CL_1_1 + glBindTexture(GL_TEXTURE_2D, last_texture); + glBindBuffer(GL_ARRAY_BUFFER, last_array_buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, last_element_array_buffer); + glDisable(GL_SCISSOR_TEST); +#else + glPopAttrib(); +#endif +} + +bool imageButtonImpl(const sf::Texture& texture, + const sf::FloatRect& textureRect, const sf::Vector2f& size, + const int framePadding, const sf::Color& bgColor, + const sf::Color& tintColor) { + sf::Vector2f textureSize = static_cast(texture.getSize()); + + ImVec2 uv0(textureRect.left / textureSize.x, + textureRect.top / textureSize.y); + ImVec2 uv1((textureRect.left + textureRect.width) / textureSize.x, + (textureRect.top + textureRect.height) / textureSize.y); + + ImTextureID textureID = + convertGLTextureHandleToImTextureID(texture.getNativeHandle()); + return ImGui::ImageButton(textureID, ImVec2(size.x,size.y), uv0, uv1, framePadding, toImColor(bgColor), + toImColor(tintColor)); +} + +unsigned int getConnectedJoystickId() { + for (unsigned int i = 0; i < (unsigned int)sf::Joystick::Count; ++i) { + if (sf::Joystick::isConnected(i)) return i; + } + + return NULL_JOYSTICK_ID; +} + +void initDefaultJoystickMapping() { + ImGui::SFML::SetJoystickMapping(ImGuiNavInput_Activate, 0); + ImGui::SFML::SetJoystickMapping(ImGuiNavInput_Cancel, 1); + ImGui::SFML::SetJoystickMapping(ImGuiNavInput_Input, 3); + ImGui::SFML::SetJoystickMapping(ImGuiNavInput_Menu, 2); + ImGui::SFML::SetJoystickMapping(ImGuiNavInput_FocusPrev, 4); + ImGui::SFML::SetJoystickMapping(ImGuiNavInput_FocusNext, 5); + ImGui::SFML::SetJoystickMapping(ImGuiNavInput_TweakSlow, 4); + ImGui::SFML::SetJoystickMapping(ImGuiNavInput_TweakFast, 5); + + ImGui::SFML::SetDPadXAxis(sf::Joystick::PovX); + // D-pad Y axis is inverted on Windows +#ifdef _WIN32 + ImGui::SFML::SetDPadYAxis(sf::Joystick::PovY, true); +#else + ImGui::SFML::SetDPadYAxis(sf::Joystick::PovY); +#endif + + ImGui::SFML::SetLStickXAxis(sf::Joystick::X); + ImGui::SFML::SetLStickYAxis(sf::Joystick::Y); + + ImGui::SFML::SetJoytickDPadThreshold(5.f); + ImGui::SFML::SetJoytickLStickThreshold(5.f); +} + +void updateJoystickActionState(ImGuiIO& io, ImGuiNavInput_ action) { + bool isPressed = + sf::Joystick::isButtonPressed(s_joystickId, s_joystickMapping[action]); + io.NavInputs[action] = isPressed ? 1.0f : 0.0f; +} + +void updateJoystickDPadState(ImGuiIO& io) { + float dpadXPos = + sf::Joystick::getAxisPosition(s_joystickId, s_dPadInfo.xAxis); + if (s_dPadInfo.xInverted) dpadXPos = -dpadXPos; + + float dpadYPos = + sf::Joystick::getAxisPosition(s_joystickId, s_dPadInfo.yAxis); + if (s_dPadInfo.yInverted) dpadYPos = -dpadYPos; + + io.NavInputs[ImGuiNavInput_DpadLeft] = + dpadXPos < -s_dPadInfo.threshold ? 1.0f : 0.0f; + io.NavInputs[ImGuiNavInput_DpadRight] = + dpadXPos > s_dPadInfo.threshold ? 1.0f : 0.0f; + + io.NavInputs[ImGuiNavInput_DpadUp] = + dpadYPos < -s_dPadInfo.threshold ? 1.0f : 0.0f; + io.NavInputs[ImGuiNavInput_DpadDown] = + dpadYPos > s_dPadInfo.threshold ? 1.0f : 0.0f; +} + +void updateJoystickLStickState(ImGuiIO& io) { + float lStickXPos = + sf::Joystick::getAxisPosition(s_joystickId, s_lStickInfo.xAxis); + if (s_lStickInfo.xInverted) lStickXPos = -lStickXPos; + + float lStickYPos = + sf::Joystick::getAxisPosition(s_joystickId, s_lStickInfo.yAxis); + if (s_lStickInfo.yInverted) lStickYPos = -lStickYPos; + + if (lStickXPos < -s_lStickInfo.threshold) { + io.NavInputs[ImGuiNavInput_LStickLeft] = std::abs(lStickXPos / 100.f); + } + + if (lStickXPos > s_lStickInfo.threshold) { + io.NavInputs[ImGuiNavInput_LStickRight] = lStickXPos / 100.f; + } + + if (lStickYPos < -s_lStickInfo.threshold) { + io.NavInputs[ImGuiNavInput_LStickUp] = std::abs(lStickYPos / 100.f); + } + + if (lStickYPos > s_lStickInfo.threshold) { + io.NavInputs[ImGuiNavInput_LStickDown] = lStickYPos / 100.f; + } +} + +void setClipboardText(void* /*userData*/, const char* text) { + sf::Clipboard::setString(sf::String::fromUtf8(text, text + std::strlen(text))); +} + +const char* getClipboadText(void* /*userData*/) { + std::basic_string tmp = sf::Clipboard::getString().toUtf8(); + s_clipboardText = std::string(tmp.begin(), tmp.end()); + return s_clipboardText.c_str(); +} + +void loadMouseCursor(ImGuiMouseCursor imguiCursorType, + sf::Cursor::Type sfmlCursorType) { + s_mouseCursors[imguiCursorType] = new sf::Cursor(); + s_mouseCursorLoaded[imguiCursorType] = + s_mouseCursors[imguiCursorType]->loadFromSystem(sfmlCursorType); +} + +void updateMouseCursor(sf::Window& window) { + ImGuiIO& io = ImGui::GetIO(); + if ((io.ConfigFlags & ImGuiConfigFlags_NoMouseCursorChange) == 0) { + ImGuiMouseCursor cursor = ImGui::GetMouseCursor(); + if (io.MouseDrawCursor || cursor == ImGuiMouseCursor_None) { + window.setMouseCursorVisible(false); + } else { + window.setMouseCursorVisible(true); + + sf::Cursor& c = s_mouseCursorLoaded[cursor] + ? *s_mouseCursors[cursor] + : *s_mouseCursors[ImGuiMouseCursor_Arrow]; + window.setMouseCursor(c); + } + } +} + +} // end of anonymous namespace diff --git a/cpp-projects/3d-engine/imgui-sfml/imgui-SFML.h b/cpp-projects/3d-engine/imgui-sfml/imgui-SFML.h new file mode 100644 index 0000000..9300ed5 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-sfml/imgui-SFML.h @@ -0,0 +1,99 @@ +#ifndef IMGUI_SFML_H +#define IMGUI_SFML_H + +#include +#include +#include +#include +#include + +#include "imgui-SFML_export.h" + +namespace sf +{ + class Event; + class RenderTarget; + class RenderWindow; + class Sprite; + class Texture; + class Window; +} + +namespace ImGui +{ + namespace SFML + { + IMGUI_SFML_API void Init(sf::RenderWindow& window, bool loadDefaultFont = true); + IMGUI_SFML_API void Init(sf::Window& window, sf::RenderTarget& target, bool loadDefaultFont = true); + IMGUI_SFML_API void Init(sf::Window& window, const sf::Vector2f& displaySize, bool loadDefaultFont = true); + + IMGUI_SFML_API void ProcessEvent(const sf::Event& event); + + IMGUI_SFML_API void Update(sf::RenderWindow& window, sf::Time dt); + IMGUI_SFML_API void Update(sf::Window& window, sf::RenderTarget& target, sf::Time dt); + IMGUI_SFML_API void Update(const sf::Vector2i& mousePos, const sf::Vector2f& displaySize, sf::Time dt); + + IMGUI_SFML_API void Render(sf::RenderTarget& target); + IMGUI_SFML_API void Render(); + + IMGUI_SFML_API void Shutdown(); + + IMGUI_SFML_API void UpdateFontTexture(); + IMGUI_SFML_API sf::Texture& GetFontTexture(); + + // joystick functions + IMGUI_SFML_API void SetActiveJoystickId(unsigned int joystickId); + IMGUI_SFML_API void SetJoytickDPadThreshold(float threshold); + IMGUI_SFML_API void SetJoytickLStickThreshold(float threshold); + + IMGUI_SFML_API void SetJoystickMapping(int action, unsigned int joystickButton); + IMGUI_SFML_API void SetDPadXAxis(sf::Joystick::Axis dPadXAxis, bool inverted = false); + IMGUI_SFML_API void SetDPadYAxis(sf::Joystick::Axis dPadYAxis, bool inverted = false); + IMGUI_SFML_API void SetLStickXAxis(sf::Joystick::Axis lStickXAxis, bool inverted = false); + IMGUI_SFML_API void SetLStickYAxis(sf::Joystick::Axis lStickYAxis, bool inverted = false); + } + + // custom ImGui widgets for SFML stuff + + // Image overloads + IMGUI_SFML_API void Image(const sf::Texture& texture, + const sf::Color& tintColor = sf::Color::White, + const sf::Color& borderColor = sf::Color::Transparent); + IMGUI_SFML_API void Image(const sf::Texture& texture, const sf::Vector2f& size, + const sf::Color& tintColor = sf::Color::White, + const sf::Color& borderColor = sf::Color::Transparent); + IMGUI_SFML_API void Image(const sf::Texture& texture, const sf::FloatRect& textureRect, + const sf::Color& tintColor = sf::Color::White, + const sf::Color& borderColor = sf::Color::Transparent); + IMGUI_SFML_API void Image(const sf::Texture& texture, const sf::Vector2f& size, const sf::FloatRect& textureRect, + const sf::Color& tintColor = sf::Color::White, + const sf::Color& borderColor = sf::Color::Transparent); + + IMGUI_SFML_API void Image(const sf::Sprite& sprite, + const sf::Color& tintColor = sf::Color::White, + const sf::Color& borderColor = sf::Color::Transparent); + IMGUI_SFML_API void Image(const sf::Sprite& sprite, const sf::Vector2f& size, + const sf::Color& tintColor = sf::Color::White, + const sf::Color& borderColor = sf::Color::Transparent); + + // ImageButton overloads + IMGUI_SFML_API bool ImageButton(const sf::Texture& texture, const int framePadding = -1, + const sf::Color& bgColor = sf::Color::Transparent, + const sf::Color& tintColor = sf::Color::White); + IMGUI_SFML_API bool ImageButton(const sf::Texture& texture, const sf::Vector2f& size, const int framePadding = -1, + const sf::Color& bgColor = sf::Color::Transparent, const sf::Color& tintColor = sf::Color::White); + + IMGUI_SFML_API bool ImageButton(const sf::Sprite& sprite, const int framePadding = -1, + const sf::Color& bgColor = sf::Color::Transparent, + const sf::Color& tintColor = sf::Color::White); + IMGUI_SFML_API bool ImageButton(const sf::Sprite& sprite, const sf::Vector2f& size, const int framePadding = -1, + const sf::Color& bgColor = sf::Color::Transparent, + const sf::Color& tintColor = sf::Color::White); + + // Draw_list overloads. All positions are in relative coordinates (relative to top-left of the current window) + IMGUI_SFML_API void DrawLine(const sf::Vector2f& a, const sf::Vector2f& b, const sf::Color& col, float thickness = 1.0f); + IMGUI_SFML_API void DrawRect(const sf::FloatRect& rect, const sf::Color& color, float rounding = 0.0f, int rounding_corners = 0x0F, float thickness = 1.0f); + IMGUI_SFML_API void DrawRectFilled(const sf::FloatRect& rect, const sf::Color& color, float rounding = 0.0f, int rounding_corners = 0x0F); +} + +#endif //# IMGUI_SFML_H diff --git a/cpp-projects/3d-engine/imgui-sfml/imgui-SFML_export.h b/cpp-projects/3d-engine/imgui-sfml/imgui-SFML_export.h new file mode 100644 index 0000000..cf5e4c9 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-sfml/imgui-SFML_export.h @@ -0,0 +1,25 @@ +#ifndef IMGUI_SFML_EXPORT_H +#define IMGUI_SFML_EXPORT_H + +#if IMGUI_SFML_SHARED_LIB + #if _WIN32 + #ifdef IMGUI_SFML_EXPORTS + #define IMGUI_SFML_API __declspec(dllexport) + #define IMGUI_API __declspec(dllexport) + #else + #define IMGUI_SFML_API __declspec(dllimport) + #define IMGUI_API __declspec(dllexport) + #endif + #elif __GNUC__ >= 4 + #define IMGUI_SFML_API __attribute__ ((visibility("default"))) + #define IMGUI_API __attribute__ ((visibility("default"))) + #else + #define IMGUI_SFML_API + #define IMGUI_API + #endif +#else + #define IMGUI_SFML_API + #define IMGUI_API +#endif + +#endif \ No newline at end of file diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_convert.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_convert.hpp new file mode 100644 index 0000000..cec356c --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_convert.hpp @@ -0,0 +1,59 @@ + + + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// imgui +#include "imgui/imgui.h" + +// local +#include "geometry/point2.hpp" +#include "geometry/point4.hpp" + +namespace tool { + +[[maybe_unused]] static inline auto to_iv2(geo::Pt2 p) -> ImVec2{ + return ImVec2(static_cast(p.x()),static_cast(p.y())); +} +[[maybe_unused]] static inline auto to_iv2(geo::Pt2f p) -> ImVec2{ + return ImVec2(p.x(),p.y()); +} +[[maybe_unused]] static inline auto to_iv4(geo::Pt4f p) -> ImVec4{ + return ImVec4(p.x(),p.y(),p.z(),p.w()); +} +[[maybe_unused]] static inline auto to_pt2(ImVec2 v) -> geo::Pt2f{ + return {v.x,v.y}; +} +[[maybe_unused]] static inline auto to_pt4(ImVec4 v) -> geo::Pt4f{ + return {v.x,v.y,v.z,v.w}; +} + + +} + + diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_fbo_ui_drawer.cpp b/cpp-projects/3d-engine/imgui-tb/imgui_fbo_ui_drawer.cpp new file mode 100644 index 0000000..f8204c2 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_fbo_ui_drawer.cpp @@ -0,0 +1,182 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + + +#include "imgui_fbo_ui_drawer.hpp" + +// imgui +#include "imgui/imgui.h" + +// local +#include "imgui-tb/imgui_helper.hpp" + +using namespace tool::graphics; + + +ImguiFboUiDrawer::ImguiFboUiDrawer() : m_camera(&m_screen, {0,0,0}, {0,0,1}){ + m_camera.set_fov(60.); +} + +auto ImguiFboUiDrawer::init() -> void{ + m_fbo.clean(); + m_fbo.generate(); +} + +auto ImguiFboUiDrawer::resize(const geo::Pt2 &size) -> void{ + + if(m_texture.width() == size.x() && m_texture.height() == size.y()){ + return; + } + m_screenUpdated = true; + + m_screen.resize(size.x(), size.y()); + m_camera.update_projection(); + + m_texture.clean(); + m_texture.init_image_8ui(size.x(),size.y(), 3); + + TextureOptions options; + options.minFilter = TextureMinFilter::linear; + options.magFilter = TextureMagFilter::linear; + m_texture.set_texture_options(options); + + m_depthTexture.clean(); + m_depthTexture.generate(); + m_depthTexture.bind(); + m_depthTexture.set_data_storage(size.x(), size.y()); + + m_fbo.attach_colors_textures({ + &m_texture + }); + + m_fbo.attach_depth_buffer(m_depthTexture); + + m_fbo.set_draw_buffers({ + tool::gl::FrameBufferAttachment::color0, + }); +} + +auto ImguiFboUiDrawer::update_viewport() -> void{ + + if(m_texture.id() == 0){ + return; + } + + glGetIntegerv(GL_VIEWPORT, m_viewport); + glViewport(0,0, m_texture.width(), m_texture.height()); +} + +auto ImguiFboUiDrawer::set_gl_states(geo::Pt4f color) -> void{ + glClearColor(color.x(),color.y(),color.z(),color.w()); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + glEnable(GL_DEPTH_TEST); +} + +auto ImguiFboUiDrawer::draw() -> void{ + + auto size = content_region_size_available(); + float scale = std::min(1.f*size.y() / m_texture.height(), 1.f*size.x() / m_texture.width()); + ImVec2 sizeI = ImVec2(m_texture.width() * scale,m_texture.height() * scale); + + if(m_texture.id() == 0){ + ImGui::Text("Texture not initialized."); + }else{ + if(invertTexture){ + ImGui::Image(m_texture.id(), sizeI, ImVec2(0,1), ImVec2(1,0)); + }else{ + ImGui::Image(m_texture.id(), sizeI, ImVec2(0,0), ImVec2(1,1)); + } + } + + check_inputs(); +} + +auto ImguiFboUiDrawer::restore_viewport() -> void{ + // restore + gl::FBO::unbind(); + glViewport( + static_cast(m_viewport[0]), + static_cast(m_viewport[1]), + static_cast(m_viewport[2]), + static_cast(m_viewport[3]) + ); +} + +auto ImguiFboUiDrawer::check_inputs() -> void{ + + ImGuiIO& io = ImGui::GetIO(); + if(ImGui::IsItemHovered()){ + + const double xoffset = io.MouseDelta.x; + const double yoffset = -io.MouseDelta.y; + const double wheel = io.MouseWheel; + + if(ImGui::IsMouseDown(0)){ + m_camera.set_direction(rotationSpeed*xoffset,rotationSpeed*yoffset,0.); + m_cameraUpdated = true; + } + if(ImGui::IsMouseDown(1)){ + m_camera.set_direction(0.,0.,rotationSpeed*xoffset); + m_cameraUpdated = true; + } + if(ImGui::IsMouseDown(2)){ + m_camera.move_up(translateSpeed*yoffset); + m_camera.move_right(translateSpeed*xoffset); + m_cameraUpdated = true; + } + if(io.MouseWheel != 0.f){ + m_camera.move_front(scrollSpeed*wheel); + m_cameraUpdated = true; + } + + // up key + if(ImGui::IsKeyDown(73)){ + m_camera.move_front(movingSpeed); + m_cameraUpdated = true; + } + // down key + if(ImGui::IsKeyDown(74)){ + m_camera.move_back(movingSpeed); + m_cameraUpdated = true; + } + // left key + if(ImGui::IsKeyDown(71)){ + m_camera.move_left(movingSpeed); + m_cameraUpdated = true; + } + // right key + if(ImGui::IsKeyDown(72)){ + m_camera.move_right(movingSpeed); + m_cameraUpdated = true; + } + // R key + if(ImGui::IsKeyPressed(17, false)){ + m_camera.reset_init_values(); + m_cameraUpdated = true; + } + } +} + diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_fbo_ui_drawer.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_fbo_ui_drawer.hpp new file mode 100644 index 0000000..8ae8144 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_fbo_ui_drawer.hpp @@ -0,0 +1,80 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// base +#include "graphics/camera.hpp" + +// opengl-utility +#include "opengl/buffer/framebuffer_object.hpp" +#include "opengl/gl_texture.hpp" + +namespace tool::graphics { + + +class ImguiFboUiDrawer{ + +public: + + ImguiFboUiDrawer(); + + auto init() -> void; + auto resize(const geo::Pt2 &size) -> void; + + inline auto bind() -> void {m_fbo.bind();} + inline auto unbind() -> void {m_fbo.unbind();} + auto draw() -> void; + auto set_gl_states(geo::Pt4f color = {0.0f, 0.0f, 0.0f, 1.0f}) -> void; + auto reset_states() noexcept-> void {m_cameraUpdated=false;m_screenUpdated=false;} + auto update_viewport() -> void; + + inline auto camera() -> graphics::Camera* {return &m_camera;} + auto texture_id() const noexcept -> gl::TextureName {return m_texture.id();} + auto is_camera_updated() const noexcept -> bool{return m_cameraUpdated;} + auto is_screen_updated() const noexcept -> bool{return m_screenUpdated;} + + double rotationSpeed = 0.05; + float scrollSpeed = 0.1f; + float movingSpeed = 0.05f; + float translateSpeed = 0.01f; + bool invertTexture = true; + +private: + + auto restore_viewport() -> void; + auto check_inputs() -> void; + + GLint m_viewport[4]; + gl::FBO m_fbo; + gl::Texture2D m_texture; + gl::RBO m_depthTexture; + graphics::Camera m_camera; + graphics::Screen m_screen; + bool m_cameraUpdated = false; + bool m_screenUpdated = false; +}; +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_helper.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_helper.hpp new file mode 100644 index 0000000..2036da1 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_helper.hpp @@ -0,0 +1,89 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "imgui_convert.hpp" + +namespace tool{ + +/** + * @brief get current window position in screen space + */ +[[maybe_unused]] static inline auto window_screen_pos() -> tool::geo::Pt2f{ + return to_pt2(ImGui::GetWindowPos()); +} + +[[maybe_unused]] static inline auto window_size() -> geo::Pt2f{ + return to_pt2(ImGui::GetWindowSize()); +} + +[[maybe_unused]] static inline auto item_size() -> geo::Pt2f{ + return to_pt2(ImGui::GetItemRectSize()); +} + +[[maybe_unused]] static inline auto last_item_top_left_screen_pos() -> geo::Pt2f{ + return to_pt2(ImGui::GetItemRectMin()); +} + +[[maybe_unused]] static inline auto last_item_bottom_right_screen_pos() -> geo::Pt2f{ + return to_pt2(ImGui::GetItemRectMax()); +} + +[[maybe_unused]] static inline auto last_item_bottom_left_screen_pos() -> geo::Pt2f{ + return last_item_top_left_screen_pos() + geo::Pt2f{0, item_size().y()}; +} + +[[maybe_unused]] static inline auto last_item_top_screen_value() -> float{ + return last_item_top_left_screen_pos().y(); +} + +[[maybe_unused]] static inline auto last_item_bottom_screen_value() -> float{ + return last_item_bottom_right_screen_pos().y(); +} + +[[maybe_unused]] static inline auto last_item_left_screen_value() -> float{ + return last_item_top_left_screen_pos().x(); +} + +[[maybe_unused]] static inline auto last_item_right_screen_value() -> float{ + return last_item_bottom_right_screen_pos().x(); +} + +[[maybe_unused]] static inline auto content_region_size_available() -> geo::Pt2f{ + return to_pt2(ImGui::GetContentRegionAvail()); +} + +[[maybe_unused]] static inline auto cursor_window_position() -> geo::Pt2f{ + return to_pt2(ImGui::GetCursorPos()); +} + +[[maybe_unused]] static inline auto cursor_screen_position() -> geo::Pt2f{ + return to_pt2(ImGui::GetCursorScreenPos()); +} + +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_calibrator_drawer.cpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_calibrator_drawer.cpp new file mode 100644 index 0000000..998dbf3 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_calibrator_drawer.cpp @@ -0,0 +1,107 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "imgui_k4_calibrator_drawer.hpp" + +using namespace tool::graphics; + +auto K4CalibratorDrawer::initialize(size_t nbGrabbers) -> void{ + K4CloudsSceneDrawer::initialize(2*nbGrabbers); + + for(size_t ii = 0; ii < nbGrabbers; ++ii){ + auto &cdC = cloudsD[ii].display; + cdC.forceCloudColor = true; + auto &cdP = cloudsD[nbGrabbers + ii].display; + cdP.forceCloudColor = true; + } +} + +auto K4CalibratorDrawer::set_data(int sourceId, int modelId, std::vector* grabbersData) -> void{ + + auto allSelection = modelId == grabbersData->size(); + for(size_t ii = 0; ii < grabbersData->size(); ++ii){ + + auto grabberData = &(*grabbersData)[ii]; + + // calibration + update_from_colored_cloud_data(grabberData->id, grabberData->calibrationCloud); + + // processsed + update_from_colored_cloud_data(grabbersData->size() + grabberData->id, grabberData->processedCloud); + + auto visibilty = (ii == modelId) || (ii == sourceId) || allSelection; + + auto &cdC = cloudsD[ii].display; + cdC.cloudVisible = m_settings.displayCalibrationCloud && visibilty; + + auto &cdP = cloudsD[grabbersData->size() + ii].display; + cdP.cloudVisible = m_settings.displayProcessedCloud && visibilty; + } + m_redrawClouds = true; +} + +auto K4CalibratorDrawer::draw() -> void{ + + if(m_redrawClouds || has_to_redraw_clouds()){ + draw_clouds_to_fbo(); + } + m_redrawClouds = false; + + draw_all_clouds_drawers_in_one_tab(false, false, false, true, "Calibration clouds"); +} + +auto K4CalibratorDrawer::update_grabber_model(size_t idGrabber, const camera::K4Model &model) -> void{ + + auto tr = model.compute_full_transformation(); + cloudsD[idGrabber].model = tr; + cloudsD[(cloudsD.size()/2) + idGrabber].model = tr; + m_redrawClouds = true; +} + +auto K4CalibratorDrawer::update_grabber_cloud_display(size_t idGrabber, const camera::K4CloudDisplaySettings &cloudDisplay) -> void{ + + auto &cdC = cloudsD[idGrabber].display; + cdC.forceCloudColor = true; + cdC.cloudColor = cloudDisplay.cloudColor; + cdC.sizePoints = cloudDisplay.sizePoints; + cdC.useVoxels = cloudDisplay.useVoxels; + cdC.sizeVoxels = cloudDisplay.sizeVoxels; + + auto &cdP = cloudsD[(cloudsD.size()/2) + idGrabber].display; + cdP.cloudColor = cloudDisplay.cloudColor * 0.5f; + cdP.sizePoints = cloudDisplay.sizePoints * 2.f; + cdP.useVoxels = cloudDisplay.useVoxels; + cdP.sizeVoxels = cloudDisplay.sizeVoxels * 1.2f; + + m_redrawClouds = true; +} + +auto K4CalibratorDrawer::update_settings(const K4CalibratorDrawerSettings &settings) -> void{ + m_settings = settings; + m_redrawClouds = true; +} + + diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_calibrator_drawer.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_calibrator_drawer.hpp new file mode 100644 index 0000000..06e06c9 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_calibrator_drawer.hpp @@ -0,0 +1,52 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// base +#include "camera/kinect4/k4_calibrator_settings.hpp" + +// local +#include "imgui_k4_clouds_scene_drawer.hpp" + +namespace tool::graphics { + +struct K4CalibratorDrawer : public K4CloudsSceneDrawer{ + + auto initialize(size_t nbGrabbers) -> void; + auto set_data(int sourceId, int modelId, std::vector* grabbersData) -> void; + auto draw() -> void; + + auto update_grabber_model(size_t idGrabber, const camera::K4Model &model) -> void; + auto update_grabber_cloud_display(size_t idGrabber, const camera::K4CloudDisplaySettings &cloudDisplay) -> void; + auto update_settings(const K4CalibratorDrawerSettings &settings) -> void; + +private: + + bool m_redrawClouds = false; + K4CalibratorDrawerSettings m_settings; +}; +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_cloud_drawer.cpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_cloud_drawer.cpp new file mode 100644 index 0000000..6a7be03 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_cloud_drawer.cpp @@ -0,0 +1,123 @@ + +#include "imgui_k4_cloud_drawer.hpp" + +using namespace tool::graphics; + +auto K4CloudDrawer::initialize() -> void { + + std::vector reset4(4* 100*100, 50); + std::vector reset3(3* 100*100, 50); + colorT.init_or_update_8ui(100,100,4, reset4.data()); + depthT.init_or_update_8ui(100,100,3, reset3.data()); + infraT.init_or_update_8ui(100,100,3, reset3.data()); + + colorD.init(&colorT); + depthD.init(&depthT); + infraD.init(&infraT); + + spD.init(0.05f, 20, 20, {}); +} + +auto K4CloudDrawer::reset() -> void{ + + // reset frame + lastFrameId = -1; + lastFrame = nullptr; + + // reset bodies + nbBodies = 0; + jointsModels.clear(); + + std::vector reset4(4* 100*100, 50); + std::vector reset3(3* 100*100, 50); + colorT.init_or_update_8ui(100,100,4, reset4.data()); + depthT.init_or_update_8ui(100,100,3, reset3.data()); + infraT.init_or_update_8ui(100,100,3, reset3.data()); + + // clean cloud drawer + cpD.clean(); +} + +auto K4CloudDrawer::init_from_frame(std::shared_ptr frame) -> bool { + + if(lastFrameId == frame->idCapture){ + return false; + } + + if(!frame->imageColorData.empty()){ + colorT.init_or_update_8ui( + static_cast(frame->colorWidth), + static_cast(frame->colorHeight), 4, reinterpret_cast(frame->imageColorData.data())); + } + if(!frame->imageDepthData.empty()){ + depthT.init_or_update_8ui( + static_cast(frame->depthWidth), + static_cast(frame->depthHeight), 3, reinterpret_cast(frame->imageDepthData.data())); + } + if(!frame->imageInfraData.empty()){ + infraT.init_or_update_8ui( + static_cast(frame->infraWidth), + static_cast(frame->infraHeight), 3, reinterpret_cast(frame->imageInfraData.data())); + } + + if(frame->cloud.is_valid()){ + cpD.init( + frame->cloud.size(), + frame->cloud.vertices.get_data(), + frame->cloud.colors.get_data(), + !frame->cloud.normals.empty() ? frame->cloud.normals.get_data() : nullptr + ); + } + + nbBodies = frame->bodies.size(); + if(jointsModels.size() < nbBodies){ + jointsModels.resize(nbBodies); + } + for(size_t ii = 0; ii < nbBodies; ++ii){ + for(size_t jj = 0; jj < frame->bodies[ii].skeleton.joints.size(); ++jj){ + const auto &j = frame->bodies[ii].skeleton.joints[jj]; + jointsModels[ii][jj] = std::make_tuple(j.good_confidence(), + geo::transform( + {{1.f,1.f,1.f}}, + euler_angles(j.orientation)*d180_PI, + j.position*0.001f + ) + ); + } + } + + // update last frame + lastFrameId = frame->idCapture; + lastFrame = frame; + + return true; + // if(k4M->parameters.captureAudio){ + // for(size_t idFrame = 0; idFrame < currentData->audioFramesCount; ++idFrame){ + // for(size_t idChannel = 0; idChannel < channelsData2.size(); ++idChannel){ + // // if(idChannel == 0){ + // // Logger::message(std::to_string(currentData->audioChannelsData[idChannel][idFrame]) + " "); + // // } + // channelsData2[idChannel].push_back(currentData->audioChannelsData[idChannel][idFrame]); + // } + // } + + // for(size_t idChannel = 0; idChannel < channelsData2.size(); ++idChannel){ + // if(channelsData2[idChannel].size() > 50000){ + // channelsData2[idChannel].erase(channelsData2[idChannel].begin(), channelsData2[idChannel].begin() + (channelsData2[idChannel].size() - 50000)); + // } + // } + // } +} + +auto K4CloudDrawer::init_from_colored_cloud_data(const geo::ColoredCloudData &cloudData) -> bool { + +// if(cloudData.size() == 0 || cloudData.vertices.empty() || cloudData.colors.empty()){ +// return false; +// } + cpD.init(cloudData.size(), cloudData.vertices.get_data(), cloudData.colors.get_data()); + + lastFrame = nullptr; + lastFrameId = -1; + + return true; +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_cloud_drawer.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_cloud_drawer.hpp new file mode 100644 index 0000000..5e53690 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_cloud_drawer.hpp @@ -0,0 +1,80 @@ + + + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// base +#include "camera/kinect4/k4_frame.hpp" +#include "camera/kinect4/k4_device_settings.hpp" +#include "camera/kinect4/k4_display_settings.hpp" + + +// opengl +#include "opengl/gl_texture.hpp" +#include "opengl/draw/drawer.hpp" + +// local +#include "imgui_texture_ui_drawer.hpp" + +namespace tool::graphics { + +struct K4CloudDrawer{ + + // info + std::int32_t lastFrameId = -1; + std::shared_ptr lastFrame = nullptr; + + // drawers + gl::CloudPointsDrawer cpD; + ImGuiTextureUiDrawer colorD; + ImGuiTextureUiDrawer depthD; + ImGuiTextureUiDrawer infraD; + gl::SphereDrawer spD; + + // settings + geo::Mat4f model = geo::Mat4f::identity(); + camera::K4CloudDisplaySettings display; + + // joints sub models + size_t nbBodies = 0; + std::vector, camera::k4JointsCount>> jointsModels; + + // textures + gl::Texture2D colorT; + gl::Texture2D depthT; + gl::Texture2D infraT; + + // init + auto initialize() -> void; + auto reset() -> void; + // # from frame + auto init_from_frame(std::shared_ptr frame) -> bool; + // # from data + auto init_from_colored_cloud_data(const geo::ColoredCloudData &cloudData) -> bool; +}; +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_clouds_scene_drawer.cpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_clouds_scene_drawer.cpp new file mode 100644 index 0000000..3231125 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_clouds_scene_drawer.cpp @@ -0,0 +1,553 @@ + +/******************************************************************************* +** Toolset-k4-scaner-manager ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "imgui_k4_clouds_scene_drawer.hpp" + +// base +#include "thirdparty/BinPack2D/binpack2d.hpp" +#include "utility/logger.hpp" + +// local +#include "imgui-tb/imgui_helper.hpp" +#include "imgui_ui_drawer.hpp" +#include "engine/shaders_manager.hpp" + +using namespace tool; +using namespace tool::graphics; +using namespace tool::camera; + +auto K4CloudsSceneDrawer::initialize(size_t nbDrawers) -> void { + + fboD.init(); + cloudsD.resize(nbDrawers); + for(auto &cloudD : cloudsD){ + cloudD.initialize(); + } + + gridD = std::make_unique(); +} + +auto K4CloudsSceneDrawer::reset() -> void{ + for(auto &cloudD : cloudsD){ + cloudD.reset(); + } +} + +auto K4CloudsSceneDrawer::update_from_frame(size_t idCloud, std::shared_ptr frame) -> bool { + if(idCloud < cloudsD.size()){ + return cloudsD[idCloud].init_from_frame(std::move(frame)); + } + return false; +} + +auto K4CloudsSceneDrawer::update_from_colored_cloud_data(size_t idCloud, const geo::ColoredCloudData &cloud) -> bool{ + if(idCloud < cloudsD.size()){ + return cloudsD[idCloud].init_from_colored_cloud_data(cloud); + } + return false; +} + +auto K4CloudsSceneDrawer::draw_clouds_to_fbo() -> void{ + draw_clouds_to_fbo(fboD); + fboD.reset_states(); + m_redrawClouds = false; +} + +auto K4CloudsSceneDrawer::draw_clouds_to_fbo(ImguiFboUiDrawer &fboD) -> void { + + if(fboD.texture_id() == 0){ + return; + } + + fboD.bind(); + fboD.update_viewport(); + fboD.set_gl_states(display.backgroundColor); + + auto solidShader = ShadersManager::get_instance()->get_ptr("solid"); + auto cloudShader = ShadersManager::get_instance()->get_ptr("cloud"); + auto voxelShader = ShadersManager::get_instance()->get_ptr("voxelCloud"); + + size_t idC = 0; + for(auto &cloudD : cloudsD){ + + if(display.drawOnlyCloudId != -1){ + if(idC++ != display.drawOnlyCloudId){ + continue; + } + } + + if(!cloudD.display.cloudVisible){ + continue; + } + + if(auto shader = cloudD.display.useVoxels ? voxelShader : cloudShader){ + shader->use(); + shader->set_uniform("view", fboD.camera()->view().conv()); + shader->set_uniform("projection", fboD.camera()->projection().conv()); + shader->set_uniform("model", cloudD.model); + shader->set_uniform("enable_unicolor", cloudD.display.forceCloudColor); + shader->set_uniform("unicolor", cloudD.display.cloudColor); + shader->set_uniform("factor_unicolor", cloudD.display.factorUnicolor); + + if(cloudD.display.useVoxels){ + shader->set_uniform("hSize", cloudD.display.sizeVoxels); + }else{ + shader->set_uniform("size_pt", cloudD.display.sizePoints); + shader->set_uniform("camera_position", fboD.camera()->position().conv()); + } + cloudD.cpD.draw(); + }else{ + Logger::error("[K4CloudsSceneDrawer] Shaders with aliases \"cloud\" and \"voxelCloud\" must be available in the shader manager.\n"); + break; + } + + if(auto shader = solidShader){ + + shader->use(); + shader->set_uniform("view", fboD.camera()->view().conv()); + shader->set_uniform("projection", fboD.camera()->projection().conv()); + shader->set_uniform("enable_unicolor", true); + + for(size_t ii = 0; ii < cloudD.nbBodies; ++ii){ + shader->set_uniform("unicolor", geo::Pt4f{1.f,0.f,0.f, 1.f}); + for(size_t jj = 0; jj < cloudD.jointsModels[ii].size(); ++jj){ + const auto &jm = cloudD.jointsModels[ii][jj]; + if(std::get<0>(jm)){ + shader->set_uniform("model", cloudD.model * std::get<1>(jm)); + cloudD.spD.draw(); + } + } + } + + if(plane1D){ + auto pTr = plane1Tr * cloudD.model; +// std::cout << plane1Tr << " "; + shader->set_uniform("model", pTr); + shader->set_uniform("unicolor", geo::Pt4f{0.f,1.f,0.f, 1.f}); + plane1D->draw(shader); + } + +// auto id = geo::Mat4f::identity(); +// shader->set_uniform("model", id); +// gridD->draw(shader); + + }else{ + Logger::error("[K4CloudsSceneDrawer] Shaders with aliases \"solid\" must be available in the shader manager.\n"); + break; + } + } + + fboD.unbind(); +} + +auto K4CloudsSceneDrawer::draw_color_texture_imgui_child(size_t idCloud, const std::string &windowName, geo::Pt2f sizeWindow) -> void{ + + auto &cD = cloudsD[idCloud]; + + // draw + cD.colorD.draw_child(windowName, sizeWindow); + + // check mouse inputs + const auto &hp = cD.colorD.hoveringPixel; + for(size_t idB = 0; idB < cD.colorD.mouseButtonsPressed.size(); ++idB){ + if(cD.lastFrame != nullptr && cD.colorD.mouseButtonsPressed[idB]){ + if(hp.x() >= 0 && hp.x() < static_cast(cD.lastFrame->colorWidth) && + hp.y() >= 0 && hp.y() < static_cast(cD.lastFrame->colorHeight) && + !cD.lastFrame->imageColorData.empty()){ + mouse_pressed_color_signal(idCloud, idB, hp, cD.lastFrame->imageColorData[hp.y() * cD.lastFrame->colorWidth + hp.x()]); + } + } + } +} + +auto K4CloudsSceneDrawer::draw_depth_texture_imgui_child(size_t idCloud, const std::string &windowName, geo::Pt2f sizeWindow) -> void{ + + auto &cD = cloudsD[idCloud]; + + // draw + cD.depthD.draw_child(windowName, sizeWindow); + + // check mouse inputs + const auto &hp = cD.depthD.hoveringPixel; + for(size_t idB = 0; idB < cD.depthD.mouseButtonsPressed.size(); ++idB){ + if(cD.lastFrame != nullptr && cD.depthD.mouseButtonsPressed[idB]){ + if(hp.x() >= 0 && hp.x() < static_cast(cD.lastFrame->depthWidth) && + hp.y() >= 0 && hp.y() < static_cast(cD.lastFrame->depthHeight) && + !cD.lastFrame->imageDepthData.empty()){ + mouse_pressed_depth_signal(idCloud, idB, hp, cD.lastFrame->imageDepthData[hp.y() * cD.lastFrame->depthWidth + hp.x()]); + } + } + } +} + +auto K4CloudsSceneDrawer::draw_infra_texture_imgui_child(size_t idCloud, const std::string &windowName, geo::Pt2f sizeWindow) -> void{ + + auto &cD = cloudsD[idCloud]; + + // draw + cD.infraD.draw_child(windowName, sizeWindow); + + // check mouse inputs + const auto &hp = cD.infraD.hoveringPixel; + for(size_t idB = 0; idB < cD.infraD.mouseButtonsPressed.size(); ++idB){ + if(cD.lastFrame != nullptr && cD.infraD.mouseButtonsPressed[idB]){ + if(hp.x() >= 0 && hp.x() < static_cast(cD.lastFrame->infraWidth) && + hp.y() >= 0 && hp.y() < static_cast(cD.lastFrame->infraHeight) && + !cD.lastFrame->imageInfraData.empty()){ + mouse_pressed_infra_signal(idCloud, idB, hp, cD.lastFrame->imageInfraData[hp.y() * cD.lastFrame->infraWidth + hp.x()]); + } + } + } +} + +auto K4CloudsSceneDrawer::draw_color_texture_imgui_at_position(size_t idCloud, const geo::Pt2f &screenPos, const geo::Pt2f &sizeTexture, std::optional text) -> void { + + auto &cD = cloudsD[idCloud]; + + // draw + cD.colorD.draw_at_position(screenPos, sizeTexture, std::move(text)); + + // check mouse inputs + const auto &hp = cD.colorD.hoveringPixel; + for(size_t idB = 0; idB < cD.colorD.mouseButtonsPressed.size(); ++idB){ + if(cD.lastFrame != nullptr && cD.colorD.mouseButtonsPressed[idB]){ + if(hp.x() >= 0 && hp.x() < static_cast(cD.lastFrame->colorWidth) && + hp.y() >= 0 && hp.y() < static_cast(cD.lastFrame->colorHeight) && + !cD.lastFrame->imageColorData.empty()){ + mouse_pressed_color_signal(idCloud, idB, hp, cD.lastFrame->imageColorData[hp.y() * cD.lastFrame->colorWidth + hp.x()]); + } + } + } +} + +auto K4CloudsSceneDrawer::draw_depth_texture_imgui_at_position(size_t idCloud, const geo::Pt2f &screenPos, const geo::Pt2f &sizeTexture, std::optional text) -> void { + + auto &cD = cloudsD[idCloud]; + + // draw + cD.depthD.draw_at_position(screenPos, sizeTexture, std::move(text)); + + // check mouse inputs + const auto &hp = cD.depthD.hoveringPixel; + for(size_t idB = 0; idB < cD.depthD.mouseButtonsPressed.size(); ++idB){ + if(cD.lastFrame != nullptr && cD.depthD.mouseButtonsPressed[idB]){ + if(hp.x() >= 0 && hp.x() < static_cast(cD.lastFrame->depthWidth) && + hp.y() >= 0 && hp.y() < static_cast(cD.lastFrame->depthHeight) && + !cD.lastFrame->imageDepthData.empty()){ + mouse_pressed_depth_signal(idCloud, idB, hp, cD.lastFrame->imageDepthData[hp.y() * cD.lastFrame->depthWidth + hp.x()]); + } + } + } +} + +auto K4CloudsSceneDrawer::draw_infra_texture_imgui_at_position(size_t idCloud, const geo::Pt2f &screenPos, const geo::Pt2f &sizeTexture, std::optional text) -> void { + + auto &cD = cloudsD[idCloud]; + + // draw + cD.infraD.draw_at_position(screenPos, sizeTexture, std::move(text)); + + // check mouse inputs + const auto &hp = cD.infraD.hoveringPixel; + for(size_t idB = 0; idB < cD.infraD.mouseButtonsPressed.size(); ++idB){ + if(cD.lastFrame != nullptr && cD.infraD.mouseButtonsPressed[idB]){ + if(hp.x() >= 0 && hp.x() < static_cast(cD.lastFrame->infraWidth) && + hp.y() >= 0 && hp.y() < static_cast(cD.lastFrame->infraHeight) && + !cD.lastFrame->imageInfraData.empty()){ + mouse_pressed_infra_signal(idCloud, idB, hp, cD.lastFrame->imageInfraData[hp.y() * cD.lastFrame->infraWidth + hp.x()]); + } + } + } +} + +auto K4CloudsSceneDrawer::draw_all_clouds_drawers_in_one_tab(bool drawColor, bool drawDepth, bool drawInfra, bool drawCloud, std::string_view cloudTabName) -> void{ + + + if(ImGuiUiDrawer::begin_tab_bar(&m_tabId, "Frames_all###direct_all_frames_tab_bar")){ + + std::vector textures; + + if(drawColor){ + if(ImGuiUiDrawer::begin_tab_item("Color###direct_all_color_tabitem")){ + + for(const auto &cloudD : cloudsD){ + textures.push_back(&cloudD.colorT); + } + + auto textPos = compute_textures_rectangles(content_region_size_available(), textures); + auto cp = cursor_screen_position(); + for(size_t ii = 0; ii < textPos.size(); ++ii){ + draw_color_texture_imgui_at_position( + ii, + std::get<0>(textPos[ii]) + cp, + std::get<1>(textPos[ii]), + std::format("Cam{}",ii) + ); + } + + ImGui::EndTabItem(); + } + } + if(drawDepth){ + if(ImGuiUiDrawer::begin_tab_item("Depth###direct_all_depth_tabitem")){ + + for(const auto &cloudD : cloudsD){ + textures.push_back(&cloudD.depthT); + } + + auto textPos = compute_textures_rectangles(content_region_size_available(), textures); + auto cp = cursor_screen_position(); + for(size_t ii = 0; ii < textPos.size(); ++ii){ + draw_depth_texture_imgui_at_position( + ii, + std::get<0>(textPos[ii]) + cp, + std::get<1>(textPos[ii]), + std::format("Cam{}",ii) + ); + } + ImGui::EndTabItem(); + } + } + if(drawInfra){ + if(ImGuiUiDrawer::begin_tab_item("Infra###direct_all_infra_tabitem")){ + + for(const auto &cloudD : cloudsD){ + textures.push_back(&cloudD.infraT); + } + + auto textPos = compute_textures_rectangles(content_region_size_available(), textures); + auto cp = cursor_screen_position(); + for(size_t ii = 0; ii < textPos.size(); ++ii){ + draw_infra_texture_imgui_at_position( + ii, + std::get<0>(textPos[ii]) + cp, + std::get<1>(textPos[ii]), + std::format("Cam{}",ii) + ); + } + + ImGui::EndTabItem(); + } + } + + if(drawCloud){ + if(ImGuiUiDrawer::begin_tab_item(std::format("{}###direct_all_cloud_tabitem",cloudTabName).c_str())){ + draw_fbo(content_region_size_available().conv()); + ImGui::EndTabItem(); + } + } + + ImGui::EndTabBar(); + } +} + +auto K4CloudsSceneDrawer::draw_cloud_drawer_tab(size_t idDrawer, bool focusWindow, std::string_view name, bool drawColor, bool drawDepth, bool drawInfra, bool drawCloud, std::optional> sizeW) -> void{ + + if(focusWindow){ + if(ImGuiUiDrawer::begin_tab_bar(&m_tabId, std::format("Frames###{}_frames_tab_bar", name).data())){ + + if(drawColor){ + if(ImGuiUiDrawer::begin_tab_item(std::format("Color###{}_focus_color_tabitem", name).data())){ + draw_color_texture_imgui_child(idDrawer, std::format("-###direct_focus_color_tabchild", name), content_region_size_available()); + ImGui::EndTabItem(); + } + } + if(drawDepth){ + if(ImGuiUiDrawer::begin_tab_item(std::format("Depth###{}_focus_depth_tabitem", name).data())){ + draw_depth_texture_imgui_child(idDrawer, std::format("-###direct_focus_depth_tabchild", name), content_region_size_available()); + ImGui::EndTabItem(); + } + } + if(drawInfra){ + if(ImGuiUiDrawer::begin_tab_item(std::format("Infra###{}_focus_infra_tabitem", name).data())){ + draw_infra_texture_imgui_child(idDrawer, std::format("-###direct_focus_infra_tabchild", name), content_region_size_available()); + ImGui::EndTabItem(); + } + } + if(drawCloud){ + if(ImGuiUiDrawer::begin_tab_item(std::format("Cloud###{}_focus_cloud_tabitem", name).data())){ + draw_fbo(content_region_size_available().conv()); + ImGui::EndTabItem(); + } + } + + ImGui::EndTabBar(); + } + }else{ + auto sizeW = content_region_size_available() * 0.46f; + if(drawColor){ + draw_color_texture_imgui_child(idDrawer, std::format("Color###{}_nofocus_color_tabchild", name), sizeW); + if(drawDepth){ + ImGui::SameLine(); + } + } + if(drawDepth){ + draw_depth_texture_imgui_child(idDrawer, std::format("Depth###{}_nofocus_depth_tabchild", name), sizeW); + } + if(drawInfra){ + draw_infra_texture_imgui_child(idDrawer, std::format("Infra###{}_nofocus_infra_tabchild", name), sizeW); + if(drawCloud){ + ImGui::SameLine(); + } + } + + if(drawCloud){ + if(ImGui::BeginChild(std::format("CloudWindow###{}_nofocus_cloud_window_child", name).data(), to_iv2(sizeW))){ + if(ImGuiUiDrawer::begin_tab_bar(&m_tabId, std::format("CloudTab###{}_cloud_tabbar", name).data())){ + if(ImGuiUiDrawer::begin_tab_item(std::format("Cloud###{}_cloud_tabitem", name).data())){ + draw_fbo(content_region_size_available().conv()); + ImGui::EndTabItem(); + } + ImGui::EndTabBar(); + } + } + ImGui::EndChild(); + } + } +} + +auto K4CloudsSceneDrawer::update_cloud_display_settings(size_t idCloud, const camera::K4CloudDisplaySettings &cloudDisplay) -> void{ + if(idCloud < cloudsD.size()){ + cloudsD[idCloud].display = cloudDisplay; + m_redrawClouds = true; + }else{ + Logger::error("invalid id cloud\n"); + } +} + +auto K4CloudsSceneDrawer::update_scene_display_settings(const camera::K4SceneDisplaySettings &sdS) -> void{ + display = sdS; + m_redrawClouds = true; +} + +auto K4CloudsSceneDrawer::update_model(size_t idCloud, const camera::K4Model &model) -> void{ + cloudsD[idCloud].model = model.compute_full_transformation(); + m_redrawClouds = true; +} + +auto K4CloudsSceneDrawer::compute_textures_rectangles(geo::Pt2f parentSize, const std::vector &textures) -> std::vector>{ + +// constexpr bool allow_flip = false; +// const auto runtime_flipping_mode = rectpack2D::flipping_option::DISABLED; +// using spaces_type = rectpack2D::empty_spaces; +// using rect_type = rectpack2D::output_rect_t; + +// std::vector rectangles; +// for(const auto &texture : textures){ +// auto sizeF = geo::Pt2f{1.f*texture->width(), 1.f*texture->height()}; +// rectangles.emplace_back(rectpack2D::rect_xywh(0,0,static_cast(sizeF.x()),static_cast(sizeF.y()))); +// } + +// auto report_successful = [](rect_type&) { +// return rectpack2D::callback_result::CONTINUE_PACKING; +// }; + +// auto report_unsuccessful = [](rect_type&) { +// return rectpack2D::callback_result::ABORT_PACKING; +// }; + +// const auto max_side = parentSize.x(); +// const auto discard_step = -4; +// const auto result_size = rectpack2D::find_best_packing_dont_sort( +// rectangles, +// rectpack2D::make_finder_input( +// max_side, +// discard_step, +// report_successful, +// report_unsuccessful, +// runtime_flipping_mode +// ) +// ); + +// std::vector> infos; +// for(const auto &r : rectangles){ +// infos.push_back({{1.f*r.x,1.f*r.y},{1.f*r.w, 1.f*r.h}}); +// } +// return infos; + + + int idTry = 0; + float factor = 1.f; + bool fit = false; + do { + if(idTry > 20){ + return {}; + } + + + BinPack2D::ContentAccumulator inputContent; + + for(const auto &texture : textures){ + auto sizeF = geo::Pt2f{texture->width() * factor, texture->height() * factor}; + + inputContent += BinPack2D::Content( + texture, BinPack2D::Coord(), BinPack2D::Size(static_cast(sizeF.x()), static_cast(sizeF.y())), false + ); + } + + + BinPack2D::CanvasArray canvasArray = BinPack2D::UniformCanvasArrayBuilder( + static_cast(parentSize.x()),static_cast(parentSize.y()),1 + ).Build(); + BinPack2D::ContentAccumulator remainder; + + if(canvasArray.Place( inputContent, remainder )){ + fit = true; + + // A place to store packed content. + BinPack2D::ContentAccumulator outputContent; + + // Read all placed content. + canvasArray.CollectContent( outputContent ); + + std::vector> infos; + + typedef BinPack2D::Content::Vector::iterator binpack2d_iterator; + for( binpack2d_iterator itor = outputContent.Get().begin(); itor != outputContent.Get().end(); itor++ ) { + const BinPack2D::Content &content = *itor; + infos.push_back({{static_cast(content.coord.x),static_cast(content.coord.y)}, + {static_cast(content.size.w),static_cast(content.size.h)}}); + } + + return infos; + + }else{ + ++idTry; + factor *= 0.9f; + } + }while(!fit); + + + return {}; +} + +auto K4CloudsSceneDrawer::draw_fbo(tool::geo::Pt2 size) -> void{ + fboD.bind(); + fboD.resize(size); + fboD.draw(); + fboD.unbind(); +} + diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_clouds_scene_drawer.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_clouds_scene_drawer.hpp new file mode 100644 index 0000000..a876007 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_clouds_scene_drawer.hpp @@ -0,0 +1,95 @@ +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ +#pragma once + +// base +#include "thirdparty/sigslot/signal.hpp" + +// local +#include "camera/kinect4/k4_model.hpp" +#include "imgui_k4_cloud_drawer.hpp" +#include "imgui_fbo_ui_drawer.hpp" + + +namespace tool::graphics { + +struct K4CloudsSceneDrawer{ + + auto initialize(size_t nbDrawers) -> void; + auto reset() -> void; + + inline auto has_to_redraw_clouds() const noexcept -> bool{ + return fboD.is_camera_updated() || fboD.is_screen_updated() || m_redrawClouds; + } + + auto update_from_frame(size_t idCloud, std::shared_ptr frame) -> bool; + auto update_from_colored_cloud_data(size_t idCloud, const geo::ColoredCloudData &cloud) -> bool; + + auto draw_clouds_to_fbo() -> void; + auto draw_clouds_to_fbo(ImguiFboUiDrawer &fboD) -> void; + auto draw_fbo(tool::geo::Pt2 size) -> void; + + // imgui + // # child + auto draw_color_texture_imgui_child(size_t idCloud, const std::string &windowName, geo::Pt2f sizeWindow) -> void; + auto draw_depth_texture_imgui_child(size_t idCloud, const std::string &windowName, geo::Pt2f sizeWindow) -> void; + auto draw_infra_texture_imgui_child(size_t idCloud, const std::string &windowName, geo::Pt2f sizeWindow) -> void; + // # position + auto draw_color_texture_imgui_at_position(size_t idCloud, const geo::Pt2f &screenPos, const geo::Pt2f &sizeTexture, std::optional text = {}) -> void; + auto draw_depth_texture_imgui_at_position(size_t idCloud, const geo::Pt2f &screenPos, const geo::Pt2f &sizeTexture, std::optional text = {}) -> void; + auto draw_infra_texture_imgui_at_position(size_t idCloud, const geo::Pt2f &screenPos, const geo::Pt2f &sizeTexture, std::optional text = {}) -> void; + // # tab + auto draw_all_clouds_drawers_in_one_tab(bool drawColor=true, bool drawDepth=true, bool drawInfra=true, bool drawCloud=true, std::string_view cloudTabName = "Cloud"sv) -> void; + auto draw_cloud_drawer_tab(size_t idDrawer, bool focusWindow, std::string_view name, bool drawColor=true, bool drawDepth=true, bool drawInfra=true, bool drawCloud=true, std::optional> sizeW = std::nullopt) -> void; + + // settings + auto update_cloud_display_settings(size_t idCloud, const camera::K4CloudDisplaySettings &cloudDisplay) -> void; + auto update_scene_display_settings(const camera::K4SceneDisplaySettings &sdS) -> void; + auto update_model(size_t idCloud, const camera::K4Model &model) -> void; + + // signals + sigslot::signal, geo::Pt4> mouse_pressed_color_signal; + sigslot::signal, geo::Pt3> mouse_pressed_depth_signal; + sigslot::signal, geo::Pt3> mouse_pressed_infra_signal; + + // drawers + std::vector cloudsD; + std::unique_ptr gridD = nullptr; + ImguiFboUiDrawer fboD; + // settings + camera::K4SceneDisplaySettings display; + + // test + std::unique_ptr plane1D = nullptr; + geo::Matrix plane1Tr = geo::Mat4f::identity(); + +private: + + bool m_redrawClouds = false; + unsigned int m_tabId = 0; + + auto compute_textures_rectangles(geo::Pt2f parentSize, const std::vector &textures) -> std::vector>; +}; +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_device_drawer.cpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_device_drawer.cpp new file mode 100644 index 0000000..200a04a --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_device_drawer.cpp @@ -0,0 +1,91 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "imgui_k4_device_drawer.hpp" + +// base +#include "files/cloud_io.hpp" +#include "utility/string.hpp" +#include "utility/logger.hpp" + +// local +#include "imgui/imgui.h" + +using namespace tool::graphics; + +auto K4DeviceDrawer::initialize() -> void{ + K4CloudsSceneDrawer::initialize(1); +} + +auto K4DeviceDrawer::update() -> void{ + + locker.lock(); + if(lastFrame != nullptr){ + if(previousFrameId != lastFrame->idCapture){ + update_from_frame(0, lastFrame); + previousFrameId = lastFrame->idCapture; + redrawClouds = true; + } + } + locker.unlock(); + + if(redrawClouds || has_to_redraw_clouds()){ + draw_clouds_to_fbo(); + } + redrawClouds = false; +} + +auto K4DeviceDrawer::draw(bool focusWindow) -> void{ + draw_cloud_drawer_tab(0, focusWindow, "device"sv); +} + +auto K4DeviceDrawer::update_frame(std::shared_ptr frame) -> void{ + locker.lock(); + lastFrame = frame; + locker.unlock(); +} + +auto K4DeviceDrawer::save_cloud(const std::string &path) -> void{ + Logger::message("K4DeviceDrawer::save_cloud\n"); + + std::shared_ptr frame = nullptr; + locker.lock(); + frame = lastFrame; + locker.unlock(); + + if(frame != nullptr){ + Logger::message(std::format("save_cloud: {}\n", path)); + tool::files::CloudIO::save_cloud(path, frame->cloud); + auto c = frame->cloud; + for(size_t ii = 0; ii < c.size(); ++ii){ + c.vertices[ii] += c.normals[ii]*0.2f; + } + + auto nPath = path; + tool::String::replace_first(nPath, ".obj", "_.obj"); + tool::files::CloudIO::save_cloud(nPath, c); + } +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_device_drawer.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_device_drawer.hpp new file mode 100644 index 0000000..87402db --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_device_drawer.hpp @@ -0,0 +1,62 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// base +#include "camera/kinect4/k4_frame.hpp" +#include "camera/kinect4/k4_filters.hpp" + +// 3d-engine +#include "imgui_k4_clouds_scene_drawer.hpp" + +namespace tool::graphics { + +struct K4DeviceDrawer : public K4CloudsSceneDrawer{ + + auto initialize() -> void; + auto update() -> void; + auto draw(bool focusWindow) -> void; + + auto update_filters(const camera::K4Filters &filters) -> void{ +// if(!plane1D){ +// plane1D = std::make_unique(); +// } +// plane1Tr = geo::look_at(filters.p1Pos, filters.p1Pos + geo::normalize(filters.p1Rot), geo::Vec3f(1,0,0)); +// = geo::transform(geo::Pt3f{1.f,1.f,1.f}, filters.p1Rot, filters.p1Pos); + } + + auto update_frame(std::shared_ptr frame) -> void; + + auto save_cloud(const std::string &path) -> void; + +private: + std::mutex locker; + bool redrawClouds = false; + std::shared_ptr lastFrame = nullptr; + std::int32_t previousFrameId = 0; +}; +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_direct_drawer.cpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_direct_drawer.cpp new file mode 100644 index 0000000..2a63084 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_direct_drawer.cpp @@ -0,0 +1,115 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "imgui_k4_direct_drawer.hpp" + +// local +#include "imgui_ui_drawer.hpp" + +using namespace tool::graphics; + +auto K4DirectDrawer::initialize(size_t nbGrabbers) -> void{ + K4CloudsSceneDrawer::initialize(nbGrabbers); +} + +auto K4DirectDrawer::set_frame(size_t idGrabber, std::shared_ptr frame) -> void{ + if(update_from_frame(idGrabber, frame)){ + m_redrawClouds = true; + } +} + +auto K4DirectDrawer::redraw_clouds_to_fbo() -> void{ + + if(m_redrawClouds || has_to_redraw_clouds()){ + + // store visibility + std::vector cloudsVisibility; + for(size_t ii = 0; ii < cloudsD.size(); ++ii){ + cloudsVisibility.push_back(cloudsD[ii].display.cloudVisible ? 1 : 0); + } + + // display only current grabber id cloud if per tab + if(!m_allTabOpened){ + for(size_t ii = 0; ii < cloudsD.size(); ++ii){ + if(cloudsD[ii].display.cloudVisible){ + cloudsD[ii].display.cloudVisible = ii == m_currentTabOpened; + } + } + } + draw_clouds_to_fbo(); + + // restore visibility + for(size_t ii = 0; ii < cloudsD.size(); ++ii){ + cloudsD[ii].display.cloudVisible = cloudsVisibility[ii] == 1; + } + } + m_redrawClouds = false; +} + + +auto K4DirectDrawer::draw(bool focus) -> void{ + + redraw_clouds_to_fbo(); + + static ImGuiID tabId = 0; + if (ImGuiUiDrawer::begin_tab_bar(&tabId, "###display_direct_tabbar")){ + + // all + if ((m_allTabOpened = ImGuiUiDrawer::begin_tab_item("All###display_direct_all_tabitem"))){ + draw_all_clouds_drawers_in_one_tab(); + ImGui::EndTabItem(); + } + + // per grabber + for(size_t ii = 0; ii < cloudsD.size(); ++ii){ + if (ImGuiUiDrawer::begin_tab_item(std::format("[{}]###display_direct_per_grabber_tabitem_{}", ii, ii).c_str())){ + m_currentTabOpened = ii; + draw_cloud_drawer_tab(ii, focus, "display_direct"sv); + ImGui::EndTabItem(); + } + } + + ImGui::EndTabBar(); + } +} + +auto K4DirectDrawer::draw_only_clouds() -> void{ + + redraw_clouds_to_fbo(); + + static ImGuiID tabId = 0; + if (ImGuiUiDrawer::begin_tab_bar(&tabId, "###display_direct_tabbar")){ + + // all + if (ImGuiUiDrawer::begin_tab_item("All###display_direct_all_tabitem")){ + draw_all_clouds_drawers_in_one_tab(); + ImGui::EndTabItem(); + } + + ImGui::EndTabBar(); + } +} + diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_direct_drawer.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_direct_drawer.hpp new file mode 100644 index 0000000..6845553 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_direct_drawer.hpp @@ -0,0 +1,51 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + + +// local +#include "imgui_k4_clouds_scene_drawer.hpp" + +namespace tool::graphics { + +struct K4DirectDrawer : public K4CloudsSceneDrawer{ + + auto initialize(size_t nbGrabbers) -> void; + auto set_frame(size_t idGrabber, std::shared_ptr frame) -> void; + + auto draw(bool focus) -> void; + auto draw_only_clouds() -> void; + +private: + + auto redraw_clouds_to_fbo() -> void; + + bool m_allTabOpened = false; + bool m_redrawClouds = false; + size_t m_currentTabOpened = 0; +}; +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_player_drawer.cpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_player_drawer.cpp new file mode 100644 index 0000000..ccc888f --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_player_drawer.cpp @@ -0,0 +1,128 @@ + +#include "imgui_k4_player_drawer.hpp" + +// local +#include "imgui_ui_drawer.hpp" + + +using namespace tool::graphics; + +auto K4PlayerDrawer::initialize(const std::vector &models) -> void { + + K4CloudsSceneDrawer::initialize(models.size()); + for(size_t ii = 0; ii < models.size(); ++ii){ + update_model(ii, models[ii]); + std::cout << models[ii].transformation << "\n"; + } + + m_currentFrames.resize(models.size()); + std::fill(std::begin(m_currentFrames), std::end(m_currentFrames), nullptr); +} + +auto K4PlayerDrawer::set_frame(size_t idC, std::shared_ptr frame) -> void{ + + if(idC >= m_currentFrames.size()){ + return; + } + + m_currentFrames[idC] = frame; + + if(update_from_frame(idC, m_currentFrames[idC])){ + m_redrawClouds = true; + } +} + +auto K4PlayerDrawer::draw() -> void{ + + if(m_redrawClouds || has_to_redraw_clouds()){ + draw_clouds_to_fbo(); + } + m_redrawClouds = false; + + bool focusWindow = false; + + // draw data + static ImGuiID tabId = 0; + if (ImGuiUiDrawer::begin_tab_bar(&tabId, "###display_recorder_tabbar")){ + + // all + bool allTabOpened = false; + if ((allTabOpened = ImGuiUiDrawer::begin_tab_item("All###display_player_all_tabitem"))){ + display.drawOnlyCloudId = -1; + draw_all_clouds_drawers_in_one_tab(); + ImGui::EndTabItem(); + } + + // per grabber + int previousCloudIdDisplayed = display.drawOnlyCloudId; + for(size_t ii = 0; ii < cloudsD.size(); ++ii){ + if (ImGuiUiDrawer::begin_tab_item(std::format("[{}]###display_player_per_grabber_tabitem_{}", ii, ii).c_str())){ + display.drawOnlyCloudId = ii; + draw_cloud_drawer_tab(ii, focusWindow, "display_player"sv); + ImGui::EndTabItem(); + } + } + ImGui::EndTabBar(); + + if((allTabOpened != m_allTabOpened) || (previousCloudIdDisplayed != display.drawOnlyCloudId)){ + m_redrawClouds = true; + m_allTabOpened = allTabOpened; + } + } + + +// if(ImGui::Button("Test merge")){ + +// player->merge_cameras(0.01f, geo::Pt3f{-20.f,-20.f,-20.f},geo::Pt3f{20.f,20.f,20.f}); +// initialize(1); + +// m_currentFrames.clear(); +// m_currentFrames.reserve(1); + +// camera::K4Model model; +// model.transformation = player->video()->get_camera_data(0)->transform.conv(); +// update_model(0, std::move(model)); +// m_currentFrames.push_back(std::make_shared()); + +// m_redrawClouds = true; +// } + + +// if (ImGuiFileDialog::Instance()->Display("Load recording")) { +// if (ImGuiFileDialog::Instance()->IsOk()){ + +// player->load_from_file(ImGuiFileDialog::Instance()->GetFilePathName()); +// initialize(player->video()->nb_cameras()); + +// m_currentFrames.clear(); +// m_currentFrames.reserve(player->video()->nb_cameras()); +// for(size_t ii = 0; ii < player->video()->nb_cameras(); ++ii){ +// camera::K4Model model; +// model.transformation = player->video()->get_camera_data(ii)->transform.conv(); +// update_model(ii, std::move(model)); +// m_currentFrames.push_back(std::make_shared()); +// } +// m_redrawClouds = true; +// } +// ImGuiFileDialog::Instance()->Close(); +// } + +// if (ImGuiFileDialog::Instance()->Display("Save recording")) { +// if (ImGuiFileDialog::Instance()->IsOk()){ +// Logger::message(std::format("Save recording to {}\n", ImGuiFileDialog::Instance()->GetFilePathName())); +// player->save_to_file(ImGuiFileDialog::Instance()->GetFilePathName()); +// } +// ImGuiFileDialog::Instance()->Close(); +// } + +// if (ImGuiFileDialog::Instance()->Display("Save cloud")) { +// if (ImGuiFileDialog::Instance()->IsOk()){ +// Logger::message(std::format("Save cloud to {}\n", ImGuiFileDialog::Instance()->GetFilePathName())); +// camera::K4Frame f; +// player->uncompress_frame(0, f); +// files::CloudIO::save_cloud(ImGuiFileDialog::Instance()->GetFilePathName(), f.cloud); +// } +// ImGuiFileDialog::Instance()->Close(); +// } +} + diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_player_drawer.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_player_drawer.hpp new file mode 100644 index 0000000..a99099c --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_player_drawer.hpp @@ -0,0 +1,48 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + + +// local +#include "imgui_k4_clouds_scene_drawer.hpp" + +namespace tool::graphics { + + +struct K4PlayerDrawer : public K4CloudsSceneDrawer{ + + auto initialize(const std::vector &models) -> void; + auto set_frame(size_t idC, std::shared_ptr frame) -> void; + auto draw() -> void; + +private: + + std::vector> m_currentFrames; + bool m_redrawClouds = false; + bool m_allTabOpened = false; +}; +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_recorder_drawer.cpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_recorder_drawer.cpp new file mode 100644 index 0000000..ce5fbe6 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_recorder_drawer.cpp @@ -0,0 +1,93 @@ + + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "imgui_k4_recorder_drawer.hpp" + +// local +#include "imgui_ui_drawer.hpp" + + +using namespace tool::graphics; + +auto K4RecorderDrawer::initialize(size_t nbGrabbers) -> void { + K4CloudsSceneDrawer::initialize(nbGrabbers); + m_currentFrames.resize(nbGrabbers); + std::fill(std::begin(m_currentFrames), std::end(m_currentFrames), nullptr); +} + +auto K4RecorderDrawer::set_frame(size_t idC, std::shared_ptr frame) -> void{ + + if(idC >= m_currentFrames.size()){ + return; + } + + m_currentFrames[idC] = frame; + + if(update_from_frame(idC, m_currentFrames[idC])){ + m_redrawClouds = true; + } +} + +auto K4RecorderDrawer::draw() -> void{ + + if(m_redrawClouds || has_to_redraw_clouds()){ + draw_clouds_to_fbo(); + } + m_redrawClouds = false; + + bool focusWindow = false; + + // draw data + static ImGuiID tabId = 0; + if (ImGuiUiDrawer::begin_tab_bar(&tabId, "###display_recorder_tabbar")){ + + // all + bool allTabOpened = false; + if ((allTabOpened = ImGuiUiDrawer::begin_tab_item("All###display_recorder_all_tabitem"))){ + display.drawOnlyCloudId = -1; + draw_all_clouds_drawers_in_one_tab(); + ImGui::EndTabItem(); + } + + // per grabber + int previousCloudIdDisplayed = display.drawOnlyCloudId; + for(size_t ii = 0; ii < cloudsD.size(); ++ii){ + if (ImGuiUiDrawer::begin_tab_item(std::format("[{}]###display_recorder_per_grabber_tabitem_{}", ii, ii).c_str())){ + display.drawOnlyCloudId = ii; + draw_cloud_drawer_tab(ii, focusWindow, "display_recorder"sv); + ImGui::EndTabItem(); + } + } + ImGui::EndTabBar(); + + if((allTabOpened != m_allTabOpened) || (previousCloudIdDisplayed != display.drawOnlyCloudId)){ + m_redrawClouds = true; + m_allTabOpened = allTabOpened; + } + } +} + diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_recorder_drawer.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_recorder_drawer.hpp new file mode 100644 index 0000000..2606195 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_recorder_drawer.hpp @@ -0,0 +1,48 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "imgui_k4_clouds_scene_drawer.hpp" + +namespace tool::graphics { + +struct K4RecorderDrawer : public K4CloudsSceneDrawer{ + + auto initialize(size_t nbGrabbers) -> void; + auto set_frame(size_t idC, std::shared_ptr frame) -> void; + auto draw() -> void; + +private: + + std::vector> m_currentFrames; + bool m_redrawClouds = false; + bool m_allTabOpened = false; +}; + + +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_ui_drawer.cpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_ui_drawer.cpp new file mode 100644 index 0000000..4f6f4fd --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_ui_drawer.cpp @@ -0,0 +1,1320 @@ + + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "imgui_k4_ui_drawer.hpp" + +// std +#include + +// local +#include "imgui-tb/imgui_ui_drawer.hpp" +#include "imgui/extra/ImGuiFileDialog.h" + +#include "opengl/gl_texture.hpp" + +using namespace tool::graphics; + + +auto K4UIDrawer::draw_filters_tab_item(const std::string &tabItemName, camera::K4Mode mode, camera::K4Filters &filters, bool &autoUpdate) -> std::tuple{ + + if (!ImGuiUiDrawer::begin_tab_item(tabItemName.c_str())){ + return {false, false}; + } + + bool update = false; + + ImGuiUiDrawer::title2("PIXELS"); + { + int minMaxD[2] = {filters.minDepthValue, filters.maxDepthValue}; + auto range = (camera::range(mode)*1000.f).conv(); + if(minMaxD[0] < range.x()){ + minMaxD[0] = range.x(); + } + if(minMaxD[1] > range.y()){ + minMaxD[1] = range.y(); + } + +// ImGui::Text("Mask:"); +// ImGui::Indent(); +// { +// if(ImGui::Button("Fill")){ +// filters.depthMask.fill(false); +// update = true; +// } +// ImGui::SameLine(); +// if(ImGui::Button("Empty")){ +// filters.depthMask.fill(true); +// update = true; +// } +// ImGui::SameLine(); +// ImGui::Text("Pencil"); +// ImGui::SameLine(); +// ImGui::SetNextItemWidth(60.f); + +// if(ImGui::Combo("###settings_pencil_combo", &filters.idPencil, sizesPencilItems, IM_ARRAYSIZE(sizesPencilItems))){ +// update = true; +// } +// } +// ImGui::Unindent(); + + ImGui::Text("Depth (mm):"); + ImGui::Indent(); + if(ImGui::SliderInt2("###settings_depth_min_max_sliderint2", minMaxD, range.x(), range.y())){ + filters.minDepthValue = static_cast(minMaxD[0]); + filters.maxDepthValue = static_cast(minMaxD[1]); + update = true; + } + ImGui::Unindent(); + + int minMaxWidth[2] = {static_cast(filters.minWidth), static_cast(filters.maxWidth)}; + auto depthRes = camera::depth_resolution(mode); + if(minMaxWidth[1] > depthRes.x()){ + minMaxWidth[1] = depthRes.x(); + } + ImGui::Text("Width (pixels):"); + ImGui::Indent(); + if(ImGui::SliderInt2("###settings_width_min_max_sliderint2", minMaxWidth, 0, depthRes.x())){ + filters.minWidth = static_cast(minMaxWidth[0]); + filters.maxWidth = static_cast(minMaxWidth[1]); + update = true; + } + ImGui::Unindent(); + + int minMaxHeight[2] = {static_cast(filters.minHeight), static_cast(filters.maxHeight)}; + if(minMaxHeight[1] > depthRes.y()){ + minMaxHeight[1] = depthRes.y(); + } + + ImGui::Text("Height (pixels):"); + ImGui::Indent(); + if(ImGui::SliderInt2("###settings_height_min_max_sliderint2", minMaxHeight, 0, depthRes.y())){ + filters.minHeight = static_cast(minMaxHeight[0]); + filters.maxHeight = static_cast(minMaxHeight[1]); + update = true; + } + ImGui::Unindent(); + + + ImGui::Text("Plane1:"); + ImGui::Indent(); + + int mode = static_cast(filters.p1FMode); + if(ImGui::RadioButton("No filtering###mode_none_plane1", &mode,0)){ + update = true; + filters.p1FMode = camera::K4Filters::PlaneFilteringMode::None; + } + if(ImGui::RadioButton("Remove above###mode_above_plane1", &mode,1)){ + update = true; + filters.p1FMode = camera::K4Filters::PlaneFilteringMode::Above; + } + if(ImGui::RadioButton("Remove below###mode_below_plane1", &mode,2)){ + update = true; + filters.p1FMode = camera::K4Filters::PlaneFilteringMode::Below; + } + + ImGui::Text("Rotation (euler angles):"); + auto rotPtr = filters.p1Rot.array.data(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("x","p1_rot_x", rotPtr, modelRotFs, modelRotDs)){ + update = true; + } + ImGui::SameLine(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("y","p1_rot_y", rotPtr+1, modelRotFs, modelRotDs)){ + update = true; + } + ImGui::SameLine(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("z","p1_rot_z", rotPtr+2, modelRotFs, modelRotDs)){ + update = true; + } + ImGui::Text("Translation (mm):"); + auto trPtr = filters.p1Pos.array.data(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("x","p1_tr_x", trPtr, modelTrFs, modelTrDs)){ + update = true; + } + ImGui::SameLine(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("y","p1_tr_y", trPtr+1, modelTrFs, modelTrDs)){ + update = true; + } + ImGui::SameLine(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("z","p1_tr_z", trPtr+2, modelTrFs, modelTrDs)){ + update = true; + } + + ImGui::Unindent(); + } + + ImGuiUiDrawer::title2("COLOR"); + { + ImGui::Text("JPEG compression rate"); + ImGui::SameLine(); + ImGui::SetNextItemWidth(75.f); + int compressionRate = filters.jpegCompressionRate; + if(ImGui::SliderInt("###compression_rate", &compressionRate, 5, 100)){ + filters.jpegCompressionRate = compressionRate; + update = true; + } + + if(ImGui::Checkbox("Filter depth with color###settings_filter_depth_with_color_checkbox", &filters.filterDepthWithColor)){ + update = true; + } + + if(filters.filterDepthWithColor){ + + ImGui::Indent(); + + ImGui::Text("Filtered color"); + ImGui::Indent(); + if(ImGui::ColorEdit3("###settings_filtered_color_coloredit3", filters.filterColor.rgb().array.data())){ + filters.filterColor.clamp(0.f,1.f); + update = true; + } + ImGui::Unindent(); + + + ImGui::Text("Max diff color"); + ImGui::Indent(); + auto mdcd = filters.maxDiffColor.array.data(); + +// static tool::gl::Texture2D tex; +// static std::vector tData; +// if(tData.size() != 100*25*3){ +// tData.resize(100*25*3); +// } + + ImGuiFloatS fs; + fs.defaultValue = 0.f; + fs.min = 0.f; + fs.max = 360.f; + fs.speedDrag = 0.1f; + fs.speedInc = 1.f; + fs.format = "%.1f"; + + ImGuiDragS ds; + ds.widthDrag = 60.f; + ds.defaultButton = false; + ds.decButton = true; + ds.incButton = true; + ds.displayText = true; + ds.displayTextLeft = true; + if(ImGuiUiDrawer::draw_drag_float_with_buttons("Hue","hue", mdcd, fs, ds)){ + update = true; + } +// ImGui::SameLine(); +// tex.init_or_update_8ui(100,25,3, tData.data()); +// ImGui::Image(tex.id(), {100,25}, ImVec2(0,0), ImVec2(1,1)); + + fs.speedDrag = 0.001f; + fs.speedInc = 0.01f; + fs.max = 1.f; + fs.format = "%.3f"; + if(ImGuiUiDrawer::draw_drag_float_with_buttons("Saturation","saturation", mdcd+1, fs, ds)){ + update = true; + } +// ImGui::SameLine(); +// tex.init_or_update_8ui(100,25,3, tData.data()); +// ImGui::Image(tex.id(), {100,25}, ImVec2(0,0), ImVec2(1,1)); + + if(ImGuiUiDrawer::draw_drag_float_with_buttons("Value","value", mdcd+2, fs, ds)){ + update = true; + } +// ImGui::SameLine(); +// tex.init_or_update_8ui(100,25,3, tData.data()); +// ImGui::Image(tex.id(), {100,25}, ImVec2(0,0), ImVec2(1,1)); + + ImGui::Unindent(); + } + } + + ImGuiUiDrawer::title2("GEOMETRY"); + { + + ImGuiIntS is; + is.defaultValue = 1; + is.min = 1; + is.max = 8; + is.speedInc =1; + is.speedDrag = 1; + + ImGuiFloatS fs; + fs.defaultValue = 10.f; + fs.min = 0.f; + fs.max = 100.f; + fs.speedInc = 1.f; + fs.speedDrag = 0.1f; + fs.format = "%.1f"; + + ImGuiDragS ds; + ds.widthDrag = 60.f; + ds.decButton = true; + ds.incButton = true; + ds.displayText = true; + ds.displayTextLeft = true; + ds.defaultButton = false; + + if(ImGui::Checkbox("Do local depth difference filtering", &filters.doLocalDiffFiltering)){ + update = true; + } + + ImGui::Indent(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("Max value:","settings_local_diff_dragfloat", &filters.maxLocalDiff, fs, ds)){ + update = true; + } + ImGui::Unindent(); + + if(ImGui::Checkbox("Do minimum neighbours filtering", &filters.doMinNeighboursFiltering)){ + update = true; + } + ImGui::Indent(); + ds.widthDrag = 40.f; + int nbMinN = filters.nbMinNeighbours; + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Min nb of neighbours","settings_nb_min_neighbours", &nbMinN, is, ds)){ + filters.nbMinNeighbours = nbMinN; + update = true; + } + + is.max = 10; + int nbMinL = filters.minNeighboursLoops; + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Loops:","settings_min_neighbours_nb_loop", &nbMinL, is, ds)){ + filters.minNeighboursLoops = nbMinL; + update = true; + } + ImGui::Unindent(); + + + if(ImGui::Checkbox("Do erosion", &filters.doErosion)){ + update = true; + } + ImGui::Indent(); + int nbErosionL = filters.erosionLoops; + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Loops:","settings_erosion_nb_loop", &nbErosionL, is, ds)){ + filters.erosionLoops = nbErosionL; + update = true; + } + ImGui::Unindent(); + + if(ImGui::Checkbox("Keep only bigger cluster###settings_keep_only_biggest_cluster_checkbox", &filters.keepOnlyBiggestCluster)){ + update = true; + } + } + + ImGuiUiDrawer::title2("INVALIDATE"); + { + if(ImGui::Checkbox("Color from depth###settings_invalidate_color_from_depth_checkbox", &filters.invalidateColorFromDepth)){ + update = true; + } + + ImGui::SameLine(); + if(ImGui::Checkbox("Infra from depth###settings_invalidate_infra_from_depth_checkbox", &filters.invalidateInfraFromDepth)){ + update = true; + } + } + + ImGui::EndTabItem(); + + return {true, update}; +} + +auto K4UIDrawer::draw_scene_display_setings_tab_item(const std::string &tabItemName, camera::K4SceneDisplaySettings &display, bool &autoUpdate) -> bool { + + if (!ImGui::BeginTabItem(tabItemName.c_str())){ + return false; + } + + bool update = false; + + if(ImGui::ColorEdit4("Background color###background_scene_color", display.backgroundColor.array.data())){ + update = true; + } + + ImGui::Spacing(); + ImGui::Separator(); + + bool manualUpdate = false; + if(ImGui::Button("Update###scene_display_update_button")){ + manualUpdate = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Auto update###scene_display_auto_update_cb", &autoUpdate)){} + + ImGui::EndTabItem(); + + return (update && autoUpdate) || manualUpdate; +} + +auto K4UIDrawer::draw_cloud_display_setings_tab_item(const std::string &tabItemName, camera::K4CloudDisplaySettings &display, bool &autoUpdate) -> bool { + + if (!ImGuiUiDrawer::begin_tab_item(tabItemName.c_str())){ + return false; + } + + bool update = false; + + if(ImGui::Checkbox("Cloud visible###display_cloud_visible", &display.cloudVisible)){ + update = true; + } + if(ImGui::Checkbox("Force cloud color###display_force_cloud_color", &display.forceCloudColor)){ + update = true; + } + if(ImGui::ColorEdit4("Cloud color###display_cloud_color", display.cloudColor.array.data())){ + update = true; + } + ImGui::SetNextItemWidth(100.f); + if(ImGui::DragFloat("Color factor", &display.factorUnicolor, 0.01f, 0.f, 1.f)){ + update = true; + } + + ImGui::Separator(); + if(ImGui::Checkbox("Use voxels###display_use_voxels", &display.useVoxels)){ + update = true; + } + + ImGui::SetNextItemWidth(100.f); + if(ImGui::DragFloat("Size points###display_size_points", &display.sizePoints, 0.1f, 0.1f, 30.f, "%.1f")){ + update = true; + } + + ImGuiDragS voxelsD = {100.f, true, true, true, true, false}; + ImGuiFloatS voxelFS = {0.005f, 0.001f, 0.05f, 0.0001f, 0.001f}; + + float sizeV = display.sizeVoxels; + if(ImGuiUiDrawer::draw_drag_float_with_buttons("Size voxel","display_size_voxels", &sizeV, voxelFS, voxelsD)){ + update = true; + display.sizeVoxels = sizeV; + } + + ImGui::Spacing(); + ImGui::Separator(); + + bool manualUpdate = false; + if(ImGui::Button("Update###display_cloud_update_button")){ + manualUpdate = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Auto update###display_cloud_auto_update_cb", &autoUpdate)){} + + ImGui::EndTabItem(); + + return (update && autoUpdate) || manualUpdate; +} + +auto K4UIDrawer::draw_calibration_tab_item(const std::string &tabItemName, camera::K4Model &model, bool &autoUpdate) -> bool{ + + if (!ImGuiUiDrawer::begin_tab_item(tabItemName.c_str())){ + return false; + } + + bool update = false; + auto d = model.transformation.array.data(); + + ImGuiUiDrawer::title2("Calibration matrix"); + if(ImGui::DragFloat4("###r0", d, 0.001f)){ + update = true; + } + if(ImGui::DragFloat4("###r1", d + 4, 0.001f)){ + update = true; + } + if(ImGui::DragFloat4("###r2", d + 8, 0.001f)){ + update = true; + } + if(ImGui::DragFloat4("###r3", d + 12, 0.001f)){ + update = true; + } + + if(ImGui::Button("Reset###model_matrix_reset_button")){ + model.transformation = geo::Mat4f::identity(); + update = true; + } + ImGui::Spacing(); + ImGui::Separator(); + + ImGuiUiDrawer::title2("Additional transformation"); + + ImGui::Spacing(); + ImGui::Text("Transformation to be aplied on the model matrix"); + ImGui::Text("Rotation (euler angles):"); + + auto rotPtr = model.rotation.array.data(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("x","rot_x", rotPtr, modelRotFs, modelRotDs)){ + update = true; + } + ImGui::SameLine(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("y","rot_y", rotPtr+1, modelRotFs, modelRotDs)){ + update = true; + } + ImGui::SameLine(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("z","rot_z", rotPtr+2, modelRotFs, modelRotDs)){ + update = true; + } + + ImGui::Text("Translation (mm):"); + auto trPtr = model.translation.array.data(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("x","tr_x", trPtr, modelTrFs, modelTrDs)){ + update = true; + } + ImGui::SameLine(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("y","tr_y", trPtr+1, modelTrFs, modelTrDs)){ + update = true; + } + ImGui::SameLine(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("z","tr_z", trPtr+2, modelTrFs, modelTrDs)){ + update = true; + } + + ImGui::Text("Scaling:"); + auto scPtr = model.scaling.array.data(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("x","sc_x", scPtr, modelScFs, modelScDs)){ + update = true; + } + ImGui::SameLine(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("y","sc_y", scPtr+1, modelScFs, modelScDs)){ + update = true; + } + ImGui::SameLine(); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("z","sc_z", scPtr+2, modelScFs, modelScDs)){ + update = true; + } + + if(ImGui::Button("Apply###model_apply_transformation_button")){ + model.transformation = geo::transform(model.scaling,model.rotation,model.translation) * model.transformation; + model.rotation = {}; + model.translation = {}; + model.scaling = {1.f,1.f,1.f}; + update = true; + } + ImGui::SameLine(); + if(ImGui::Button("Reset###model_reset_transformation_button")){ + model.rotation = {}; + model.translation = {}; + model.scaling = {1.f,1.f,1.f}; + update = true; + } + + ImGui::EndTabItem(); + + return update; +} + +auto K4UIDrawer::draw_recording_tab_item( + const std::string &tabItemName, + camera::K4RecorderStates &rStates, + camera::K4RecorderSettings &rSettings, + bool &autoUpdate) -> bool{ + + + if (!ImGuiUiDrawer::begin_tab_item(tabItemName.data())){ + return false; + } + + ImGuiUiDrawer::title2("ACTIONS"); + if(!rStates.isRecording){ + if(ImGui::Button("Start")){ + rStates.startRecording = true; + } + }else{ + if(ImGui::Button("Stop")){ + rStates.stopRecording = true; + } + } + ImGui::SameLine(); + if(!rStates.isRecording){ + if(ImGui::Button("Reset")){ + rStates.resetRecording = true; + } + } + + ImGuiUiDrawer::text(std::format("Recording duration: {:6.4}", rStates.duration/1000.0)); + + ImGui::Text("Nb frames recorded:"); + ImGui::Indent(); + for(size_t ii = 0; ii < rStates.nbFramesRecorded.size(); ++ii){ + ImGuiUiDrawer::text(std::format("Camera {}: Count: {} Current frame id: {}", ii, rStates.nbFramesRecorded[ii], rStates.currentFrames[ii])); + } + ImGui::Unindent(); + + ImGuiIntS iSettings; + iSettings.min = 0; + iSettings.max = static_cast(rStates.duration); + iSettings.speedInc = 30; + iSettings.speedDrag = 10; + + ImGuiDragS dSettings; + dSettings.widthDrag = 150; + dSettings.decButton = true; + dSettings.incButton = true; + dSettings.displayText = true; + dSettings.defaultButton = false; + dSettings.displayTextLeft = true; + + int currentTime = static_cast(rStates.currentTime); + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Timeline", "recorder_timeline", ¤tTime, iSettings, dSettings)){ + rStates.currentTime = currentTime; + rStates.moveTime = true; + } + + if(ImGui::Button("Save")){ + ImGuiFileDialog::Instance()->OpenDialog("Save recording", "Choose file to save", ".kvid", "."); + } + + ImGuiUiDrawer::title2("SETTINGS"); + bool update = false; + ImGui::SetNextItemWidth(50.f); + if(ImGui::DragInt("Max number of frames per camera", &rSettings.cameraMaxFramesToRecord, 1.0f, 1, 100000)){ + update = true; + } + +// double maxDurationS = 500.; +// // output +// std::vector camerasToRecord; +// bool recordAllData = true; +// bool recordColor = true; +// bool recordDepth = true; +// bool recordInfra = true; +// bool recordCloud = true; +// bool recordIMU = true; +// bool recordAudio = true; +// bool recordBodies = true; + + ImGui::Spacing(); + ImGui::Separator(); + ImGui::Spacing(); + bool manualUpdate = false; + if(ImGui::Button("Update###recording_update_button")){ + manualUpdate = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Auto update###recording_auto_update_cb", &autoUpdate)){} + + + ImGui::EndTabItem(); + + return (update && autoUpdate) || manualUpdate; +} + +auto K4UIDrawer::draw_player_tab_item( + const std::string &tabItemName, + camera::K4PlayerStates &pStates, + camera::K4PlayerSettings &pSettings, + bool &autoUpdate) -> bool{ + + if (!ImGuiUiDrawer::begin_tab_item(tabItemName.data())){ + return false; + } + + ImGuiUiDrawer::title2("ACTIONS"); + + if(!pStates.isPlaying){ + if(ImGui::Button("Play")){ + pStates.play = true; + } + }else{ + if(ImGui::Button("Pause")){ + pStates.pause = true; + } + } + ImGui::SameLine(); + if(ImGui::Button("Restart")){ + pStates.restart = true; + } + + ImGuiUiDrawer::text(std::format("Current time: {:6.4}", pStates.currentTime/1000.0)); + ImGui::SameLine(); + ImGuiUiDrawer::text(std::format("Duration: [{:6.4}]", pStates.duration/1000.0)); + + ImGui::Text("Frames:"); + ImGui::Indent(); + for(size_t ii = 0; ii < pStates.nbFrames.size(); ++ii){ + ImGuiUiDrawer::text(std::format("Camera {}: Count: {} Current frame id: {}", ii, pStates.nbFrames[ii], pStates.currentFrames[ii])); + } + ImGui::Unindent(); + + ImGuiIntS iSettings; + iSettings.min = 0; + iSettings.max = static_cast(pStates.duration); + iSettings.speedInc = 30; + iSettings.speedDrag = 10; + + ImGuiDragS dSettings; + dSettings.widthDrag = 150; + dSettings.decButton = true; + dSettings.incButton = true; + dSettings.displayText = true; + dSettings.defaultButton = false; + dSettings.displayTextLeft = true; + + int currentTime = static_cast(pStates.currentTime); + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Timeline", "player_timeline", ¤tTime, iSettings, dSettings)){ + pStates.moveTime = true; + pStates.currentTime = currentTime; + } + + ImGui::Text("Edit"); + ImGui::Indent(); + if(ImGui::Button("Remove until current frame")){ + pStates.removeUntil = true; + } + if(ImGui::Button("Remove after current frame")){ + pStates.removeAfter = true; + } + if(ImGui::Button("Merge")){ + pStates.merge = true; + } + if(ImGui::Button("Info")){ + pStates.info = true; + } + ImGui::Unindent(); + + ImGui::Text("I/O"); + ImGui::Indent(); + if(ImGui::Button("Load video")){ + ImGuiFileDialog::Instance()->OpenDialog("Load video", "Choose video to load", ".kvid", "."); + } + ImGui::SameLine(); + if(ImGui::Button("Save video")){ + ImGuiFileDialog::Instance()->OpenDialog("Save video", "Choose video to save", ".kvid", "."); + } + ImGui::SameLine(); + if(ImGui::Button("Save cloud")){ + ImGuiFileDialog::Instance()->OpenDialog("Save cloud", "Choose cloud to save", ".obj", "."); + } + ImGui::Unindent(); + + ImGuiUiDrawer::title2("SETTINGS"); + bool update = false; + + if(ImGui::Checkbox("Do loop", &pSettings.doLoop)){ + update = true; + } + + bool manualUpdate = false; + if(ImGui::Button("Update###player_update_button")){ + manualUpdate = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Auto update###player_auto_update_cb", &autoUpdate)){} + + + ImGui::EndTabItem(); + + return (update && autoUpdate) || manualUpdate; +} + +auto K4UIDrawer::draw_calibrator_tab_item( + const std::string &tabItemName, + bool useNormalFilteringSettings, + camera::K4CalibratorStates &cStates, + K4CalibratorDrawerSettings &cdSettings, + camera::K4CalibratorSettings &cSettings, + bool &autoUpdate) -> bool { + + if (!ImGuiUiDrawer::begin_tab_item(tabItemName.data())){ + return false; + } + bool update = false; + + int filteringMode = useNormalFilteringSettings ? 0 : 1; + ImGui::Text("Use filtering mode:"); + ImGui::SameLine(); + if(ImGui::RadioButton("Normal###filters_normal_mode", &filteringMode, 0)){ + cStates.updateFilteringMode = true; + cStates.filteringMode = filteringMode; + } + ImGui::SameLine(); + if(ImGui::RadioButton("Calibration###filters_calibration_mode", &filteringMode, 1)){ + cStates.updateFilteringMode = true; + cStates.filteringMode = filteringMode; + } + ImGui::Spacing(); + + + ImGuiDragS ds; + ds.widthDrag = 80.f; + ds.defaultButton = true; + ds.displayTextLeft = false; + + ImGuiUiDrawer::title2("GRABBERS TO USE"); + { + ImGui::Text("Model:"); + ImGui::SameLine(); + ImGui::SetNextItemWidth(60); + if(ImGuiUiDrawer::combo("###model_calibrator", &cSettings.modelSelectionId, cSettings.models)){ + update = true; + cStates.updateDisplaySettings = true; + } + + ImGui::SameLine(); + ImGui::Text("Source:"); + ImGui::SameLine(); + ImGui::SetNextItemWidth(60); + if(ImGuiUiDrawer::combo("###source_calibrator", &cSettings.sourceSelectionId, cSettings.sources)){ + update = true; + cStates.updateDisplaySettings = true; + } + } + + ImGuiUiDrawer::title2("CAPTURE"); + { + int duration = static_cast(cSettings.durationMs); + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Duration (ms)", "capture duration", &duration, {5000, 100, 120000, 100, 1000}, ds)){ + cSettings.durationMs = duration; + update = true; + } + + int timeToWait = static_cast(cSettings.timeToWaitBeforeRegisteringMs); + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Time to wait before registering (ms)", "capture wait", &timeToWait, {0, 100, 120000, 100, 1000}, ds)){ + cSettings.timeToWaitBeforeRegisteringMs = timeToWait; + update = true; + } + + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Max number of frames", "capture max frames nb", &cSettings.maxFramesToRecord, {1000, 1, 10000, 1, 100}, ds)){ + update = true; + } + + ImGuiUiDrawer::text(std::format("Time elasped [{}] ms", cStates.elapsedTime.count())); + ImGuiUiDrawer::text("Frames registered count:"sv); + for(size_t ii = 0; ii < cStates.nbFramesRegistered.size(); ++ii){ + ImGuiUiDrawer::text(std::format("Cloud [{}], nb frames [{}]", ii, cStates.nbFramesRegistered[ii])); + } + + if(ImGui::Button("Reset")){ + cStates.resetCalibration = true; + } + ImGui::SameLine(); + if(!cStates.isRegistering){ + if(ImGui::Button("Start")){ + cStates.startCalibration = true; + } + }else{ + if(ImGui::Button("Stop")){ + cStates.stopCalibration = true; + } + } + } + + ImGuiUiDrawer::title2("PROCESSING"); + { + if(ImGui::Checkbox("Use processed clouds", &cSettings.useProcessed)){ + update = true; + cStates.recomputeRegisteringProcessing = true; + } + + if(ImGui::Checkbox("Remove outliers", &cSettings.removeOutliers)){ + update = true; + cStates.recomputeRegisteringProcessing = true; + } + ImGui::SameLine(); + ImGui::Text("with max distance"); + ImGui::SameLine(); + ImGui::SetNextItemWidth(50); + if(ImGui::DragFloat("###Max distance of outliers", &cSettings.maxDistanceOutlier, 0.01f, 0.01f, 2.f)){ + update = true; + cStates.recomputeRegisteringProcessing = true; + } + + ImGui::Spacing(); + + if(ImGui::Checkbox("Downsample", &cSettings.downSample)){ + update = true; + cStates.recomputeRegisteringProcessing = true; + } + ImGui::SameLine(); + ImGui::Text("with voxel size"); + ImGui::SameLine(); + ImGui::SetNextItemWidth(50); + float dVS = static_cast(cSettings.downSampleVoxelSize); + if(ImGui::DragFloat("###Downsample voxel size", &dVS, 0.01f, 0.01f, 1.f)){ + cSettings.downSampleVoxelSize = dVS; + update = true; + cStates.recomputeRegisteringProcessing = true; + } + + if(ImGui::Checkbox("Compute sphere center", &cSettings.computeSphereCenter)){ + update = true; + cStates.recomputeRegisteringProcessing = true; + } + ImGui::SameLine(); + ImGui::Text("with ray"); + ImGui::SameLine(); + ImGui::SetNextItemWidth(50); + if(ImGui::DragFloat("###Sphere ray", &cSettings.ballRay, 0.01f, 0.01f, 2.f)){ + update = true; + cStates.recomputeRegisteringProcessing = true; + } + } + + ImGuiUiDrawer::title2("DISPLAY"); + { + if(ImGui::Checkbox("Calibration clouds", &cdSettings.displayCalibrationCloud)){ + cStates.updateDisplaySettings = true; + update = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Processed clouds", &cdSettings.displayProcessedCloud)){ + cStates.updateDisplaySettings = true; + update = true; + } + } + + ImGuiUiDrawer::title2("CALIBRATION"); + { + ImGui::Text("Registation:"); + ImGui::SameLine(); + if (ImGui::RadioButton("RANSAC", cSettings.doRansac)) { + cSettings.doRansac = true; + update = true; + } + ImGui::SameLine(); + if(ImGui::RadioButton("ICP", !cSettings.doRansac)){ + cSettings.doRansac = false; + update = true; + } + + ImGui::Checkbox("Show advanced parameters", &cSettings.advancedParameters); + if(cSettings.advancedParameters){ + if(cSettings.doRansac){ + + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Nb Tries", "Ransac nb tries", &cSettings.ransac.nbTries, {10,1,1000, 1, 1}, ds)){ + update = true; + } + if(ImGuiUiDrawer::draw_drag_int_with_buttons("N", "Ransac N", &cSettings.ransac.N, {3,1,100, 1, 1}, ds)){ + update = true; + } + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Max iteration", "Ransac max ite", &cSettings.ransac.maxIteration, {100000,1,100000000, 1, 1000}, ds)){ + update = true; + } + + ImGuiFloatS fs; + fs.defaultValue = 0.999f; + fs.min = 0.009f; + fs.max = 0.99999f; + fs.speedDrag = 0.00001f; + fs.speedInc = 0.001f; + fs.format = "%.5f"; + + float confidence = static_cast(cSettings.ransac.confidence); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("Confidence","Ransac confidence", &confidence, fs, ds)){ + cSettings.ransac.confidence = confidence; + update = true; + } + + float distanceThreshold = static_cast(cSettings.ransac.distanceThreshold); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("Distance threshold","Ransac dist thresh", &distanceThreshold, fs, ds)){ + cSettings.ransac.distanceThreshold = distanceThreshold; + update = true; + } + + fs.defaultValue = 0.9f; + fs.min = 0.8f; + fs.max = 0.99f; + fs.speedDrag = 0.001f; + fs.speedInc = 0.01f; + fs.format = "%.2f"; + float similaritiesThreshold = static_cast(cSettings.ransac.similaritiesThreshold); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("Similarities threshold","Ransac sim thresh", &similaritiesThreshold, fs, ds)){ + cSettings.ransac.similaritiesThreshold = similaritiesThreshold; + update = true; + } + + if(ImGui::Checkbox("Mutal filter", &cSettings.ransac.mutualFilter)){ + update = true; + } + ImGui::Unindent(); + }else{ + + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Max iteration", "ICP max it", &cSettings.icpMaxIteration, {30,1,1000, 1, 1}, ds)){ + update = true; + } + + ImGuiFloatS fs; + fs.defaultValue = 0.999f; + fs.min = 0.009f; + fs.max = 0.99999f; + fs.speedDrag = 0.00001f; + fs.speedInc = 0.001f; + fs.format = "%.5f"; + + float maxDistCorr = static_cast(cSettings.icpMaxDistanceCorr); + if(ImGuiUiDrawer::draw_drag_float_with_buttons("Max distance correspondance","ICP max dist corr", &maxDistCorr, fs, ds)){ + cSettings.icpMaxDistanceCorr = maxDistCorr; + update = true; + } + } + } + + if(ImGui::Button("Calibrate")){ + cStates.calibrate = true; + } + ImGui::SameLine(); + if(ImGui::Button("Validate calibration")){ + cStates.validateCalibration = true; + } + } + + bool manualUpdate = false; + if(ImGui::Button("Update###calibration_update_button")){ + manualUpdate = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Auto update###calibration_auto_update_cb", &autoUpdate)){} + + ImGui::EndTabItem(); + + return (update && autoUpdate) || manualUpdate; +} + +auto K4UIDrawer::draw_device_settings_tab_item( + const std::string &tabItemName, + const std::vector &devicesName, + camera::K4DeviceSettings &device, + bool &updateDeviceList, + bool &autoUpdate) -> bool{ + + if (!ImGuiUiDrawer::begin_tab_item(tabItemName.c_str())){ + return false; + } + bool update = false; + + draw_config(devicesName, device.configS, updateDeviceList, update); + draw_data_settings(device.dataS, update); + draw_actions_settings(device.actionsS, update); + + ImGui::Spacing(); + ImGui::Separator(); + + bool manualUpdate = false; + if(ImGui::Button("Update###settings_update_button")){ + manualUpdate = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Auto update###settings_auto_update_cb", &autoUpdate)){} + + ImGui::EndTabItem(); + + return (update && autoUpdate) || manualUpdate; +} + +auto K4UIDrawer::draw_config(const std::vector &devicesName, camera::K4ConfigSettings &config, bool &updateDeviceList, bool &updateP) -> void{ + + ImGui::Spacing(); + ImGuiUiDrawer::text_centered("CONFIG"); + ImGui::Separator(); + + ImGui::Spacing(); + ImGui::Text("Device id:"); + ImGui::Indent(); + ImGui::SetNextItemWidth(150.f); + + if(devicesName.size() != 0){ + if(ImGui::BeginCombo("###settings_device_id", devicesName[config.idDevice].c_str())){ + for(size_t ii = 0; ii < devicesName.size(); ++ii){ + bool selected = ii == config.idDevice; + if (ImGui::Selectable(devicesName[ii].c_str(),selected)){ + config.idDevice = static_cast(ii); + updateP = true; + } + if(selected){ + ImGui::SetItemDefaultFocus(); + } + } + ImGui::EndCombo(); + } + }else{ + ImGui::Text("No device found."); + } + + updateDeviceList = ImGui::Button("Refresh devices list"); + ImGui::Unindent(); + + ImGui::Spacing(); + ImGui::Text("Mode:"); + ImGui::Indent(); + int guiCurrentModeSelection = static_cast(config.mode); + + if(ImGui::Button("<###settings_mode_left")){ + if(guiCurrentModeSelection > 0){ + --guiCurrentModeSelection; + updateP = true; + config.mode = static_cast(guiCurrentModeSelection); + } + } + ImGui::SameLine(); + ImGui::SetNextItemWidth(150.f); + if(ImGui::Combo("###settings_mode_combo", &guiCurrentModeSelection, modeItems, IM_ARRAYSIZE(modeItems))){ + updateP = true; + config.mode = static_cast(guiCurrentModeSelection); + } + ImGui::SameLine(); + if(ImGui::Button(">###settings_mode_right")){ + if(guiCurrentModeSelection < IM_ARRAYSIZE(modeItems)-1){ + ++guiCurrentModeSelection; + updateP = true; + config.mode = static_cast(guiCurrentModeSelection); + } + } + + ImGui::Unindent(); + ImGui::Spacing(); + + ImGui::Text("Synch mode:"); + ImGui::Indent(); + ImGui::SetNextItemWidth(100.f); + int guiCurrentSynchModeSelection = static_cast(config.synchMode); + if(ImGui::Combo("###settings_synch_mode_combo", &guiCurrentSynchModeSelection, synchItems, IM_ARRAYSIZE(synchItems))){ + updateP = true; + config.synchMode = static_cast(guiCurrentSynchModeSelection); + } + ImGui::Unindent(); + + ImGui::Text("Subordinate delay (usec):"); + ImGui::SetNextItemWidth(80.f); + ImGui::Indent(); + if(ImGui::DragInt("###subordinate_delay_usec", &config.subordinateDelayUsec, 1.0f, 0, 100000)){ + updateP = true; + } + ImGui::Unindent(); + ImGui::Spacing(); + + if(ImGui::Checkbox("Synch color and depth images", &config.synchronizeColorAndDepth)){ + updateP = true; + } + + ImGui::Text("Delay between color and depth (usec)"); + ImGui::SetNextItemWidth(80.f); + ImGui::Indent(); + if(ImGui::DragInt("###delayt_between_color_and_depth_usec", &config.delayBetweenColorAndDepthUsec, 1.0f, 100000, 100000)){ + updateP = true; + } + ImGui::Unindent(); + + ImGui::Spacing(); + if(ImGui::Checkbox("Disable LED", &config.disableLED)){ + updateP = true; + } + + ImGui::Spacing(); +} + +auto K4UIDrawer::draw_data_settings(camera::K4DataSettings &data, bool &updateP) -> void{ + + ImGui::Spacing(); + ImGuiUiDrawer::text_centered("DATA"); + ImGui::Separator(); + + ImGui::Spacing(); + ImGui::Text("Capture:"); + if(ImGui::Checkbox("audio###settings_capture_audio", &data.captureAudio)){ + updateP = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("IMU###settings_capture_imu", &data.captureIMU)){ + updateP = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("bodies (GPU-heavy)###settings_capture_bodies", &data.captureBodies)){ + updateP = true; + } + + ImGui::Spacing(); + ImGui::Text("Send:"); + if(ImGui::Checkbox("RGB###settings_send_rgb", &data.sendColor)){ + updateP = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Depth###settings_send_depth", &data.sendDepth)){ + updateP = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Infra###settings_send_infra", &data.sendInfra)){ + updateP = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Cloud###settings_send_cloud", &data.sendCloud)){ + updateP = true; + } + + if(ImGui::Checkbox("IMU###settings_send_imu", &data.sendIMU)){ + updateP = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Audio###settings_send_audio", &data.sendAudio)){ + updateP = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Bodies###settings_send_bodies", &data.sendBodies)){ + updateP = true; + } + + ImGui::Spacing(); + ImGui::Text("Display locally on grabber (disable for efficiency):"); + if(ImGui::Checkbox("RGB###settings_display_rgb", &data.generateRGBLocalFrame)){ + updateP = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Depth###settings_display_depth", &data.generateDepthLocalFrame)){ + updateP = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Infra###settings_display_infra", &data.generateInfraLocalFrame)){ + updateP = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Cloud###settings_display_cloud", &data.generateCloudLocal)){ + updateP = true; + } +} + +auto K4UIDrawer::draw_actions_settings(camera::K4ActionsSettings &actions, bool &updateP) -> void{ + + ImGui::Spacing(); + ImGuiUiDrawer::text_centered("ACTIONS TO DO"); + ImGui::Separator(); + ImGui::Spacing(); + + if(ImGui::Checkbox("Start device###settings_start_device", &actions.startDevice)){ + updateP = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Open camera###settings_open_camera", &actions.openCamera)){ + updateP = true; + } + + ImGui::Spacing(); +} + + +auto K4UIDrawer::draw_colors_settings_tab_item(const std::string &tabItemName, camera::K4ColorSettings &colors, bool &autoUpdate) -> bool{ + + if (!ImGuiUiDrawer::begin_tab_item(tabItemName.c_str())){ + return false; + } + + int guiSel = static_cast(colors.exposureTimeAbsolute); + bool update = false; + + ImGuiUiDrawer::title2("CAMERA HARDWARE SETTINGS"); + if(ImGui::Button("D###default_exposure_time")){ + colors.exposureTimeAbsolute = 5; + update = true; + } + ImGui::SameLine(); + ImGui::SetNextItemWidth(100); + if(ImGui::Combo("###exposure_time", &guiSel, exposureTimesMicroSTimes, IM_ARRAYSIZE(exposureTimesMicroSTimes))){ + colors.exposureTimeAbsolute = guiSel; + update = true; + } + ImGui::SameLine(); + ImGui::Text("Exposure time"); + ImGui::SameLine(); + if(ImGui::Checkbox("Auto###auto_exposure_time", &colors.autoExposureTime)){ + update = true; + } + ImGui::Spacing(); + + + int value = colors.brightness; + ImGuiDragS ds; + ds.widthDrag = 100.f; + + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Brightness", "color_brightness", &value, ImGuiIntS{128,0,255,1.f,1}, ds)){ + colors.brightness = value; + update = true; + } + ImGui::Spacing(); + + value = colors.contrast; + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Contrast", "color_contrast", &value, ImGuiIntS{5,0,10,1.f,1}, ds)){ + colors.contrast = value; + update = true; + } + + ImGui::Spacing(); + + value = colors.saturation; + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Saturation", "color_saturation", &value, ImGuiIntS{32, 0, 63, 1.f, 1}, ds)){ + colors.saturation = value; + update = true; + } + + ImGui::Spacing(); + + value = colors.sharpness; + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Sharpness", "color_sharpness", &value, ImGuiIntS{4,0,4,1.f,1}, ds)){ + colors.sharpness = value; + update = true; + } + + ImGui::Spacing(); + + value = colors.gain; + if(ImGuiUiDrawer::draw_drag_int_with_buttons("Gain", "color_gain", &value, ImGuiIntS{128, 0, 255, 1.f, 1}, ds)){ + colors.gain = value; + update = true; + } + + ImGui::Spacing(); + + value = colors.whiteBalance; + if(ImGuiUiDrawer::draw_drag_int_with_buttons("White balance", "color_white_balance", &value, ImGuiIntS{4500, 2500, 12500, 1, 10}, ds)){ + colors.whiteBalance = value; + update = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("A###auto_white_balance", &colors.autoWhiteBalance)){ + update = true; + } + + ImGui::Spacing(); + + if(ImGui::Button("D###default_Backlight compensation")){ + colors.backlightCompensation = false; + update = true; + } + ImGui::SameLine(); + if(ImGui::Checkbox("Backlight compensation", &colors.backlightCompensation)){ + update = true; + } + ImGui::Spacing(); + + if(ImGui::Button("D###default_Powerline frequency")){ + colors.powerlineFrequency = camera::K4PowerlineFrequency::F60; + update = true; + } + ImGui::SameLine(); + + guiSel = colors.powerlineFrequency == camera::K4PowerlineFrequency::F50 ? 0 : 1; + ImGui::SetNextItemWidth(100.f); + if(ImGui::Combo("###settings_mode_combo", &guiSel, powerlineFrequencyItems, IM_ARRAYSIZE(powerlineFrequencyItems))){ + update = true; + colors.powerlineFrequency = guiSel == 0 ? camera::K4PowerlineFrequency::F50 : camera::K4PowerlineFrequency::F60; + } + ImGui::SameLine(); + ImGui::Text("Powerline frequency:"); + + ImGui::EndTabItem(); + + return update; +} + + diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_k4_ui_drawer.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_k4_ui_drawer.hpp new file mode 100644 index 0000000..f46856e --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_k4_ui_drawer.hpp @@ -0,0 +1,118 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// base +#include "camera/kinect4/k4_device_settings.hpp" +#include "camera/kinect4/k4_filters.hpp" +#include "camera/kinect4/k4_display_settings.hpp" +#include "camera/kinect4/k4_recorder_settings.hpp" +#include "camera/kinect4/k4_player_settings.hpp" +#include "camera/kinect4/k4_calibrator_settings.hpp" +#include "camera/kinect4/k4_color_settings.hpp" +#include "camera/kinect4/k4_model.hpp" + +// opengl +#include "opengl/draw/drawer.hpp" + +// local +#include "imgui_types.hpp" + +namespace tool::graphics { + +[[maybe_unused]] static constexpr const char* modeItems[] = { + "Cloud_320x288", + "Cloud_640x576_MJPEG","Cloud_640x576_YUY2", "Cloud_640x576_NV12", "Cloud_640x576_BGRA32", + "Cloud_512x512", "Cloud_1024x1024", + "Full_frame_320x288", "Full_frame_640x576", "Full_frame_512x512", "Full_frame_1024x1024", + "Only_color_1280x720", "Only_color_1920x1080", "Only_color_2560x1440", "Only_color_2048x1536", "Only_color_3840x2160", "Only_color_4096x3072" +}; + +[[maybe_unused]] static constexpr const char* compressModeItems[] = { + "Full", "Cloud", "None" +}; + +[[maybe_unused]] static constexpr const char* synchItems[] = { + "Standalone", "Master", "Subordinate" +}; + +[[maybe_unused]] static constexpr const char* powerlineFrequencyItems[] = { + "50", "60" +}; + +[[maybe_unused]] static constexpr const char* sizesPencilItems[] = { + "10", "40", "100" +}; + +[[maybe_unused]] static constexpr const char* exposureTimesMicroSTimes[] = { + "500", "1250", "2500", "8330", "16670", "33330" +}; + +class K4UIDrawer{ + +public: + + static auto draw_config(const std::vector &devicesName, camera::K4ConfigSettings &config, bool &updateDeviceList, bool &updateP) -> void; + static auto draw_data_settings(camera::K4DataSettings &data, bool &updateP) -> void; + static auto draw_actions_settings(camera::K4ActionsSettings &actions, bool &updateP) -> void; + static auto draw_device_settings_tab_item( + const std::string &tabItemName, + const std::vector &devicesName, + camera::K4DeviceSettings &device, + bool &updateDeviceList, + bool &autoUpdate + ) -> bool; + + static auto draw_colors_settings_tab_item(const std::string &tabItemName, camera::K4ColorSettings &colors, bool &autoUpdate) -> bool; + static auto draw_filters_tab_item(const std::string &tabItemName, camera::K4Mode mode, camera::K4Filters &filters, bool &autoUpdate) -> std::tuple; + static auto draw_scene_display_setings_tab_item(const std::string &tabItemName, camera::K4SceneDisplaySettings &display, bool &autoUpdate) -> bool; + static auto draw_cloud_display_setings_tab_item(const std::string &tabItemName, camera::K4CloudDisplaySettings &display, bool &autoUpdate) -> bool; + static auto draw_calibration_tab_item(const std::string &tabItemName, camera::K4Model &model, bool &autoUpdate) -> bool; + static auto draw_recording_tab_item(const std::string &tabItemName, camera::K4RecorderStates &rStates, camera::K4RecorderSettings &rSettings, bool &autoUpdate) -> bool; + static auto draw_player_tab_item(const std::string &tabItemName, camera::K4PlayerStates &pStates, camera::K4PlayerSettings &pSettings, bool &autoUpdate) -> bool; + static auto draw_calibrator_tab_item(const std::string &tabItemName, bool useNormalFilteringSettings, camera::K4CalibratorStates &cStates, K4CalibratorDrawerSettings &cdSettings, camera::K4CalibratorSettings &cSettings, bool &autoUpdate) -> bool; + + static inline ImGuiDragS modelRotDs = {45.f, true, true, false, true, true}; + static inline ImGuiDragS modelTrDs = {45.f, true, true, false, true, true}; + static inline ImGuiDragS modelScDs = {45.f, true, true, false, true, true}; + static inline ImGuiFloatS modelRotFs = {0.f, -360.f, 360.f, 0.01f, 0.01f, "%.2f"}; + static inline ImGuiFloatS modelTrFs = {0.f, -10.f, 10.f, 0.001f, 0.001f, "%.3f"}; + static inline ImGuiFloatS modelScFs = {1.f, -100.f, 100.f, 0.01f, 0.01f, "%.3f"}; +}; + + +struct CloudPointsFrameDrawer{ + int currentFrameId = 0; + int startFrameId = 0; + int endFrameId = 0; + gl::CloudPointsDrawer cloudD; +}; + +} + + + diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_logs.cpp b/cpp-projects/3d-engine/imgui-tb/imgui_logs.cpp new file mode 100644 index 0000000..ed8b7c8 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_logs.cpp @@ -0,0 +1,134 @@ + + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + + +#include "imgui_logs.hpp" + +using namespace tool::graphics; + +ImguiLogs::ImguiLogs(){ + clear(); +} + +void ImguiLogs::clear(){ + buffer.clear(); + lineOffsets.clear(); + lineOffsets.push_back(0); +} + +void ImguiLogs::draw(const char* name){ + + ImGui::BeginChild(name, ImVec2(0, 0), false); + + // Options menu + if (ImGui::BeginPopup("Options")){ + ImGui::Checkbox("Auto-scroll", &autoScroll); + ImGui::EndPopup(); + } + + // Main window + if (ImGui::Button("Options")){ + ImGui::OpenPopup("Options"); + } + ImGui::SameLine(); + bool clearB = ImGui::Button("Clear"); + ImGui::SameLine(); + bool copy = ImGui::Button("Copy"); + ImGui::SameLine(); + filter.Draw("Filter", -100.0f); + + ImGui::Separator(); + ImGui::BeginChild("scrolling", ImVec2(0, 0), false, ImGuiWindowFlags_HorizontalScrollbar | ImGuiWindowFlags_AlwaysVerticalScrollbar); + + if (clearB){ + clear(); + } + if (copy){ + ImGui::LogToClipboard(); + } + + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(0, 0)); + const char* buf = buffer.begin(); + const char* buf_end = buffer.end(); + if (filter.IsActive()){ + // In this example we don't use the clipper when Filter is enabled. + // This is because we don't have a random access on the result on our filter. + // A real application processing logs with ten of thousands of entries may want to store the result of + // search/filter.. especially if the filtering function is not trivial (e.g. reg-exp). + for (int line_no = 0; line_no < lineOffsets.Size; line_no++){ + const char* line_start = buf + lineOffsets[line_no]; + const char* line_end = (line_no + 1 < lineOffsets.Size) ? (buf + lineOffsets[line_no + 1] - 1) : buf_end; + if (filter.PassFilter(line_start, line_end)){ + ImGui::TextUnformatted(line_start, line_end); + } + } + }else{ + // The simplest and easy way to display the entire buffer: + // ImGui::TextUnformatted(buf_begin, buf_end); + // And it'll just work. TextUnformatted() has specialization for large blob of text and will fast-forward + // to skip non-visible lines. Here we instead demonstrate using the clipper to only process lines that are + // within the visible area. + // If you have tens of thousands of items and their processing cost is non-negligible, coarse clipping them + // on your side is recommended. Using ImGuiListClipper requires + // - A) random access into your data + // - B) items all being the same height, + // both of which we can handle since we an array pointing to the beginning of each line of text. + // When using the filter (in the block of code above) we don't have random access into the data to display + // anymore, which is why we don't use the clipper. Storing or skimming through the search result would make + // it possible (and would be recommended if you want to search through tens of thousands of entries). + ImGuiListClipper clipper; + clipper.Begin(lineOffsets.Size); + while (clipper.Step()) + { + for (int line_no = clipper.DisplayStart; line_no < clipper.DisplayEnd; line_no++) + { + const char* line_start = buf + lineOffsets[line_no]; + const char* line_end = (line_no + 1 < lineOffsets.Size) ? (buf + lineOffsets[line_no + 1] - 1) : buf_end; + ImGui::TextUnformatted(line_start, line_end); + } + } + clipper.End(); + } + ImGui::PopStyleVar(); + + if (autoScroll && ImGui::GetScrollY() >= ImGui::GetScrollMaxY()){ + ImGui::SetScrollHereY(1.0f); + } + + ImGui::EndChild(); + ImGui::EndChild(); +} + +void ImguiLogs2::draw(const char *name){ + + ImGuiWindowFlags window_flags = ImGuiWindowFlags_HorizontalScrollbar; + ImGui::BeginChild(name, ImVec2(ImGui::GetWindowContentRegionWidth() * 0.5f, 260), false, window_flags); + for(const auto &log : logs){ + ImGui::Selectable(log.c_str()); + } + ImGui::EndChild(); +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_logs.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_logs.hpp new file mode 100644 index 0000000..06eb558 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_logs.hpp @@ -0,0 +1,72 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include +#include + +// local +#include "imgui/imgui.h" + + +namespace tool::graphics { + +struct ImguiLogs{ + + ImGuiTextBuffer buffer; + ImGuiTextFilter filter; + ImVector lineOffsets; // Index to lines offset. We maintain this with AddLog() calls. + bool autoScroll = true; // Keep scrolling if already at the bottom. + + ImguiLogs(); + void clear(); + + void add_log(const char* fmt, ...){// IM_FMTARGS(2){ + int old_size = buffer.size(); + va_list args; + va_start(args, fmt); + buffer.appendfv(fmt, args); + va_end(args); + for (int new_size = buffer.size(); old_size < new_size; old_size++){ + if (buffer[old_size] == '\n'){ + lineOffsets.push_back(old_size + 1); + } + } + } + + void draw(const char* name); +}; + +struct ImguiLogs2{ + + + std::vector logs; + ImguiLogs2(){} + void draw(const char* name); +}; +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_texture_ui_drawer.cpp b/cpp-projects/3d-engine/imgui-tb/imgui_texture_ui_drawer.cpp new file mode 100644 index 0000000..400fe2a --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_texture_ui_drawer.cpp @@ -0,0 +1,257 @@ + + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "imgui_texture_ui_drawer.hpp" + +// std +#include + +// local +#include "imgui-tb/imgui_ui_drawer.hpp" +#include "imgui-tb/imgui_helper.hpp" + +using namespace tool; +using namespace tool::graphics; + +void ImGuiTextureUiDrawer::init(gl::TBO *texture, bool invert){ + m_invert = invert; + m_texture = texture; +} + +void ImGuiTextureUiDrawer::draw_child(const std::string &windowName, geo::Pt2f sizeWindow, std::optional text){ + + if(!m_texture){ + return; + } + + if(ImGui::BeginChild(std::format("{}Window",windowName).c_str(), to_iv2(sizeWindow),false,ImGuiWindowFlags_NoScrollWithMouse)){ + + if(ImGui::BeginTabBar(std::format("{}Tab",windowName).c_str(), ImGuiTabBarFlags_None)){ + if(ImGui::BeginTabItem(windowName.c_str())){ + + auto size = content_region_size_available(); + float scale = std::min(1.f*size.y() / m_texture->height(), 1.f*size.x() / m_texture->width()); + auto sizeI = ImVec2(m_texture->width() * scale, m_texture->height() * scale); + + auto cursorScreenPos = ImGui::GetCursorScreenPos(); + + if(m_texture->id() == 0){ + ImGui::Text("Texture not initialized."); + }else{ + + if(m_invert){ + ImGui::Image(m_texture->id(), sizeI, ImVec2(0,1), ImVec2(1,0)); + }else{ + ImGui::Image(m_texture->id(), sizeI, ImVec2(0,0), ImVec2(1,1)); + } + + auto io = ImGui::GetIO(); + auto min = ImGui::GetItemRectMin(); + auto size = ImGui::GetItemRectSize(); + auto mousePos = io.MousePos; + auto diff = geo::Pt2f{mousePos.x-min.x, mousePos.y-min.y}; + + if(diff.x() > 0 && diff.x() < size.x && diff.y() > 0 && diff.y() < size.y){ + + hoveringPixel = (diff / scale).conv(); + for(int ii = 0; ii < 5; ++ii){ + if(io.MouseDown[ii]){ + mouseButtonsPressed[ii] = true; + }else if(io.MouseReleased[ii]){ + mouseButtonsPressed[ii] = false; + } + } + + }else{ + hoveringPixel = {-1,-1}; + for(int ii = 0; ii < 5; ++ii){ + mouseButtonsPressed[ii] = false; + } + } + + if(text.has_value()){ + auto newCursorPos = ImGui::GetCursorScreenPos(); + ImGui::SetCursorScreenPos(cursorScreenPos); + ImGuiUiDrawer::text_colored(ImVec4(1,0,0,1), text.value()); + ImGui::SetCursorScreenPos(newCursorPos); + } + } + ImGui::EndTabItem(); + } + if(ImGui::BeginTabItem("Infos")){ + auto size = content_region_size_available(); + ImGui::Text("pointer = %d", m_texture->id()); + ImGui::Text("original size = %d x %d", m_texture->width(), m_texture->height()); + ImGui::Text("window size = %d x %d", static_cast(size.x()), static_cast(size.y())); + ImGui::EndTabItem(); + } + + ImGui::EndTabBar(); + } + } + ImGui::EndChild(); + +} + +void ImGuiTextureUiDrawer::draw_at_position(const geo::Pt2f &screenPos, const geo::Pt2f &sizeTexture, std::optional text){ + + if(!m_texture){ + return; + } + + ImGui::SetCursorScreenPos(to_iv2(screenPos)); + + if(m_texture->id() == 0){ + ImGui::Text("Texture not initialized."); + }else{ + + auto cursorScreenPos = ImGui::GetCursorScreenPos(); + + if(m_invert){ + ImGui::Image(m_texture->id(), to_iv2(sizeTexture), ImVec2(0,1), ImVec2(1,0)); + }else{ + ImGui::Image(m_texture->id(), to_iv2(sizeTexture), ImVec2(0,0), ImVec2(1,1)); + } + + auto io = ImGui::GetIO(); + auto min = ImGui::GetItemRectMin(); + auto size = ImGui::GetItemRectSize(); + auto mousePos = io.MousePos; + auto diff = geo::Pt2f{mousePos.x-min.x, mousePos.y-min.y}; + + if(diff.x() > 0 && diff.x() < size.x && diff.y() > 0 && diff.y() < size.y){ + + hoveringPixel = (diff).conv(); + for(int ii = 0; ii < 5; ++ii){ + if(io.MouseDown[ii]){ + mouseButtonsPressed[ii] = true; + }else if(io.MouseReleased[ii]){ + mouseButtonsPressed[ii] = false; + } + } + + }else{ + hoveringPixel = {-1,-1}; + for(int ii = 0; ii < 5; ++ii){ + mouseButtonsPressed[ii] = false; + } + } + + if(text.has_value()){ + auto newCursorPos = ImGui::GetCursorScreenPos(); + ImGui::SetCursorScreenPos(cursorScreenPos); + ImGuiUiDrawer::text_colored(ImVec4(1,0,0,1), text.value()); + ImGui::SetCursorScreenPos(newCursorPos); + } + } + + ImGui::SetCursorScreenPos(to_iv2(screenPos)); +} + + + +//std::optional> ImguiTextureDrawer::draw_texture(const std::string &name, geo::Pt2f screenPos, geo::Pt2f sizeTexture, const gl::TBO *texture, bool invert){ + +// std::optional> hoveringPixel = std::nullopt; + +//// auto pos = ImGui::GetCursorScreenPos(); +// ImGui::SetCursorScreenPos(to_iv2(screenPos)); + +// if(texture->id() == 0){ +// ImGui::Text(std::format("{}: texture not initialized.", name)); +// }else{ + +// if(invert){ +// ImGui::Image(texture->id(), to_iv2(sizeTexture), ImVec2(0,1), ImVec2(1,0)); +// }else{ +// ImGui::Image(texture->id(), to_iv2(sizeTexture), ImVec2(0,0), ImVec2(1,1)); +// } + +// auto min = ImGui::GetItemRectMin(); +// auto mousePos = ImGui::GetIO().MousePos; +// hoveringPixel = geo::Pt2{static_cast(mousePos.x-min.x), static_cast(mousePos.y-min.y)}; + +// ImGui::SetCursorScreenPos(to_iv2(screenPos)); +// ImGui::TextColored(ImVec4(1,0,0,1),name); +// } + +//// ImGui::SetCursorScreenPos(pos); +// return hoveringPixel; +//} + + +//std::optional> ImguiTextureDrawer::draw_texture_tab_child(const std::string &windowName, geo::Pt2f sizeWindow, const gl::TBO *texture, bool invert, const std::string &optionalText){ + +// std::optional> hoveringPixel = std::nullopt; +// if(ImGui::BeginChild(std::format("{}Window",windowName).c_str(), to_iv2(sizeWindow),false,ImGuiWindowFlags_NoScrollWithMouse)){ + +// if(ImGui::BeginTabBar(std::format("{}Tab",windowName).c_str(), ImGuiTabBarFlags_None)){ +// if(ImGui::BeginTabItem(windowName.c_str())){ + +// auto size = ImGui::content_region_size_available(); +// float scale = std::min(1.f*size.y() / texture->height(), 1.f*size.x() / texture->width()); +// auto sizeI = ImVec2(texture->width() * scale, texture->height() * scale); + +// auto cursorScreenPos = ImGui::GetCursorScreenPos(); + +// if(texture->id() == 0){ +// ImGui::Text("Texture not initialized."); +// }else{ + +// if(invert){ +// ImGui::Image(texture->id(), sizeI, ImVec2(0,1), ImVec2(1,0)); +// }else{ +// ImGui::Image(texture->id(), sizeI, ImVec2(0,0), ImVec2(1,1)); +// } + +// auto min = ImGui::GetItemRectMin(); +// auto mousePos = ImGui::GetIO().MousePos; +// hoveringPixel = geo::Pt2{static_cast((mousePos.x-min.x)/ scale), static_cast((mousePos.y-min.y)/ scale)} ; + +// if(optionalText.length() > 0){ +// auto newCursorPos = ImGui::GetCursorScreenPos(); +// ImGui::SetCursorScreenPos(cursorScreenPos); +// ImGui::TextColored(ImVec4(1,0,0,1), optionalText); +// ImGui::SetCursorScreenPos(newCursorPos); +// } +// } +// ImGui::EndTabItem(); +// } +// if(ImGui::BeginTabItem("Infos")){ +// auto size = ImGui::content_region_size_available(); +// ImGui::Text("pointer = %d", texture->id()); +// ImGui::Text("original size = %d x %d", texture->width(), texture->height()); +// ImGui::Text("window size = %d x %d", static_cast(size.x()), static_cast(size.y())); +// ImGui::EndTabItem(); +// } + +// ImGui::EndTabBar(); +// } +// } +// ImGui::EndChild(); +// return hoveringPixel; +//} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_texture_ui_drawer.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_texture_ui_drawer.hpp new file mode 100644 index 0000000..a3722f1 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_texture_ui_drawer.hpp @@ -0,0 +1,52 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// base +#include "geometry/point2.hpp" + +// opengl-utility +#include "opengl/buffer/texture_buffer_object.hpp" + +namespace tool::graphics { + + +class ImGuiTextureUiDrawer{ +public: + + auto init(gl::TBO *texture, bool invert = false) -> void; + auto draw_child(const std::string &windowName, geo::Pt2f sizeWindow, std::optional text = std::nullopt) -> void; + auto draw_at_position(const geo::Pt2f &screenPos, const geo::Pt2f &sizeTexture, std::optional text = std::nullopt) -> void; + + geo::Pt2 hoveringPixel = {-1,-1}; + std::array mouseButtonsPressed; + +private: + bool m_invert = false; + gl::TBO *m_texture = nullptr; +}; +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_types.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_types.hpp new file mode 100644 index 0000000..384e664 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_types.hpp @@ -0,0 +1,62 @@ + + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include + +namespace tool{ + +using ImGuiId = unsigned int; + +struct ImGuiDragS{ + float widthDrag = 100.f; + bool decButton = true; + bool incButton = true; + bool defaultButton = true; + bool displayText = true; + bool displayTextLeft = false; +}; + +struct ImGuiIntS{ + int defaultValue = 0; + int min = 0; + int max = 100; + float speedDrag = 0.1f; + int speedInc = 1; +}; + +struct ImGuiFloatS{ + float defaultValue = 0.f; + float min = 0.f; + float max = 1.f; + float speedDrag = 0.01f; + float speedInc = 0.1f; + std::string format = "%.3f"; +}; +} diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_ui_drawer.cpp b/cpp-projects/3d-engine/imgui-tb/imgui_ui_drawer.cpp new file mode 100644 index 0000000..e9bc6dd --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_ui_drawer.cpp @@ -0,0 +1,324 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + + +#include "imgui_ui_drawer.hpp" + +// std +#include + +// local +#include "imgui_convert.hpp" + +using namespace tool::geo; + +static auto vector_getter = [](void* vec, int idx, const char** out_text){ + auto& vector = *static_cast*>(vec); + if (idx < 0 || idx >= static_cast(vector.size())) { + return false; + } + *out_text = vector.at(idx).c_str(); + return true; +}; + +auto tool::ImGuiUiDrawer::text_centered(const char* text, ...) -> void{ + + va_list vaList = nullptr; + va_start(vaList, text); + + float font_size = ImGui::GetFontSize() * strlen(text) / 2; + ImGui::SameLine( + ImGui::GetWindowSize().x / 2 - + font_size + (font_size / 2) + ); + + ImGui::TextV(text, vaList); + + va_end(vaList); +} + + +auto tool::ImGuiUiDrawer::text_colored(const ImVec4 &col, const std::string &text) -> void{ + auto d = text.c_str(); + ImGui::TextColored(col, d, d + text.size()); +} + +auto tool::ImGuiUiDrawer::text_wrapped(const std::string &text, std::optional color) -> void{ + auto d = text.c_str(); + if(!color.has_value()){ + ImGui::TextWrapped(d, d + text.size()); + }else{ + ImGui::PushStyleColor(ImGuiCol_Text, to_iv4(color.value())); + ImGui::TextWrapped(d, d + text.size()); + ImGui::PopStyleColor(); + } +} + +auto tool::ImGuiUiDrawer::text_wrapped(std::string_view text, std::optional color) -> void{ + auto d = text.data(); + if(!color.has_value()){ + ImGui::TextWrapped(d, d + text.size()); + }else{ + ImGui::PushStyleColor(ImGuiCol_Text, to_iv4(color.value())); + ImGui::TextWrapped(d, d + text.size()); + ImGui::PopStyleColor(); + } +} + +auto tool::ImGuiUiDrawer::text(const std::string &text, std::optional color) -> void{ + auto d = text.c_str(); + if(!color.has_value()){ + ImGui::Text(d, d + text.size()); + }else{ + ImGui::TextColored(to_iv4(color.value()), d, d + text.size()); + } +} + +auto tool::ImGuiUiDrawer::text(std::string_view text, std::optional color) -> void{ + auto d = text.data(); + if(!color.has_value()){ + ImGui::Text(d, d + text.size()); + }else{ + ImGui::TextColored(to_iv4(color.value()), d, d + text.size()); + } +} + +auto tool::ImGuiUiDrawer::list_box(const char *label, int *currIndex, std::vector &values) -> bool{ + if (values.empty()) { + return false; + } + return ImGui::ListBox(label, currIndex, vector_getter,static_cast(&values), static_cast(values.size())); +} + +auto tool::ImGuiUiDrawer::combo(const char *label, int *currIndex, std::vector &values) -> bool{ + if (values.empty()) { + return false; + } + return ImGui::Combo(label, currIndex, vector_getter,static_cast(&values), static_cast(values.size())); +} + + +auto tool::ImGuiUiDrawer::draw_drag_float_with_buttons( + std::string_view displayName, std::string_view itemName, float *value, ImGuiFloatS fSettings, ImGuiDragS dSettings) -> bool{ + + bool update = false; + if(dSettings.defaultButton){ + if(ImGui::Button(std::format("D###default_{}",itemName).c_str())){ + *value = fSettings.defaultValue; + update = true; + } + ImGui::SameLine(); + } + + if(dSettings.displayTextLeft && dSettings.displayText){ + text(displayName); + ImGui::SameLine(); + } + + if(dSettings.decButton){ + if(ImGui::Button(std::format("-###minus_{}",itemName).c_str())){ + if((*value) > fSettings.min+fSettings.speedInc){ + (*value) -= fSettings.speedInc; + update = true; + }else{ + (*value) = fSettings.min; + update = true; + } + } + ImGui::SameLine(); + } + + ImGui::SetNextItemWidth(dSettings.widthDrag); + auto currentV = *value; + + if(ImGui::DragFloat(std::format("###drag_{}", itemName).c_str(), value, fSettings.speedDrag, fSettings.min, fSettings.max, fSettings.format.c_str(), ImGuiSliderFlags_AlwaysClamp)){ + + if((*value) < fSettings.min){ + *value = fSettings.min; + } + if((*value) > fSettings.max){ + *value = fSettings.max; + } + + if(((*value) >= fSettings.min || (*value) <= fSettings.max) && currentV != (*value)){ + update = true; + } + } + + if(dSettings.incButton){ + ImGui::SameLine(); + if(ImGui::Button(std::format("+###plus_{}",itemName).c_str())){ + if((*value) < fSettings.max-fSettings.speedInc){ + (*value) +=fSettings.speedInc; + update = true; + }else{ + (*value) = fSettings.max; + update = true; + } + } + } + + if(!dSettings.displayTextLeft && dSettings.displayText){ + ImGui::SameLine(); + text(displayName); + } + + return update; +} + +auto tool::ImGuiUiDrawer::title2(std::string_view text) -> void{ + ImGui::Spacing(); + ImGui::Spacing(); + ImGui::PushStyleColor(ImGuiCol_Text, IM_COL32(0, 255, 0, 255)); + auto d = text.data(); + ImGuiUiDrawer::text_centered(d, d + text.size()); + ImGui::PopStyleColor(); + ImGui::Separator(); +} + +auto tool::ImGuiUiDrawer::begin_tab_bar(unsigned int *tabId, const char *label, ImGuiTabBarFlags flags) -> bool{ + m_tabId = tabId; + return ImGui::BeginTabBar(label, flags); +} + +auto tool::ImGuiUiDrawer::begin_tab_item(const char *label, const ImVec4 &activeColor, const ImVec4 &inactiveColor, bool *pOpen, ImGuiTabItemFlags flags) -> bool{ + + if(!m_tabId){ + return ImGui::BeginTabItem(label, pOpen, flags); + } + + ImGuiID id = ImGui::GetID(label); + + bool wasActive = (*m_tabId == id); + + if (wasActive){ + ImGui::PushStyleColor(ImGuiCol_Text, activeColor); + }else{ + ImGui::PushStyleColor(ImGuiCol_Text, inactiveColor); + } + + bool isActive = ImGui::BeginTabItem(label, pOpen, flags); + ImGui::PopStyleColor(); + + if (isActive){ + *m_tabId = id; + } + + return isActive; +} + +//auto tool::ImGuiUiDrawer::begin_tab_item(const char *label, ImGuiID &activeTab, const ImVec4 &activeColor, const ImVec4 &inactiveColor, bool *pOpen, ImGuiTabItemFlags flags) -> bool{ + +// ImGuiID id = ImGui::GetID(label); +// bool wasActive = (activeTab == id); + +// if (wasActive){ +// ImGui::PushStyleColor(ImGuiCol_Text, activeColor); +// }else{ +// ImGui::PushStyleColor(ImGuiCol_Text, inactiveColor); +// } + +// bool isActive = ImGui::BeginTabItem(label, pOpen, flags); +// ImGui::PopStyleColor(); + +// if (isActive){ +// activeTab = id; +// } + +// return isActive; +//} + +auto tool::ImGuiUiDrawer::draw_drag_int_with_buttons(std::string_view displayName, std::string_view itemName, int *value, ImGuiIntS iSettings, ImGuiDragS dSettings) -> bool{ + + bool update = false; + if(dSettings.defaultButton){ + if(ImGui::Button(std::format("D###default_{}",itemName).c_str())){ + *value = iSettings.defaultValue; + update = true; + } + ImGui::SameLine(); + } + + if(dSettings.displayTextLeft && dSettings.displayText){ + text(displayName); + ImGui::SameLine(); + } + + if(dSettings.decButton){ + if(ImGui::Button(std::format("-###minus_{}",itemName).c_str())){ + if((*value) > iSettings.min+iSettings.speedInc){ + (*value) -= iSettings.speedInc; + update = true; + }else{ + (*value) = iSettings.min; + update = true; + } + } + ImGui::SameLine(); + } + + ImGui::SetNextItemWidth(dSettings.widthDrag); + auto currentV = *value; + + if(ImGui::DragInt(std::format("###drag_{}", itemName).c_str(), value, iSettings.speedDrag, iSettings.min, iSettings.max, "%d", ImGuiSliderFlags_AlwaysClamp)){ + + if((*value) < iSettings.min){ + *value = iSettings.min; + } + if((*value) > iSettings.max){ + *value = iSettings.max; + } + + if(((*value) >= iSettings.min || (*value) <= iSettings.max) && currentV != (*value)){ + update = true; + } + } + + if(dSettings.incButton){ + ImGui::SameLine(); + if(ImGui::Button(std::format("+###plus_{}",itemName).c_str())){ + if((*value) < iSettings.max-iSettings.speedInc){ + (*value) +=iSettings.speedInc; + update = true; + }else{ + (*value) = iSettings.max; + update = true; + } + } + } + + if(!dSettings.displayTextLeft && dSettings.displayText){ + ImGui::SameLine(); + text(displayName); + } + + return update; +} + + +//PushStyleColor(ImGuiCol_Text, col); Text(fmt, ...); PopStyleColor(); +// shortcut for PushTextWrapPos(0.0f); Text(fmt, ...); PopTextWrapPos();. +//Note that this won't work on an auto-resizing window if there's no other widgets to extend the window width, yoy may need to set a size using SetNextWindowSize(). diff --git a/cpp-projects/3d-engine/imgui-tb/imgui_ui_drawer.hpp b/cpp-projects/3d-engine/imgui-tb/imgui_ui_drawer.hpp new file mode 100644 index 0000000..a12b4d1 --- /dev/null +++ b/cpp-projects/3d-engine/imgui-tb/imgui_ui_drawer.hpp @@ -0,0 +1,77 @@ + + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include +#include +#include + +// imgui +#include + +// local +#include "imgui_types.hpp" +#include "geometry/point4.hpp" + +namespace tool{ + +struct ImGuiStyles{ + struct Colors{ + + }; +}; + +struct ImGuiUiDrawer{ + + +[[maybe_unused]] static auto combo(const char* label, int* currIndex, std::vector& values) -> bool; +[[maybe_unused]] static auto list_box(const char* label, int* currIndex, std::vector& values) -> bool; +[[maybe_unused]] static auto text(std::string_view text, std::optional color = std::nullopt) -> void; +[[maybe_unused]] static auto text(const std::string &text, std::optional color = std::nullopt) -> void; +[[maybe_unused]] static auto text_wrapped(std::string_view text, std::optional color = std::nullopt) -> void; +[[maybe_unused]] static auto text_wrapped(const std::string &text, std::optional color = std::nullopt) -> void; +[[maybe_unused]] static auto text_colored(const ImVec4& col, const std::string &text) -> void; + +[[maybe_unused]] static auto text_centered(const char* text, ...) -> void; + +[[maybe_unused]] static auto draw_drag_int_with_buttons(std::string_view displayName, std::string_view itemName, int *value, ImGuiIntS iSettings, ImGuiDragS dSettings) -> bool; +[[maybe_unused]] static auto draw_drag_float_with_buttons(std::string_view displayName, std::string_view itemName, float *value, ImGuiFloatS fSettings, ImGuiDragS dSettings) -> bool; + +[[maybe_unused]] static auto title2(std::string_view text) -> void; + +[[maybe_unused]] static auto begin_tab_bar(unsigned int *tabId, const char* label, ImGuiTabBarFlags flags = 0) -> bool; +//[[maybe_unused]] static auto begin_tab_item(const char* label, ImGuiID &activeTab, const ImVec4 &activeColor = ImVec4(0, 1, 0, 1), const ImVec4 &inactiveColor = ImVec4(1, 1, 1, 1), bool* pOpen = nullptr, ImGuiTabItemFlags flags = 0) -> bool; +[[maybe_unused]] static auto begin_tab_item(const char* label, const ImVec4 &activeColor = ImVec4(0, 1, 0, 1), const ImVec4 &inactiveColor = ImVec4(1, 1, 1, 1), bool* pOpen = nullptr, ImGuiTabItemFlags flags = 0) -> bool; + + +private: + static inline unsigned int *m_tabId = nullptr; +}; + +} diff --git a/cpp-projects/3d-engine/imgui/extra/ImGuiFileDialog.cpp b/cpp-projects/3d-engine/imgui/extra/ImGuiFileDialog.cpp new file mode 100644 index 0000000..c7262b2 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/ImGuiFileDialog.cpp @@ -0,0 +1,5053 @@ +// This is an independent project of an individual developer. Dear PVS-Studio, please check it. +// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com + +/* +MIT License + +Copyright (c) 2019-2020 Stephane Cuillerdier (aka aiekick) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "ImGuiFileDialog.h" + +#ifdef __cplusplus + +#include +#include // stricmp / strcasecmp +#include // variadic +#include +#include +#include +#include +#include +// this option need c++17 +#ifdef USE_STD_FILESYSTEM + #include +#endif +#if defined (__EMSCRIPTEN__) // EMSCRIPTEN + #include +#endif // EMSCRIPTEN +#if defined(__WIN32__) || defined(_WIN32) + #ifndef WIN32 + #define WIN32 + #endif // WIN32 + #define stat _stat + #define stricmp _stricmp + #include + // this option need c++17 + #ifdef USE_STD_FILESYSTEM + #include + #else + #include "dirent/dirent.h" // directly open the dirent file attached to this lib + #endif // USE_STD_FILESYSTEM + #define PATH_SEP '\\' + #ifndef PATH_MAX + #define PATH_MAX 260 + #endif // PATH_MAX +#elif defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__APPLE__) || defined (__EMSCRIPTEN__) + #define UNIX + #define stricmp strcasecmp + #include + // this option need c++17 + #ifndef USE_STD_FILESYSTEM + #include + #endif // USE_STD_FILESYSTEM + #define PATH_SEP '/' +#endif // defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__APPLE__) + +#include "imgui.h" +#ifndef IMGUI_DEFINE_MATH_OPERATORS + #define IMGUI_DEFINE_MATH_OPERATORS +#endif // IMGUI_DEFINE_MATH_OPERATORS +#include "imgui_internal.h" + +#include +#include +#include + +#ifdef USE_THUMBNAILS +#ifndef DONT_DEFINE_AGAIN__STB_IMAGE_IMPLEMENTATION +#ifndef STB_IMAGE_IMPLEMENTATION +#define STB_IMAGE_IMPLEMENTATION +#endif // STB_IMAGE_IMPLEMENTATION +#endif // DONT_DEFINE_AGAIN__STB_IMAGE_IMPLEMENTATION +#include "stb/stb_image.h" +#ifndef DONT_DEFINE_AGAIN__STB_IMAGE_RESIZE_IMPLEMENTATION +#ifndef STB_IMAGE_RESIZE_IMPLEMENTATION +#define STB_IMAGE_RESIZE_IMPLEMENTATION +#endif // STB_IMAGE_RESIZE_IMPLEMENTATION +#endif // DONT_DEFINE_AGAIN__STB_IMAGE_RESIZE_IMPLEMENTATION +#include "stb/stb_image_resize.h" +#endif // USE_THUMBNAILS + +namespace IGFD +{ +// float comparisons +#ifndef IS_FLOAT_DIFFERENT +#define IS_FLOAT_DIFFERENT(a,b) (fabs((a) - (b)) > FLT_EPSILON) +#endif // IS_FLOAT_DIFFERENT +#ifndef IS_FLOAT_EQUAL +#define IS_FLOAT_EQUAL(a,b) (fabs((a) - (b)) < FLT_EPSILON) +#endif // IS_FLOAT_EQUAL +// width of filter combobox +#ifndef FILTER_COMBO_WIDTH +#define FILTER_COMBO_WIDTH 150.0f +#endif // FILTER_COMBO_WIDTH +// for lets you define your button widget +// if you have like me a special bi-color button +#ifndef IMGUI_PATH_BUTTON +#define IMGUI_PATH_BUTTON ImGui::Button +#endif // IMGUI_PATH_BUTTON +#ifndef IMGUI_BUTTON +#define IMGUI_BUTTON ImGui::Button +#endif // IMGUI_BUTTON +// locales +#ifndef createDirButtonString +#define createDirButtonString "+" +#endif // createDirButtonString +#ifndef okButtonString +#define okButtonString "OK" +#endif // okButtonString +#ifndef cancelButtonString +#define cancelButtonString "Cancel" +#endif // cancelButtonString +#ifndef resetButtonString +#define resetButtonString "R" +#endif // resetButtonString +#ifndef drivesButtonString +#define drivesButtonString "Drives" +#endif // drivesButtonString +#ifndef editPathButtonString +#define editPathButtonString "E" +#endif // editPathButtonString +#ifndef searchString +#define searchString "Search :" +#endif // searchString +#ifndef dirEntryString +#define dirEntryString "[Dir]" +#endif // dirEntryString +#ifndef linkEntryString +#define linkEntryString "[Link]" +#endif // linkEntryString +#ifndef fileEntryString +#define fileEntryString "[File]" +#endif // fileEntryString +#ifndef fileNameString +#define fileNameString "File Name :" +#endif // fileNameString +#ifndef dirNameString +#define dirNameString "Directory Path :" +#endif // dirNameString +#ifndef buttonResetSearchString +#define buttonResetSearchString "Reset search" +#endif // buttonResetSearchString +#ifndef buttonDriveString +#define buttonDriveString "Drives" +#endif // buttonDriveString +#ifndef buttonEditPathString +#define buttonEditPathString "Edit path\nYou can also right click on path buttons" +#endif // buttonEditPathString +#ifndef buttonResetPathString +#define buttonResetPathString "Reset to current directory" +#endif // buttonResetPathString +#ifndef buttonCreateDirString +#define buttonCreateDirString "Create Directory" +#endif // buttonCreateDirString +#ifndef tableHeaderAscendingIcon +#define tableHeaderAscendingIcon "A|" +#endif // tableHeaderAscendingIcon +#ifndef tableHeaderDescendingIcon +#define tableHeaderDescendingIcon "D|" +#endif // tableHeaderDescendingIcon +#ifndef tableHeaderFileNameString +#define tableHeaderFileNameString "File name" +#endif // tableHeaderFileNameString +#ifndef tableHeaderFileTypeString +#define tableHeaderFileTypeString "Type" +#endif // tableHeaderFileTypeString +#ifndef tableHeaderFileSizeString +#define tableHeaderFileSizeString "Size" +#endif // tableHeaderFileSizeString +#ifndef tableHeaderFileDateString +#define tableHeaderFileDateString "Date" +#endif // tableHeaderFileDateString +#ifndef OverWriteDialogTitleString +#define OverWriteDialogTitleString "The file Already Exist !" +#endif // OverWriteDialogTitleString +#ifndef OverWriteDialogMessageString +#define OverWriteDialogMessageString "Would you like to OverWrite it ?" +#endif // OverWriteDialogMessageString +#ifndef OverWriteDialogConfirmButtonString +#define OverWriteDialogConfirmButtonString "Confirm" +#endif // OverWriteDialogConfirmButtonString +#ifndef OverWriteDialogCancelButtonString +#define OverWriteDialogCancelButtonString "Cancel" +#endif // OverWriteDialogCancelButtonString +// see strftime functionin for customize +#ifndef DateTimeFormat +#define DateTimeFormat "%Y/%m/%d %H:%M" +#endif // DateTimeFormat +#ifdef USE_THUMBNAILS +#ifndef tableHeaderFileThumbnailsString +#define tableHeaderFileThumbnailsString "Thumbnails" +#endif // tableHeaderFileThumbnailsString +#ifndef DisplayMode_FilesList_ButtonString +#define DisplayMode_FilesList_ButtonString "FL" +#endif // DisplayMode_FilesList_ButtonString +#ifndef DisplayMode_FilesList_ButtonHelp +#define DisplayMode_FilesList_ButtonHelp "File List" +#endif // DisplayMode_FilesList_ButtonHelp +#ifndef DisplayMode_ThumbailsList_ButtonString +#define DisplayMode_ThumbailsList_ButtonString "TL" +#endif // DisplayMode_ThumbailsList_ButtonString +#ifndef DisplayMode_ThumbailsList_ButtonHelp +#define DisplayMode_ThumbailsList_ButtonHelp "Thumbnails List" +#endif // DisplayMode_ThumbailsList_ButtonHelp +#ifndef DisplayMode_ThumbailsGrid_ButtonString +#define DisplayMode_ThumbailsGrid_ButtonString "TG" +#endif // DisplayMode_ThumbailsGrid_ButtonString +#ifndef DisplayMode_ThumbailsGrid_ButtonHelp +#define DisplayMode_ThumbailsGrid_ButtonHelp "Thumbnails Grid" +#endif // DisplayMode_ThumbailsGrid_ButtonHelp +#ifndef DisplayMode_ThumbailsList_ImageHeight +#define DisplayMode_ThumbailsList_ImageHeight 32.0f +#endif // DisplayMode_ThumbailsList_ImageHeight +#ifndef IMGUI_RADIO_BUTTON + inline bool inRadioButton(const char* vLabel, bool vToggled) + { + bool pressed = false; + + if (vToggled) + { + ImVec4 bua = ImGui::GetStyleColorVec4(ImGuiCol_ButtonActive); + ImVec4 te = ImGui::GetStyleColorVec4(ImGuiCol_Text); + ImGui::PushStyleColor(ImGuiCol_Button, te); + ImGui::PushStyleColor(ImGuiCol_ButtonActive, te); + ImGui::PushStyleColor(ImGuiCol_ButtonHovered, te); + ImGui::PushStyleColor(ImGuiCol_Text, bua); + } + + pressed = IMGUI_BUTTON(vLabel); + + if (vToggled) + { + ImGui::PopStyleColor(4); //-V112 + } + + return pressed; + } +#define IMGUI_RADIO_BUTTON inRadioButton +#endif // IMGUI_RADIO_BUTTON +#endif // USE_THUMBNAILS +#ifdef USE_BOOKMARK +#ifndef defaultBookmarkPaneWith +#define defaultBookmarkPaneWith 150.0f +#endif // defaultBookmarkPaneWith +#ifndef bookmarksButtonString +#define bookmarksButtonString "Bookmark" +#endif // bookmarksButtonString +#ifndef bookmarksButtonHelpString +#define bookmarksButtonHelpString "Bookmark" +#endif // bookmarksButtonHelpString +#ifndef addBookmarkButtonString +#define addBookmarkButtonString "+" +#endif // addBookmarkButtonString +#ifndef removeBookmarkButtonString +#define removeBookmarkButtonString "-" +#endif // removeBookmarkButtonString +#ifndef IMGUI_TOGGLE_BUTTON + inline bool inToggleButton(const char* vLabel, bool* vToggled) + { + bool pressed = false; + + if (vToggled && *vToggled) + { + ImVec4 bua = ImGui::GetStyleColorVec4(ImGuiCol_ButtonActive); + //ImVec4 buh = ImGui::GetStyleColorVec4(ImGuiCol_ButtonHovered); + //ImVec4 bu = ImGui::GetStyleColorVec4(ImGuiCol_Button); + ImVec4 te = ImGui::GetStyleColorVec4(ImGuiCol_Text); + ImGui::PushStyleColor(ImGuiCol_Button, te); + ImGui::PushStyleColor(ImGuiCol_ButtonActive, te); + ImGui::PushStyleColor(ImGuiCol_ButtonHovered, te); + ImGui::PushStyleColor(ImGuiCol_Text, bua); + } + + pressed = IMGUI_BUTTON(vLabel); + + if (vToggled && *vToggled) + { + ImGui::PopStyleColor(4); //-V112 + } + + if (vToggled && pressed) + *vToggled = !*vToggled; + + return pressed; + } +#define IMGUI_TOGGLE_BUTTON inToggleButton +#endif // IMGUI_TOGGLE_BUTTON +#endif // USE_BOOKMARK + + ///////////////////////////////////////////////////////////////////////////////////// + //// INLINE FUNCTIONS /////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + +#ifndef USE_STD_FILESYSTEM + inline int inAlphaSort(const struct dirent** a, const struct dirent** b) + { + return strcoll((*a)->d_name, (*b)->d_name); + } +#endif + + ///////////////////////////////////////////////////////////////////////////////////// + //// FILE EXTENTIONS INFOS ////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + + IGFD::FileStyle::FileStyle() + : color(0, 0, 0, 0) + { + + } + + IGFD::FileStyle::FileStyle(const FileStyle& vStyle) + { + color = vStyle.color; + icon = vStyle.icon; + font = vStyle.font; + flags = vStyle.flags; + } + + IGFD::FileStyle::FileStyle(const ImVec4& vColor, const std::string& vIcon, ImFont* vFont) + : color(vColor), icon(vIcon), font(vFont) + { + + } + + ///////////////////////////////////////////////////////////////////////////////////// + //// FILE INFOS ///////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + + // https://github.com/ocornut/imgui/issues/1720 + bool IGFD::Utils::Splitter(bool split_vertically, float thickness, float* size1, float* size2, float min_size1, float min_size2, float splitter_long_axis_size) + { + using namespace ImGui; + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImGuiID id = window->GetID("##Splitter"); + ImRect bb; + bb.Min = window->DC.CursorPos + (split_vertically ? ImVec2(*size1, 0.0f) : ImVec2(0.0f, *size1)); + bb.Max = bb.Min + CalcItemSize(split_vertically ? ImVec2(thickness, splitter_long_axis_size) : ImVec2(splitter_long_axis_size, thickness), 0.0f, 0.0f); + return SplitterBehavior(bb, id, split_vertically ? ImGuiAxis_X : ImGuiAxis_Y, size1, size2, min_size1, min_size2, 1.0f); + } + +#ifdef WIN32 + bool IGFD::Utils::WReplaceString(std::wstring& str, const std::wstring& oldStr, const std::wstring& newStr) + { + bool found = false; + size_t pos = 0; + while ((pos = str.find(oldStr, pos)) != std::wstring::npos) + { + found = true; + str.replace(pos, oldStr.length(), newStr); + pos += newStr.length(); + } + return found; + } + + std::vector IGFD::Utils::WSplitStringToVector(const std::wstring& text, char delimiter, bool pushEmpty) + { + std::vector arr; + if (!text.empty()) + { + std::wstring::size_type start = 0; + std::wstring::size_type end = text.find(delimiter, start); + while (end != std::wstring::npos) + { + std::wstring token = text.substr(start, end - start); + if (!token.empty() || (token.empty() && pushEmpty)) //-V728 + arr.push_back(token); + start = end + 1; + end = text.find(delimiter, start); + } + std::wstring token = text.substr(start); + if (!token.empty() || (token.empty() && pushEmpty)) //-V728 + arr.push_back(token); + } + return arr; + } + + std::wstring IGFD::Utils::string_to_wstring(const std::string& str) + { + std::wstring ret; + if (!str.empty()) + { + size_t sz = std::mbstowcs(nullptr, str.c_str(), str.size()); + if (sz) + { + ret.resize(sz); + std::mbstowcs((wchar_t*)ret.data(), str.c_str(), sz); + } + } + return ret; + } + + std::string IGFD::Utils::wstring_to_string(const std::wstring& str) + { + std::string ret; + if (!str.empty()) + { + size_t sz = std::wcstombs(nullptr, str.c_str(), str.size()); + if (sz) + { + ret.resize(sz); + std::wcstombs((char*)ret.data(), str.c_str(), sz); + } + } + return ret; + } +#endif // WIN32 + + bool IGFD::Utils::ReplaceString(std::string& str, const std::string& oldStr, const std::string& newStr) + { + bool found = false; + size_t pos = 0; + while ((pos = str.find(oldStr, pos)) != std::string::npos) + { + found = true; + str.replace(pos, oldStr.length(), newStr); + pos += newStr.length(); + } + return found; + } + + std::vector IGFD::Utils::SplitStringToVector(const std::string& text, char delimiter, bool pushEmpty) + { + std::vector arr; + if (!text.empty()) + { + size_t start = 0; + size_t end = text.find(delimiter, start); + while (end != std::string::npos) + { + auto token = text.substr(start, end - start); + if (!token.empty() || (token.empty() && pushEmpty)) //-V728 + arr.push_back(token); + start = end + 1; + end = text.find(delimiter, start); + } + auto token = text.substr(start); + if (!token.empty() || (token.empty() && pushEmpty)) //-V728 + arr.push_back(token); + } + return arr; + } + + std::vector IGFD::Utils::GetDrivesList() + { + std::vector res; + +#ifdef WIN32 + const DWORD mydrives = 2048; + char lpBuffer[2048]; +#define mini(a,b) (((a) < (b)) ? (a) : (b)) + const DWORD countChars = mini(GetLogicalDriveStringsA(mydrives, lpBuffer), 2047); +#undef mini + if (countChars > 0) + { + std::string var = std::string(lpBuffer, (size_t)countChars); + IGFD::Utils::ReplaceString(var, "\\", ""); + res = IGFD::Utils::SplitStringToVector(var, '\0', false); + } +#endif // WIN32 + + return res; + } + + bool IGFD::Utils::IsDirectoryExist(const std::string& name) + { + bool bExists = false; + + if (!name.empty()) + { +#ifdef USE_STD_FILESYSTEM + namespace fs = std::filesystem; +#ifdef WIN32 + std::wstring wname = IGFD::Utils::string_to_wstring(name.c_str()); + fs::path pathName = fs::path(wname); +#else + fs::path pathName = fs::path(name); +#endif + bExists = fs::is_directory(pathName); +#else + DIR* pDir = nullptr; + pDir = opendir(name.c_str()); + if (pDir != nullptr) + { + bExists = true; + (void)closedir(pDir); + } +#endif // USE_STD_FILESYSTEM + } + + return bExists; // this is not a directory! + } + + bool IGFD::Utils::CreateDirectoryIfNotExist(const std::string& name) + { + bool res = false; + + if (!name.empty()) + { + if (!IsDirectoryExist(name)) + { +#ifdef WIN32 +#ifdef USE_STD_FILESYSTEM + namespace fs = std::filesystem; + std::wstring wname = IGFD::Utils::string_to_wstring(name.c_str()); + fs::path pathName = fs::path(wname); + res = fs::create_directory(pathName); +#else + std::wstring wname = IGFD::Utils::string_to_wstring(name); + if (CreateDirectoryW(wname.c_str(), nullptr)) + { + res = true; + } +#endif // USE_STD_FILESYSTEM +#elif defined(__EMSCRIPTEN__) + std::string str = std::string("FS.mkdir('") + name + "');"; + emscripten_run_script(str.c_str()); + res = true; +#elif defined(UNIX) + char buffer[PATH_MAX] = {}; + snprintf(buffer, PATH_MAX, "mkdir -p %s", name.c_str()); + const int dir_err = std::system(buffer); + if (dir_err != -1) + { + res = true; + } +#endif // WIN32 + if (!res) { + std::cout << "Error creating directory " << name << std::endl; + } + } + } + + return res; + } + +#ifdef USE_STD_FILESYSTEM + // https://github.com/aiekick/ImGuiFileDialog/issues/54 + IGFD::Utils::PathStruct IGFD::Utils::ParsePathFileName(const std::string& vPathFileName) + { + namespace fs = std::filesystem; + PathStruct res; + if (vPathFileName.empty()) + return res; + + auto fsPath = fs::path(vPathFileName); + + if (fs::is_regular_file(fsPath)) { + res.name = fsPath.string(); + res.path = fsPath.parent_path().string(); + res.isOk = true; + } + + return res; + } +#else + IGFD::Utils::PathStruct IGFD::Utils::ParsePathFileName(const std::string& vPathFileName) + { + PathStruct res; + + if (!vPathFileName.empty()) + { + std::string pfn = vPathFileName; + std::string separator(1u, PATH_SEP); + IGFD::Utils::ReplaceString(pfn, "\\", separator); + IGFD::Utils::ReplaceString(pfn, "/", separator); + + size_t lastSlash = pfn.find_last_of(separator); + if (lastSlash != std::string::npos) + { + res.name = pfn.substr(lastSlash + 1); + res.path = pfn.substr(0, lastSlash); + res.isOk = true; + } + + size_t lastPoint = pfn.find_last_of('.'); + if (lastPoint != std::string::npos) + { + if (!res.isOk) + { + res.name = pfn; + res.isOk = true; + } + res.ext = pfn.substr(lastPoint + 1); + IGFD::Utils::ReplaceString(res.name, "." + res.ext, ""); + } + + if (!res.isOk) + { + res.name = std::move(pfn); + res.isOk = true; + } + } + + return res; + } +#endif // USE_STD_FILESYSTEM + void IGFD::Utils::AppendToBuffer(char* vBuffer, size_t vBufferLen, const std::string& vStr) + { + std::string st = vStr; + size_t len = vBufferLen - 1u; + size_t slen = strlen(vBuffer); + + if (!st.empty() && st != "\n") + { + IGFD::Utils::ReplaceString(st, "\n", ""); + IGFD::Utils::ReplaceString(st, "\r", ""); + } + vBuffer[slen] = '\0'; + std::string str = std::string(vBuffer); + //if (!str.empty()) str += "\n"; + str += vStr; + if (len > str.size()) len = str.size(); +#ifdef MSVC + strncpy_s(vBuffer, vBufferLen, str.c_str(), len); +#else // MSVC + strncpy(vBuffer, str.c_str(), len); +#endif // MSVC + vBuffer[len] = '\0'; + } + + void IGFD::Utils::ResetBuffer(char* vBuffer) + { + vBuffer[0] = '\0'; + } + + void IGFD::Utils::SetBuffer(char* vBuffer, size_t vBufferLen, const std::string& vStr) + { + ResetBuffer(vBuffer); + AppendToBuffer(vBuffer, vBufferLen, vStr); + } + + ///////////////////////////////////////////////////////////////////////////////////// + //// FILE INFOS ///////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + + bool IGFD::FileInfos::IsTagFound(const std::string& vTag) const + { + if (!vTag.empty()) + { + if (fileNameExt_optimized == "..") return true; + + return + fileNameExt_optimized.find(vTag) != std::string::npos || // first try wihtout case and accents + fileNameExt.find(vTag) != std::string::npos; // second if searched with case and accents + } + + // if tag is empty => its a special case but all is found + return true; + } + + ///////////////////////////////////////////////////////////////////////////////////// + //// SEARCH MANAGER ///////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + + void IGFD::SearchManager::Clear() + { + puSearchTag.clear(); + IGFD::Utils::ResetBuffer(puSearchBuffer); + } + + void IGFD::SearchManager::DrawSearchBar(FileDialogInternal& vFileDialogInternal) + { + // search field + if (IMGUI_BUTTON(resetButtonString "##BtnImGuiFileDialogSearchField")) + { + Clear(); + vFileDialogInternal.puFileManager.ApplyFilteringOnFileList(vFileDialogInternal); + } + if (ImGui::IsItemHovered()) + ImGui::SetTooltip(buttonResetSearchString); + ImGui::SameLine(); + ImGui::Text(searchString); + ImGui::SameLine(); + ImGui::PushItemWidth(ImGui::GetContentRegionAvail().x); + bool edited = ImGui::InputText("##InputImGuiFileDialogSearchField", puSearchBuffer, MAX_FILE_DIALOG_NAME_BUFFER); + if (ImGui::GetItemID() == ImGui::GetActiveID()) + puSearchInputIsActive = true; + ImGui::PopItemWidth(); + if (edited) + { + puSearchTag = puSearchBuffer; + vFileDialogInternal.puFileManager.ApplyFilteringOnFileList(vFileDialogInternal); + } + } + + ///////////////////////////////////////////////////////////////////////////////////// + //// FILTER INFOS /////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + + void IGFD::FilterManager::FilterInfos::clear() + { + filter.clear(); + collectionfilters.clear(); + } + + bool IGFD::FilterManager::FilterInfos::empty() const + { + return filter.empty() && collectionfilters.empty(); + } + + bool IGFD::FilterManager::FilterInfos::exist(const std::string& vFilter) const + { + return filter == vFilter || (collectionfilters.find(vFilter) != collectionfilters.end()); + } + + ///////////////////////////////////////////////////////////////////////////////////// + //// FILTER MANAGER ///////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + + void IGFD::FilterManager::ParseFilters(const char* vFilters) + { + prParsedFilters.clear(); + + if (vFilters) + puDLGFilters = vFilters; // file mode + else + puDLGFilters.clear(); // directory mode + + if (!puDLGFilters.empty()) + { + // ".*,.cpp,.h,.hpp" + // "Source files{.cpp,.h,.hpp},Image files{.png,.gif,.jpg,.jpeg},.md" + + bool currentFilterFound = false; + + size_t nan = std::string::npos; + size_t p = 0, lp = 0; + while ((p = puDLGFilters.find_first_of("{,", p)) != nan) + { + FilterInfos infos; + + if (puDLGFilters[p] == '{') // { + { + infos.filter = puDLGFilters.substr(lp, p - lp); + p++; + lp = puDLGFilters.find('}', p); + if (lp != nan) + { + std::string fs = puDLGFilters.substr(p, lp - p); + auto arr = IGFD::Utils::SplitStringToVector(fs, ',', false); + for (auto a : arr) + { + infos.collectionfilters.emplace(a); + } + } + p = lp + 1; + } + else // , + { + infos.filter = puDLGFilters.substr(lp, p - lp); + p++; + } + + if (!currentFilterFound && prSelectedFilter.filter == infos.filter) + { + currentFilterFound = true; + prSelectedFilter = infos; + } + + lp = p; + if (!infos.empty()) + prParsedFilters.emplace_back(infos); + } + + std::string token = puDLGFilters.substr(lp); + if (!token.empty()) + { + FilterInfos infos; + infos.filter = std::move(token); + prParsedFilters.emplace_back(infos); + } + + if (!currentFilterFound) + if (!prParsedFilters.empty()) + prSelectedFilter = *prParsedFilters.begin(); + } + } + + void IGFD::FilterManager::SetSelectedFilterWithExt(const std::string& vFilter) + { + if (!prParsedFilters.empty()) + { + if (!vFilter.empty()) + { + // std::map + for (const auto& infos : prParsedFilters) + { + if (vFilter == infos.filter) + { + prSelectedFilter = infos; + } + else + { + // maybe this ext is in an extention so we will + // explore the collections is they are existing + for (const auto& filter : infos.collectionfilters) + { + if (vFilter == filter) + { + prSelectedFilter = infos; + } + } + } + } + } + + if (prSelectedFilter.empty()) + prSelectedFilter = *prParsedFilters.begin(); + } + } + + void IGFD::FilterManager::SetFileStyle(const IGFD_FileStyleFlags& vFlags, const char* vCriteria, const FileStyle& vInfos) + { + std::string _criteria; + if (vCriteria) + _criteria = std::string(vCriteria); + prFilesStyle[vFlags][_criteria] = std::make_shared(vInfos); + prFilesStyle[vFlags][_criteria]->flags = vFlags; + } + + // will be called internally + // will not been exposed to IGFD API + bool IGFD::FilterManager::prFillFileStyle(std::shared_ptr vFileInfos) const + { + if (vFileInfos.use_count() && !prFilesStyle.empty()) + { + for (const auto& _flag : prFilesStyle) + { + for (const auto& _file : _flag.second) + { + if (_flag.first & IGFD_FileStyleByTypeDir && vFileInfos->fileType == 'd') + { + if (_file.first.empty()) // for all dirs + { + vFileInfos->fileStyle = _file.second; + } + else if (_file.first == vFileInfos->fileNameExt) // for dirs who are equal to style criteria + { + vFileInfos->fileStyle = _file.second; + } + } + else if (_flag.first & IGFD_FileStyleByTypeFile && vFileInfos->fileType == 'f') + { + if (_file.first.empty()) // for all files + { + vFileInfos->fileStyle = _file.second; + } + else if (_file.first == vFileInfos->fileNameExt) // for files who are equal to style criteria + { + vFileInfos->fileStyle = _file.second; + } + } + else if (_flag.first & IGFD_FileStyleByTypeLink && vFileInfos->fileType == 'l') + { + if (_file.first.empty()) // for all links + { + vFileInfos->fileStyle = _file.second; + } + else if (_file.first == vFileInfos->fileNameExt) // for links who are equal to style criteria + { + vFileInfos->fileStyle = _file.second; + } + } + + if (_flag.first & IGFD_FileStyleByExtention) + { + if (_file.first == vFileInfos->fileExt) + { + vFileInfos->fileStyle = _file.second; + } + + // can make sense for some dirs like the hidden by ex ".git" + if (_flag.first & IGFD_FileStyleByTypeDir && vFileInfos->fileType == 'd') + { + if (_file.first == vFileInfos->fileExt) + { + vFileInfos->fileStyle = _file.second; + } + } + else if (_flag.first & IGFD_FileStyleByTypeFile && vFileInfos->fileType == 'f') + { + if (_file.first == vFileInfos->fileExt) + { + vFileInfos->fileStyle = _file.second; + } + } + else if (_flag.first & IGFD_FileStyleByTypeLink && vFileInfos->fileType == 'l') + { + if (_file.first == vFileInfos->fileExt) + { + vFileInfos->fileStyle = _file.second; + } + } + } + if (_flag.first & IGFD_FileStyleByFullName) + { + if (_file.first == vFileInfos->fileNameExt) + { + vFileInfos->fileStyle = _file.second; + } + + if (_flag.first & IGFD_FileStyleByTypeDir && vFileInfos->fileType == 'd') + { + if (_file.first == vFileInfos->fileNameExt) + { + vFileInfos->fileStyle = _file.second; + } + } + else if (_flag.first & IGFD_FileStyleByTypeFile && vFileInfos->fileType == 'f') + { + if (_file.first == vFileInfos->fileNameExt) + { + vFileInfos->fileStyle = _file.second; + } + } + else if (_flag.first & IGFD_FileStyleByTypeLink && vFileInfos->fileType == 'l') + { + if (_file.first == vFileInfos->fileNameExt) + { + vFileInfos->fileStyle = _file.second; + } + } + } + if (_flag.first & IGFD_FileStyleByContainedInFullName) + { + if (vFileInfos->fileNameExt.find(_file.first) != std::string::npos) + { + vFileInfos->fileStyle = _file.second; + } + + if (_flag.first & IGFD_FileStyleByTypeDir && vFileInfos->fileType == 'd') + { + if (vFileInfos->fileNameExt.find(_file.first) != std::string::npos) + { + vFileInfos->fileStyle = _file.second; + } + } + else if (_flag.first & IGFD_FileStyleByTypeFile && vFileInfos->fileType == 'f') + { + if (vFileInfos->fileNameExt.find(_file.first) != std::string::npos) + { + vFileInfos->fileStyle = _file.second; + } + } + else if (_flag.first & IGFD_FileStyleByTypeLink && vFileInfos->fileType == 'l') + { + if (vFileInfos->fileNameExt.find(_file.first) != std::string::npos) + { + vFileInfos->fileStyle = _file.second; + } + } + } + + if (vFileInfos->fileStyle.use_count()) + return true; + } + } + } + + return false; + } + + void IGFD::FilterManager::SetFileStyle(const IGFD_FileStyleFlags& vFlags, const char* vCriteria, const ImVec4& vColor, const std::string& vIcon, ImFont* vFont) + { + std::string _criteria; + if (vCriteria) + _criteria = std::string(vCriteria); + prFilesStyle[vFlags][_criteria] = std::make_shared(vColor, vIcon, vFont); + prFilesStyle[vFlags][_criteria]->flags = vFlags; + } + + // todo : to refactor this fucking function + bool IGFD::FilterManager::GetFileStyle(const IGFD_FileStyleFlags& vFlags, const std::string& vCriteria, ImVec4* vOutColor, std::string* vOutIcon, ImFont **vOutFont) + { + if (vOutColor) + { + if (!prFilesStyle.empty()) + { + if (prFilesStyle.find(vFlags) != prFilesStyle.end()) // found + { + if (vFlags & IGFD_FileStyleByContainedInFullName) + { + // search for vCriteria who are containing the criteria + for (const auto& _file : prFilesStyle.at(vFlags)) + { + if (vCriteria.find(_file.first) != std::string::npos) + { + if (_file.second.use_count()) + { + *vOutColor = _file.second->color; + if (vOutIcon) + *vOutIcon = _file.second->icon; + if (vOutFont) + *vOutFont = _file.second->font; + return true; + } + } + } + } + else + { + if (prFilesStyle.at(vFlags).find(vCriteria) != prFilesStyle.at(vFlags).end()) // found + { + *vOutColor = prFilesStyle[vFlags][vCriteria]->color; + if (vOutIcon) + *vOutIcon = prFilesStyle[vFlags][vCriteria]->icon; + if (vOutFont) + *vOutFont = prFilesStyle[vFlags][vCriteria]->font; + return true; + } + } + } + else + { + // search for flag composition + for (const auto& _flag : prFilesStyle) + { + if (_flag.first & vFlags) + { + if (_flag.first & IGFD_FileStyleByContainedInFullName) + { + // search for vCriteria who are containing the criteria + for (const auto& _file : prFilesStyle.at(_flag.first)) + { + if (vCriteria.find(_file.first) != std::string::npos) + { + if (_file.second.use_count()) + { + *vOutColor = _file.second->color; + if (vOutIcon) + *vOutIcon = _file.second->icon; + if (vOutFont) + *vOutFont = _file.second->font; + return true; + } + } + } + } + else + { + if (prFilesStyle.at(_flag.first).find(vCriteria) != prFilesStyle.at(_flag.first).end()) // found + { + *vOutColor = prFilesStyle[_flag.first][vCriteria]->color; + if (vOutIcon) + *vOutIcon = prFilesStyle[_flag.first][vCriteria]->icon; + if (vOutFont) + *vOutFont = prFilesStyle[_flag.first][vCriteria]->font; + return true; + } + } + } + } + } + } + } + return false; + } + + void IGFD::FilterManager::ClearFilesStyle() + { + prFilesStyle.clear(); + } + + bool IGFD::FilterManager::IsCoveredByFilters(const std::string& vTag) const + { + if (!puDLGFilters.empty() && !prSelectedFilter.empty()) + { + // check if current file extention is covered by current filter + // we do that here, for avoid doing that during filelist display + // for better fps + if (prSelectedFilter.exist(vTag) || prSelectedFilter.filter == ".*") + { + return true; + } + } + + return false; + } + + bool IGFD::FilterManager::DrawFilterComboBox(FileDialogInternal& vFileDialogInternal) + { + // combobox of filters + if (!puDLGFilters.empty()) + { + ImGui::SameLine(); + + bool needToApllyNewFilter = false; + + ImGui::PushItemWidth(FILTER_COMBO_WIDTH); + if (ImGui::BeginCombo("##Filters", prSelectedFilter.filter.c_str(), ImGuiComboFlags_None)) + { + intptr_t i = 0; + for (const auto& filter : prParsedFilters) + { + const bool item_selected = (filter.filter == prSelectedFilter.filter); + ImGui::PushID((void*)(intptr_t)i++); + if (ImGui::Selectable(filter.filter.c_str(), item_selected)) + { + prSelectedFilter = filter; + needToApllyNewFilter = true; + } + ImGui::PopID(); + } + + ImGui::EndCombo(); + } + ImGui::PopItemWidth(); + + if (needToApllyNewFilter) + { + vFileDialogInternal.puFileManager.OpenCurrentPath(vFileDialogInternal); + } + + return needToApllyNewFilter; + } + + return false; + } + + IGFD::FilterManager::FilterInfos IGFD::FilterManager::GetSelectedFilter() + { + return prSelectedFilter; + } + + std::string IGFD::FilterManager::ReplaceExtentionWithCurrentFilter(const std::string& vFile) const + { + auto result = vFile; + + if (!result.empty()) + { + // if not a collection we can replace the filter by the extention we want + if (prSelectedFilter.collectionfilters.empty()) + { + size_t lastPoint = vFile.find_last_of('.'); + if (lastPoint != std::string::npos) + { + result = result.substr(0, lastPoint); + } + + result += prSelectedFilter.filter; + } + } + + return result; + } + + void IGFD::FilterManager::SetDefaultFilterIfNotDefined() + { + if (prSelectedFilter.empty() && // no filter selected + !prParsedFilters.empty()) // filter exist + prSelectedFilter = *prParsedFilters.begin(); // we take the first filter + } + + ///////////////////////////////////////////////////////////////////////////////////// + //// FILE MANAGER /////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + + IGFD::FileManager::FileManager() + { + puFsRoot = std::string(1u, PATH_SEP); + } + + void IGFD::FileManager::OpenCurrentPath(const FileDialogInternal& vFileDialogInternal) + { + puShowDrives = false; + ClearComposer(); + ClearFileLists(); + if (puDLGDirectoryMode) // directory mode + SetDefaultFileName("."); + else + SetDefaultFileName(puDLGDefaultFileName); + ScanDir(vFileDialogInternal, GetCurrentPath()); + } + + void IGFD::FileManager::SortFields(const FileDialogInternal& vFileDialogInternal, const SortingFieldEnum& vSortingField, const bool& vCanChangeOrder) + { + if (vSortingField != SortingFieldEnum::FIELD_NONE) + { + puHeaderFileName = tableHeaderFileNameString; + puHeaderFileType = tableHeaderFileTypeString; + puHeaderFileSize = tableHeaderFileSizeString; + puHeaderFileDate = tableHeaderFileDateString; +#ifdef USE_THUMBNAILS + puHeaderFileThumbnails = tableHeaderFileThumbnailsString; +#endif // #ifdef USE_THUMBNAILS + } + + if (vSortingField == SortingFieldEnum::FIELD_FILENAME) + { + if (vCanChangeOrder && puSortingField == vSortingField) + puSortingDirection[0] = !puSortingDirection[0]; + + if (puSortingDirection[0]) + { +#ifdef USE_CUSTOM_SORTING_ICON + puHeaderFileName = tableHeaderDescendingIcon + puHeaderFileName; +#endif // USE_CUSTOM_SORTING_ICON + std::sort(prFileList.begin(), prFileList.end(), + [](const std::shared_ptr& a, const std::shared_ptr& b) -> bool + { + if (!a.use_count() || !b.use_count()) + return false; + + // this code fail in c:\\Users with the link "All users". got a invalid comparator + /* + // use code from https://github.com/jackm97/ImGuiFileDialog/commit/bf40515f5a1de3043e60562dc1a494ee7ecd3571 + // strict ordering for file/directory types beginning in '.' + // common on Linux platforms + if (a->fileNameExt[0] == '.' && b->fileNameExt[0] != '.') + return false; + if (a->fileNameExt[0] != '.' && b->fileNameExt[0] == '.') + return true; + if (a->fileNameExt[0] == '.' && b->fileNameExt[0] == '.') + { + return (stricmp(a->fileNameExt.c_str(), b->fileNameExt.c_str()) < 0); // sort in insensitive case + } + */ + if (a->fileType != b->fileType) return (a->fileType == 'd'); // directory in first + return (stricmp(a->fileNameExt.c_str(), b->fileNameExt.c_str()) < 0); // sort in insensitive case + }); + } + else + { +#ifdef USE_CUSTOM_SORTING_ICON + puHeaderFileName = tableHeaderAscendingIcon + puHeaderFileName; +#endif // USE_CUSTOM_SORTING_ICON + std::sort(prFileList.begin(), prFileList.end(), + [](const std::shared_ptr& a, const std::shared_ptr& b) -> bool + { + if (!a.use_count() || !b.use_count()) + return false; + + // this code fail in c:\\Users with the link "All users". got a invalid comparator + /* + // use code from https://github.com/jackm97/ImGuiFileDialog/commit/bf40515f5a1de3043e60562dc1a494ee7ecd3571 + // strict ordering for file/directory types beginning in '.' + // common on Linux platforms + if (a->fileNameExt[0] == '.' && b->fileNameExt[0] != '.') + return false; + if (a->fileNameExt[0] != '.' && b->fileNameExt[0] == '.') + return true; + if (a->fileNameExt[0] == '.' && b->fileNameExt[0] == '.') + { + return (stricmp(a->fileNameExt.c_str(), b->fileNameExt.c_str()) > 0); // sort in insensitive case + } + */ + return (stricmp(a->fileNameExt.c_str(), b->fileNameExt.c_str()) > 0); // sort in insensitive case + }); + } + } + else if (vSortingField == SortingFieldEnum::FIELD_TYPE) + { + if (vCanChangeOrder && puSortingField == vSortingField) + puSortingDirection[1] = !puSortingDirection[1]; + + if (puSortingDirection[1]) + { +#ifdef USE_CUSTOM_SORTING_ICON + puHeaderFileType = tableHeaderDescendingIcon + puHeaderFileType; +#endif // USE_CUSTOM_SORTING_ICON + std::sort(prFileList.begin(), prFileList.end(), + [](const std::shared_ptr& a, const std::shared_ptr& b) -> bool + { + if (!a.use_count() || !b.use_count()) + return false; + + if (a->fileType != b->fileType) return (a->fileType == 'd'); // directory in first + return (a->fileExt < b->fileExt); // else + }); + } + else + { +#ifdef USE_CUSTOM_SORTING_ICON + puHeaderFileType = tableHeaderAscendingIcon + puHeaderFileType; +#endif // USE_CUSTOM_SORTING_ICON + std::sort(prFileList.begin(), prFileList.end(), + [](const std::shared_ptr& a, const std::shared_ptr& b) -> bool + { + if (!a.use_count() || !b.use_count()) + return false; + + if (a->fileType != b->fileType) return (a->fileType != 'd'); // directory in last + return (a->fileExt > b->fileExt); // else + }); + } + } + else if (vSortingField == SortingFieldEnum::FIELD_SIZE) + { + if (vCanChangeOrder && puSortingField == vSortingField) + puSortingDirection[2] = !puSortingDirection[2]; + + if (puSortingDirection[2]) + { +#ifdef USE_CUSTOM_SORTING_ICON + puHeaderFileSize = tableHeaderDescendingIcon + puHeaderFileSize; +#endif // USE_CUSTOM_SORTING_ICON + std::sort(prFileList.begin(), prFileList.end(), + [](const std::shared_ptr& a, const std::shared_ptr& b) -> bool + { + if (!a.use_count() || !b.use_count()) + return false; + + if (a->fileType != b->fileType) return (a->fileType == 'd'); // directory in first + return (a->fileSize < b->fileSize); // else + }); + } + else + { +#ifdef USE_CUSTOM_SORTING_ICON + puHeaderFileSize = tableHeaderAscendingIcon + puHeaderFileSize; +#endif // USE_CUSTOM_SORTING_ICON + std::sort(prFileList.begin(), prFileList.end(), + [](const std::shared_ptr& a, const std::shared_ptr& b) -> bool + { + if (!a.use_count() || !b.use_count()) + return false; + + if (a->fileType != b->fileType) return (a->fileType != 'd'); // directory in last + return (a->fileSize > b->fileSize); // else + }); + } + } + else if (vSortingField == SortingFieldEnum::FIELD_DATE) + { + if (vCanChangeOrder && puSortingField == vSortingField) + puSortingDirection[3] = !puSortingDirection[3]; + + if (puSortingDirection[3]) + { +#ifdef USE_CUSTOM_SORTING_ICON + puHeaderFileDate = tableHeaderDescendingIcon + puHeaderFileDate; +#endif // USE_CUSTOM_SORTING_ICON + std::sort(prFileList.begin(), prFileList.end(), + [](const std::shared_ptr& a, const std::shared_ptr& b) -> bool + { + if (!a.use_count() || !b.use_count()) + return false; + + if (a->fileType != b->fileType) return (a->fileType == 'd'); // directory in first + return (a->fileModifDate < b->fileModifDate); // else + }); + } + else + { +#ifdef USE_CUSTOM_SORTING_ICON + puHeaderFileDate = tableHeaderAscendingIcon + puHeaderFileDate; +#endif // USE_CUSTOM_SORTING_ICON + std::sort(prFileList.begin(), prFileList.end(), + [](const std::shared_ptr& a, const std::shared_ptr& b) -> bool + { + if (!a.use_count() || !b.use_count()) + return false; + + if (a->fileType != b->fileType) return (a->fileType != 'd'); // directory in last + return (a->fileModifDate > b->fileModifDate); // else + }); + } + } +#ifdef USE_THUMBNAILS + else if (vSortingField == SortingFieldEnum::FIELD_THUMBNAILS) + { + if (vCanChangeOrder && puSortingField == vSortingField) + puSortingDirection[4] = !puSortingDirection[4]; + + // we will compare thumbnails by : + // 1) width + // 2) height + + if (puSortingDirection[4]) + { +#ifdef USE_CUSTOM_SORTING_ICON + puHeaderFileThumbnails = tableHeaderDescendingIcon + puHeaderFileThumbnails; +#endif // USE_CUSTOM_SORTING_ICON + std::sort(prFileList.begin(), prFileList.end(), + [](const std::shared_ptr& a, const std::shared_ptr& b) -> bool + { + if (!a.use_count() || !b.use_count()) + return false; + + if (a->fileType != b->fileType) return (a->fileType == 'd'); // directory in first + if (a->thumbnailInfo.textureWidth == b->thumbnailInfo.textureWidth) + return (a->thumbnailInfo.textureHeight < b->thumbnailInfo.textureHeight); + return (a->thumbnailInfo.textureWidth < b->thumbnailInfo.textureWidth); + }); + } + + else + { +#ifdef USE_CUSTOM_SORTING_ICON + puHeaderFileThumbnails = tableHeaderAscendingIcon + puHeaderFileThumbnails; +#endif // USE_CUSTOM_SORTING_ICON + std::sort(prFileList.begin(), prFileList.end(), + [](const std::shared_ptr& a, const std::shared_ptr& b) -> bool + { + if (!a.use_count() || !b.use_count()) + return false; + + if (a->fileType != b->fileType) return (a->fileType != 'd'); // directory in last + if (a->thumbnailInfo.textureWidth == b->thumbnailInfo.textureWidth) + return (a->thumbnailInfo.textureHeight > b->thumbnailInfo.textureHeight); + return (a->thumbnailInfo.textureWidth > b->thumbnailInfo.textureWidth); + }); + } + } +#endif // USE_THUMBNAILS + + if (vSortingField != SortingFieldEnum::FIELD_NONE) + { + puSortingField = vSortingField; + } + + ApplyFilteringOnFileList(vFileDialogInternal); + } + + void IGFD::FileManager::ClearFileLists() + { + prFilteredFileList.clear(); + prFileList.clear(); + } + + std::string IGFD::FileManager::prOptimizeFilenameForSearchOperations(const std::string& vFileNameExt) + { + auto fileNameExt = vFileNameExt; + // convert to lower case + for (char& c : fileNameExt) + c = (char)std::tolower(c); + return fileNameExt; + } + + void IGFD::FileManager::AddFile(const FileDialogInternal& vFileDialogInternal, const std::string& vPath, const std::string& vFileName, const char& vFileType) + { + auto infos = std::make_shared(); + + infos->filePath = vPath; + infos->fileNameExt = vFileName; + infos->fileNameExt_optimized = prOptimizeFilenameForSearchOperations(infos->fileNameExt); + infos->fileType = vFileType; + + if (infos->fileNameExt.empty() || (infos->fileNameExt == "." && !vFileDialogInternal.puFilterManager.puDLGFilters.empty())) return; // filename empty or filename is the current dir '.' //-V807 + if (infos->fileNameExt != ".." && (vFileDialogInternal.puDLGflags & ImGuiFileDialogFlags_DontShowHiddenFiles) && infos->fileNameExt[0] == '.') // dont show hidden files + if (!vFileDialogInternal.puFilterManager.puDLGFilters.empty() || (vFileDialogInternal.puFilterManager.puDLGFilters.empty() && infos->fileNameExt != ".")) // except "." if in directory mode //-V728 + return; + + if (infos->fileType == 'f' || + infos->fileType == 'l') // link can have the same extention of a file + { + size_t lpt = infos->fileNameExt.find_last_of('.'); + if (lpt != std::string::npos) + { + infos->fileExt = infos->fileNameExt.substr(lpt); + } + + if (!vFileDialogInternal.puFilterManager.IsCoveredByFilters(infos->fileExt)) + { + return; + } + } + + vFileDialogInternal.puFilterManager.prFillFileStyle(infos); + + prCompleteFileInfos(infos); + prFileList.push_back(infos); + } + + void IGFD::FileManager::ScanDir(const FileDialogInternal& vFileDialogInternal, const std::string& vPath) + { + std::string path = vPath; + + if (prCurrentPathDecomposition.empty()) + { + SetCurrentDir(path); + } + + if (!prCurrentPathDecomposition.empty()) + { +#ifdef WIN32 + if (path == puFsRoot) + path += std::string(1u, PATH_SEP); +#endif // WIN32 + + ClearFileLists(); + +#ifdef USE_STD_FILESYSTEM + //const auto wpath = IGFD::Utils::WGetString(path.c_str()); + const std::filesystem::path fspath(path); + const auto dir_iter = std::filesystem::directory_iterator(fspath); + AddFile(vFileDialogInternal, path, "..", 'd'); + for (const auto& file : dir_iter) + { + char fileType = 0; + if (file.is_symlink()) + fileType = 'l'; + else if (file.is_directory()) + fileType = 'd'; + else + fileType = 'f'; + auto fileNameExt = file.path().filename().string(); + AddFile(vFileDialogInternal, path, fileNameExt, fileType); + } +#else // dirent + struct dirent** files = nullptr; + size_t n = scandir(path.c_str(), &files, nullptr, inAlphaSort); + if (n) + { + size_t i; + + for (i = 0; i < n; i++) + { + struct dirent* ent = files[i]; + + char fileType = 0; + switch (ent->d_type) + { + case DT_REG: + fileType = 'f'; break; + case DT_DIR: + fileType = 'd'; break; + case DT_LNK: + fileType = 'l'; break; + } + + auto fileNameExt = ent->d_name; + + AddFile(vFileDialogInternal, path, fileNameExt, fileType); + } + + for (i = 0; i < n; i++) + { + free(files[i]); + } + + free(files); + } +#endif // USE_STD_FILESYSTEM + + SortFields(vFileDialogInternal, puSortingField, false); + } + } + + bool IGFD::FileManager::GetDrives() + { + auto drives = IGFD::Utils::GetDrivesList(); + if (!drives.empty()) + { + prCurrentPath.clear(); + prCurrentPathDecomposition.clear(); + ClearFileLists(); + for (auto& drive : drives) + { + auto info = std::make_shared(); + info->fileNameExt = drive; + info->fileNameExt_optimized = prOptimizeFilenameForSearchOperations(drive); + info->fileType = 'd'; + + if (!info->fileNameExt.empty()) + { + prFileList.push_back(info); + } + } + puShowDrives = true; + return true; + } + return false; + } + + bool IGFD::FileManager::IsComposerEmpty() + { + return prCurrentPathDecomposition.empty(); + } + + size_t IGFD::FileManager::GetComposerSize() + { + return prCurrentPathDecomposition.size(); + } + + bool IGFD::FileManager::IsFileListEmpty() + { + return prFileList.empty(); + } + + size_t IGFD::FileManager::GetFullFileListSize() + { + return prFileList.size(); + } + + std::shared_ptr IGFD::FileManager::GetFullFileAt(size_t vIdx) + { + if (vIdx < prFileList.size()) + return prFileList[vIdx]; + return nullptr; + } + + bool IGFD::FileManager::IsFilteredListEmpty() + { + return prFilteredFileList.empty(); + } + + size_t IGFD::FileManager::GetFilteredListSize() + { + return prFilteredFileList.size(); + } + + std::shared_ptr IGFD::FileManager::GetFilteredFileAt(size_t vIdx) + { + if (vIdx < prFilteredFileList.size()) + return prFilteredFileList[vIdx]; + return nullptr; + } + + bool IGFD::FileManager::IsFileNameSelected(const std::string& vFileName) + { + return prSelectedFileNames.find(vFileName) != prSelectedFileNames.end(); + } + + std::string IGFD::FileManager::GetBack() + { + return prCurrentPathDecomposition.back(); + } + + void IGFD::FileManager::ClearComposer() + { + prCurrentPathDecomposition.clear(); + } + + void IGFD::FileManager::ClearAll() + { + ClearComposer(); + ClearFileLists(); + } + + void IGFD::FileManager::ApplyFilteringOnFileList(const FileDialogInternal& vFileDialogInternal) + { + prFilteredFileList.clear(); + for (const auto& file : prFileList) + { + if (!file.use_count()) + continue; + bool show = true; + if (!file->IsTagFound(vFileDialogInternal.puSearchManager.puSearchTag)) // if search tag + show = false; + if (puDLGDirectoryMode && file->fileType != 'd') // directory mode + show = false; + if (show) + prFilteredFileList.push_back(file); + } + } + + std::string IGFD::FileManager::prRoundNumber(double vvalue, int n) + { + std::stringstream tmp; + tmp << std::setprecision(n) << std::fixed << vvalue; + return tmp.str(); + } + + std::string IGFD::FileManager::prFormatFileSize(size_t vByteSize) + { + if (vByteSize != 0) + { + static double lo = 1024.0; + static double ko = 1024.0 * 1024.0; + static double mo = 1024.0 * 1024.0 * 1024.0; + + auto v = (double)vByteSize; + + if (v < lo) + return prRoundNumber(v, 0) + " o"; // octet + else if (v < ko) + return prRoundNumber(v / lo, 2) + " Ko"; // ko + else if (v < mo) + return prRoundNumber(v / ko, 2) + " Mo"; // Mo + else + return prRoundNumber(v / mo, 2) + " Go"; // Go + } + + return ""; + } + + void IGFD::FileManager::prCompleteFileInfos(const std::shared_ptr& vInfos) + { + if (!vInfos.use_count()) + return; + + if (vInfos->fileNameExt != "." && + vInfos->fileNameExt != "..") + { + // _stat struct : + //dev_t st_dev; /* ID of device containing file */ + //ino_t st_ino; /* inode number */ + //mode_t st_mode; /* protection */ + //nlink_t st_nlink; /* number of hard links */ + //uid_t st_uid; /* user ID of owner */ + //gid_t st_gid; /* group ID of owner */ + //dev_t st_rdev; /* device ID (if special file) */ + //off_t st_size; /* total size, in bytes */ + //blksize_t st_blksize; /* blocksize for file system I/O */ + //blkcnt_t st_blocks; /* number of 512B blocks allocated */ + //time_t st_atime; /* time of last access - not sure out of ntfs */ + //time_t st_mtime; /* time of last modification - not sure out of ntfs */ + //time_t st_ctime; /* time of last status change - not sure out of ntfs */ + + std::string fpn; + + if (vInfos->fileType == 'f' || vInfos->fileType == 'l' || vInfos->fileType == 'd') // file + fpn = vInfos->filePath + std::string(1u, PATH_SEP) + vInfos->fileNameExt; + + struct stat statInfos = {}; + char timebuf[100]; + int result = stat(fpn.c_str(), &statInfos); + if (!result) + { + if (vInfos->fileType != 'd') + { + vInfos->fileSize = (size_t)statInfos.st_size; + vInfos->formatedFileSize = prFormatFileSize(vInfos->fileSize); + } + + size_t len = 0; +#ifdef MSVC + struct tm _tm; + errno_t err = localtime_s(&_tm, &statInfos.st_mtime); + if (!err) len = strftime(timebuf, 99, DateTimeFormat, &_tm); +#else // MSVC + struct tm* _tm = localtime(&statInfos.st_mtime); + if (_tm) len = strftime(timebuf, 99, DateTimeFormat, _tm); +#endif // MSVC + if (len) + { + vInfos->fileModifDate = std::string(timebuf, len); + } + } + } + } + + void IGFD::FileManager::prRemoveFileNameInSelection(const std::string& vFileName) + { + prSelectedFileNames.erase(vFileName); + + if (prSelectedFileNames.size() == 1) + { + snprintf(puFileNameBuffer, MAX_FILE_DIALOG_NAME_BUFFER, "%s", vFileName.c_str()); + } + else + { + snprintf(puFileNameBuffer, MAX_FILE_DIALOG_NAME_BUFFER, "%zu files Selected", prSelectedFileNames.size()); + } + } + + void IGFD::FileManager::prAddFileNameInSelection(const std::string& vFileName, bool vSetLastSelectionFileName) + { + prSelectedFileNames.emplace(vFileName); + + if (prSelectedFileNames.size() == 1) + { + snprintf(puFileNameBuffer, MAX_FILE_DIALOG_NAME_BUFFER, "%s", vFileName.c_str()); + } + else + { + snprintf(puFileNameBuffer, MAX_FILE_DIALOG_NAME_BUFFER, "%zu files Selected", prSelectedFileNames.size()); + } + + if (vSetLastSelectionFileName) + prLastSelectedFileName = vFileName; + } + + void IGFD::FileManager::SetCurrentDir(const std::string& vPath) + { + std::string path = vPath; +#ifdef WIN32 + if (puFsRoot == path) + path += std::string(1u, PATH_SEP); +#endif // WIN32 + +#ifdef USE_STD_FILESYSTEM + namespace fs = std::filesystem; + bool dir_opened = fs::is_directory(vPath); + if (!dir_opened) + { + path = "."; + dir_opened = fs::is_directory(vPath); + } + if (dir_opened) +#else + DIR* dir = opendir(path.c_str()); + if (dir == nullptr) + { + path = "."; + dir = opendir(path.c_str()); + } + + if (dir != nullptr) +#endif // USE_STD_FILESYSTEM + { +#ifdef WIN32 + DWORD numchar = 0; + // numchar = GetFullPathNameA(path.c_str(), PATH_MAX, real_path, nullptr); + std::wstring wpath = IGFD::Utils::string_to_wstring(path); + numchar = GetFullPathNameW(wpath.c_str(), 0, nullptr, nullptr); + std::wstring fpath(numchar, 0); + GetFullPathNameW(wpath.c_str(), numchar, (wchar_t*)fpath.data(), nullptr); + std::string real_path = IGFD::Utils::wstring_to_string(fpath); + if (real_path.back() == '\0') // for fix issue we can have with std::string concatenation.. if there is a \0 at end + real_path = real_path.substr(0, real_path.size() - 1U); + if (!real_path.empty()) +#elif defined(UNIX) // UNIX is LINUX or APPLE + char real_path[PATH_MAX]; + char* numchar = realpath(path.c_str(), real_path); + if (numchar != nullptr) +#endif // WIN32 + { + prCurrentPath = std::move(real_path); + if (prCurrentPath[prCurrentPath.size() - 1] == PATH_SEP) + { + prCurrentPath = prCurrentPath.substr(0, prCurrentPath.size() - 1); + } + IGFD::Utils::SetBuffer(puInputPathBuffer, MAX_PATH_BUFFER_SIZE, prCurrentPath); + prCurrentPathDecomposition = IGFD::Utils::SplitStringToVector(prCurrentPath, PATH_SEP, false); +#ifdef UNIX // UNIX is LINUX or APPLE + prCurrentPathDecomposition.insert(prCurrentPathDecomposition.begin(), std::string(1u, PATH_SEP)); +#endif // UNIX + if (!prCurrentPathDecomposition.empty()) + { +#ifdef WIN32 + puFsRoot = prCurrentPathDecomposition[0]; +#endif // WIN32 + } + } +#ifndef USE_STD_FILESYSTEM + closedir(dir); +#endif + } + } + + bool IGFD::FileManager::CreateDir(const std::string& vPath) + { + bool res = false; + + if (!vPath.empty()) + { + std::string path = prCurrentPath + std::string(1u, PATH_SEP) + vPath; + + res = IGFD::Utils::CreateDirectoryIfNotExist(path); + } + + return res; + } + + void IGFD::FileManager::ComposeNewPath(std::vector::iterator vIter) + { + std::string res; + + while (true) + { + if (!res.empty()) + { +#ifdef WIN32 + res = *vIter + std::string(1u, PATH_SEP) + res; +#elif defined(UNIX) // UNIX is LINUX or APPLE + if (*vIter == puFsRoot) + res = *vIter + res; + else + res = *vIter + PATH_SEP + res; +#endif // WIN32 + } + else + res = *vIter; + + if (vIter == prCurrentPathDecomposition.begin()) + { +#if defined(UNIX) // UNIX is LINUX or APPLE + if (res[0] != PATH_SEP) + res = PATH_SEP + res; +#endif // defined(UNIX) + break; + } + + --vIter; + } + + prCurrentPath = std::move(res); + } + + bool IGFD::FileManager::SetPathOnParentDirectoryIfAny() + { + if (prCurrentPathDecomposition.size() > 1) + { + ComposeNewPath(prCurrentPathDecomposition.end() - 2); + return true; + } + return false; + } + + std::string IGFD::FileManager::GetCurrentPath() + { + if (prCurrentPath.empty()) + prCurrentPath = "."; + return prCurrentPath; + } + + void IGFD::FileManager::SetCurrentPath(const std::string& vCurrentPath) + { + if (vCurrentPath.empty()) + prCurrentPath = "."; + else + prCurrentPath = vCurrentPath; + } + + bool IGFD::FileManager::IsFileExist(const std::string& vFile) + { + std::ifstream docFile(vFile, std::ios::in); + if (docFile.is_open()) + { + docFile.close(); + return true; + } + return false; + } + + void IGFD::FileManager::SetDefaultFileName(const std::string& vFileName) + { + puDLGDefaultFileName = vFileName; + IGFD::Utils::SetBuffer(puFileNameBuffer, MAX_FILE_DIALOG_NAME_BUFFER, vFileName); + } + + bool IGFD::FileManager::SelectDirectory(const std::shared_ptr& vInfos) + { + if (!vInfos.use_count()) + return false; + + bool pathClick = false; + + if (vInfos->fileNameExt == "..") + { + pathClick = SetPathOnParentDirectoryIfAny(); + } + else + { + std::string newPath; + + if (puShowDrives) + { + newPath = vInfos->fileNameExt + std::string(1u, PATH_SEP); + } + else + { +#ifdef __linux__ + if (puFsRoot == prCurrentPath) + newPath = prCurrentPath + vInfos->fileNameExt; + else +#endif // __linux__ + newPath = prCurrentPath + std::string(1u, PATH_SEP) + vInfos->fileNameExt; + } + + if (IGFD::Utils::IsDirectoryExist(newPath)) + { + if (puShowDrives) + { + prCurrentPath = vInfos->fileNameExt; + puFsRoot = prCurrentPath; + } + else + { + prCurrentPath = newPath; //-V820 + } + pathClick = true; + } + } + + return pathClick; + } + + void IGFD::FileManager::SelectFileName(const FileDialogInternal& vFileDialogInternal, const std::shared_ptr& vInfos) + { + if (!vInfos.use_count()) + return; + + if (ImGui::GetIO().KeyCtrl) + { + if (puDLGcountSelectionMax == 0) // infinite selection + { + if (prSelectedFileNames.find(vInfos->fileNameExt) == prSelectedFileNames.end()) // not found +> add + { + prAddFileNameInSelection(vInfos->fileNameExt, true); + } + else // found +> remove + { + prRemoveFileNameInSelection(vInfos->fileNameExt); + } + } + else // selection limited by size + { + if (prSelectedFileNames.size() < puDLGcountSelectionMax) + { + if (prSelectedFileNames.find(vInfos->fileNameExt) == prSelectedFileNames.end()) // not found +> add + { + prAddFileNameInSelection(vInfos->fileNameExt, true); + } + else // found +> remove + { + prRemoveFileNameInSelection(vInfos->fileNameExt); + } + } + } + } + else if (ImGui::GetIO().KeyShift) + { + if (puDLGcountSelectionMax != 1) + { + prSelectedFileNames.clear(); + // we will iterate filelist and get the last selection after the start selection + bool startMultiSelection = false; + std::string fileNameToSelect = vInfos->fileNameExt; + std::string savedLastSelectedFileName; // for invert selection mode + for (const auto& file : prFileList) + { + if (!file.use_count()) + continue; + + bool canTake = true; + if (!file->IsTagFound(vFileDialogInternal.puSearchManager.puSearchTag)) canTake = false; + if (canTake) // if not filtered, we will take files who are filtered by the dialog + { + if (file->fileNameExt == prLastSelectedFileName) + { + startMultiSelection = true; + prAddFileNameInSelection(prLastSelectedFileName, false); + } + else if (startMultiSelection) + { + if (puDLGcountSelectionMax == 0) // infinite selection + { + prAddFileNameInSelection(file->fileNameExt, false); + } + else // selection limited by size + { + if (prSelectedFileNames.size() < puDLGcountSelectionMax) + { + prAddFileNameInSelection(file->fileNameExt, false); + } + else + { + startMultiSelection = false; + if (!savedLastSelectedFileName.empty()) + prLastSelectedFileName = savedLastSelectedFileName; + break; + } + } + } + + if (file->fileNameExt == fileNameToSelect) + { + if (!startMultiSelection) // we are before the last Selected FileName, so we must inverse + { + savedLastSelectedFileName = prLastSelectedFileName; + prLastSelectedFileName = fileNameToSelect; + fileNameToSelect = savedLastSelectedFileName; + startMultiSelection = true; + prAddFileNameInSelection(prLastSelectedFileName, false); + } + else + { + startMultiSelection = false; + if (!savedLastSelectedFileName.empty()) + prLastSelectedFileName = savedLastSelectedFileName; + break; + } + } + } + } + } + } + else + { + prSelectedFileNames.clear(); + IGFD::Utils::ResetBuffer(puFileNameBuffer); + prAddFileNameInSelection(vInfos->fileNameExt, true); + } + } + + void IGFD::FileManager::DrawDirectoryCreation(const FileDialogInternal& vFileDialogInternal) + { + if (vFileDialogInternal.puDLGflags & ImGuiFileDialogFlags_DisableCreateDirectoryButton) + return; + + if (IMGUI_BUTTON(createDirButtonString)) + { + if (!prCreateDirectoryMode) + { + prCreateDirectoryMode = true; + IGFD::Utils::ResetBuffer(puDirectoryNameBuffer); + } + } + if (ImGui::IsItemHovered()) + ImGui::SetTooltip(buttonCreateDirString); + + if (prCreateDirectoryMode) + { + ImGui::SameLine(); + + ImGui::PushItemWidth(100.0f); + ImGui::InputText("##DirectoryFileName", puDirectoryNameBuffer, MAX_FILE_DIALOG_NAME_BUFFER); + ImGui::PopItemWidth(); + + ImGui::SameLine(); + + if (IMGUI_BUTTON(okButtonString)) + { + std::string newDir = std::string(puDirectoryNameBuffer); + if (CreateDir(newDir)) + { + SetCurrentPath(prCurrentPath + std::string(1u, PATH_SEP) + newDir); + OpenCurrentPath(vFileDialogInternal); + } + + prCreateDirectoryMode = false; + } + + ImGui::SameLine(); + + if (IMGUI_BUTTON(cancelButtonString)) + { + prCreateDirectoryMode = false; + } + } + } + + void IGFD::FileManager::DrawPathComposer(const FileDialogInternal& vFileDialogInternal) + { + if (IMGUI_BUTTON(resetButtonString)) + { + SetCurrentPath("."); + OpenCurrentPath(vFileDialogInternal); + } + if (ImGui::IsItemHovered()) + ImGui::SetTooltip(buttonResetPathString); + +#ifdef WIN32 + ImGui::SameLine(); + + if (IMGUI_BUTTON(drivesButtonString)) + { + puDrivesClicked = true; + } + if (ImGui::IsItemHovered()) + ImGui::SetTooltip(buttonDriveString); +#endif // WIN32 + + ImGui::SameLine(); + + if (IMGUI_BUTTON(editPathButtonString)) + { + puInputPathActivated = true; + } + if (ImGui::IsItemHovered()) + ImGui::SetTooltip(buttonEditPathString); + + ImGui::SameLine(); + + ImGui::SeparatorEx(ImGuiSeparatorFlags_Vertical); + + // show current path + if (!prCurrentPathDecomposition.empty()) + { + ImGui::SameLine(); + + if (puInputPathActivated) + { + ImGui::PushItemWidth(ImGui::GetContentRegionAvail().x); + ImGui::InputText("##pathedition", puInputPathBuffer, MAX_PATH_BUFFER_SIZE); + ImGui::PopItemWidth(); + } + else + { + int _id = 0; + for (auto itPathDecomp = prCurrentPathDecomposition.begin(); + itPathDecomp != prCurrentPathDecomposition.end(); ++itPathDecomp) + { + if (itPathDecomp != prCurrentPathDecomposition.begin()) + ImGui::SameLine(); + ImGui::PushID(_id++); + bool click = IMGUI_PATH_BUTTON((*itPathDecomp).c_str()); + ImGui::PopID(); + if (click) + { + ComposeNewPath(itPathDecomp); + puPathClicked = true; + break; + } + // activate input for path + if (ImGui::IsItemClicked(ImGuiMouseButton_Right)) + { + ComposeNewPath(itPathDecomp); + IGFD::Utils::SetBuffer(puInputPathBuffer, MAX_PATH_BUFFER_SIZE, prCurrentPath); + puInputPathActivated = true; + break; + } + } + } + } + } + + std::string IGFD::FileManager::GetResultingPath() + { + std::string path = prCurrentPath; + + if (puDLGDirectoryMode) // if directory mode + { + std::string selectedDirectory = puFileNameBuffer; + if (!selectedDirectory.empty() && + selectedDirectory != ".") + path += std::string(1u, PATH_SEP) + selectedDirectory; + } + + return path; + } + + std::string IGFD::FileManager::GetResultingFileName(FileDialogInternal& vFileDialogInternal) + { + if (!puDLGDirectoryMode) // if not directory mode + { + return vFileDialogInternal.puFilterManager.ReplaceExtentionWithCurrentFilter(std::string(puFileNameBuffer)); + } + + return ""; // directory mode + } + + std::string IGFD::FileManager::GetResultingFilePathName(FileDialogInternal& vFileDialogInternal) + { + std::string result = GetResultingPath(); + + std::string filename = GetResultingFileName(vFileDialogInternal); + if (!filename.empty()) + { +#ifdef UNIX + if (puFsRoot != result) +#endif // UNIX + result += std::string(1u, PATH_SEP); + + result += filename; + } + + return result; + } + + std::map IGFD::FileManager::GetResultingSelection() + { + std::map res; + + for (auto& selectedFileName : prSelectedFileNames) + { + std::string result = GetResultingPath(); + +#ifdef UNIX + if (puFsRoot != result) +#endif // UNIX + result += std::string(1u, PATH_SEP); + + result += selectedFileName; + + res[selectedFileName] = result; + } + + return res; + } + + ///////////////////////////////////////////////////////////////////////////////////// + //// FILE DIALOG INTERNAL /////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + + void IGFD::FileDialogInternal::NewFrame() + { + puCanWeContinue = true; // reset flag for possibily validate the dialog + puIsOk = false; // reset dialog result + puFileManager.puDrivesClicked = false; + puFileManager.puPathClicked = false; + + puNeedToExitDialog = false; + +#ifdef USE_DIALOG_EXIT_WITH_KEY + if (ImGui::IsKeyPressed(IGFD_EXIT_KEY)) + { + // we do that here with the data's defined at the last frame + // because escape key can quit input activation and at the end of the frame all flag will be false + // so we will detect nothing + if (!(puFileManager.puInputPathActivated || + puSearchManager.puSearchInputIsActive || + puFileInputIsActive || + puFileListViewIsActive)) + { + puNeedToExitDialog = true; // need to quit dialog + } + } + else +#endif + { + puSearchManager.puSearchInputIsActive = false; + puFileInputIsActive = false; + puFileListViewIsActive = false; + } + } + + void IGFD::FileDialogInternal::EndFrame() + { + // directory change + if (puFileManager.puPathClicked) + { + puFileManager.OpenCurrentPath(*this); + } + + if (puFileManager.puDrivesClicked) + { + if (puFileManager.GetDrives()) + { + puFileManager.ApplyFilteringOnFileList(*this); + } + } + + if (puFileManager.puInputPathActivated) + { + auto gio = ImGui::GetIO(); + if (ImGui::IsKeyReleased(gio.KeyMap[ImGuiKey_Enter])) + { + puFileManager.SetCurrentPath(std::string(puFileManager.puInputPathBuffer)); + puFileManager.OpenCurrentPath(*this); + puFileManager.puInputPathActivated = false; + } + if (ImGui::IsKeyReleased(gio.KeyMap[ImGuiKey_Escape])) + { + puFileManager.puInputPathActivated = false; + } + } + } + + void IGFD::FileDialogInternal::ResetForNewDialog() + { + + } + + ///////////////////////////////////////////////////////////////////////////////////// + //// THUMBNAIL FEATURE ////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + + IGFD::ThumbnailFeature::ThumbnailFeature() + { +#ifdef USE_THUMBNAILS + prDisplayMode = DisplayModeEnum::FILE_LIST; +#endif + } + + IGFD::ThumbnailFeature::~ThumbnailFeature() = default; + + void IGFD::ThumbnailFeature::NewThumbnailFrame(FileDialogInternal& vFileDialogInternal) + { + (void)vFileDialogInternal; + +#ifdef USE_THUMBNAILS + prStartThumbnailFileDatasExtraction(); +#endif + } + + void IGFD::ThumbnailFeature::EndThumbnailFrame(FileDialogInternal& vFileDialogInternal) + { +#ifdef USE_THUMBNAILS + prClearThumbnails(vFileDialogInternal); +#endif + } + + void IGFD::ThumbnailFeature::QuitThumbnailFrame(FileDialogInternal& vFileDialogInternal) + { +#ifdef USE_THUMBNAILS + prStopThumbnailFileDatasExtraction(); + prClearThumbnails(vFileDialogInternal); +#endif + } + +#ifdef USE_THUMBNAILS + void IGFD::ThumbnailFeature::prStartThumbnailFileDatasExtraction() + { + const bool res = prThumbnailGenerationThread.use_count() && prThumbnailGenerationThread->joinable(); + if (!res) + { + prIsWorking = true; + prCountFiles = 0U; + prThumbnailGenerationThread = std::shared_ptr( + new std::thread(&IGFD::ThumbnailFeature::prThreadThumbnailFileDatasExtractionFunc, this), + [this](std::thread* obj) + { + prIsWorking = false; + if (obj) + obj->join(); + }); + } + } + + bool IGFD::ThumbnailFeature::prStopThumbnailFileDatasExtraction() + { + const bool res = prThumbnailGenerationThread.use_count() && prThumbnailGenerationThread->joinable(); + if (res) + { + prThumbnailGenerationThread.reset(); + } + + return res; + } + + void IGFD::ThumbnailFeature::prThreadThumbnailFileDatasExtractionFunc() + { + prCountFiles = 0U; + prIsWorking = true; + + // infinite loop while is thread working + while(prIsWorking) + { + if (!prThumbnailFileDatasToGet.empty()) + { + std::shared_ptr file = nullptr; + prThumbnailFileDatasToGetMutex.lock(); + //get the first file in the list + file = (*prThumbnailFileDatasToGet.begin()); + prThumbnailFileDatasToGetMutex.unlock(); + + // retrieve datas of the texture file if its an image file + if (file.use_count()) + { + if (file->fileType == 'f') //-V522 + { + if (file->fileExt == ".png" + || file->fileExt == ".bmp" + || file->fileExt == ".tga" + || file->fileExt == ".jpg" || file->fileExt == ".jpeg" + || file->fileExt == ".gif" + || file->fileExt == ".psd" + || file->fileExt == ".pic" + || file->fileExt == ".ppm" || file->fileExt == ".pgm" + //|| file->fileExt == ".hdr" => format float so in few times + ) + { + auto fpn = file->filePath + std::string(1u, PATH_SEP) + file->fileNameExt; + + int w = 0; + int h = 0; + int chans = 0; + uint8_t *datas = stbi_load(fpn.c_str(), &w, &h, &chans, STBI_rgb_alpha); + if (datas) + { + if (w && h) + { + // resize with respect to glyph ratio + const float ratioX = (float)w / (float)h; + const float newX = DisplayMode_ThumbailsList_ImageHeight * ratioX; + float newY = w / ratioX; + if (newX < w) + newY = DisplayMode_ThumbailsList_ImageHeight; + + const auto newWidth = (int)newX; + const auto newHeight = (int)newY; + const auto newBufSize = (size_t)(newWidth * newHeight * 4U); //-V112 //-V1028 + auto resizedData = new uint8_t[newBufSize]; + + const int resizeSucceeded = stbir_resize_uint8( + datas, w, h, 0, + resizedData, newWidth, newHeight, 0, + 4); //-V112 + + if (resizeSucceeded) + { + auto th = &file->thumbnailInfo; + + th->textureFileDatas = resizedData; + th->textureWidth = newWidth; + th->textureHeight = newHeight; + th->textureChannels = 4; //-V112 + + // we set that at least, because will launch the gpu creation of the texture in the main thread + th->isReadyToUpload = true; + + // need gpu loading + prAddThumbnailToCreate(file); + } + } + else + { + printf("image loading fail : w:%i h:%i c:%i\n", w, h, 4); //-V112 + } + + stbi_image_free(datas); + } + } + } + + // peu importe le resultat on vire le fichicer + // remove form this list + // write => thread concurency issues + prThumbnailFileDatasToGetMutex.lock(); + prThumbnailFileDatasToGet.pop_front(); + prThumbnailFileDatasToGetMutex.unlock(); + } + } + } + } + + inline void inVariadicProgressBar(float fraction, const ImVec2& size_arg, const char* fmt, ...) + { + va_list args; + va_start(args, fmt); + char TempBuffer[512]; + const int w = vsnprintf(TempBuffer, 511, fmt, args); + va_end(args); + if (w) + { + ImGui::ProgressBar(fraction, size_arg, TempBuffer); + } + } + + void IGFD::ThumbnailFeature::prDrawThumbnailGenerationProgress() + { + if (prThumbnailGenerationThread.use_count() && prThumbnailGenerationThread->joinable()) + { + if (!prThumbnailFileDatasToGet.empty()) + { + const auto p = (float)((double)prCountFiles / (double)prThumbnailFileDatasToGet.size()); // read => no thread concurency issues + inVariadicProgressBar(p, ImVec2(50, 0), "%u/%u", prCountFiles, (uint32_t)prThumbnailFileDatasToGet.size()); // read => no thread concurency issues + ImGui::SameLine(); + } + } + } + + void IGFD::ThumbnailFeature::prAddThumbnailToLoad(const std::shared_ptr& vFileInfos) + { + if (vFileInfos.use_count()) + { + if (vFileInfos->fileType == 'f') + { + if (vFileInfos->fileExt == ".png" + || vFileInfos->fileExt == ".bmp" + || vFileInfos->fileExt == ".tga" + || vFileInfos->fileExt == ".jpg" || vFileInfos->fileExt == ".jpeg" + || vFileInfos->fileExt == ".gif" + || vFileInfos->fileExt == ".psd" + || vFileInfos->fileExt == ".pic" + || vFileInfos->fileExt == ".ppm" || vFileInfos->fileExt == ".pgm" + //|| file->fileExt == ".hdr" => format float so in few times + ) + { + // write => thread concurency issues + prThumbnailFileDatasToGetMutex.lock(); + prThumbnailFileDatasToGet.push_back(vFileInfos); + vFileInfos->thumbnailInfo.isLoadingOrLoaded = true; + prThumbnailFileDatasToGetMutex.unlock(); + } + } + } + } + + void IGFD::ThumbnailFeature::prAddThumbnailToCreate(const std::shared_ptr& vFileInfos) + { + if (vFileInfos.use_count()) + { + // write => thread concurency issues + prThumbnailToCreateMutex.lock(); + prThumbnailToCreate.push_back(vFileInfos); + prThumbnailToCreateMutex.unlock(); + } + } + + void IGFD::ThumbnailFeature::prAddThumbnailToDestroy(const IGFD_Thumbnail_Info& vIGFD_Thumbnail_Info) + { + // write => thread concurency issues + prThumbnailToDestroyMutex.lock(); + prThumbnailToDestroy.push_back(vIGFD_Thumbnail_Info); + prThumbnailToDestroyMutex.unlock(); + } + + void IGFD::ThumbnailFeature::prDrawDisplayModeToolBar() + { + if (IMGUI_RADIO_BUTTON(DisplayMode_FilesList_ButtonString, + prDisplayMode == DisplayModeEnum::FILE_LIST)) + prDisplayMode = DisplayModeEnum::FILE_LIST; + if (ImGui::IsItemHovered()) ImGui::SetTooltip(DisplayMode_FilesList_ButtonHelp); + ImGui::SameLine(); + if (IMGUI_RADIO_BUTTON(DisplayMode_ThumbailsList_ButtonString, + prDisplayMode == DisplayModeEnum::THUMBNAILS_LIST)) + prDisplayMode = DisplayModeEnum::THUMBNAILS_LIST; + if (ImGui::IsItemHovered()) ImGui::SetTooltip(DisplayMode_ThumbailsList_ButtonHelp); + ImGui::SameLine(); + /* todo + if (IMGUI_RADIO_BUTTON(DisplayMode_ThumbailsGrid_ButtonString, + prDisplayMode == DisplayModeEnum::THUMBNAILS_GRID)) + prDisplayMode = DisplayModeEnum::THUMBNAILS_GRID; + if (ImGui::IsItemHovered()) ImGui::SetTooltip(DisplayMode_ThumbailsGrid_ButtonHelp); + ImGui::SameLine(); + */ + prDrawThumbnailGenerationProgress(); + } + + void IGFD::ThumbnailFeature::prClearThumbnails(FileDialogInternal& vFileDialogInternal) + { + // directory wil be changed so the file list will be erased + if (vFileDialogInternal.puFileManager.puPathClicked) + { + size_t count = vFileDialogInternal.puFileManager.GetFullFileListSize(); + for (size_t idx = 0U; idx < count; idx++) + { + auto file = vFileDialogInternal.puFileManager.GetFullFileAt(idx); + if (file.use_count()) + { + if (file->thumbnailInfo.isReadyToDisplay) //-V522 + { + prAddThumbnailToDestroy(file->thumbnailInfo); + } + } + } + } + } + + void IGFD::ThumbnailFeature::SetCreateThumbnailCallback(const CreateThumbnailFun& vCreateThumbnailFun) + { + prCreateThumbnailFun = vCreateThumbnailFun; + } + + void IGFD::ThumbnailFeature::SetDestroyThumbnailCallback(const DestroyThumbnailFun& vCreateThumbnailFun) + { + prDestroyThumbnailFun = vCreateThumbnailFun; + } + + void IGFD::ThumbnailFeature::ManageGPUThumbnails() + { + if (prCreateThumbnailFun) + { + if (!prThumbnailToCreate.empty()) + { + for (const auto& file : prThumbnailToCreate) + { + if (file.use_count()) + { + prCreateThumbnailFun(&file->thumbnailInfo); + } + } + prThumbnailToCreateMutex.lock(); + prThumbnailToCreate.clear(); + prThumbnailToCreateMutex.unlock(); + } + } + else + { + printf("No Callback found for create texture\nYou need to define the callback with a call to SetCreateThumbnailCallback\n"); + } + + if (prDestroyThumbnailFun) + { + if (!prThumbnailToDestroy.empty()) + { + for (auto thumbnail : prThumbnailToDestroy) + { + prDestroyThumbnailFun(&thumbnail); + } + prThumbnailToDestroyMutex.lock(); + prThumbnailToDestroy.clear(); + prThumbnailToDestroyMutex.unlock(); + } + } + else + { + printf("No Callback found for destroy texture\nYou need to define the callback with a call to SetCreateThumbnailCallback\n"); + } + } + +#endif // USE_THUMBNAILS + + ///////////////////////////////////////////////////////////////////////////////////// + //// BOOKMARK FEATURE /////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + + IGFD::BookMarkFeature::BookMarkFeature() + { +#ifdef USE_BOOKMARK + prBookmarkWidth = defaultBookmarkPaneWith; +#endif // USE_BOOKMARK + } + +#ifdef USE_BOOKMARK + void IGFD::BookMarkFeature::prDrawBookmarkButton() + { + IMGUI_TOGGLE_BUTTON(bookmarksButtonString, &prBookmarkPaneShown); + + if (ImGui::IsItemHovered()) + ImGui::SetTooltip(bookmarksButtonHelpString); + } + bool IGFD::BookMarkFeature::prDrawBookmarkPane(FileDialogInternal& vFileDialogInternal, const ImVec2& vSize) + { + bool res = false; + + ImGui::BeginChild("##bookmarkpane", vSize); + + static int selectedBookmarkForEdition = -1; + + if (IMGUI_BUTTON(addBookmarkButtonString "##ImGuiFileDialogAddBookmark")) + { + if (!vFileDialogInternal.puFileManager.IsComposerEmpty()) + { + BookmarkStruct bookmark; + bookmark.name = vFileDialogInternal.puFileManager.GetBack(); + bookmark.path = vFileDialogInternal.puFileManager.GetCurrentPath(); + prBookmarks.push_back(bookmark); + } + } + if (selectedBookmarkForEdition >= 0 && + selectedBookmarkForEdition < (int)prBookmarks.size()) + { + ImGui::SameLine(); + if (IMGUI_BUTTON(removeBookmarkButtonString "##ImGuiFileDialogAddBookmark")) + { + prBookmarks.erase(prBookmarks.begin() + selectedBookmarkForEdition); + if (selectedBookmarkForEdition == (int)prBookmarks.size()) + selectedBookmarkForEdition--; + } + + if (selectedBookmarkForEdition >= 0 && + selectedBookmarkForEdition < (int)prBookmarks.size()) + { + ImGui::SameLine(); + + ImGui::PushItemWidth(vSize.x - ImGui::GetCursorPosX()); + if (ImGui::InputText("##ImGuiFileDialogBookmarkEdit", prBookmarkEditBuffer, MAX_FILE_DIALOG_NAME_BUFFER)) + { + prBookmarks[(size_t)selectedBookmarkForEdition].name = std::string(prBookmarkEditBuffer); + } + ImGui::PopItemWidth(); + } + } + + ImGui::Separator(); + + if (!prBookmarks.empty()) + { + prBookmarkClipper.Begin((int)prBookmarks.size(), ImGui::GetTextLineHeightWithSpacing()); + while (prBookmarkClipper.Step()) + { + for (int i = prBookmarkClipper.DisplayStart; i < prBookmarkClipper.DisplayEnd; i++) + { + if (i < 0) continue; + const BookmarkStruct& bookmark = prBookmarks[(size_t)i]; + ImGui::PushID(i); + if (ImGui::Selectable(bookmark.name.c_str(), selectedBookmarkForEdition == i, + ImGuiSelectableFlags_AllowDoubleClick) | + (selectedBookmarkForEdition == -1 && + bookmark.path == vFileDialogInternal.puFileManager.GetCurrentPath())) // select if path is current + { + selectedBookmarkForEdition = i; + IGFD::Utils::ResetBuffer(prBookmarkEditBuffer); + IGFD::Utils::AppendToBuffer(prBookmarkEditBuffer, MAX_FILE_DIALOG_NAME_BUFFER, bookmark.name); + + if (ImGui::IsMouseDoubleClicked(0)) // apply path + { + vFileDialogInternal.puFileManager.SetCurrentPath(bookmark.path); + vFileDialogInternal.puFileManager.OpenCurrentPath(vFileDialogInternal); + res = true; + } + } + ImGui::PopID(); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("%s", bookmark.path.c_str()); //-V111 + } + } + prBookmarkClipper.End(); + } + + ImGui::EndChild(); + + return res; + } + + std::string IGFD::BookMarkFeature::SerializeBookmarks() + { + std::string res; + + size_t idx = 0; + for (auto& it : prBookmarks) + { + if (idx++ != 0) + res += "##"; // ## because reserved by imgui, so an input text cant have ## + res += it.name + "##" + it.path; + } + + return res; + } + + void IGFD::BookMarkFeature::DeserializeBookmarks(const std::string& vBookmarks) + { + if (!vBookmarks.empty()) + { + prBookmarks.clear(); + auto arr = IGFD::Utils::SplitStringToVector(vBookmarks, '#', false); + for (size_t i = 0; i < arr.size(); i += 2) + { + BookmarkStruct bookmark; + bookmark.name = arr[i]; + if (i + 1 < arr.size()) // for avoid crash if arr size is impair due to user mistake after edition + { + // if bad format we jump this bookmark + bookmark.path = arr[i + 1]; + prBookmarks.push_back(bookmark); + } + } + } + } +#endif // USE_BOOKMARK + + ///////////////////////////////////////////////////////////////////////////////////// + //// KEY EXPLORER FEATURE /////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + + KeyExplorerFeature::KeyExplorerFeature() = default; + +#ifdef USE_EXPLORATION_BY_KEYS + bool IGFD::KeyExplorerFeature::prLocateItem_Loop(FileDialogInternal& vFileDialogInternal, ImWchar vC) + { + bool found = false; + + auto& fdi = vFileDialogInternal.puFileManager; + if (!fdi.IsFilteredListEmpty()) + { + auto countFiles = fdi.GetFilteredListSize(); + for (size_t i = prLocateFileByInputChar_lastFileIdx; i < countFiles; i++) + { + auto nfo = fdi.GetFilteredFileAt(i); + if (nfo.use_count()) + { + if (nfo->fileNameExt_optimized[0] == vC || // lower case search //-V522 + nfo->fileNameExt[0] == vC) // maybe upper case search + { + //float p = ((float)i) * ImGui::GetTextLineHeightWithSpacing(); + float p = (float)((double)i / (double)countFiles) * ImGui::GetScrollMaxY(); + ImGui::SetScrollY(p); + prLocateFileByInputChar_lastFound = true; + prLocateFileByInputChar_lastFileIdx = i; + prStartFlashItem(prLocateFileByInputChar_lastFileIdx); + + auto infos = fdi.GetFilteredFileAt(prLocateFileByInputChar_lastFileIdx); + if (infos.use_count()) + { + if (infos->fileType == 'd') //-V522 + { + if (fdi.puDLGDirectoryMode) // directory chooser + { + fdi.SelectFileName(vFileDialogInternal, infos); + } + } + else + { + fdi.SelectFileName(vFileDialogInternal, infos); + } + + found = true; + break; + } + } + } + } + } + + return found; + } + + void IGFD::KeyExplorerFeature::prLocateByInputKey(FileDialogInternal& vFileDialogInternal) + { + ImGuiContext& g = *GImGui; + auto& fdi = vFileDialogInternal.puFileManager; + if (!g.ActiveId && !fdi.IsFilteredListEmpty()) + { + auto& queueChar = ImGui::GetIO().InputQueueCharacters; + auto countFiles = fdi.GetFilteredListSize(); + + // point by char + if (!queueChar.empty()) + { + ImWchar c = queueChar.back(); + if (prLocateFileByInputChar_InputQueueCharactersSize != queueChar.size()) + { + if (c == prLocateFileByInputChar_lastChar) // next file starting with same char until + { + if (prLocateFileByInputChar_lastFileIdx < countFiles - 1U) + prLocateFileByInputChar_lastFileIdx++; + else + prLocateFileByInputChar_lastFileIdx = 0; + } + + if (!prLocateItem_Loop(vFileDialogInternal, c)) + { + // not found, loop again from 0 this time + prLocateFileByInputChar_lastFileIdx = 0; + prLocateItem_Loop(vFileDialogInternal, c); + } + + prLocateFileByInputChar_lastChar = c; + } + } + + prLocateFileByInputChar_InputQueueCharactersSize = queueChar.size(); + } + } + + void IGFD::KeyExplorerFeature::prExploreWithkeys(FileDialogInternal& vFileDialogInternal, ImGuiID vListViewID) + { + auto& fdi = vFileDialogInternal.puFileManager; + if (!fdi.IsFilteredListEmpty()) + { + bool canWeExplore = false; + bool hasNav = (ImGui::GetIO().ConfigFlags & ImGuiConfigFlags_NavEnableKeyboard); + + ImGuiContext& g = *GImGui; + if (!hasNav && !g.ActiveId) // no nav and no activated inputs + canWeExplore = true; + + if (g.NavId && g.NavId == vListViewID) + { + if (ImGui::IsKeyPressedMap(IGFD_KEY_ENTER) || + ImGui::IsKeyPressedMap(ImGuiKey_KeyPadEnter) || + ImGui::IsKeyPressedMap(ImGuiKey_Space)) + { + ImGui::ActivateItem(vListViewID); + ImGui::SetActiveID(vListViewID, g.CurrentWindow); + } + } + + if (vListViewID == g.LastActiveId-1) // if listview id is the last acticated nav id (ImGui::ActivateItem(vListViewID);) + canWeExplore = true; + + if (canWeExplore) + { + if (ImGui::IsKeyPressedMap(ImGuiKey_Escape)) + { + ImGui::ClearActiveID(); + g.LastActiveId = 0; + } + + auto countFiles = fdi.GetFilteredListSize(); + + // explore + bool exploreByKey = false; + bool enterInDirectory = false; + bool exitDirectory = false; + + if ((hasNav && ImGui::IsKeyPressedMap(ImGuiKey_UpArrow)) || (!hasNav && ImGui::IsKeyPressed(IGFD_KEY_UP))) + { + exploreByKey = true; + if (prLocateFileByInputChar_lastFileIdx > 0) + prLocateFileByInputChar_lastFileIdx--; + else + prLocateFileByInputChar_lastFileIdx = countFiles - 1U; + } + else if ((hasNav && ImGui::IsKeyPressedMap(ImGuiKey_DownArrow)) || (!hasNav && ImGui::IsKeyPressed(IGFD_KEY_DOWN))) + { + exploreByKey = true; + if (prLocateFileByInputChar_lastFileIdx < countFiles - 1U) + prLocateFileByInputChar_lastFileIdx++; + else + prLocateFileByInputChar_lastFileIdx = 0U; + } + else if (ImGui::IsKeyReleased(IGFD_KEY_ENTER)) + { + exploreByKey = true; + enterInDirectory = true; + } + else if (ImGui::IsKeyReleased(IGFD_KEY_BACKSPACE)) + { + exploreByKey = true; + exitDirectory = true; + } + + if (exploreByKey) + { + //float totalHeight = prFilteredFileList.size() * ImGui::GetTextLineHeightWithSpacing(); + float p = (float)((double)prLocateFileByInputChar_lastFileIdx / (double)(countFiles - 1U)) * ImGui::GetScrollMaxY();// seems not udpated in tables version outside tables + //float p = ((float)locateFileByInputChar_lastFileIdx) * ImGui::GetTextLineHeightWithSpacing(); + ImGui::SetScrollY(p); + prStartFlashItem(prLocateFileByInputChar_lastFileIdx); + + auto infos = fdi.GetFilteredFileAt(prLocateFileByInputChar_lastFileIdx); + if (infos.use_count()) + { + if (infos->fileType == 'd') //-V522 + { + if (!fdi.puDLGDirectoryMode || enterInDirectory) + { + if (enterInDirectory) + { + if (fdi.SelectDirectory(infos)) + { + // changement de repertoire + vFileDialogInternal.puFileManager.OpenCurrentPath(vFileDialogInternal); + if (prLocateFileByInputChar_lastFileIdx > countFiles - 1U) + { + prLocateFileByInputChar_lastFileIdx = 0; + } + } + } + } + else // directory chooser + { + fdi.SelectFileName(vFileDialogInternal, infos); + } + } + else + { + fdi.SelectFileName(vFileDialogInternal, infos); + } + + if (exitDirectory) + { + auto nfo = std::make_shared(); + nfo->fileNameExt = ".."; + + if (fdi.SelectDirectory(nfo)) + { + // changement de repertoire + vFileDialogInternal.puFileManager.OpenCurrentPath(vFileDialogInternal); + if (prLocateFileByInputChar_lastFileIdx > countFiles - 1U) + { + prLocateFileByInputChar_lastFileIdx = 0; + } + } +#ifdef WIN32 + else + { + if (fdi.GetComposerSize() == 1U) + { + if (fdi.GetDrives()) + { + fdi.ApplyFilteringOnFileList(vFileDialogInternal); + } + } + } +#endif // WIN32 + } + } + } + } + } + } + + bool IGFD::KeyExplorerFeature::prFlashableSelectable(const char* label, bool selected, + ImGuiSelectableFlags flags, bool vFlashing, const ImVec2& size_arg) + { + using namespace ImGui; + + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + + // Submit label or explicit size to ItemSize(), whereas ItemAdd() will submit a larger/spanning rectangle. + ImGuiID id = window->GetID(label); + ImVec2 label_size = CalcTextSize(label, nullptr, true); + ImVec2 size(size_arg.x != 0.0f ? size_arg.x : label_size.x, size_arg.y != 0.0f ? size_arg.y : label_size.y); //-V550 + ImVec2 pos = window->DC.CursorPos; + pos.y += window->DC.CurrLineTextBaseOffset; + ItemSize(size, 0.0f); + + // Fill horizontal space + // We don't support (size < 0.0f) in Selectable() because the ItemSpacing extension would make explicitly right-aligned sizes not visibly match other widgets. + const bool span_all_columns = (flags & ImGuiSelectableFlags_SpanAllColumns) != 0; + const float min_x = span_all_columns ? window->ParentWorkRect.Min.x : pos.x; + const float max_x = span_all_columns ? window->ParentWorkRect.Max.x : window->WorkRect.Max.x; + if (fabs(size_arg.x) < FLT_EPSILON || (flags & ImGuiSelectableFlags_SpanAvailWidth)) + size.x = ImMax(label_size.x, max_x - min_x); + + // Text stays at the submission position, but bounding box may be extended on both sides + const ImVec2 text_min = pos; + const ImVec2 text_max(min_x + size.x, pos.y + size.y); + + // Selectables are meant to be tightly packed together with no click-gap, so we extend their box to cover spacing between selectable. + ImRect bb(min_x, pos.y, text_max.x, text_max.y); + if ((flags & ImGuiSelectableFlags_NoPadWithHalfSpacing) == 0) + { + const float spacing_x = span_all_columns ? 0.0f : style.ItemSpacing.x; + const float spacing_y = style.ItemSpacing.y; + const float spacing_L = IM_FLOOR(spacing_x * 0.50f); + const float spacing_U = IM_FLOOR(spacing_y * 0.50f); + bb.Min.x -= spacing_L; + bb.Min.y -= spacing_U; + bb.Max.x += (spacing_x - spacing_L); + bb.Max.y += (spacing_y - spacing_U); + } + //if (g.IO.KeyCtrl) { GetForegroundDrawList()->AddRect(bb.Min, bb.Max, IM_COL32(0, 255, 0, 255)); } + + // Modify ClipRect for the ItemAdd(), faster than doing a PushColumnsBackground/PushTableBackground for every Selectable.. + const float backup_clip_rect_min_x = window->ClipRect.Min.x; + const float backup_clip_rect_max_x = window->ClipRect.Max.x; + if (span_all_columns) + { + window->ClipRect.Min.x = window->ParentWorkRect.Min.x; + window->ClipRect.Max.x = window->ParentWorkRect.Max.x; + } + + bool item_add; + const bool disabled_item = (flags & ImGuiSelectableFlags_Disabled) != 0; + if (disabled_item) + { + ImGuiItemFlags backup_item_flags = g.CurrentItemFlags; + g.CurrentItemFlags |= ImGuiItemFlags_Disabled; + item_add = ItemAdd(bb, id); + g.CurrentItemFlags = backup_item_flags; + } + else + { + item_add = ItemAdd(bb, id); + } + + if (span_all_columns) + { + window->ClipRect.Min.x = backup_clip_rect_min_x; + window->ClipRect.Max.x = backup_clip_rect_max_x; + } + + if (!item_add) + return false; + + const bool disabled_global = (g.CurrentItemFlags & ImGuiItemFlags_Disabled) != 0; + if (disabled_item && !disabled_global) // Only testing this as an optimization + BeginDisabled(true); + + // FIXME: We can standardize the behavior of those two, we could also keep the fast path of override ClipRect + full push on render only, + // which would be advantageous since most selectable are not selected. + if (span_all_columns && window->DC.CurrentColumns) + PushColumnsBackground(); + else if (span_all_columns && g.CurrentTable) + TablePushBackgroundChannel(); + + // We use NoHoldingActiveID on menus so user can click and _hold_ on a menu then drag to browse child entries + ImGuiButtonFlags button_flags = 0; + if (flags & ImGuiSelectableFlags_NoHoldingActiveID) { button_flags |= ImGuiButtonFlags_NoHoldingActiveId; } + if (flags & ImGuiSelectableFlags_SelectOnClick) { button_flags |= ImGuiButtonFlags_PressedOnClick; } + if (flags & ImGuiSelectableFlags_SelectOnRelease) { button_flags |= ImGuiButtonFlags_PressedOnRelease; } + if (flags & ImGuiSelectableFlags_AllowDoubleClick) { button_flags |= ImGuiButtonFlags_PressedOnClickRelease | ImGuiButtonFlags_PressedOnDoubleClick; } + if (flags & ImGuiSelectableFlags_AllowItemOverlap) { button_flags |= ImGuiButtonFlags_AllowItemOverlap; } + + const bool was_selected = selected; + bool hovered, held; + bool pressed = ButtonBehavior(bb, id, &hovered, &held, button_flags); + + // Auto-select when moved into + // - This will be more fully fleshed in the range-select branch + // - This is not exposed as it won't nicely work with some user side handling of shift/control + // - We cannot do 'if (g.NavJustMovedToId != id) { selected = false; pressed = was_selected; }' for two reasons + // - (1) it would require focus scope to be set, need exposing PushFocusScope() or equivalent (e.g. BeginSelection() calling PushFocusScope()) + // - (2) usage will fail with clipped items + // The multi-select API aim to fix those issues, e.g. may be replaced with a BeginSelection() API. + if ((flags & ImGuiSelectableFlags_SelectOnNav) && g.NavJustMovedToId != 0 && g.NavJustMovedToFocusScopeId == window->DC.NavFocusScopeIdCurrent) + if (g.NavJustMovedToId == id) + selected = pressed = true; + + // Update NavId when clicking or when Hovering (this doesn't happen on most widgets), so navigation can be resumed with gamepad/keyboard + if (pressed || (hovered && (flags & ImGuiSelectableFlags_SetNavIdOnHover))) + { + if (!g.NavDisableMouseHover && g.NavWindow == window && g.NavLayer == window->DC.NavLayerCurrent) + { + SetNavID(id, window->DC.NavLayerCurrent, window->DC.NavFocusScopeIdCurrent, ImRect(bb.Min - window->Pos, bb.Max - window->Pos)); + g.NavDisableHighlight = true; + } + } + if (pressed) + MarkItemEdited(id); + + if (flags & ImGuiSelectableFlags_AllowItemOverlap) + SetItemAllowOverlap(); + + // In this branch, Selectable() cannot toggle the selection so this will never trigger. + if (selected != was_selected) //-V547 + g.LastItemData.StatusFlags |= ImGuiItemStatusFlags_ToggledSelection; + + // Render + if ((held && (flags & ImGuiSelectableFlags_DrawHoveredWhenHeld)) || vFlashing) + hovered = true; + if (hovered || selected) + { + const ImU32 col = GetColorU32((held && hovered) ? ImGuiCol_HeaderActive : hovered ? ImGuiCol_HeaderHovered : ImGuiCol_Header); + RenderFrame(bb.Min, bb.Max, col, false, 0.0f); + } + RenderNavHighlight(bb, id, ImGuiNavHighlightFlags_TypeThin | ImGuiNavHighlightFlags_NoRounding); + + if (span_all_columns && window->DC.CurrentColumns) + PopColumnsBackground(); + else if (span_all_columns && g.CurrentTable) + TablePopBackgroundChannel(); + + RenderTextClipped(text_min, text_max, label, nullptr, &label_size, style.SelectableTextAlign, &bb); + + // Automatically close popups + if (pressed && (window->Flags & ImGuiWindowFlags_Popup) && !(flags & ImGuiSelectableFlags_DontClosePopups) && !(g.LastItemData.InFlags & ImGuiItemFlags_SelectableDontClosePopup)) + CloseCurrentPopup(); + + if (disabled_item && !disabled_global) + EndDisabled(); + + IMGUI_TEST_ENGINE_ITEM_INFO(id, label, g.LastItemData.StatusFlags); + return pressed; //-V1020 + } + + void IGFD::KeyExplorerFeature::prStartFlashItem(size_t vIdx) + { + prFlashAlpha = 1.0f; + prFlashedItem = vIdx; + } + + bool IGFD::KeyExplorerFeature::prBeginFlashItem(size_t vIdx) + { + bool res = false; + + if (prFlashedItem == vIdx && + std::abs(prFlashAlpha - 0.0f) > 0.00001f) + { + prFlashAlpha -= prFlashAlphaAttenInSecs * ImGui::GetIO().DeltaTime; + if (prFlashAlpha < 0.0f) prFlashAlpha = 0.0f; + + ImVec4 hov = ImGui::GetStyleColorVec4(ImGuiCol_HeaderHovered); + hov.w = prFlashAlpha; + ImGui::PushStyleColor(ImGuiCol_HeaderHovered, hov); + res = true; + } + + return res; + } + + void IGFD::KeyExplorerFeature::prEndFlashItem() + { + ImGui::PopStyleColor(); + } + + void IGFD::KeyExplorerFeature::SetFlashingAttenuationInSeconds(float vAttenValue) + { + prFlashAlphaAttenInSecs = 1.0f / ImMax(vAttenValue, 0.01f); + } +#endif // USE_EXPLORATION_BY_KEYS + + ///////////////////////////////////////////////////////////////////////////////////// + //// FILE DIALOG CONSTRUCTOR / DESTRUCTOR /////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// + + IGFD::FileDialog::FileDialog() : BookMarkFeature(), KeyExplorerFeature(), ThumbnailFeature() {} + IGFD::FileDialog::~FileDialog() = default; + + ////////////////////////////////////////////////////////////////////////////////////////////////// + ///// FILE DIALOG STANDARD DIALOG //////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////// + + // path and fileNameExt can be specified + void IGFD::FileDialog::OpenDialog( + const std::string& vKey, + const std::string& vTitle, + const char* vFilters, + const std::string& vPath, + const std::string& vFileName, + const int& vCountSelectionMax, + UserDatas vUserDatas, + ImGuiFileDialogFlags vFlags) + { + if (prFileDialogInternal.puShowDialog) // if already opened, quit + return; + + prFileDialogInternal.ResetForNewDialog(); + + prFileDialogInternal.puDLGkey = vKey; + prFileDialogInternal.puDLGtitle = vTitle; + prFileDialogInternal.puDLGuserDatas = vUserDatas; + prFileDialogInternal.puDLGflags = vFlags; + prFileDialogInternal.puDLGoptionsPane = nullptr; + prFileDialogInternal.puDLGoptionsPaneWidth = 0.0f; + prFileDialogInternal.puDLGmodal = false; + + prFileDialogInternal.puFilterManager.puDLGdefaultExt.clear(); + prFileDialogInternal.puFilterManager.ParseFilters(vFilters); + + prFileDialogInternal.puFileManager.puDLGDirectoryMode = (vFilters == nullptr); + if (vPath.empty()) + prFileDialogInternal.puFileManager.puDLGpath = prFileDialogInternal.puFileManager.GetCurrentPath(); + else + prFileDialogInternal.puFileManager.puDLGpath = vPath; + prFileDialogInternal.puFileManager.SetCurrentPath(vPath); + prFileDialogInternal.puFileManager.puDLGcountSelectionMax = (size_t)vCountSelectionMax; + prFileDialogInternal.puFileManager.SetDefaultFileName(vFileName); + + prFileDialogInternal.puFileManager.ClearAll(); + + prFileDialogInternal.puShowDialog = true; // open dialog + } + + // path and filename are obtained from filePathName + void IGFD::FileDialog::OpenDialog( + const std::string& vKey, + const std::string& vTitle, + const char* vFilters, + const std::string& vFilePathName, + const int& vCountSelectionMax, + UserDatas vUserDatas, + ImGuiFileDialogFlags vFlags) + { + if (prFileDialogInternal.puShowDialog) // if already opened, quit + return; + + prFileDialogInternal.ResetForNewDialog(); + + prFileDialogInternal.puDLGkey = vKey; + prFileDialogInternal.puDLGtitle = vTitle; + prFileDialogInternal.puDLGoptionsPane = nullptr; + prFileDialogInternal.puDLGoptionsPaneWidth = 0.0f; + prFileDialogInternal.puDLGuserDatas = vUserDatas; + prFileDialogInternal.puDLGflags = vFlags; + prFileDialogInternal.puDLGmodal = false; + + auto ps = IGFD::Utils::ParsePathFileName(vFilePathName); + if (ps.isOk) + { + prFileDialogInternal.puFileManager.puDLGpath = ps.path; + prFileDialogInternal.puFileManager.SetDefaultFileName(vFilePathName); + prFileDialogInternal.puFilterManager.puDLGdefaultExt = "." + ps.ext; + } + else + { + prFileDialogInternal.puFileManager.puDLGpath = prFileDialogInternal.puFileManager.GetCurrentPath(); + prFileDialogInternal.puFileManager.SetDefaultFileName(""); + prFileDialogInternal.puFilterManager.puDLGdefaultExt.clear(); + } + + prFileDialogInternal.puFilterManager.ParseFilters(vFilters); + prFileDialogInternal.puFilterManager.SetSelectedFilterWithExt( + prFileDialogInternal.puFilterManager.puDLGdefaultExt); + + prFileDialogInternal.puFileManager.SetCurrentPath(prFileDialogInternal.puFileManager.puDLGpath); + + prFileDialogInternal.puFileManager.puDLGDirectoryMode = (vFilters == nullptr); + prFileDialogInternal.puFileManager.puDLGcountSelectionMax = vCountSelectionMax; //-V101 + + prFileDialogInternal.puFileManager.ClearAll(); + + prFileDialogInternal.puShowDialog = true; + } + + // with pane + // path and fileNameExt can be specified + void IGFD::FileDialog::OpenDialog( + const std::string& vKey, + const std::string& vTitle, + const char* vFilters, + const std::string& vPath, + const std::string& vFileName, + const PaneFun& vSidePane, + const float& vSidePaneWidth, + const int& vCountSelectionMax, + UserDatas vUserDatas, + ImGuiFileDialogFlags vFlags) + { + if (prFileDialogInternal.puShowDialog) // if already opened, quit + return; + + prFileDialogInternal.ResetForNewDialog(); + + prFileDialogInternal.puDLGkey = vKey; + prFileDialogInternal.puDLGtitle = vTitle; + prFileDialogInternal.puDLGuserDatas = vUserDatas; + prFileDialogInternal.puDLGflags = vFlags; + prFileDialogInternal.puDLGoptionsPane = vSidePane; + prFileDialogInternal.puDLGoptionsPaneWidth = vSidePaneWidth; + prFileDialogInternal.puDLGmodal = false; + + prFileDialogInternal.puFilterManager.puDLGdefaultExt.clear(); + prFileDialogInternal.puFilterManager.ParseFilters(vFilters); + + prFileDialogInternal.puFileManager.puDLGcountSelectionMax = (size_t)vCountSelectionMax; + prFileDialogInternal.puFileManager.puDLGDirectoryMode = (vFilters == nullptr); + if (vPath.empty()) + prFileDialogInternal.puFileManager.puDLGpath = prFileDialogInternal.puFileManager.GetCurrentPath(); + else + prFileDialogInternal.puFileManager.puDLGpath = vPath; + + prFileDialogInternal.puFileManager.SetCurrentPath(prFileDialogInternal.puFileManager.puDLGpath); + + prFileDialogInternal.puFileManager.SetDefaultFileName(vFileName); + + prFileDialogInternal.puFileManager.ClearAll(); + + prFileDialogInternal.puShowDialog = true; // open dialog + } + + // with pane + // path and filename are obtained from filePathName + void IGFD::FileDialog::OpenDialog( + const std::string& vKey, + const std::string& vTitle, + const char* vFilters, + const std::string& vFilePathName, + const PaneFun& vSidePane, + const float& vSidePaneWidth, + const int& vCountSelectionMax, + UserDatas vUserDatas, + ImGuiFileDialogFlags vFlags) + { + if (prFileDialogInternal.puShowDialog) // if already opened, quit + return; + + prFileDialogInternal.ResetForNewDialog(); + + prFileDialogInternal.puDLGkey = vKey; + prFileDialogInternal.puDLGtitle = vTitle; + prFileDialogInternal.puDLGoptionsPane = vSidePane; + prFileDialogInternal.puDLGoptionsPaneWidth = vSidePaneWidth; + prFileDialogInternal.puDLGuserDatas = vUserDatas; + prFileDialogInternal.puDLGflags = vFlags; + prFileDialogInternal.puDLGmodal = false; + + auto ps = IGFD::Utils::ParsePathFileName(vFilePathName); + if (ps.isOk) + { + prFileDialogInternal.puFileManager.puDLGpath = ps.path; + prFileDialogInternal.puFileManager.SetDefaultFileName(vFilePathName); + prFileDialogInternal.puFilterManager.puDLGdefaultExt = "." + ps.ext; + } + else + { + prFileDialogInternal.puFileManager.puDLGpath = prFileDialogInternal.puFileManager.GetCurrentPath(); + prFileDialogInternal.puFileManager.SetDefaultFileName(""); + prFileDialogInternal.puFilterManager.puDLGdefaultExt.clear(); + } + + prFileDialogInternal.puFileManager.SetCurrentPath(prFileDialogInternal.puFileManager.puDLGpath); + + prFileDialogInternal.puFileManager.puDLGcountSelectionMax = vCountSelectionMax; //-V101 + prFileDialogInternal.puFileManager.puDLGDirectoryMode = (vFilters == nullptr); + prFileDialogInternal.puFilterManager.ParseFilters(vFilters); + prFileDialogInternal.puFilterManager.SetSelectedFilterWithExt( + prFileDialogInternal.puFilterManager.puDLGdefaultExt); + + prFileDialogInternal.puFileManager.ClearAll(); + + prFileDialogInternal.puShowDialog = true; + } + + ////////////////////////////////////////////////////////////////////////////////////////////////// + ///// FILE DIALOG MODAL DIALOG /////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////// + + void IGFD::FileDialog::OpenModal( + const std::string& vKey, + const std::string& vTitle, + const char* vFilters, + const std::string& vPath, + const std::string& vFileName, + const int& vCountSelectionMax, + UserDatas vUserDatas, + ImGuiFileDialogFlags vFlags) + { + if (prFileDialogInternal.puShowDialog) // if already opened, quit + return; + + OpenDialog( + vKey, vTitle, vFilters, + vPath, vFileName, + vCountSelectionMax, vUserDatas, vFlags); + + prFileDialogInternal.puDLGmodal = true; + } + + void IGFD::FileDialog::OpenModal( + const std::string& vKey, + const std::string& vTitle, + const char* vFilters, + const std::string& vFilePathName, + const int& vCountSelectionMax, + UserDatas vUserDatas, + ImGuiFileDialogFlags vFlags) + { + if (prFileDialogInternal.puShowDialog) // if already opened, quit + return; + + OpenDialog( + vKey, vTitle, vFilters, + vFilePathName, + vCountSelectionMax, vUserDatas, vFlags); + + prFileDialogInternal.puDLGmodal = true; + } + + // with pane + // path and fileNameExt can be specified + void IGFD::FileDialog::OpenModal( + const std::string& vKey, + const std::string& vTitle, + const char* vFilters, + const std::string& vPath, + const std::string& vFileName, + const PaneFun& vSidePane, + const float& vSidePaneWidth, + const int& vCountSelectionMax, + UserDatas vUserDatas, + ImGuiFileDialogFlags vFlags) + { + if (prFileDialogInternal.puShowDialog) // if already opened, quit + return; + + OpenDialog( + vKey, vTitle, vFilters, + vPath, vFileName, + vSidePane, vSidePaneWidth, + vCountSelectionMax, vUserDatas, vFlags); + + prFileDialogInternal.puDLGmodal = true; + } + + // with pane + // path and filename are obtained from filePathName + void IGFD::FileDialog::OpenModal( + const std::string& vKey, + const std::string& vTitle, + const char* vFilters, + const std::string& vFilePathName, + const PaneFun& vSidePane, + const float& vSidePaneWidth, + const int& vCountSelectionMax, + UserDatas vUserDatas, + ImGuiFileDialogFlags vFlags) + { + if (prFileDialogInternal.puShowDialog) // if already opened, quit + return; + + OpenDialog( + vKey, vTitle, vFilters, + vFilePathName, + vSidePane, vSidePaneWidth, + vCountSelectionMax, vUserDatas, vFlags); + + prFileDialogInternal.puDLGmodal = true; + } + + ////////////////////////////////////////////////////////////////////////////////////////////////// + ///// FILE DIALOG DISPLAY FUNCTION /////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////// + + bool IGFD::FileDialog::Display(const std::string& vKey, ImGuiWindowFlags vFlags, ImVec2 vMinSize, ImVec2 vMaxSize) + { + bool res = false; + + if (prFileDialogInternal.puShowDialog && prFileDialogInternal.puDLGkey == vKey) + { + if (prFileDialogInternal.puUseCustomLocale) + setlocale(prFileDialogInternal.puLocaleCategory, prFileDialogInternal.puLocaleBegin.c_str()); + + auto& fdFile = prFileDialogInternal.puFileManager; + auto& fdFilter = prFileDialogInternal.puFilterManager; + + static ImGuiWindowFlags flags; + + // to be sure than only one dialog is displayed per frame + ImGuiContext& g = *GImGui; + if (g.FrameCount == prFileDialogInternal.puLastImGuiFrameCount) // one instance was displayed this frame before for this key +> quit + return res; + prFileDialogInternal.puLastImGuiFrameCount = g.FrameCount; // mark this instance as used this frame + + std::string name = prFileDialogInternal.puDLGtitle + "##" + prFileDialogInternal.puDLGkey; + if (prFileDialogInternal.puName != name) + { + fdFile.ClearComposer(); + fdFile.ClearFileLists(); + flags = vFlags; + } + + NewFrame(); + +#ifdef IMGUI_HAS_VIEWPORT + if (!ImGui::GetIO().ConfigViewportsNoDecoration) + { + // https://github.com/ocornut/imgui/issues/4534 + ImGuiWindowClass window_class; + window_class.ViewportFlagsOverrideClear = ImGuiViewportFlags_NoDecoration; + ImGui::SetNextWindowClass(&window_class); + } +#endif // IMGUI_HAS_VIEWPORT + + ImGui::SetNextWindowSizeConstraints(vMinSize, vMaxSize); + + bool beg = false; + if (prFileDialogInternal.puDLGmodal && + !prFileDialogInternal.puOkResultToConfirm) // disable modal because the confirm dialog for overwrite is a new modal + { + ImGui::OpenPopup(name.c_str()); + beg = ImGui::BeginPopupModal(name.c_str(), (bool*)nullptr, + flags | ImGuiWindowFlags_NoScrollbar); + } + else + { + beg = ImGui::Begin(name.c_str(), (bool*)nullptr, flags | ImGuiWindowFlags_NoScrollbar); + } + if (beg) + { +#ifdef IMGUI_HAS_VIEWPORT + // if decoration is enabled we disable the resizing feature of imgui for avoid crash with SDL2 and GLFW3 + if (ImGui::GetIO().ConfigViewportsNoDecoration) + { + flags = vFlags; + } + else + { + auto win = ImGui::GetCurrentWindowRead(); + if (win->Viewport->Idx != 0) + flags |= ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoTitleBar; + else + flags = vFlags; + } +#endif // IMGUI_HAS_VIEWPORT + + prFileDialogInternal.puName = name; //-V820 + puAnyWindowsHovered |= ImGui::IsWindowHovered(); + + if (fdFile.puDLGpath.empty()) + fdFile.puDLGpath = "."; // defaut path is '.' + + fdFilter.SetDefaultFilterIfNotDefined(); + + // init list of files + if (fdFile.IsFileListEmpty() && !fdFile.puShowDrives) + { + IGFD::Utils::ReplaceString(fdFile.puDLGDefaultFileName, fdFile.puDLGpath, ""); // local path + if (!fdFile.puDLGDefaultFileName.empty()) + { + fdFile.SetDefaultFileName(fdFile.puDLGDefaultFileName); + fdFilter.SetSelectedFilterWithExt(fdFilter.puDLGdefaultExt); + } + else if (fdFile.puDLGDirectoryMode) // directory mode + fdFile.SetDefaultFileName("."); + fdFile.ScanDir(prFileDialogInternal, fdFile.puDLGpath); + } + + // draw dialog parts + prDrawHeader(); // bookmark, directory, path + prDrawContent(); // bookmark, files view, side pane + res = prDrawFooter(); // file field, filter combobox, ok/cancel buttons + + EndFrame(); + + // for display in dialog center, the confirm to overwrite dlg + prFileDialogInternal.puDialogCenterPos = ImGui::GetCurrentWindowRead()->ContentRegionRect.GetCenter(); + + // when the confirm to overwrite dialog will appear we need to + // disable the modal mode of the main file dialog + // see prOkResultToConfirm under + if (prFileDialogInternal.puDLGmodal && + !prFileDialogInternal.puOkResultToConfirm) + ImGui::EndPopup(); + } + + // same things here regarding prOkResultToConfirm + if (!prFileDialogInternal.puDLGmodal || prFileDialogInternal.puOkResultToConfirm) + ImGui::End(); + + // confirm the result and show the confirm to overwrite dialog if needed + res = prConfirm_Or_OpenOverWriteFileDialog_IfNeeded(res, vFlags); + + if (prFileDialogInternal.puUseCustomLocale) + setlocale(prFileDialogInternal.puLocaleCategory, prFileDialogInternal.puLocaleEnd.c_str()); + } + + return res; + } + + void IGFD::FileDialog::NewFrame() + { + prFileDialogInternal.NewFrame(); + NewThumbnailFrame(prFileDialogInternal); + } + + void IGFD::FileDialog::EndFrame() + { + EndThumbnailFrame(prFileDialogInternal); + prFileDialogInternal.EndFrame(); + + } + void IGFD::FileDialog::QuitFrame() + { + QuitThumbnailFrame(prFileDialogInternal); + } + + void IGFD::FileDialog::prDrawHeader() + { +#ifdef USE_BOOKMARK + prDrawBookmarkButton(); + ImGui::SameLine(); +#endif // USE_BOOKMARK + + prFileDialogInternal.puFileManager.DrawDirectoryCreation(prFileDialogInternal); + ImGui::SameLine(); + ImGui::SeparatorEx(ImGuiSeparatorFlags_Vertical); + ImGui::SameLine(); + prFileDialogInternal.puFileManager.DrawPathComposer(prFileDialogInternal); + +#ifdef USE_THUMBNAILS + if (!(prFileDialogInternal.puDLGflags & ImGuiFileDialogFlags_DisableThumbnailMode)) + { + prDrawDisplayModeToolBar(); + ImGui::SameLine(); + ImGui::SeparatorEx(ImGuiSeparatorFlags_Vertical); + ImGui::SameLine(); + } +#endif // USE_THUMBNAILS + + prFileDialogInternal.puSearchManager.DrawSearchBar(prFileDialogInternal); + } + + void IGFD::FileDialog::prDrawContent() + { + ImVec2 size = ImGui::GetContentRegionAvail() - ImVec2(0.0f, prFileDialogInternal.puFooterHeight); + +#ifdef USE_BOOKMARK + if (prBookmarkPaneShown) + { + //size.x -= prBookmarkWidth; + float otherWidth = size.x - prBookmarkWidth; + ImGui::PushID("##splitterbookmark"); + IGFD::Utils::Splitter(true, 4.0f, + &prBookmarkWidth, &otherWidth, 10.0f, + 10.0f + prFileDialogInternal.puDLGoptionsPaneWidth, size.y); + ImGui::PopID(); + size.x -= otherWidth; + prDrawBookmarkPane(prFileDialogInternal, size); + ImGui::SameLine(); + } +#endif // USE_BOOKMARK + + size.x = ImGui::GetContentRegionAvail().x - prFileDialogInternal.puDLGoptionsPaneWidth; + + if (prFileDialogInternal.puDLGoptionsPane) + { + ImGui::PushID("##splittersidepane"); + IGFD::Utils::Splitter(true, 4.0f, &size.x, &prFileDialogInternal.puDLGoptionsPaneWidth, 10.0f, 10.0f, size.y); + ImGui::PopID(); + } + +#ifdef USE_THUMBNAILS + if (prFileDialogInternal.puDLGflags & ImGuiFileDialogFlags_DisableThumbnailMode) + { + prDrawFileListView(size); + } + else + { + switch (prDisplayMode) + { + case DisplayModeEnum::FILE_LIST: + prDrawFileListView(size); + break; + case DisplayModeEnum::THUMBNAILS_LIST: + prDrawThumbnailsListView(size); + break; + case DisplayModeEnum::THUMBNAILS_GRID: + prDrawThumbnailsGridView(size); + } + } +#else + prDrawFileListView(size); +#endif // USE_THUMBNAILS + + if (prFileDialogInternal.puDLGoptionsPane) + { + prDrawSidePane(size.y); + } + } + + bool IGFD::FileDialog::prDrawFooter() + { + auto& fdFile = prFileDialogInternal.puFileManager; + + float posY = ImGui::GetCursorPos().y; // height of last bar calc + + if (!fdFile.puDLGDirectoryMode) + ImGui::Text(fileNameString); + else // directory chooser + ImGui::Text(dirNameString); + + ImGui::SameLine(); + + // Input file fields + float width = ImGui::GetContentRegionAvail().x; + if (!fdFile.puDLGDirectoryMode) + width -= FILTER_COMBO_WIDTH; + ImGui::PushItemWidth(width); + ImGui::InputText("##FileName", fdFile.puFileNameBuffer, MAX_FILE_DIALOG_NAME_BUFFER); + if (ImGui::GetItemID() == ImGui::GetActiveID()) + prFileDialogInternal.puFileInputIsActive = true; + ImGui::PopItemWidth(); + + // combobox of filters + prFileDialogInternal.puFilterManager.DrawFilterComboBox(prFileDialogInternal); + + bool res = false; + + // OK Button + if (prFileDialogInternal.puCanWeContinue && strlen(fdFile.puFileNameBuffer)) + { + if (IMGUI_BUTTON(okButtonString "##validationdialog")) + { + prFileDialogInternal.puIsOk = true; + res = true; + } + + ImGui::SameLine(); + } + + // Cancel Button + if (IMGUI_BUTTON(cancelButtonString "##validationdialog") || + prFileDialogInternal.puNeedToExitDialog) // dialog exit asked + { + prFileDialogInternal.puIsOk = false; + res = true; + } + + prFileDialogInternal.puFooterHeight = ImGui::GetCursorPosY() - posY; + + return res; + } + + bool IGFD::FileDialog::prSelectableItem(int vidx, std::shared_ptr vInfos, bool vSelected, const char* vFmt, ...) + { + if (!vInfos.use_count()) + return false; + + auto& fdi = prFileDialogInternal.puFileManager; + + static ImGuiSelectableFlags selectableFlags = ImGuiSelectableFlags_AllowDoubleClick | + ImGuiSelectableFlags_SpanAllColumns | ImGuiSelectableFlags_SpanAvailWidth; + + va_list args; + va_start(args, vFmt); + vsnprintf(fdi.puVariadicBuffer, MAX_FILE_DIALOG_NAME_BUFFER, vFmt, args); + va_end(args); + + float h = 0.0f; +#ifdef USE_THUMBNAILS + if (prDisplayMode == DisplayModeEnum::THUMBNAILS_LIST) + h = DisplayMode_ThumbailsList_ImageHeight; +#endif // USE_THUMBNAILS +#ifdef USE_EXPLORATION_BY_KEYS + bool flashed = prBeginFlashItem((size_t)vidx); + bool res = prFlashableSelectable(fdi.puVariadicBuffer, vSelected, selectableFlags, + flashed, ImVec2(-1.0f, h)); + if (flashed) + prEndFlashItem(); +#else // USE_EXPLORATION_BY_KEYS + (void)vidx; // remove a warnings ofr unused var + + bool res = ImGui::Selectable(fdi.puVariadicBuffer, vSelected, selectableFlags, ImVec2(-1.0f, h)); +#endif // USE_EXPLORATION_BY_KEYS + if (res) + { + if (vInfos->fileType == 'd') + { + // nav system, selectebale cause open directory or select directory + if (ImGui::GetIO().ConfigFlags & ImGuiConfigFlags_NavEnableKeyboard) + { + if (fdi.puDLGDirectoryMode) // directory chooser + { + fdi.SelectFileName(prFileDialogInternal, vInfos); + } + else + { + fdi.puPathClicked = fdi.SelectDirectory(vInfos); + } + } + else // no nav system => classic behavior + { + if (ImGui::IsMouseDoubleClicked(0)) // 0 -> left mouse button double click + { + fdi.puPathClicked = fdi.SelectDirectory(vInfos); + } + else if (fdi.puDLGDirectoryMode) // directory chooser + { + fdi.SelectFileName(prFileDialogInternal, vInfos); + } + } + + return true; // needToBreakTheloop + } + else + { + fdi.SelectFileName(prFileDialogInternal, vInfos); + } + } + + return false; + } + + void IGFD::FileDialog::prBeginFileColorIconStyle(std::shared_ptr vFileInfos, bool& vOutShowColor, std::string& vOutStr, ImFont** vOutFont) + { + vOutStr.clear(); + vOutShowColor = false; + + if (vFileInfos->fileStyle.use_count()) //-V807 //-V522 + { + vOutShowColor = true; + + *vOutFont = vFileInfos->fileStyle->font; + } + + if (vOutShowColor && !vFileInfos->fileStyle->icon.empty()) vOutStr = vFileInfos->fileStyle->icon; + else if (vFileInfos->fileType == 'd') vOutStr = dirEntryString; + else if (vFileInfos->fileType == 'l') vOutStr = linkEntryString; + else if (vFileInfos->fileType == 'f') vOutStr = fileEntryString; + + vOutStr += " " + vFileInfos->fileNameExt; + + if (vOutShowColor) + ImGui::PushStyleColor(ImGuiCol_Text, vFileInfos->fileStyle->color); + if (*vOutFont) + ImGui::PushFont(*vOutFont); + } + + void IGFD::FileDialog::prEndFileColorIconStyle(const bool& vShowColor, ImFont* vFont) + { + if (vFont) + ImGui::PopFont(); + if (vShowColor) + ImGui::PopStyleColor(); + } + + void IGFD::FileDialog::prDrawFileListView(ImVec2 vSize) + { + auto& fdi = prFileDialogInternal.puFileManager; + + ImGui::PushID(this); + + static ImGuiTableFlags flags = ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_RowBg | + ImGuiTableFlags_Hideable | ImGuiTableFlags_ScrollY | + ImGuiTableFlags_NoHostExtendY +#ifndef USE_CUSTOM_SORTING_ICON + | ImGuiTableFlags_Sortable +#endif // USE_CUSTOM_SORTING_ICON + ; + auto listViewID = ImGui::GetID("##FileDialog_fileTable"); + if (ImGui::BeginTableEx("##FileDialog_fileTable", listViewID, 4, flags, vSize, 0.0f)) //-V112 + { + ImGui::TableSetupScrollFreeze(0, 1); // Make header always visible + ImGui::TableSetupColumn(fdi.puHeaderFileName.c_str(), ImGuiTableColumnFlags_WidthStretch, -1, 0); + ImGui::TableSetupColumn(fdi.puHeaderFileType.c_str(), ImGuiTableColumnFlags_WidthFixed | + ((prFileDialogInternal.puDLGflags & ImGuiFileDialogFlags_HideColumnType) ? ImGuiTableColumnFlags_DefaultHide : 0), -1, 1); + ImGui::TableSetupColumn(fdi.puHeaderFileSize.c_str(), ImGuiTableColumnFlags_WidthFixed | + ((prFileDialogInternal.puDLGflags & ImGuiFileDialogFlags_HideColumnSize) ? ImGuiTableColumnFlags_DefaultHide : 0), -1, 2); + ImGui::TableSetupColumn(fdi.puHeaderFileDate.c_str(), ImGuiTableColumnFlags_WidthFixed | + ((prFileDialogInternal.puDLGflags & ImGuiFileDialogFlags_HideColumnDate) ? ImGuiTableColumnFlags_DefaultHide : 0), -1, 3); + +#ifndef USE_CUSTOM_SORTING_ICON + // Sort our data if sort specs have been changed! + if (ImGuiTableSortSpecs* sorts_specs = ImGui::TableGetSortSpecs()) + { + if (sorts_specs->SpecsDirty && !fdi.IsFileListEmpty()) + { + if (sorts_specs->Specs->ColumnUserID == 0) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_FILENAME, true); + else if (sorts_specs->Specs->ColumnUserID == 1) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_TYPE, true); + else if (sorts_specs->Specs->ColumnUserID == 2) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_SIZE, true); + else //if (sorts_specs->Specs->ColumnUserID == 3) => alwayd true for the moment, to uncomment if we add a fourth column + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_DATE, true); + + sorts_specs->SpecsDirty = false; + } + } + + ImGui::TableHeadersRow(); +#else // USE_CUSTOM_SORTING_ICON + ImGui::TableNextRow(ImGuiTableRowFlags_Headers); + for (int column = 0; column < 4; column++) //-V112 + { + ImGui::TableSetColumnIndex(column); + const char* column_name = ImGui::TableGetColumnName(column); // Retrieve name passed to TableSetupColumn() + ImGui::PushID(column); + ImGui::TableHeader(column_name); + ImGui::PopID(); + if (ImGui::IsItemClicked()) + { + if (column == 0) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_FILENAME, true); + else if (column == 1) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_TYPE, true); + else if (column == 2) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_SIZE, true); + else //if (column == 3) => alwayd true for the moment, to uncomment if we add a fourth column + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_DATE, true); + } + } +#endif // USE_CUSTOM_SORTING_ICON + if (!fdi.IsFilteredListEmpty()) + { + std::string _str; + ImFont* _font = nullptr; + bool _showColor = false; + + prFileListClipper.Begin((int)fdi.GetFilteredListSize(), ImGui::GetTextLineHeightWithSpacing()); + while (prFileListClipper.Step()) + { + for (int i = prFileListClipper.DisplayStart; i < prFileListClipper.DisplayEnd; i++) + { + if (i < 0) continue; + + auto infos = fdi.GetFilteredFileAt((size_t)i); + if (!infos.use_count()) + continue; + + prBeginFileColorIconStyle(infos, _showColor, _str, &_font); + + bool selected = fdi.IsFileNameSelected(infos->fileNameExt); // found + + ImGui::TableNextRow(); + + bool needToBreakTheloop = false; + + if (ImGui::TableNextColumn()) // file name + { + needToBreakTheloop = prSelectableItem(i, infos, selected, _str.c_str()); + } + if (ImGui::TableNextColumn()) // file type + { + ImGui::Text("%s", infos->fileExt.c_str()); + } + if (ImGui::TableNextColumn()) // file size + { + if (infos->fileType != 'd') + { + ImGui::Text("%s ", infos->formatedFileSize.c_str()); + } + else + { + ImGui::Text(""); + } + } + if (ImGui::TableNextColumn()) // file date + time + { + ImGui::Text("%s", infos->fileModifDate.c_str()); + } + + prEndFileColorIconStyle(_showColor, _font); + + if (needToBreakTheloop) + break; + } + } + prFileListClipper.End(); + } + +#ifdef USE_EXPLORATION_BY_KEYS + if (!fdi.puInputPathActivated) + { + prLocateByInputKey(prFileDialogInternal); + prExploreWithkeys(prFileDialogInternal, listViewID); + } +#endif // USE_EXPLORATION_BY_KEYS + + ImGuiContext& g = *GImGui; + if (g.LastActiveId - 1 == listViewID || g.LastActiveId == listViewID) + { + prFileDialogInternal.puFileListViewIsActive = true; + } + + ImGui::EndTable(); + } + + ImGui::PopID(); + } + +#ifdef USE_THUMBNAILS + void IGFD::FileDialog::prDrawThumbnailsListView(ImVec2 vSize) + { + auto& fdi = prFileDialogInternal.puFileManager; + + ImGui::PushID(this); + + static ImGuiTableFlags flags = ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_RowBg | + ImGuiTableFlags_Hideable | ImGuiTableFlags_ScrollY | + ImGuiTableFlags_NoHostExtendY +#ifndef USE_CUSTOM_SORTING_ICON + | ImGuiTableFlags_Sortable +#endif // USE_CUSTOM_SORTING_ICON + ; + auto listViewID = ImGui::GetID("##FileDialog_fileTable"); + if (ImGui::BeginTableEx("##FileDialog_fileTable", listViewID, 5, flags, vSize, 0.0f)) + { + ImGui::TableSetupScrollFreeze(0, 1); // Make header always visible + ImGui::TableSetupColumn(fdi.puHeaderFileName.c_str(), ImGuiTableColumnFlags_WidthStretch, -1, 0); + ImGui::TableSetupColumn(fdi.puHeaderFileType.c_str(), ImGuiTableColumnFlags_WidthFixed | + ((prFileDialogInternal.puDLGflags & ImGuiFileDialogFlags_HideColumnType) ? ImGuiTableColumnFlags_DefaultHide : 0), -1, 1); + ImGui::TableSetupColumn(fdi.puHeaderFileSize.c_str(), ImGuiTableColumnFlags_WidthFixed | + ((prFileDialogInternal.puDLGflags & ImGuiFileDialogFlags_HideColumnSize) ? ImGuiTableColumnFlags_DefaultHide : 0), -1, 2); + ImGui::TableSetupColumn(fdi.puHeaderFileDate.c_str(), ImGuiTableColumnFlags_WidthFixed | + ((prFileDialogInternal.puDLGflags & ImGuiFileDialogFlags_HideColumnDate) ? ImGuiTableColumnFlags_DefaultHide : 0), -1, 3); + // not needed to have an option for hide the thumbnails since this is why this view is used + ImGui::TableSetupColumn(fdi.puHeaderFileThumbnails.c_str(), ImGuiTableColumnFlags_WidthFixed, -1, 4); //-V112 + +#ifndef USE_CUSTOM_SORTING_ICON + // Sort our data if sort specs have been changed! + if (ImGuiTableSortSpecs* sorts_specs = ImGui::TableGetSortSpecs()) + { + if (sorts_specs->SpecsDirty && !fdi.IsFileListEmpty()) + { + if (sorts_specs->Specs->ColumnUserID == 0) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_FILENAME, true); + else if (sorts_specs->Specs->ColumnUserID == 1) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_TYPE, true); + else if (sorts_specs->Specs->ColumnUserID == 2) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_SIZE, true); + else if (sorts_specs->Specs->ColumnUserID == 3) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_DATE, true); + else // if (sorts_specs->Specs->ColumnUserID == 4) = > always true for the moment, to uncomment if we add another column + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_THUMBNAILS, true); + sorts_specs->SpecsDirty = false; + } + } + + ImGui::TableHeadersRow(); +#else // USE_CUSTOM_SORTING_ICON + ImGui::TableNextRow(ImGuiTableRowFlags_Headers); + for (int column = 0; column < 5; column++) + { + ImGui::TableSetColumnIndex(column); + const char* column_name = ImGui::TableGetColumnName(column); // Retrieve name passed to TableSetupColumn() + ImGui::PushID(column); + ImGui::TableHeader(column_name); + ImGui::PopID(); + if (ImGui::IsItemClicked()) + { + if (column == 0) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_FILENAME, true); + else if (column == 1) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_TYPE, true); + else if (column == 2) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_SIZE, true); + else if (column == 3) + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_DATE, true); + else // if (column == 4) = > always true for the moment, to uncomment if we add another column + fdi.SortFields(prFileDialogInternal, IGFD::FileManager::SortingFieldEnum::FIELD_THUMBNAILS, true); + } + } +#endif // USE_CUSTOM_SORTING_ICON + if (!fdi.IsFilteredListEmpty()) + { + std::string _str; + ImFont* _font = nullptr; + bool _showColor = false; + + ImGuiContext& g = *GImGui; + const float itemHeight = ImMax(g.FontSize, DisplayMode_ThumbailsList_ImageHeight) + g.Style.ItemSpacing.y; + + prFileListClipper.Begin((int)fdi.GetFilteredListSize(), itemHeight); + while (prFileListClipper.Step()) + { + for (int i = prFileListClipper.DisplayStart; i < prFileListClipper.DisplayEnd; i++) + { + if (i < 0) continue; + + auto infos = fdi.GetFilteredFileAt((size_t)i); + if (!infos.use_count()) + continue; + + prBeginFileColorIconStyle(infos, _showColor, _str, &_font); + + bool selected = fdi.IsFileNameSelected(infos->fileNameExt); // found + + ImGui::TableNextRow(); + + bool needToBreakTheloop = false; + + if (ImGui::TableNextColumn()) // file name + { + needToBreakTheloop = prSelectableItem(i, infos, selected, _str.c_str()); + } + if (ImGui::TableNextColumn()) // file type + { + ImGui::Text("%s", infos->fileExt.c_str()); + } + if (ImGui::TableNextColumn()) // file size + { + if (infos->fileType != 'd') + { + ImGui::Text("%s ", infos->formatedFileSize.c_str()); + } + else + { + ImGui::Text(""); + } + } + if (ImGui::TableNextColumn()) // file date + time + { + ImGui::Text("%s", infos->fileModifDate.c_str()); + } + if (ImGui::TableNextColumn()) // file thumbnails + { + auto th = &infos->thumbnailInfo; + + if (!th->isLoadingOrLoaded) + { + prAddThumbnailToLoad(infos); + } + if (th->isReadyToDisplay && + th->textureID) + { + ImGui::Image((ImTextureID)th->textureID, + ImVec2((float)th->textureWidth, + (float)th->textureHeight)); + } + } + + prEndFileColorIconStyle(_showColor, _font); + + if (needToBreakTheloop) + break; + } + } + prFileListClipper.End(); + } + +#ifdef USE_EXPLORATION_BY_KEYS + if (!fdi.puInputPathActivated) + { + prLocateByInputKey(prFileDialogInternal); + prExploreWithkeys(prFileDialogInternal, listViewID); + } +#endif // USE_EXPLORATION_BY_KEYS + + ImGuiContext& g = *GImGui; + if (g.LastActiveId - 1 == listViewID || g.LastActiveId == listViewID) + { + prFileDialogInternal.puFileListViewIsActive = true; + } + + ImGui::EndTable(); + } + + ImGui::PopID(); + } + + void IGFD::FileDialog::prDrawThumbnailsGridView(ImVec2 vSize) + { + if (ImGui::BeginChild("##thumbnailsGridsFiles", vSize)) + { + // todo + } + + ImGui::EndChild(); + } + +#endif + + void IGFD::FileDialog::prDrawSidePane(float vHeight) + { + ImGui::SameLine(); + + ImGui::BeginChild("##FileTypes", ImVec2(0, vHeight)); + + prFileDialogInternal.puDLGoptionsPane( + prFileDialogInternal.puFilterManager.GetSelectedFilter().filter.c_str(), + prFileDialogInternal.puDLGuserDatas, &prFileDialogInternal.puCanWeContinue); + + ImGui::EndChild(); + } + + void IGFD::FileDialog::Close() + { + prFileDialogInternal.puDLGkey.clear(); + prFileDialogInternal.puShowDialog = false; + } + + bool IGFD::FileDialog::WasOpenedThisFrame(const std::string& vKey) const + { + bool res = prFileDialogInternal.puShowDialog && prFileDialogInternal.puDLGkey == vKey; + if (res) + { + ImGuiContext& g = *GImGui; + res &= prFileDialogInternal.puLastImGuiFrameCount == g.FrameCount; // return true if a dialog was displayed in this frame + } + return res; + } + + bool IGFD::FileDialog::WasOpenedThisFrame() const + { + bool res = prFileDialogInternal.puShowDialog; + if (res) + { + ImGuiContext& g = *GImGui; + res &= prFileDialogInternal.puLastImGuiFrameCount == g.FrameCount; // return true if a dialog was displayed in this frame + } + return res; + } + + bool IGFD::FileDialog::IsOpened(const std::string& vKey) const + { + return (prFileDialogInternal.puShowDialog && prFileDialogInternal.puDLGkey == vKey); + } + + bool IGFD::FileDialog::IsOpened() const + { + return prFileDialogInternal.puShowDialog; + } + + std::string IGFD::FileDialog::GetOpenedKey() const + { + if (prFileDialogInternal.puShowDialog) + return prFileDialogInternal.puDLGkey; + return ""; + } + + std::string IGFD::FileDialog::GetFilePathName() + { + return prFileDialogInternal.puFileManager.GetResultingFilePathName(prFileDialogInternal); + } + + std::string IGFD::FileDialog::GetCurrentPath() + { + return prFileDialogInternal.puFileManager.GetResultingPath(); + } + + std::string IGFD::FileDialog::GetCurrentFileName() + { + return prFileDialogInternal.puFileManager.GetResultingFileName(prFileDialogInternal); + } + + std::string IGFD::FileDialog::GetCurrentFilter() + { + return prFileDialogInternal.puFilterManager.GetSelectedFilter().filter; + } + + std::map IGFD::FileDialog::GetSelection() + { + return prFileDialogInternal.puFileManager.GetResultingSelection(); + } + + UserDatas IGFD::FileDialog::GetUserDatas() const + { + return prFileDialogInternal.puDLGuserDatas; + } + + bool IGFD::FileDialog::IsOk() const + { + return prFileDialogInternal.puIsOk; + } + + void IGFD::FileDialog::SetFileStyle(const IGFD_FileStyleFlags& vFlags, const char* vCriteria, const FileStyle& vInfos) + { + prFileDialogInternal.puFilterManager.SetFileStyle(vFlags, vCriteria, vInfos); + } + + void IGFD::FileDialog::SetFileStyle(const IGFD_FileStyleFlags& vFlags, const char* vCriteria, const ImVec4& vColor, const std::string& vIcon, ImFont* vFont) + { + prFileDialogInternal.puFilterManager.SetFileStyle(vFlags, vCriteria, vColor, vIcon, vFont); + } + + bool IGFD::FileDialog::GetFileStyle(const IGFD_FileStyleFlags& vFlags, const std::string& vCriteria, ImVec4* vOutColor, std::string* vOutIcon, ImFont **vOutFont) + { + return prFileDialogInternal.puFilterManager.GetFileStyle(vFlags, vCriteria, vOutColor, vOutIcon, vOutFont); + } + + void IGFD::FileDialog::ClearFilesStyle() + { + prFileDialogInternal.puFilterManager.ClearFilesStyle(); + } + + void IGFD::FileDialog::SetLocales(const int& vLocaleCategory, const std::string& vLocaleBegin, const std::string& vLocaleEnd) + { + prFileDialogInternal.puUseCustomLocale = true; + prFileDialogInternal.puLocaleBegin = vLocaleBegin; + prFileDialogInternal.puLocaleEnd = vLocaleEnd; + } + + ////////////////////////////////////////////////////////////////////////////// + //// OVERWRITE DIALOG //////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////// + + bool IGFD::FileDialog::prConfirm_Or_OpenOverWriteFileDialog_IfNeeded(bool vLastAction, ImGuiWindowFlags vFlags) + { + // if confirmation => return true for confirm the overwrite et quit the dialog + // if cancel => return false && set IsOk to false for keep inside the dialog + + // if IsOk == false => return false for quit the dialog + if (!prFileDialogInternal.puIsOk && vLastAction) + { + QuitFrame(); + return true; + } + + // if IsOk == true && no check of overwrite => return true for confirm the dialog + if (prFileDialogInternal.puIsOk && vLastAction && !(prFileDialogInternal.puDLGflags & ImGuiFileDialogFlags_ConfirmOverwrite)) + { + QuitFrame(); + return true; + } + + // if IsOk == true && check of overwrite => return false and show confirm to overwrite dialog + if ((prFileDialogInternal.puOkResultToConfirm || (prFileDialogInternal.puIsOk && vLastAction)) && + (prFileDialogInternal.puDLGflags & ImGuiFileDialogFlags_ConfirmOverwrite)) + { + if (prFileDialogInternal.puIsOk) // catched only one time + { + if (!prFileDialogInternal.puFileManager.IsFileExist(GetFilePathName())) // not existing => quit dialog + { + QuitFrame(); + return true; + } + else // existing => confirm dialog to open + { + prFileDialogInternal.puIsOk = false; + prFileDialogInternal.puOkResultToConfirm = true; + } + } + + std::string name = OverWriteDialogTitleString "##" + prFileDialogInternal.puDLGtitle + prFileDialogInternal.puDLGkey + "OverWriteDialog"; + + bool res = false; + + ImGui::OpenPopup(name.c_str()); + if (ImGui::BeginPopupModal(name.c_str(), (bool*)0, + vFlags | ImGuiWindowFlags_AlwaysAutoResize | + ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove)) + { + ImGui::SetWindowPos(prFileDialogInternal.puDialogCenterPos - ImGui::GetWindowSize() * 0.5f); // next frame needed for GetWindowSize to work + + ImGui::Text("%s", OverWriteDialogMessageString); + + if (IMGUI_BUTTON(OverWriteDialogConfirmButtonString)) + { + prFileDialogInternal.puOkResultToConfirm = false; + prFileDialogInternal.puIsOk = true; + res = true; + ImGui::CloseCurrentPopup(); + } + + ImGui::SameLine(); + + if (IMGUI_BUTTON(OverWriteDialogCancelButtonString)) + { + prFileDialogInternal.puOkResultToConfirm = false; + prFileDialogInternal.puIsOk = false; + res = false; + ImGui::CloseCurrentPopup(); + } + + ImGui::EndPopup(); + } + + if (res) + { + QuitFrame(); + } + return res; + } + + return false; + } +} + +#endif // __cplusplus + +///////////////////////////////////////////////////////////////// +///// C Interface /////////////////////////////////////////////// +///////////////////////////////////////////////////////////////// + +// Return an initialized IGFD_Selection_Pair +IMGUIFILEDIALOG_API IGFD_Selection_Pair IGFD_Selection_Pair_Get(void) +{ + IGFD_Selection_Pair res = {}; + res.fileName = nullptr; + res.filePathName = nullptr; + return res; +} + +// destroy only the content of vSelection_Pair +IMGUIFILEDIALOG_API void IGFD_Selection_Pair_DestroyContent(IGFD_Selection_Pair* vSelection_Pair) +{ + if (vSelection_Pair) + { + delete[] vSelection_Pair->fileName; + delete[] vSelection_Pair->filePathName; + } +} + +// Return an initialized IGFD_Selection +IMGUIFILEDIALOG_API IGFD_Selection IGFD_Selection_Get(void) +{ + return { nullptr, 0U }; +} + +// destroy only the content of vSelection +IMGUIFILEDIALOG_API void IGFD_Selection_DestroyContent(IGFD_Selection* vSelection) +{ + if (vSelection) + { + if (vSelection->table) + { + for (size_t i = 0U; i < vSelection->count; i++) + { + IGFD_Selection_Pair_DestroyContent(&vSelection->table[i]); + } + delete[] vSelection->table; + } + vSelection->count = 0U; + } +} + +// create an instance of ImGuiFileDialog +IMGUIFILEDIALOG_API ImGuiFileDialog* IGFD_Create(void) +{ + return new ImGuiFileDialog(); +} + +// destroy the instance of ImGuiFileDialog +IMGUIFILEDIALOG_API void IGFD_Destroy(ImGuiFileDialog* vContext) +{ + if (vContext) + { + delete vContext; + vContext = nullptr; + } +} + +// standard dialog +IMGUIFILEDIALOG_API void IGFD_OpenDialog( + ImGuiFileDialog* vContext, + const char* vKey, + const char* vTitle, + const char* vFilters, + const char* vPath, + const char* vFileName, + const int vCountSelectionMax, + void* vUserDatas, + ImGuiFileDialogFlags flags) +{ + if (vContext) + { + vContext->OpenDialog( + vKey, vTitle, vFilters, vPath, vFileName, + vCountSelectionMax, vUserDatas, flags); + } +} + +IMGUIFILEDIALOG_API void IGFD_OpenDialog2( + ImGuiFileDialog* vContext, + const char* vKey, + const char* vTitle, + const char* vFilters, + const char* vFilePathName, + const int vCountSelectionMax, + void* vUserDatas, + ImGuiFileDialogFlags flags) +{ + if (vContext) + { + vContext->OpenDialog( + vKey, vTitle, vFilters, vFilePathName, + vCountSelectionMax, vUserDatas, flags); + } +} + +IMGUIFILEDIALOG_API void IGFD_OpenPaneDialog( + ImGuiFileDialog* vContext, + const char* vKey, + const char* vTitle, + const char* vFilters, + const char* vPath, + const char* vFileName, + IGFD_PaneFun vSidePane, + const float vSidePaneWidth, + const int vCountSelectionMax, + void* vUserDatas, + ImGuiFileDialogFlags flags) +{ + if (vContext) + { + vContext->OpenDialog( + vKey, vTitle, vFilters, + vPath, vFileName, + vSidePane, vSidePaneWidth, + vCountSelectionMax, vUserDatas, flags); + } +} + +IMGUIFILEDIALOG_API void IGFD_OpenPaneDialog2( + ImGuiFileDialog* vContext, + const char* vKey, + const char* vTitle, + const char* vFilters, + const char* vFilePathName, + IGFD_PaneFun vSidePane, + const float vSidePaneWidth, + const int vCountSelectionMax, + void* vUserDatas, + ImGuiFileDialogFlags flags) +{ + if (vContext) + { + vContext->OpenDialog( + vKey, vTitle, vFilters, + vFilePathName, + vSidePane, vSidePaneWidth, + vCountSelectionMax, vUserDatas, flags); + } +} + +// modal dialog +IMGUIFILEDIALOG_API void IGFD_OpenModal( + ImGuiFileDialog* vContext, + const char* vKey, + const char* vTitle, + const char* vFilters, + const char* vPath, + const char* vFileName, + const int vCountSelectionMax, + void* vUserDatas, + ImGuiFileDialogFlags flags) +{ + if (vContext) + { + vContext->OpenModal( + vKey, vTitle, vFilters, vPath, vFileName, + vCountSelectionMax, vUserDatas, flags); + } +} + +IMGUIFILEDIALOG_API void IGFD_OpenModal2( + ImGuiFileDialog* vContext, + const char* vKey, + const char* vTitle, + const char* vFilters, + const char* vFilePathName, + const int vCountSelectionMax, + void* vUserDatas, + ImGuiFileDialogFlags flags) +{ + if (vContext) + { + vContext->OpenModal( + vKey, vTitle, vFilters, vFilePathName, + vCountSelectionMax, vUserDatas, flags); + } +} + +IMGUIFILEDIALOG_API void IGFD_OpenPaneModal( + ImGuiFileDialog* vContext, + const char* vKey, + const char* vTitle, + const char* vFilters, + const char* vPath, + const char* vFileName, + IGFD_PaneFun vSidePane, + const float vSidePaneWidth, + const int vCountSelectionMax, + void* vUserDatas, + ImGuiFileDialogFlags flags) +{ + if (vContext) + { + vContext->OpenModal( + vKey, vTitle, vFilters, + vPath, vFileName, + vSidePane, vSidePaneWidth, + vCountSelectionMax, vUserDatas, flags); + } +} + +IMGUIFILEDIALOG_API void IGFD_OpenPaneModal2( + ImGuiFileDialog* vContext, + const char* vKey, + const char* vTitle, + const char* vFilters, + const char* vFilePathName, + IGFD_PaneFun vSidePane, + const float vSidePaneWidth, + const int vCountSelectionMax, + void* vUserDatas, + ImGuiFileDialogFlags flags) +{ + if (vContext) + { + vContext->OpenModal( + vKey, vTitle, vFilters, + vFilePathName, + vSidePane, vSidePaneWidth, + vCountSelectionMax, vUserDatas, flags); + } +} + +IMGUIFILEDIALOG_API bool IGFD_DisplayDialog(ImGuiFileDialog* vContext, + const char* vKey, ImGuiWindowFlags vFlags, ImVec2 vMinSize, ImVec2 vMaxSize) +{ + if (vContext) + { + return vContext->Display(vKey, vFlags, vMinSize, vMaxSize); + } + + return false; +} + +IMGUIFILEDIALOG_API void IGFD_CloseDialog(ImGuiFileDialog* vContext) +{ + if (vContext) + { + vContext->Close(); + } +} + +IMGUIFILEDIALOG_API bool IGFD_IsOk(ImGuiFileDialog* vContext) +{ + if (vContext) + { + return vContext->IsOk(); + } + + return false; +} + +IMGUIFILEDIALOG_API bool IGFD_WasKeyOpenedThisFrame(ImGuiFileDialog* vContext, + const char* vKey) +{ + if (vContext) + { + vContext->WasOpenedThisFrame(vKey); + } + + return false; +} + +IMGUIFILEDIALOG_API bool IGFD_WasOpenedThisFrame(ImGuiFileDialog* vContext) +{ + if (vContext) + { + vContext->WasOpenedThisFrame(); + } + + return false; +} + +IMGUIFILEDIALOG_API bool IGFD_IsKeyOpened(ImGuiFileDialog* vContext, + const char* vCurrentOpenedKey) +{ + if (vContext) + { + vContext->IsOpened(vCurrentOpenedKey); + } + + return false; +} + +IMGUIFILEDIALOG_API bool IGFD_IsOpened(ImGuiFileDialog* vContext) +{ + if (vContext) + { + vContext->IsOpened(); + } + + return false; +} + +IMGUIFILEDIALOG_API IGFD_Selection IGFD_GetSelection(ImGuiFileDialog* vContext) +{ + IGFD_Selection res = IGFD_Selection_Get(); + + if (vContext) + { + auto sel = vContext->GetSelection(); + if (!sel.empty()) + { + res.count = sel.size(); + res.table = new IGFD_Selection_Pair[res.count]; + + size_t idx = 0U; + for (const auto& s : sel) + { + IGFD_Selection_Pair* pair = res.table + idx++; + + // fileNameExt + if (!s.first.empty()) + { + size_t siz = s.first.size() + 1U; + pair->fileName = new char[siz]; +#ifndef MSVC + strncpy(pair->fileName, s.first.c_str(), siz); +#else + strncpy_s(pair->fileName, siz, s.first.c_str(), siz); +#endif + pair->fileName[siz - 1U] = '\0'; + } + + // filePathName + if (!s.second.empty()) + { + size_t siz = s.first.size() + 1U; + pair->filePathName = new char[siz]; +#ifndef MSVC + strncpy(pair->filePathName, s.first.c_str(), siz); +#else + strncpy_s(pair->filePathName, siz, s.first.c_str(), siz); +#endif + pair->filePathName[siz - 1U] = '\0'; + } + } + + return res; + } + } + + return res; +} + +IMGUIFILEDIALOG_API char* IGFD_GetFilePathName(ImGuiFileDialog* vContext) +{ + char* res = nullptr; + + if (vContext) + { + auto s = vContext->GetFilePathName(); + if (!s.empty()) + { + size_t siz = s.size() + 1U; + res = new char[siz]; +#ifndef MSVC + strncpy(res, s.c_str(), siz); +#else + strncpy_s(res, siz, s.c_str(), siz); +#endif + res[siz - 1U] = '\0'; + } + } + + return res; +} + +IMGUIFILEDIALOG_API char* IGFD_GetCurrentFileName(ImGuiFileDialog* vContext) +{ + char* res = nullptr; + + if (vContext) + { + auto s = vContext->GetCurrentFileName(); + if (!s.empty()) + { + size_t siz = s.size() + 1U; + res = new char[siz]; +#ifndef MSVC + strncpy(res, s.c_str(), siz); +#else + strncpy_s(res, siz, s.c_str(), siz); +#endif + res[siz - 1U] = '\0'; + } + } + + return res; +} + +IMGUIFILEDIALOG_API char* IGFD_GetCurrentPath(ImGuiFileDialog* vContext) +{ + char* res = nullptr; + + if (vContext) + { + auto s = vContext->GetCurrentPath(); + if (!s.empty()) + { + size_t siz = s.size() + 1U; + res = new char[siz]; +#ifndef MSVC + strncpy(res, s.c_str(), siz); +#else + strncpy_s(res, siz, s.c_str(), siz); +#endif + res[siz - 1U] = '\0'; + } + } + + return res; +} + +IMGUIFILEDIALOG_API char* IGFD_GetCurrentFilter(ImGuiFileDialog* vContext) +{ + char* res = nullptr; + + if (vContext) + { + auto s = vContext->GetCurrentFilter(); + if (!s.empty()) + { + size_t siz = s.size() + 1U; + res = new char[siz]; +#ifndef MSVC + strncpy(res, s.c_str(), siz); +#else + strncpy_s(res, siz, s.c_str(), siz); +#endif + res[siz - 1U] = '\0'; + } + } + + return res; +} + +IMGUIFILEDIALOG_API void* IGFD_GetUserDatas(ImGuiFileDialog* vContext) +{ + if (vContext) + { + return vContext->GetUserDatas(); + } + + return nullptr; +} + +IMGUIFILEDIALOG_API void IGFD_SetFileStyle(ImGuiFileDialog* vContext, + IGFD_FileStyleFlags vFlags, const char* vCriteria, ImVec4 vColor, const char* vIcon, ImFont* vFont) //-V813 +{ + if (vContext) + { + vContext->SetFileStyle(vFlags, vCriteria, vColor, vIcon, vFont); + } +} + +IMGUIFILEDIALOG_API void IGFD_SetFileStyle2(ImGuiFileDialog* vContext, + IGFD_FileStyleFlags vFlags, const char* vCriteria, float vR, float vG, float vB, float vA, const char* vIcon, ImFont* vFont) +{ + if (vContext) + { + vContext->SetFileStyle(vFlags, vCriteria, ImVec4(vR, vG, vB, vA), vIcon, vFont); + } +} + +IMGUIFILEDIALOG_API bool IGFD_GetFileStyle(ImGuiFileDialog* vContext, + IGFD_FileStyleFlags vFlags, const char* vCriteria, ImVec4* vOutColor, char** vOutIcon, ImFont** vOutFont) +{ + if (vContext) + { + std::string icon; + bool res = vContext->GetFileStyle(vFlags, vCriteria, vOutColor, &icon, vOutFont); + if (!icon.empty() && vOutIcon) + { + size_t siz = icon.size() + 1U; + *vOutIcon = new char[siz]; +#ifndef MSVC + strncpy(*vOutIcon, icon.c_str(), siz); +#else + strncpy_s(*vOutIcon, siz, icon.c_str(), siz); +#endif + (*vOutIcon)[siz - 1U] = '\0'; + } + return res; + } + + return false; +} + +IMGUIFILEDIALOG_API void IGFD_ClearFilesStyle(ImGuiFileDialog* vContext) +{ + if (vContext) + { + vContext->ClearFilesStyle(); + } +} + +IMGUIFILEDIALOG_API void SetLocales(ImGuiFileDialog* vContext, const int vCategory, const char* vBeginLocale, const char* vEndLocale) +{ + if (vContext) + { + vContext->SetLocales(vCategory, (vBeginLocale ? vBeginLocale : ""), (vEndLocale ? vEndLocale : "")); + } +} + +#ifdef USE_EXPLORATION_BY_KEYS +IMGUIFILEDIALOG_API void IGFD_SetFlashingAttenuationInSeconds(ImGuiFileDialog* vContext, float vAttenValue) +{ + if (vContext) + { + vContext->SetFlashingAttenuationInSeconds(vAttenValue); + } +} +#endif + +#ifdef USE_BOOKMARK +IMGUIFILEDIALOG_API char* IGFD_SerializeBookmarks(ImGuiFileDialog* vContext) +{ + char* res = nullptr; + + if (vContext) + { + auto s = vContext->SerializeBookmarks(); + if (!s.empty()) + { + size_t siz = s.size() + 1U; + res = new char[siz]; +#ifndef MSVC + strncpy(res, s.c_str(), siz); +#else + strncpy_s(res, siz, s.c_str(), siz); +#endif + res[siz - 1U] = '\0'; + } + } + + return res; +} + +IMGUIFILEDIALOG_API void IGFD_DeserializeBookmarks(ImGuiFileDialog* vContext, const char* vBookmarks) +{ + if (vContext) + { + vContext->DeserializeBookmarks(vBookmarks); + } +} +#endif + +#ifdef USE_THUMBNAILS +IMGUIFILEDIALOG_API void SetCreateThumbnailCallback(ImGuiFileDialog* vContext, const IGFD_CreateThumbnailFun vCreateThumbnailFun) +{ + if (vContext) + { + vContext->SetCreateThumbnailCallback(vCreateThumbnailFun); + } +} + +IMGUIFILEDIALOG_API void SetDestroyThumbnailCallback(ImGuiFileDialog* vContext, const IGFD_DestroyThumbnailFun vDestroyThumbnailFun) +{ + if (vContext) + { + vContext->SetDestroyThumbnailCallback(vDestroyThumbnailFun); + } +} + +IMGUIFILEDIALOG_API void ManageGPUThumbnails(ImGuiFileDialog* vContext) +{ + if (vContext) + { + vContext->ManageGPUThumbnails(); + } +} +#endif // USE_THUMBNAILS diff --git a/cpp-projects/3d-engine/imgui/extra/ImGuiFileDialog.h b/cpp-projects/3d-engine/imgui/extra/ImGuiFileDialog.h new file mode 100644 index 0000000..625ece3 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/ImGuiFileDialog.h @@ -0,0 +1,1594 @@ +/* +MIT License + +Copyright (c) 2019-2021 Stephane Cuillerdier (aka aiekick) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +/* +----------------------------------------------------------------------------------------------------------------- +----------------------------------------------------------------------------------------------------------------- + +github repo : https://github.com/aiekick/ImGuiFileDialog + +----------------------------------------------------------------------------------------------------------------- +## Description : +----------------------------------------------------------------------------------------------------------------- + +this File Dialog is build on top of DearImGui +(On windows, need te lib Dirent : https://github.com/tronkko/dirent, use the branch 1.23 for avoid any issues) +Complete readme here : https://github.com/aiekick/ImGuiFileDialog/blob/master/README.md) + +this filedialog was created principally for have custom pane with widgets accrdoing to file extention. +it was not possible with native filedialog + +An example of the File Dialog integrated within the ImGui Demo App + +----------------------------------------------------------------------------------------------------------------- +## Features : +----------------------------------------------------------------------------------------------------------------- + +- Separate system for call and display + - can be many func calls with different params for one display func by ex +- Can use custom pane via function binding + - this pane can block the validation of the dialog + - can also display different things according to current filter and User Datas +- Support of Filter Custom Coloring / Icons / text +- Multi Selection (ctrl/shift + click) : + - 0 => infinite + - 1 => one file (default) + - n => n files +- Compatible with MacOs, Linux, Win + - On Win version you can list Drives +- Support of Modal/Standard dialog type +- Support both Mode : File Chooser or Directory Chooser +- Support filter collection / Custom filter name +- Support files Exploring with keys : Up / Down / Enter (open dir) / Backspace (come back) +- Support files Exploring by input char (case insensitive) +- Support bookmark creation/edition/call for directory (can have custom name corresponding to a path) +- Support input path edition by right click on a path button +- Support of a 'Confirm to Overwrite" dialog if File Exist + + +----------------------------------------------------------------------------------------------------------------- +## NameSpace / SingleTon +----------------------------------------------------------------------------------------------------------------- + +Use the Namespace IGFD (for avoid conflict with variables, struct and class names) + +you can display only one dialog at a time, this class is a simgleton and must be called like that : +ImGuiFileDialog::Instance()->method_of_your_choice() + +----------------------------------------------------------------------------------------------------------------- +## Simple Dialog : +----------------------------------------------------------------------------------------------------------------- + +Example code : +void drawGui() +{ + // open Dialog Simple + if (ImGui::Button("Open File Dialog")) + ImGuiFileDialog::Instance()->OpenDialog("ChooseFileDlgKey", "Choose File", ".cpp,.h,.hpp", "."); + + // display + if (ImGuiFileDialog::Instance()->FileDialog("ChooseFileDlgKey")) + { + // action if OK + if (ImGuiFileDialog::Instance()->IsOk == true) + { + std::string filePathName = ImGuiFileDialog::Instance()->GetFilePathName(); + std::string filePath = ImGuiFileDialog::Instance()->GetCurrentPath(); + // action + } + // close + ImGuiFileDialog::Instance()->CloseDialog("ChooseFileDlgKey"); + } +} + +----------------------------------------------------------------------------------------------------------------- +## Directory Chooser : +----------------------------------------------------------------------------------------------------------------- + +For have only a directory chooser, you just need to specify a filter null : + +Example code : +ImGuiFileDialog::Instance()->OpenDialog("ChooseDirDlgKey", "Choose a Directory", 0, "."); + +In this mode you can select any directory with one click, and open directory with double click + +----------------------------------------------------------------------------------------------------------------- +## Dialog with Custom Pane : +----------------------------------------------------------------------------------------------------------------- + +Example code : +static bool canValidateDialog = false; +inline void InfosPane(std::string& vFilter, IGFD::UserDatas vUserDatas, bool *vCantContinue) // if vCantContinue is false, the user cant validate the dialog +{ + ImGui::TextColored(ImVec4(0, 1, 1, 1), "Infos Pane"); + ImGui::Text("Selected Filter : %s", vFilter.c_str()); + if (vUserDatas) + ImGui::Text("UserDatas : %s", vUserDatas); + ImGui::Checkbox("if not checked you cant validate the dialog", &canValidateDialog); + if (vCantContinue) + *vCantContinue = canValidateDialog; +} + +void drawGui() +{ + // open Dialog with Pane + if (ImGui::Button("Open File Dialog with a custom pane")) + ImGuiFileDialog::Instance()->OpenDialog("ChooseFileDlgKey", "Choose File", ".cpp,.h,.hpp", + ".", "", std::bind(&InfosPane, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3), 350, 1, IGFD::UserDatas("InfosPane")); + + // display and action if ok + if (ImGuiFileDialog::Instance()->FileDialog("ChooseFileDlgKey")) + { + if (ImGuiFileDialog::Instance()->IsOk == true) + { + std::string filePathName = ImGuiFileDialog::Instance()->GetFilePathName(); + std::string filePath = ImGuiFileDialog::Instance()->GetCurrentPath(); + std::string filter = ImGuiFileDialog::Instance()->GetCurrentFilter(); + // here convert from string because a string was passed as a userDatas, but it can be what you want + std::string userDatas; + if (ImGuiFileDialog::Instance()->GetUserDatas()) + userDatas = std::string((const char*)ImGuiFileDialog::Instance()->GetUserDatas()); + auto selection = ImGuiFileDialog::Instance()->GetSelection(); // multiselection + + // action + } + // close + ImGuiFileDialog::Instance()->CloseDialog("ChooseFileDlgKey"); + } +} + +----------------------------------------------------------------------------------------------------------------- +## File Style +----------------------------------------------------------------------------------------------------------------- + +You can define style for files/dirs/links in many ways : + +the style can be colors, icons and fonts + +the general form is : +ImGuiFileDialog::Instance()->SetFileStyle(styleType, criteria, color, icon, font); + +styleType can be thoses : +IGFD_FileStyle_None // define none style +IGFD_FileStyleByTypeFile // define style for all files +IGFD_FileStyleByTypeDir // define style for all dir +IGFD_FileStyleByTypeLink // define style for all link +IGFD_FileStyleByExtention // define style by extention, for files or links +IGFD_FileStyleByFullName // define style for particular file/dir/link full name (filename + extention) +IGFD_FileStyleByContainedInFullName // define style for file/dir/link when criteria is contained in full name + +samples : + +define style by file extention +ImGuiFileDialog::Instance()->SetFileStyle(IGFD_FileStyleByExtention, ".png", ImVec4(0.0f, 1.0f, 1.0f, 0.9f), ICON_IGFD_FILE_PIC, font1); +ImGuiFileDialog::Instance()->SetFileStyle(IGFD_FileStyleByExtention, ".gif", ImVec4(0.0f, 1.0f, 0.5f, 0.9f), "[GIF]"); + +define style for all directories +ImGuiFileDialog::Instance()->SetFileStyle(IGFD_FileStyleByTypeDir, "", ImVec4(0.5f, 1.0f, 0.9f, 0.9f), ICON_IGFD_FOLDER); +can be for a specific directory +ImGuiFileDialog::Instance()->SetFileStyle(IGFD_FileStyleByTypeDir, ".git", ImVec4(0.5f, 1.0f, 0.9f, 0.9f), ICON_IGFD_FOLDER); + +define style for all files +ImGuiFileDialog::Instance()->SetFileStyle(IGFD_FileStyleByTypeFile, "", ImVec4(0.5f, 1.0f, 0.9f, 0.9f), ICON_IGFD_FILE); +can be for a specific file +ImGuiFileDialog::Instance()->SetFileStyle(IGFD_FileStyleByTypeFile, ".git", ImVec4(0.5f, 1.0f, 0.9f, 0.9f), ICON_IGFD_FILE); + +define style for all links +ImGuiFileDialog::Instance()->SetFileStyle(IGFD_FileStyleByTypeLink, "", ImVec4(0.5f, 1.0f, 0.9f, 0.9f)); +can be for a specific link +ImGuiFileDialog::Instance()->SetFileStyle(IGFD_FileStyleByTypeLink, "Readme.md", ImVec4(0.5f, 1.0f, 0.9f, 0.9f)); + +define style for any files/dirs/links by fullname +ImGuiFileDialog::Instance()->SetFileStyle(IGFD_FileStyleByFullName, "doc", ImVec4(0.9f, 0.2f, 0.0f, 0.9f), ICON_IGFD_FILE_PIC); + +define style for any dirs by file who are containing this string +ImGuiFileDialog::Instance()->SetFileStyle(IGFD_FileStyleByTypeDir | IGFD_FileStyleByContainedInFullName, ".git", ImVec4(0.9f, 0.2f, 0.0f, 0.9f), ICON_IGFD_BOOKMARK); + +define style for any files by file who are containing this string +ImGuiFileDialog::Instance()->SetFileStyle(IGFD_FileStyleByTypeFile | IGFD_FileStyleByContainedInFullName, ".git", ImVec4(0.5f, 0.8f, 0.5f, 0.9f), ICON_IGFD_SAVE); + +----------------------------------------------------------------------------------------------------------------- +## Filter Collections +----------------------------------------------------------------------------------------------------------------- + +you can define a custom filter name who correspond to a group of filter + +you must use this syntax : custom_name1{filter1,filter2,filter3},custom_name2{filter1,filter2},filter1 +when you will select custom_name1, the gorup of filter 1 to 3 will be applied +the reserved char are {}, you cant use them for define filter name. + +Example code : +const char *filters = "Source files (*.cpp *.h *.hpp){.cpp,.h,.hpp},Image files (*.png *.gif *.jpg *.jpeg){.png,.gif,.jpg,.jpeg},.md"; +ImGuiFileDialog::Instance()->OpenDialog("ChooseFileDlgKey", ICON_IMFDLG_FOLDER_OPEN " Choose a File", filters, "."); + +## Multi Selection + +You can define in OpenDialog/OpenModal call the count file you wan to select : +- 0 => infinite +- 1 => one file only (default) +- n => n files only + +See the define at the end of these funcs after path. + +Example code : +ImGuiFileDialog::Instance()->OpenDialog("ChooseFileDlgKey", "Choose File", ".*,.cpp,.h,.hpp", "."); +ImGuiFileDialog::Instance()->OpenDialog("ChooseFileDlgKey", "Choose 1 File", ".*,.cpp,.h,.hpp", ".", 1); +ImGuiFileDialog::Instance()->OpenDialog("ChooseFileDlgKey", "Choose 5 File", ".*,.cpp,.h,.hpp", ".", 5); +ImGuiFileDialog::Instance()->OpenDialog("ChooseFileDlgKey", "Choose many File", ".*,.cpp,.h,.hpp", ".", 0); +ImGuiFileDialog::Instance()->OpenDialog("ChooseFileDlgKey", "Choose File", ".png,.jpg", + ".", "", std::bind(&InfosPane, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3), 350, 1, "SaveFile"); // 1 file + +----------------------------------------------------------------------------------------------------------------- +## File Dialog Constraints +----------------------------------------------------------------------------------------------------------------- + +you can define min/max size of the dialog when you display It + +by ex : + +* MaxSize is the full display size +* MinSize in the half display size. + +Example code : +ImVec2 maxSize = ImVec2((float)display_w, (float)display_h); +ImVec2 minSize = maxSize * 0.5f; +ImGuiFileDialog::Instance()->FileDialog("ChooseFileDlgKey", ImGuiWindowFlags_NoCollapse, minSize, maxSize); + +----------------------------------------------------------------------------------------------------------------- +## Detail View Mode +----------------------------------------------------------------------------------------------------------------- + +You can have tables display like that. + +- uncomment "#define USE_IMGUI_TABLES" in you custom config file (CustomImGuiFileDialogConfig.h in this example) + +----------------------------------------------------------------------------------------------------------------- +## Exploring by keys +----------------------------------------------------------------------------------------------------------------- + +you can activate this feature by uncomment : "#define USE_EXPLORATION_BY_KEYS" +in you custom config file (CustomImGuiFileDialogConfig.h in this example) + +you can also uncomment the next lines for define your keys : + +* IGFD_KEY_UP => Up key for explore to the top +* IGFD_KEY_DOWN => Down key for explore to the bottom +* IGFD_KEY_ENTER => Enter key for open directory +* IGFD_KEY_BACKSPACE => BackSpace for comming back to the last directory + +you can also explore a file list by use the current key char. + +as you see the current item is flashed (by default for 1 sec) +you can define the flashing life time by yourself with the function + +Example code : +ImGuiFileDialog::Instance()->SetFlashingAttenuationInSeconds(1.0f); + +----------------------------------------------------------------------------------------------------------------- +## Bookmarks +----------------------------------------------------------------------------------------------------------------- + +you can create/edit/call path bookmarks and load/save them in file + +you can activate it by uncomment : "#define USE_BOOKMARK" + +in you custom config file (CustomImGuiFileDialogConfig.h in this example) + +you can also uncomment the next lines for customize it : +Example code : +#define bookmarkPaneWith 150.0f => width of the bookmark pane +#define IMGUI_TOGGLE_BUTTON ToggleButton => customize the Toggled button (button stamp must be : (const char* label, bool *toggle) +#define bookmarksButtonString "Bookmark" => the text in the toggle button +#define bookmarksButtonHelpString "Bookmark" => the helper text when mouse over the button +#define addBookmarkButtonString "+" => the button for add a bookmark +#define removeBookmarkButtonString "-" => the button for remove the selected bookmark + + +* you can select each bookmark for edit the displayed name corresponding to a path +* you must double click on the label for apply the bookmark + +you can also serialize/deserialize bookmarks by ex for load/save from/to file : (check the app sample by ex) +Example code : +Load => ImGuiFileDialog::Instance()->DeserializeBookmarks(bookmarString); +Save => std::string bookmarkString = ImGuiFileDialog::Instance()->SerializeBookmarks(); + +----------------------------------------------------------------------------------------------------------------- +## Path Edition : +----------------------------------------------------------------------------------------------------------------- + +if you click right on one of any path button, you can input or modify the path pointed by this button. +then press the validate key (Enter by default with GLFW) for validate the new path +or press the escape key (Escape by default with GLFW) for quit the input path edition + +see in this gif doc/inputPathEdition.gif : +1) button edition with mouse button right and escape key for quit the edition +2) focus the input and press validation for set path + +----------------------------------------------------------------------------------------------------------------- +## Confirm to OverWrite Dialog : +----------------------------------------------------------------------------------------------------------------- + +If you want avoid OverWrite your files after confirmation, +you can show a Dialog for confirm or cancel the OverWrite operation. + +You just need to define the flag ImGuiFileDialogFlags_ConfirmOverwrite +in your call to OpenDialog/OpenModal + +By default this flag is not set, since there is no pre-defined way to +define if a dialog will be for Open or Save behavior. (and its wanted :) ) + +Example code For Standard Dialog : +Example code : +ImGuiFileDialog::Instance()->OpenDialog("ChooseFileDlgKey", + ICON_IGFD_SAVE " Choose a File", filters, + ".", "", 1, nullptr, ImGuiFileDialogFlags_ConfirmOverwrite); + +Example code For Modal Dialog : +Example code : +ImGuiFileDialog::Instance()->OpenModal("ChooseFileDlgKey", + ICON_IGFD_SAVE " Choose a File", filters, + ".", "", 1, nullptr, ImGuiFileDialogFlags_ConfirmOverwrite); + +This dialog will only verify the file in the file field. +So Not to be used with GetSelection() + +The Confirm dialog will be a forced Modal Dialog, not moveable, displayed +in the center of the current FileDialog. + +As usual you can customize the dialog, +in you custom config file (CustomImGuiFileDialogConfig.h in this example) + +you can uncomment the next lines for customize it : + +Example code : +#define OverWriteDialogTitleString "The file Already Exist !" +#define OverWriteDialogMessageString "Would you like to OverWrite it ?" +#define OverWriteDialogConfirmButtonString "Confirm" +#define OverWriteDialogCancelButtonString "Cancel" + +----------------------------------------------------------------------------------------------------------------- +## Flags : +----------------------------------------------------------------------------------------------------------------- + +flag must be specified in OpenDialog or OpenModal +* ImGuiFileDialogFlags_ConfirmOverwrite => show confirm to overwrite dialog +* ImGuiFileDialogFlags_DontShowHiddenFiles => dont show hidden file (file starting with a .) + +----------------------------------------------------------------------------------------------------------------- +## Open / Save dialog Behavior : +----------------------------------------------------------------------------------------------------------------- + +There is no way to distinguish the "open dialog" behavior than "save dialog" behavior. +So you msut adapt the return according to your need : + +if you want open file(s) or directory(s), you must use : GetSelection() method. you will obtain a std::map of the selection +if you want create a file, you must use : GetFilePathName()/GetCurrentFileName() + +the return method's and comments : + +Example code : +std::map GetSelection(); // Open File behavior : will return selection via a map +std::string GetFilePathName(); // Create File behavior : will always return the content of the field with current filter extention and current path +std::string GetCurrentFileName(); // Create File behavior : will always return the content of the field with current filter extention +std::string GetCurrentPath(); // will return current path +std::string GetCurrentFilter(); // get selected filter +UserDatas GetUserDatas(); // get user datas send with Open Dialog + +----------------------------------------------------------------------------------------------------------------- +## Thumbnails Display +----------------------------------------------------------------------------------------------------------------- + +You can now, display thumbnails of pictures. + +The file resize use stb/image so the following files extentions are supported : +(.png, .bmp, .tga, .jpg, .jpeg, .gif, .psd, .pic, .ppm, .pgm) +only tested with .png, .bmp, .tga, .jpg, .jpeg and .gif by the way + +Corresponding to your backend (ex : OpenGl) you need to define two callbacks : +* the first is a callback who will be called by ImGuiFileDialog for create the backend texture +* the second is a callback who will be called by ImGuiFileDialog for destroy the backend texture + +After that you need to call the function who is responsible to create / destroy the textures. +this function must be called in your GPU Rendering zone for avoid destroying of used texture. +if you do that at the same place of your imgui code, some backend can crash your app, by ex with vulkan. + +ex, for opengl : + +Example code : +// Create thumbnails texture +ImGuiFileDialog::Instance()->SetCreateThumbnailCallback([](IGFD_Thumbnail_Info *vThumbnail_Info) -> void +{ + if (vThumbnail_Info && + vThumbnail_Info->isReadyToUpload && + vThumbnail_Info->textureFileDatas) + { + GLuint textureId = 0; + glGenTextures(1, &textureId); + vThumbnail_Info->textureID = (void*)textureId; + + glBindTexture(GL_TEXTURE_2D, textureId); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, + (GLsizei)vThumbnail_Info->textureWidth, (GLsizei)vThumbnail_Info->textureHeight, + 0, GL_RGBA, GL_UNSIGNED_BYTE, vThumbnail_Info->textureFileDatas); + glFinish(); + glBindTexture(GL_TEXTURE_2D, 0); + + delete[] vThumbnail_Info->textureFileDatas; + vThumbnail_Info->textureFileDatas = nullptr; + + vThumbnail_Info->isReadyToUpload = false; + vThumbnail_Info->isReadyToDisplay = true; + } +}); + +Example code : +// Destroy thumbnails texture +ImGuiFileDialog::Instance()->SetDestroyThumbnailCallback([](IGFD_Thumbnail_Info* vThumbnail_Info) +{ + if (vThumbnail_Info) + { + GLuint texID = (GLuint)vThumbnail_Info->textureID; + glDeleteTextures(1, &texID); + glFinish(); + } +}); + +Example code : +// GPU Rendering Zone // To call for Create/ Destroy Textures +ImGuiFileDialog::Instance()->ManageGPUThumbnails(); + +----------------------------------------------------------------------------------------------------------------- +## C API +----------------------------------------------------------------------------------------------------------------- + +A C API is available let you include ImGuiFileDialog in your C project. +btw, ImGuiFileDialog depend of ImGui and dirent (for windows) + +Sample code with cimgui : + +// create ImGuiFileDialog +ImGuiFileDialog *cfileDialog = IGFD_Create(); + +// open dialog +if (igButton("Open File", buttonSize)) +{ + IGFD_OpenDialog(cfiledialog, + "filedlg", // dialog key (make it possible to have different treatment reagrding the dialog key + "Open a File", // dialog title + "c files(*.c *.h){.c,.h}", // dialog filter syntax : simple => .h,.c,.pp, etc and collections : text1{filter0,filter1,filter2}, text2{filter0,filter1,filter2}, etc.. + ".", // base directory for files scan + "", // base filename + 0, // a fucntion for display a right pane if you want + 0.0f, // base width of the pane + 0, // count selection : 0 infinite, 1 one file (default), n (n files) + "User data !", // some user datas + ImGuiFileDialogFlags_ConfirmOverwrite); // ImGuiFileDialogFlags +} + +ImGuiIO* ioptr = igGetIO(); +ImVec2 maxSize; +maxSize.x = ioptr->DisplaySize.x * 0.8f; +maxSize.y = ioptr->DisplaySize.y * 0.8f; +ImVec2 minSize; +minSize.x = maxSize.x * 0.25f; +minSize.y = maxSize.y * 0.25f; + +// display dialog +if (IGFD_DisplayDialog(cfiledialog, "filedlg", ImGuiWindowFlags_NoCollapse, minSize, maxSize)) +{ + if (IGFD_IsOk(cfiledialog)) // result ok + { + char* cfilePathName = IGFD_GetFilePathName(cfiledialog); + printf("GetFilePathName : %s\n", cfilePathName); + char* cfilePath = IGFD_GetCurrentPath(cfiledialog); + printf("GetCurrentPath : %s\n", cfilePath); + char* cfilter = IGFD_GetCurrentFilter(cfiledialog); + printf("GetCurrentFilter : %s\n", cfilter); + // here convert from string because a string was passed as a userDatas, but it can be what you want + void* cdatas = IGFD_GetUserDatas(cfiledialog); + if (cdatas) + printf("GetUserDatas : %s\n", (const char*)cdatas); + struct IGFD_Selection csel = IGFD_GetSelection(cfiledialog); // multi selection + printf("Selection :\n"); + for (int i = 0; i < (int)csel.count; i++) + { + printf("(%i) FileName %s => path %s\n", i, csel.table[i].fileName, csel.table[i].filePathName); + } + // action + + // destroy + if (cfilePathName) free(cfilePathName); + if (cfilePath) free(cfilePath); + if (cfilter) free(cfilter); + + IGFD_Selection_DestroyContent(&csel); + } + IGFD_CloseDialog(cfiledialog); +} + +// destroy ImGuiFileDialog +IGFD_Destroy(cfiledialog); + +----------------------------------------------------------------------------------------------------------------- +## Std::filesystem (c++17) can be used instead of dirent.h +----------------------------------------------------------------------------------------------------------------- + +you just need to uncomment that in the config file + +#define USE_STD_FILESYSTEM + +in this mode dirent is not more required + +----------------------------------------------------------------------------------------------------------------- +## How to Integrate ImGuiFileDialog in your project +----------------------------------------------------------------------------------------------------------------- + +### ImGuiFileDialog require : + +* dirent v1.23 (only when USE_STD_FILESYSTEM is not defined) (https://github.com/tronkko/dirent/tree/v1.23) lib, only for windows. Successfully tested with version v1.23 only +* Dear ImGui (https://github.com/ocornut/imgui/tree/master) (with/without tables widgets) + +### Customize ImGuiFileDialog : + +You just need to write your own config file by override the file : ImGuiFileDialog/ImGuiFileDialogConfig.h +like i do here with CustomImGuiFileDialogConfig.h + +After that, for let ImGuiFileDialog your own custom file, +you must define the preprocessor directive CUSTOM_IMGUIFILEDIALOG_CONFIG with the path of you custom config file. +This path must be relative to the directory where you put ImGuiFileDialog module. + +----------------------------------------------------------------------------------------------------------------- +----------------------------------------------------------------------------------------------------------------- + +Thats all. + +You can check by example in this repo with the file CustomImGuiFileDialogConfig.h : +- this trick was used for have custom icon font instead of labels for buttons or messages titles +- you can also use your custom imgui button, the button call stamp must be same by the way :) + +The Custom Icon Font (in CustomFont.cpp and CustomFont.h) was made with ImGuiFontStudio (https://github.com/aiekick/ImGuiFontStudio) i wrote for that :) +ImGuiFontStudio is using also ImGuiFileDialog. + +----------------------------------------------------------------------------------------------------------------- +----------------------------------------------------------------------------------------------------------------- +*/ + +#ifndef IMGUIFILEDIALOG_H +#define IMGUIFILEDIALOG_H + +#define IMGUIFILEDIALOG_VERSION "v0.6.4" + +#ifndef CUSTOM_IMGUIFILEDIALOG_CONFIG +#include "ImGuiFileDialogConfig.h" +#else // CUSTOM_IMGUIFILEDIALOG_CONFIG +#include CUSTOM_IMGUIFILEDIALOG_CONFIG +#endif // CUSTOM_IMGUIFILEDIALOG_CONFIG + +// file style enum for file display (color, icon, font) +typedef int IGFD_FileStyleFlags; // -> enum IGFD_FileStyleFlags_ +enum IGFD_FileStyleFlags_ // by evaluation / priority order +{ + IGFD_FileStyle_None = 0, // define none style + IGFD_FileStyleByTypeFile = (1 << 0), // define style for all files + IGFD_FileStyleByTypeDir = (1 << 1), // define style for all dir + IGFD_FileStyleByTypeLink = (1 << 2), // define style for all link + IGFD_FileStyleByExtention = (1 << 3), // define style by extention, for files or links + IGFD_FileStyleByFullName = (1 << 4), // define style for particular file/dir/link full name (filename + extention) + IGFD_FileStyleByContainedInFullName = (1 << 5), // define style for file/dir/link when criteria is contained in full name +}; + +typedef int ImGuiFileDialogFlags; // -> enum ImGuiFileDialogFlags_ +enum ImGuiFileDialogFlags_ +{ + ImGuiFileDialogFlags_None = 0, + ImGuiFileDialogFlags_ConfirmOverwrite = (1 << 0), // show confirm to overwrite dialog + ImGuiFileDialogFlags_DontShowHiddenFiles = (1 << 1), // dont show hidden file (file starting with a .) + ImGuiFileDialogFlags_DisableCreateDirectoryButton = (1 << 2), // disable the create directory button + ImGuiFileDialogFlags_HideColumnType = (1 << 3), // hide column file type + ImGuiFileDialogFlags_HideColumnSize = (1 << 4), // hide column file size + ImGuiFileDialogFlags_HideColumnDate = (1 << 5), // hide column file date +#ifdef USE_THUMBNAILS + ImGuiFileDialogFlags_DisableThumbnailMode = (1 << 6), // disable the thumbnail mode +#endif + ImGuiFileDialogFlags_Default = ImGuiFileDialogFlags_ConfirmOverwrite +}; + +#ifdef USE_THUMBNAILS +struct IGFD_Thumbnail_Info +{ + int isReadyToDisplay = 0; // ready to be rendered, so texture created + int isReadyToUpload = 0; // ready to upload to gpu + int isLoadingOrLoaded = 0; // was sent to laoding or loaded + void* textureID = 0; // 2d texture id (void* is like ImtextureID type) (GL, DX, VK, Etc..) + unsigned char* textureFileDatas = 0; // file texture datas, will be rested to null after gpu upload + int textureWidth = 0; // width of the texture to upload + int textureHeight = 0; // height of the texture to upload + int textureChannels = 0; // count channels of the texture to upload + void* userDatas = 0; // user datas +}; +#endif // USE_THUMBNAILS + +#ifdef __cplusplus + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace IGFD +{ +#ifndef MAX_FILE_DIALOG_NAME_BUFFER +#define MAX_FILE_DIALOG_NAME_BUFFER 1024 +#endif // MAX_FILE_DIALOG_NAME_BUFFER + +#ifndef MAX_PATH_BUFFER_SIZE +#define MAX_PATH_BUFFER_SIZE 1024 +#endif // MAX_PATH_BUFFER_SIZE + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + class FileDialogInternal; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + class SearchManager + { + public: + std::string puSearchTag; + char puSearchBuffer[MAX_FILE_DIALOG_NAME_BUFFER] = ""; + bool puSearchInputIsActive = false; + + public: + void Clear(); // clear datas + void DrawSearchBar(FileDialogInternal& vFileDialogInternal); // draw the search bar + }; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + class Utils + { + public: + struct PathStruct + { + std::string path; + std::string name; + std::string ext; + bool isOk = false; + }; + + public: + static bool Splitter(bool split_vertically, float thickness, float* size1, float* size2, float min_size1, float min_size2, float splitter_long_axis_size = -1.0f); + static bool ReplaceString(std::string& str, const std::string& oldStr, const std::string& newStr); + static bool IsDirectoryExist(const std::string& name); + static bool CreateDirectoryIfNotExist(const std::string& name); + static PathStruct ParsePathFileName(const std::string& vPathFileName); + static void AppendToBuffer(char* vBuffer, size_t vBufferLen, const std::string& vStr); + static void ResetBuffer(char* vBuffer); + static void SetBuffer(char* vBuffer, size_t vBufferLen, const std::string& vStr); +#ifdef WIN32 + static bool WReplaceString(std::wstring& str, const std::wstring& oldStr, const std::wstring& newStr); + static std::vector WSplitStringToVector(const std::wstring& text, char delimiter, bool pushEmpty); + static std::string wstring_to_string(const std::wstring& wstr); + static std::wstring string_to_wstring(const std::string& mbstr); +#endif + static std::vector SplitStringToVector(const std::string& text, char delimiter, bool pushEmpty); + static std::vector GetDrivesList(); + }; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + class FileStyle + { + public: + ImVec4 color = ImVec4(0, 0, 0, 0); + std::string icon; + ImFont* font = nullptr; + IGFD_FileStyleFlags flags = 0; + + public: + FileStyle(); + FileStyle(const FileStyle& vStyle); + FileStyle(const ImVec4& vColor, const std::string& vIcon = "", ImFont* vFont = nullptr); + }; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + class FileInfos; + class FilterManager + { + public: + class FilterInfos + { + public: + std::string filter; + std::set collectionfilters; + + public: + void clear(); // clear the datas + bool empty() const; // is filter empty + bool exist(const std::string& vFilter) const; // is filter exist + }; + + private: + std::vector prParsedFilters; + std::unordered_map>> prFilesStyle; // file infos for file extention only + FilterInfos prSelectedFilter; + + public: + std::string puDLGFilters; + std::string puDLGdefaultExt; + + public: + void ParseFilters(const char* vFilters); // Parse filter syntax, detect and parse filter collection + void SetSelectedFilterWithExt(const std::string& vFilter); // Select filter + + bool prFillFileStyle(std::shared_ptr vFileInfos) const; // fill with the good style + + void SetFileStyle( + const IGFD_FileStyleFlags& vFlags, + const char* vCriteria, + const FileStyle& vInfos); // Set FileStyle + void SetFileStyle( + const IGFD_FileStyleFlags& vFlags, + const char* vCriteria, + const ImVec4& vColor, + const std::string& vIcon, + ImFont* vFont); // link file style to Color and Icon and Font + bool GetFileStyle( + const IGFD_FileStyleFlags& vFlags, + const std::string& vCriteria, + ImVec4* vOutColor, + std::string* vOutIcon, + ImFont** vOutFont); // Get Color and Icon for Filter + void ClearFilesStyle(); // clear prFileStyle + + bool IsCoveredByFilters(const std::string& vTag) const; // check if current file extention (vTag) is covered by current filter + bool DrawFilterComboBox(FileDialogInternal& vFileDialogInternal); // draw the filter combobox + FilterInfos GetSelectedFilter(); // get the current selected filter + std::string ReplaceExtentionWithCurrentFilter(const std::string& vFile) const; // replace the extention of the current file by the selected filter + void SetDefaultFilterIfNotDefined(); // define the first filter if no filter is selected + }; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + class FileInfos + { + public: + char fileType = ' '; // dirent fileType (f:file, d:directory, l:link) + std::string filePath; // path of the file + std::string fileNameExt; // filename of the file (file name + extention) (but no path) + std::string fileNameExt_optimized; // optimized for search => insensitivecase + std::string fileExt; // extention of the file + size_t fileSize = 0; // for sorting operations + std::string formatedFileSize; // file size formated (10 o, 10 ko, 10 mo, 10 go) + std::string fileModifDate; // file user defined format of the date (data + time by default) + std::shared_ptr fileStyle = nullptr; // style of the file +#ifdef USE_THUMBNAILS + IGFD_Thumbnail_Info thumbnailInfo; // structre for the display for image file tetxure +#endif // USE_THUMBNAILS + + public: + bool IsTagFound(const std::string& vTag) const; + }; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + class FileManager + { + public: // types + enum class SortingFieldEnum // sorting for filetering of the file lsit + { + FIELD_NONE = 0, // no sorting preference, result indetermined haha.. + FIELD_FILENAME, // sorted by filename + FIELD_TYPE, // sorted by filetype + FIELD_SIZE, // sorted by filesize (formated file size) + FIELD_DATE, // sorted by filedate +#ifdef USE_THUMBNAILS + FIELD_THUMBNAILS, // sorted by thumbnails (comparaison by width then by height) +#endif // USE_THUMBNAILS + }; + + private: + std::string prCurrentPath; // current path (to be decomposed in prCurrentPathDecomposition + std::vector prCurrentPathDecomposition; // part words + std::vector> prFileList; // base container + std::vector> prFilteredFileList; // filtered container (search, sorting, etc..) + std::string prLastSelectedFileName; // for shift multi selection + std::set prSelectedFileNames; // the user selection of FilePathNames + bool prCreateDirectoryMode = false; // for create directory widget + + public: + char puVariadicBuffer[MAX_FILE_DIALOG_NAME_BUFFER] = ""; // called by prSelectableItem + bool puInputPathActivated = false; // show input for path edition + bool puDrivesClicked = false; // event when a drive button is clicked + bool puPathClicked = false; // event when a path button was clicked + char puInputPathBuffer[MAX_PATH_BUFFER_SIZE] = ""; // input path buffer for imgui widget input text (displayed in palce of composer) + char puFileNameBuffer[MAX_FILE_DIALOG_NAME_BUFFER] = ""; // file name buffer in footer for imgui widget input text + char puDirectoryNameBuffer[MAX_FILE_DIALOG_NAME_BUFFER] = ""; // directory name buffer in footer for imgui widget input text (when is directory mode) + std::string puHeaderFileName; // detail view name of column file + std::string puHeaderFileType; // detail view name of column type + std::string puHeaderFileSize; // detail view name of column size + std::string puHeaderFileDate; // detail view name of column date + time +#ifdef USE_THUMBNAILS + std::string puHeaderFileThumbnails; // detail view name of column thumbnails + bool puSortingDirection[5] = { true, true, true, true, true }; // detail view // true => Descending, false => Ascending +#else + bool puSortingDirection[4] = { true, true, true, true }; // detail view // true => Descending, false => Ascending +#endif + SortingFieldEnum puSortingField = SortingFieldEnum::FIELD_FILENAME; // detail view sorting column + bool puShowDrives = false; // drives are shown (only on os windows) + + std::string puDLGpath; // base path set by user when OpenDialog/OpenModal was called + std::string puDLGDefaultFileName; // base default file path name set by user when OpenDialog/OpenModal was called + size_t puDLGcountSelectionMax = 1U; // 0 for infinite // base max selection count set by user when OpenDialog/OpenModal was called + bool puDLGDirectoryMode = false; // is directory mode (defiend like : puDLGDirectoryMode = (filters.empty())) + + std::string puFsRoot; + + private: + static std::string prRoundNumber(double vvalue, int n); // custom rounding number + static std::string prFormatFileSize(size_t vByteSize); // format file size field + static std::string prOptimizeFilenameForSearchOperations(const std::string& vFileNameExt); // turn all text in lower case for search facilitie + static void prCompleteFileInfos(const std::shared_ptr& FileInfos); // set time and date infos of a file (detail view mode) + void prRemoveFileNameInSelection(const std::string& vFileName); // selection : remove a file name + void prAddFileNameInSelection(const std::string& vFileName, bool vSetLastSelectionFileName); // selection : add a file name + void AddFile(const FileDialogInternal& vFileDialogInternal, + const std::string& vPath, const std::string& vFileName, const char& vFileType); // add file called by scandir + + public: + FileManager(); + bool IsComposerEmpty(); + size_t GetComposerSize(); + bool IsFileListEmpty(); + bool IsFilteredListEmpty(); + size_t GetFullFileListSize(); + std::shared_ptr GetFullFileAt(size_t vIdx); + size_t GetFilteredListSize(); + std::shared_ptr GetFilteredFileAt(size_t vIdx); + bool IsFileNameSelected(const std::string& vFileName); + std::string GetBack(); + void ClearComposer(); + void ClearFileLists(); // clear file list, will destroy thumbnail textures + void ClearAll(); + void ApplyFilteringOnFileList(const FileDialogInternal& vFileDialogInternal); + void OpenCurrentPath(const FileDialogInternal& vFileDialogInternal); // set the path of the dialog, will launch the directory scan for populate the file listview + void SortFields(const FileDialogInternal& vFileDialogInternal, + const SortingFieldEnum& vSortingField, const bool& vCanChangeOrder); // will sort a column + bool GetDrives(); // list drives on windows platform + bool CreateDir(const std::string& vPath); // create a directory on the file system + void ComposeNewPath(std::vector::iterator vIter); // compose a path from the compose path widget + bool SetPathOnParentDirectoryIfAny(); // compose paht on parent directory + std::string GetCurrentPath(); // get the current path + void SetCurrentPath(const std::string& vCurrentPath); // set the current path + static bool IsFileExist(const std::string& vFile); + void SetDefaultFileName(const std::string& vFileName); + bool SelectDirectory(const std::shared_ptr& vInfos); // enter directory + void SelectFileName(const FileDialogInternal& vFileDialogInternal, + const std::shared_ptr& vInfos); // select filename + + //depend of dirent.h + void SetCurrentDir(const std::string& vPath); // define current directory for scan + void ScanDir(const FileDialogInternal& vFileDialogInternal, const std::string& vPath); // scan the directory for retrieve the file list + + public: + std::string GetResultingPath(); + std::string GetResultingFileName(FileDialogInternal& vFileDialogInternal); + std::string GetResultingFilePathName(FileDialogInternal& vFileDialogInternal); + std::map GetResultingSelection(); + + public: + void DrawDirectoryCreation(const FileDialogInternal& vFileDialogInternal); // draw directory creation widget + void DrawPathComposer(const FileDialogInternal& vFileDialogInternal); // draw path composer widget + }; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef USE_THUMBNAILS + typedef std::function CreateThumbnailFun; // texture 2d creation function binding + typedef std::function DestroyThumbnailFun; // texture 2d destroy function binding +#endif + class ThumbnailFeature + { + protected: + ThumbnailFeature(); + ~ThumbnailFeature(); + + void NewThumbnailFrame(FileDialogInternal& vFileDialogInternal); + void EndThumbnailFrame(FileDialogInternal& vFileDialogInternal); + void QuitThumbnailFrame(FileDialogInternal& vFileDialogInternal); + +#ifdef USE_THUMBNAILS + protected: + enum class DisplayModeEnum + { + FILE_LIST = 0, + THUMBNAILS_LIST, + THUMBNAILS_GRID + }; + + private: + uint32_t prCountFiles = 0U; + bool prIsWorking = false; + std::shared_ptr prThumbnailGenerationThread = nullptr; + std::list> prThumbnailFileDatasToGet; // base container + std::mutex prThumbnailFileDatasToGetMutex; + std::list> prThumbnailToCreate; // base container + std::mutex prThumbnailToCreateMutex; + std::list prThumbnailToDestroy; // base container + std::mutex prThumbnailToDestroyMutex; + + CreateThumbnailFun prCreateThumbnailFun = nullptr; + DestroyThumbnailFun prDestroyThumbnailFun = nullptr; + + protected: + DisplayModeEnum prDisplayMode = DisplayModeEnum::FILE_LIST; + + protected: + // will be call in cpu zone (imgui computations, will call a texture file retrieval thread) + void prStartThumbnailFileDatasExtraction(); // start the thread who will get byte buffer from image files + bool prStopThumbnailFileDatasExtraction(); // stop the thread who will get byte buffer from image files + void prThreadThumbnailFileDatasExtractionFunc(); // the thread who will get byte buffer from image files + void prDrawThumbnailGenerationProgress(); // a little progressbar who will display the texture gen status + void prAddThumbnailToLoad(const std::shared_ptr& vFileInfos); // add texture to load in the thread + void prAddThumbnailToCreate(const std::shared_ptr& vFileInfos); + void prAddThumbnailToDestroy(const IGFD_Thumbnail_Info& vIGFD_Thumbnail_Info); + void prDrawDisplayModeToolBar(); // draw display mode toolbar (file list, thumbnails list, small thumbnails grid, big thumbnails grid) + void prClearThumbnails(FileDialogInternal& vFileDialogInternal); + + public: + void SetCreateThumbnailCallback(const CreateThumbnailFun& vCreateThumbnailFun); + void SetDestroyThumbnailCallback(const DestroyThumbnailFun& vCreateThumbnailFun); + + // must be call in gpu zone (rendering, possibly one rendering thread) + void ManageGPUThumbnails(); // in gpu rendering zone, whill create or destroy texture +#endif + }; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + class BookMarkFeature + { + protected: + BookMarkFeature(); + +#ifdef USE_BOOKMARK + private: + struct BookmarkStruct + { + std::string name; // name of the bookmark + + // todo: the path could be relative, better if the app is movedn but bookmarked path can be outside of the app + std::string path; // absolute path of the bookmarked directory + }; + + private: + ImGuiListClipper prBookmarkClipper; + std::vector prBookmarks; + char prBookmarkEditBuffer[MAX_FILE_DIALOG_NAME_BUFFER] = ""; + + protected: + float prBookmarkWidth = 200.0f; + bool prBookmarkPaneShown = false; + + protected: + void prDrawBookmarkButton(); // draw bookmark button + bool prDrawBookmarkPane(FileDialogInternal& vFileDialogInternal, const ImVec2& vSize); // draw bookmark Pane + + public: + std::string SerializeBookmarks(); // serialize bookmarks : return bookmark buffer to save in a file + void DeserializeBookmarks( // deserialize bookmarks : load bookmark buffer to load in the dialog (saved from previous use with SerializeBookmarks()) + const std::string& vBookmarks); // bookmark buffer to load +#endif // USE_BOOKMARK + }; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + // file localization by input chat // widget flashing + class KeyExplorerFeature + { + protected: + KeyExplorerFeature(); + +#ifdef USE_EXPLORATION_BY_KEYS + private: + size_t prFlashedItem = 0; // flash when select by char + float prFlashAlpha = 0.0f; // flash when select by char + float prFlashAlphaAttenInSecs = 1.0f; // fps display dependant + size_t prLocateFileByInputChar_lastFileIdx = 0; + ImWchar prLocateFileByInputChar_lastChar = 0; + int prLocateFileByInputChar_InputQueueCharactersSize = 0; + bool prLocateFileByInputChar_lastFound = false; + + protected: + void prLocateByInputKey(FileDialogInternal& vFileDialogInternal); // select a file line in listview according to char key + bool prLocateItem_Loop(FileDialogInternal& vFileDialogInternal, ImWchar vC); // restrat for start of list view if not found a corresponding file + void prExploreWithkeys(FileDialogInternal& vFileDialogInternal, ImGuiID vListViewID); // select file/directory line in listview accroding to up/down enter/backspace keys + static bool prFlashableSelectable( // custom flashing selectable widgets, for flash the selected line in a short time + const char* label, bool selected = false, ImGuiSelectableFlags flags = 0, + bool vFlashing = false, const ImVec2& size = ImVec2(0, 0)); + void prStartFlashItem(size_t vIdx); // define than an item must be flashed + bool prBeginFlashItem(size_t vIdx); // start the flashing of a line in lsit view + static void prEndFlashItem(); // end the fleshing accrdoin to var prFlashAlphaAttenInSecs + + public: + void SetFlashingAttenuationInSeconds( // set the flashing time of the line in file list when use exploration keys + float vAttenValue); // set the attenuation (from flashed to not flashed) in seconds +#endif // USE_EXPLORATION_BY_KEYS + }; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + typedef void* UserDatas; + typedef std::function PaneFun; // side pane function binding + class FileDialogInternal + { + public: + FileManager puFileManager; + FilterManager puFilterManager; + SearchManager puSearchManager; + + public: + std::string puName; + bool puShowDialog = false; + ImVec2 puDialogCenterPos = ImVec2(0, 0); // center pos for display the confirm overwrite dialog + int puLastImGuiFrameCount = 0; // to be sure than only one dialog displayed per frame + float puFooterHeight = 0.0f; + bool puCanWeContinue = true; // events + bool puOkResultToConfirm = false; // to confim if ok for OverWrite + bool puIsOk = false; + bool puFileInputIsActive = false; // when input text for file or directory is active + bool puFileListViewIsActive = false; // when list view is active + std::string puDLGkey; + std::string puDLGtitle; + ImGuiFileDialogFlags puDLGflags = ImGuiFileDialogFlags_None; + UserDatas puDLGuserDatas = nullptr; + PaneFun puDLGoptionsPane = nullptr; + float puDLGoptionsPaneWidth = 0.0f; + bool puDLGmodal = false; + bool puNeedToExitDialog = false; + + bool puUseCustomLocale = false; + int puLocaleCategory = LC_ALL; // locale category to use + std::string puLocaleBegin; // the locale who will be applied at start of the display dialog + std::string puLocaleEnd; // the locale who will be applaied at end of the display dialog + + public: + void NewFrame(); // new frame, so maybe neded to do somethings, like reset events + void EndFrame(); // end frame, so maybe neded to do somethings fater all + void ResetForNewDialog(); // reset what is needed to reset for the openging of a new dialog + }; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + class FileDialog : + public BookMarkFeature, + public KeyExplorerFeature, + public ThumbnailFeature + { + private: + FileDialogInternal prFileDialogInternal; + ImGuiListClipper prFileListClipper; + + public: + bool puAnyWindowsHovered = false; // not remember why haha :) todo : to check if we can remove + + public: + static FileDialog* Instance() // Singleton for easier accces form anywhere but only one dialog at a time + { + static FileDialog _instance; + return &_instance; + } + + public: + FileDialog(); // ImGuiFileDialog Constructor. can be used for have many dialog at same tiem (not possible with singleton) + virtual ~FileDialog(); // ImGuiFileDialog Destructor + + // standard dialog + void OpenDialog( // open simple dialog (path and fileName can be specified) + const std::string& vKey, // key dialog + const std::string& vTitle, // title + const char* vFilters, // filters + const std::string& vPath, // path + const std::string& vFileName, // defaut file name + const int& vCountSelectionMax = 1, // count selection max + UserDatas vUserDatas = nullptr, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags = 0); // ImGuiFileDialogFlags + + void OpenDialog( // open simple dialog (path and filename are obtained from filePathName) + const std::string& vKey, // key dialog + const std::string& vTitle, // title + const char* vFilters, // filters + const std::string& vFilePathName, // file path name (will be decompsoed in path and fileName) + const int& vCountSelectionMax = 1, // count selection max + UserDatas vUserDatas = nullptr, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags = 0); // ImGuiFileDialogFlags + + // with pane + void OpenDialog( // open dialog with custom right pane (path and fileName can be specified) + const std::string& vKey, // key dialog + const std::string& vTitle, // title + const char* vFilters, // filters + const std::string& vPath, // path + const std::string& vFileName, // defaut file name + const PaneFun& vSidePane, // side pane + const float& vSidePaneWidth = 250.0f, // side pane width + const int& vCountSelectionMax = 1, // count selection max + UserDatas vUserDatas = nullptr, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags = 0); // ImGuiFileDialogFlags + + void OpenDialog( // open dialog with custom right pane (path and filename are obtained from filePathName) + const std::string& vKey, // key dialog + const std::string& vTitle, // title + const char* vFilters, // filters + const std::string& vFilePathName, // file path name (will be decompsoed in path and fileName) + const PaneFun& vSidePane, // side pane + const float& vSidePaneWidth = 250.0f, // side pane width + const int& vCountSelectionMax = 1, // count selection max + UserDatas vUserDatas = nullptr, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags = 0); // ImGuiFileDialogFlags + + // modal dialog + void OpenModal( // open simple modal (path and fileName can be specified) + const std::string& vKey, // key dialog + const std::string& vTitle, // title + const char* vFilters, // filters + const std::string& vPath, // path + const std::string& vFileName, // defaut file name + const int& vCountSelectionMax = 1, // count selection max + UserDatas vUserDatas = nullptr, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags = 0); // ImGuiFileDialogFlags + + void OpenModal( // open simple modal (path and fielname are obtained from filePathName) + const std::string& vKey, // key dialog + const std::string& vTitle, // title + const char* vFilters, // filters + const std::string& vFilePathName, // file path name (will be decompsoed in path and fileName) + const int& vCountSelectionMax = 1, // count selection max + UserDatas vUserDatas = nullptr, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags = 0); // ImGuiFileDialogFlags + + // with pane + void OpenModal( // open modal with custom right pane (path and filename are obtained from filePathName) + const std::string& vKey, // key dialog + const std::string& vTitle, // title + const char* vFilters, // filters + const std::string& vPath, // path + const std::string& vFileName, // defaut file name + const PaneFun& vSidePane, // side pane + const float& vSidePaneWidth = 250.0f, // side pane width + const int& vCountSelectionMax = 1, // count selection max + UserDatas vUserDatas = nullptr, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags = 0); // ImGuiFileDialogFlags + + void OpenModal( // open modal with custom right pane (path and fielname are obtained from filePathName) + const std::string& vKey, // key dialog + const std::string& vTitle, // title + const char* vFilters, // filters + const std::string& vFilePathName, // file path name (will be decompsoed in path and fileName) + const PaneFun& vSidePane, // side pane + const float& vSidePaneWidth = 250.0f, // side pane width + const int& vCountSelectionMax = 1, // count selection max + UserDatas vUserDatas = nullptr, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags = 0); // ImGuiFileDialogFlags + + // Display / Close dialog form + bool Display( // Display the dialog. return true if a result was obtained (Ok or not) + const std::string& vKey, // key dialog to display (if not the same key as defined by OpenDialog/Modal => no opening) + ImGuiWindowFlags vFlags = ImGuiWindowFlags_NoCollapse, // ImGuiWindowFlags + ImVec2 vMinSize = ImVec2(0, 0), // mininmal size contraint for the ImGuiWindow + ImVec2 vMaxSize = ImVec2(FLT_MAX, FLT_MAX)); // maximal size contraint for the ImGuiWindow + void Close(); // close dialog + + // queries + bool WasOpenedThisFrame(const std::string& vKey) const; // say if the dialog key was already opened this frame + bool WasOpenedThisFrame() const; // say if the dialog was already opened this frame + bool IsOpened(const std::string& vKey) const; // say if the key is opened + bool IsOpened() const; // say if the dialog is opened somewhere + std::string GetOpenedKey() const; // return the dialog key who is opened, return nothing if not opened + + // get result + bool IsOk() const; // true => Dialog Closed with Ok result / false : Dialog closed with cancel result + std::map GetSelection(); // Open File behavior : will return selection via a map + std::string GetFilePathName(); // Save File behavior : will always return the content of the field with current filter extention and current path + std::string GetCurrentFileName(); // Save File behavior : will always return the content of the field with current filter extention + std::string GetCurrentPath(); // will return current path + std::string GetCurrentFilter(); // will return selected filter + UserDatas GetUserDatas() const; // will return user datas send with Open Dialog/Modal + + // file style by extentions + void SetFileStyle( // SetExtention datas for have custom display of particular file type + const IGFD_FileStyleFlags& vFlags, // file style + const char* vCriteria, // extention filter to tune + const FileStyle& vInfos); // Filter Extention Struct who contain Color and Icon/Text for the display of the file with extention filter + void SetFileStyle( // SetExtention datas for have custom display of particular file type + const IGFD_FileStyleFlags& vFlags, // file style + const char* vCriteria, // extention filter to tune + const ImVec4& vColor, // wanted color for the display of the file with extention filter + const std::string& vIcon = "", // wanted text or icon of the file with extention filter + ImFont *vFont = nullptr); // wantes font + bool GetFileStyle( // GetExtention datas. return true is extention exist + const IGFD_FileStyleFlags& vFlags, // file style + const std::string& vCriteria, // extention filter (same as used in SetExtentionInfos) + ImVec4* vOutColor, // color to retrieve + std::string* vOutIcon = nullptr, // icon or text to retrieve + ImFont** vOutFont = nullptr); // font to retreive + void ClearFilesStyle(); // clear extentions setttings + + void SetLocales( // set locales to use before and after the dialog display + const int& vLocaleCategory, // set local category + const std::string& vLocaleBegin, // locale to use at begining of the dialog display + const std::string& vLocaleEnd); // locale to use at the end of the dialog display + + protected: + void NewFrame(); // new frame just at begining of display + void EndFrame(); // end frame just at end of display + void QuitFrame(); // quit frame when qui quit the dialog + + // others + bool prConfirm_Or_OpenOverWriteFileDialog_IfNeeded( + bool vLastAction, ImGuiWindowFlags vFlags); // treatment of the result, start the confirm to overwrite dialog if needed (if defined with flag) + + public: + // dialog parts + virtual void prDrawHeader(); // draw header part of the dialog (bookmark btn, dir creation, path composer, search bar) + virtual void prDrawContent(); // draw content part of the dialog (bookmark pane, file list, side pane) + virtual bool prDrawFooter(); // draw footer part of the dialog (file field, fitler combobox, ok/cancel btn's) + + // widgets components + virtual void prDrawSidePane(float vHeight); // draw side pane + virtual bool prSelectableItem(int vidx, + std::shared_ptr vInfos, + bool vSelected, const char* vFmt, ...); // draw a custom selectable behavior item + virtual void prDrawFileListView(ImVec2 vSize); // draw file list view (default mode) + +#ifdef USE_THUMBNAILS + virtual void prDrawThumbnailsListView(ImVec2 vSize); // draw file list view with small thumbnails on the same line + virtual void prDrawThumbnailsGridView(ImVec2 vSize); // draw a grid of small thumbnails +#endif + + // to be called only by these function and theirs overrides + // - prDrawFileListView + // - prDrawThumbnailsListView + // - prDrawThumbnailsGridView + void prBeginFileColorIconStyle( + std::shared_ptr vFileInfos, + bool& vOutShowColor, + std::string& vOutStr, + ImFont** vOutFont); // begin style apply of filter with color an icon if any + void prEndFileColorIconStyle( + const bool& vShowColor, + ImFont* vFont); // end style apply of filter + }; +} + +typedef IGFD::UserDatas IGFDUserDatas; +typedef IGFD::PaneFun IGFDPaneFun; +typedef IGFD::FileDialog ImGuiFileDialog; +#else // __cplusplus +typedef struct ImGuiFileDialog ImGuiFileDialog; +typedef struct IGFD_Selection_Pair IGFD_Selection_Pair; +typedef struct IGFD_Selection IGFD_Selection; +#endif // __cplusplus + +// C Interface + +#include + +#if defined _WIN32 || defined __CYGWIN__ +#ifdef IMGUIFILEDIALOG_NO_EXPORT +#define API +#else // IMGUIFILEDIALOG_NO_EXPORT +#define API __declspec(dllexport) +#endif // IMGUIFILEDIALOG_NO_EXPORT +#else // defined _WIN32 || defined __CYGWIN__ +#ifdef __GNUC__ +#define API __attribute__((__visibility__("default"))) +#else // __GNUC__ +#define API +#endif // __GNUC__ +#endif // defined _WIN32 || defined __CYGWIN__ + +#ifdef __cplusplus +#define IMGUIFILEDIALOG_API extern "C" API +#else // __cplusplus +#define IMGUIFILEDIALOG_API +#endif // __cplusplus + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +///// C API //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct IGFD_Selection_Pair +{ + char* fileName; + char* filePathName; +}; + +IMGUIFILEDIALOG_API IGFD_Selection_Pair IGFD_Selection_Pair_Get(); // return an initialized IGFD_Selection_Pair +IMGUIFILEDIALOG_API void IGFD_Selection_Pair_DestroyContent(IGFD_Selection_Pair* vSelection_Pair); // destroy the content of a IGFD_Selection_Pair + +struct IGFD_Selection +{ + IGFD_Selection_Pair* table; // 0 + size_t count; // 0U +}; + +IMGUIFILEDIALOG_API IGFD_Selection IGFD_Selection_Get(); // return an initialized IGFD_Selection +IMGUIFILEDIALOG_API void IGFD_Selection_DestroyContent(IGFD_Selection* vSelection); // destroy the content of a IGFD_Selection + +// constructor / destructor +IMGUIFILEDIALOG_API ImGuiFileDialog* IGFD_Create(void); // create the filedialog context +IMGUIFILEDIALOG_API void IGFD_Destroy(ImGuiFileDialog* vContext); // destroy the filedialog context + +typedef void (*IGFD_PaneFun)(const char*, void*, bool*); // callback fucntion for display the pane + +#ifdef USE_THUMBNAILS +typedef void (*IGFD_CreateThumbnailFun)(IGFD_Thumbnail_Info*); // callback function for create thumbnail texture +typedef void (*IGFD_DestroyThumbnailFun)(IGFD_Thumbnail_Info*); // callback fucntion for destroy thumbnail texture +#endif // USE_THUMBNAILS + +IMGUIFILEDIALOG_API void IGFD_OpenDialog( // open a standard dialog + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const char* vKey, // key dialog + const char* vTitle, // title + const char* vFilters, // filters/filter collections. set it to null for directory mode + const char* vPath, // path + const char* vFileName, // defaut file name + const int vCountSelectionMax, // count selection max + void* vUserDatas, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags); // ImGuiFileDialogFlags + +IMGUIFILEDIALOG_API void IGFD_OpenDialog2( // open a standard dialog + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const char* vKey, // key dialog + const char* vTitle, // title + const char* vFilters, // filters/filter collections. set it to null for directory mode + const char* vFilePathName, // defaut file path name (path and filename witl be extracted from it) + const int vCountSelectionMax, // count selection max + void* vUserDatas, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags); // ImGuiFileDialogFlags + +IMGUIFILEDIALOG_API void IGFD_OpenPaneDialog( // open a standard dialog with pane + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const char* vKey, // key dialog + const char* vTitle, // title + const char* vFilters, // filters/filter collections. set it to null for directory mode + const char* vPath, // path + const char* vFileName, // defaut file name + const IGFD_PaneFun vSidePane, // side pane + const float vSidePaneWidth, // side pane base width + const int vCountSelectionMax, // count selection max + void* vUserDatas, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags); // ImGuiFileDialogFlags + +IMGUIFILEDIALOG_API void IGFD_OpenPaneDialog2( // open a standard dialog with pane + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const char* vKey, // key dialog + const char* vTitle, // title + const char* vFilters, // filters/filter collections. set it to null for directory mode + const char* vFilePathName, // defaut file name (path and filename witl be extracted from it) + const IGFD_PaneFun vSidePane, // side pane + const float vSidePaneWidth, // side pane base width + const int vCountSelectionMax, // count selection max + void* vUserDatas, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags); // ImGuiFileDialogFlags + +IMGUIFILEDIALOG_API void IGFD_OpenModal( // open a modal dialog + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const char* vKey, // key dialog + const char* vTitle, // title + const char* vFilters, // filters/filter collections. set it to null for directory mode + const char* vPath, // path + const char* vFileName, // defaut file name + const int vCountSelectionMax, // count selection max + void* vUserDatas, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags); // ImGuiFileDialogFlags + +IMGUIFILEDIALOG_API void IGFD_OpenModal2( // open a modal dialog + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const char* vKey, // key dialog + const char* vTitle, // title + const char* vFilters, // filters/filter collections. set it to null for directory mode + const char* vFilePathName, // defaut file name (path and filename witl be extracted from it) + const int vCountSelectionMax, // count selection max + void* vUserDatas, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags); // ImGuiFileDialogFlags + +IMGUIFILEDIALOG_API void IGFD_OpenPaneModal( // open a modal dialog with pane + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const char* vKey, // key dialog + const char* vTitle, // title + const char* vFilters, // filters/filter collections. set it to null for directory mode + const char* vPath, // path + const char* vFileName, // defaut file name + const IGFD_PaneFun vSidePane, // side pane + const float vSidePaneWidth, // side pane base width + const int vCountSelectionMax, // count selection max + void* vUserDatas, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags); // ImGuiFileDialogFlags + +IMGUIFILEDIALOG_API void IGFD_OpenPaneModal2( // open a modal dialog with pane + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const char* vKey, // key dialog + const char* vTitle, // title + const char* vFilters, // filters/filter collections. set it to null for directory mode + const char* vFilePathName, // defaut file name (path and filename witl be extracted from it) + const IGFD_PaneFun vSidePane, // side pane + const float vSidePaneWidth, // side pane base width + const int vCountSelectionMax, // count selection max + void* vUserDatas, // user datas (can be retrieved in pane) + ImGuiFileDialogFlags vFlags); // ImGuiFileDialogFlags + +IMGUIFILEDIALOG_API bool IGFD_DisplayDialog( // Display the dialog + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const char* vKey, // key dialog to display (if not the same key as defined by OpenDialog/Modal => no opening) + ImGuiWindowFlags vFlags, // ImGuiWindowFlags + ImVec2 vMinSize, // mininmal size contraint for the ImGuiWindow + ImVec2 vMaxSize); // maximal size contraint for the ImGuiWindow + +IMGUIFILEDIALOG_API void IGFD_CloseDialog( // Close the dialog + ImGuiFileDialog* vContext); // ImGuiFileDialog context + +IMGUIFILEDIALOG_API bool IGFD_IsOk( // true => Dialog Closed with Ok result / false : Dialog closed with cancel result + ImGuiFileDialog* vContext); // ImGuiFileDialog context + +IMGUIFILEDIALOG_API bool IGFD_WasKeyOpenedThisFrame( // say if the dialog key was already opened this frame + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const char* vKey); + +IMGUIFILEDIALOG_API bool IGFD_WasOpenedThisFrame( // say if the dialog was already opened this frame + ImGuiFileDialog* vContext); // ImGuiFileDialog context + +IMGUIFILEDIALOG_API bool IGFD_IsKeyOpened( // say if the dialog key is opened + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const char* vCurrentOpenedKey); // the dialog key + +IMGUIFILEDIALOG_API bool IGFD_IsOpened( // say if the dialog is opened somewhere + ImGuiFileDialog* vContext); // ImGuiFileDialog context + +IMGUIFILEDIALOG_API IGFD_Selection IGFD_GetSelection( // Open File behavior : will return selection via a map + ImGuiFileDialog* vContext); // ImGuiFileDialog context + +IMGUIFILEDIALOG_API char* IGFD_GetFilePathName( // Save File behavior : will always return the content of the field with current filter extention and current path + ImGuiFileDialog* vContext); // ImGuiFileDialog context + +IMGUIFILEDIALOG_API char* IGFD_GetCurrentFileName( // Save File behavior : will always return the content of the field with current filter extention + ImGuiFileDialog* vContext); // ImGuiFileDialog context + +IMGUIFILEDIALOG_API char* IGFD_GetCurrentPath( // will return current path + ImGuiFileDialog* vContext); // ImGuiFileDialog context + +IMGUIFILEDIALOG_API char* IGFD_GetCurrentFilter( // will return selected filter + ImGuiFileDialog* vContext); // ImGuiFileDialog context + +IMGUIFILEDIALOG_API void* IGFD_GetUserDatas( // will return user datas send with Open Dialog/Modal + ImGuiFileDialog* vContext); // ImGuiFileDialog context + +IMGUIFILEDIALOG_API void IGFD_SetFileStyle( // SetExtention datas for have custom display of particular file type + ImGuiFileDialog* vContext, // ImGuiFileDialog context + IGFD_FileStyleFlags vFileStyleFlags, // file style type + const char* vFilter, // extention filter to tune + ImVec4 vColor, // wanted color for the display of the file with extention filter + const char* vIconText, // wanted text or icon of the file with extention filter (can be sued with font icon) + ImFont* vFont); // wanted font pointer + +IMGUIFILEDIALOG_API void IGFD_SetFileStyle2( // SetExtention datas for have custom display of particular file type + ImGuiFileDialog* vContext, // ImGuiFileDialog context + IGFD_FileStyleFlags vFileStyleFlags, // file style type + const char* vFilter, // extention filter to tune + float vR, float vG, float vB, float vA, // wanted color channels RGBA for the display of the file with extention filter + const char* vIconText, // wanted text or icon of the file with extention filter (can be sued with font icon) + ImFont* vFont); // wanted font pointer + +IMGUIFILEDIALOG_API bool IGFD_GetFileStyle( + ImGuiFileDialog* vContext, // ImGuiFileDialog context + IGFD_FileStyleFlags vFileStyleFlags, // file style type + const char* vFilter, // extention filter (same as used in SetExtentionInfos) + ImVec4* vOutColor, // color to retrieve + char** vOutIconText, // icon or text to retrieve + ImFont** vOutFont); // font pointer to retrived + +IMGUIFILEDIALOG_API void IGFD_ClearFilesStyle( // clear extentions setttings + ImGuiFileDialog* vContext); // ImGuiFileDialog context + +IMGUIFILEDIALOG_API void SetLocales( // set locales to use before and after display + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const int vCategory, // set local category + const char* vBeginLocale, // locale to use at begining of the dialog display + const char* vEndLocale); // locale to set at end of the dialog display + +#ifdef USE_EXPLORATION_BY_KEYS +IMGUIFILEDIALOG_API void IGFD_SetFlashingAttenuationInSeconds( // set the flashing time of the line in file list when use exploration keys + ImGuiFileDialog* vContext, // ImGuiFileDialog context + float vAttenValue); // set the attenuation (from flashed to not flashed) in seconds +#endif + +#ifdef USE_BOOKMARK +IMGUIFILEDIALOG_API char* IGFD_SerializeBookmarks( // serialize bookmarks : return bookmark buffer to save in a file + ImGuiFileDialog* vContext); // ImGuiFileDialog context + +IMGUIFILEDIALOG_API void IGFD_DeserializeBookmarks( // deserialize bookmarks : load bookmar buffer to load in the dialog (saved from previous use with SerializeBookmarks()) + ImGuiFileDialog* vContext, // ImGuiFileDialog context + const char* vBookmarks); // bookmark buffer to load +#endif + +#ifdef USE_THUMBNAILS +IMGUIFILEDIALOG_API void SetCreateThumbnailCallback( // define the callback for create the thumbnails texture + ImGuiFileDialog* vContext, // ImGuiFileDialog context + IGFD_CreateThumbnailFun vCreateThumbnailFun); // the callback for create the thumbnails texture + +IMGUIFILEDIALOG_API void SetDestroyThumbnailCallback( // define the callback for destroy the thumbnails texture + ImGuiFileDialog* vContext, // ImGuiFileDialog context + IGFD_DestroyThumbnailFun vDestroyThumbnailFun); // the callback for destroy the thumbnails texture + +IMGUIFILEDIALOG_API void ManageGPUThumbnails( // must be call in gpu zone, possibly a thread, will call the callback for create / destroy the textures + ImGuiFileDialog* vContext); // ImGuiFileDialog context +#endif // USE_THUMBNAILS + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#endif // IMGUIFILEDIALOG_H diff --git a/cpp-projects/3d-engine/imgui/extra/ImGuiFileDialogConfig.h b/cpp-projects/3d-engine/imgui/extra/ImGuiFileDialogConfig.h new file mode 100644 index 0000000..3416b02 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/ImGuiFileDialogConfig.h @@ -0,0 +1,97 @@ +#pragma once + +// uncomment and modify defines under for customize ImGuiFileDialog + +//this options need c++17 +//#define USE_STD_FILESYSTEM + +//#define MAX_FILE_DIALOG_NAME_BUFFER 1024 +//#define MAX_PATH_BUFFER_SIZE 1024 + +//#define USE_THUMBNAILS +//the thumbnail generation use the stb_image and stb_resize lib who need to define the implementation +//btw if you already use them in your app, you can have compiler error due to "implemntation found in double" +//so uncomment these line for prevent the creation of implementation of these libs again +//#define DONT_DEFINE_AGAIN__STB_IMAGE_IMPLEMENTATION +//#define DONT_DEFINE_AGAIN__STB_IMAGE_RESIZE_IMPLEMENTATION +//#define IMGUI_RADIO_BUTTON RadioButton +//#define DisplayMode_ThumbailsList_ImageHeight 32.0f +//#define tableHeaderFileThumbnailsString "Thumbnails" +//#define DisplayMode_FilesList_ButtonString "FL" +//#define DisplayMode_FilesList_ButtonHelp "File List" +//#define DisplayMode_ThumbailsList_ButtonString "TL" +//#define DisplayMode_ThumbailsList_ButtonHelp "Thumbnails List" +// todo +//#define DisplayMode_ThumbailsGrid_ButtonString "TG" +//#define DisplayMode_ThumbailsGrid_ButtonHelp "Thumbnails Grid" + + +//#define USE_EXPLORATION_BY_KEYS +// this mapping by default is for GLFW but you can use another +//#include +// Up key for explore to the top +//#define IGFD_KEY_UP GLFW_KEY_UP +// Down key for explore to the bottom +//#define IGFD_KEY_DOWN GLFW_KEY_DOWN +// Enter key for open directory +//#define IGFD_KEY_ENTER GLFW_KEY_ENTER +// BackSpace for comming back to the last directory +//#define IGFD_KEY_BACKSPACE GLFW_KEY_BACKSPACE + +// by ex you can quit the dialog by pressing the key excape +//#define USE_DIALOG_EXIT_WITH_KEY +//#define IGFD_EXIT_KEY GLFW_KEY_ESCAPE + +// widget +// filter combobox width +//#define FILTER_COMBO_WIDTH 120.0f +// button widget use for compose path +//#define IMGUI_PATH_BUTTON ImGui::Button +// standard button +//#define IMGUI_BUTTON ImGui::Button + +// locales string +//#define createDirButtonString "+" +//#define okButtonString " OK" +//#define cancelButtonString " Cancel" +//#define resetButtonString "R" +//#define drivesButtonString "Drives" +//#define editPathButtonString "E" +//#define searchString "Search" +//#define dirEntryString "[DIR] " +//#define linkEntryString "[LINK] " +//#define fileEntryString "[FILE] " +//#define fileNameString "File Name : " +//#define dirNameString "Directory Path :" +//#define buttonResetSearchString "Reset search" +//#define buttonDriveString "Drives" +//#define buttonEditPathString "Edit path\nYou can also right click on path buttons" +//#define buttonResetPathString "Reset to current directory" +//#define buttonCreateDirString "Create Directory" +//#define OverWriteDialogTitleString "The file Already Exist !" +//#define OverWriteDialogMessageString "Would you like to OverWrite it ?" +//#define OverWriteDialogConfirmButtonString "Confirm" +//#define OverWriteDialogCancelButtonString "Cancel" + +// DateTimeFormat +// see strftime functionin for customize +// "%Y/%m/%d %H:%M" give 2021:01:22 11:47 +// "%Y/%m/%d %i:%M%p" give 2021:01:22 11:45PM +//#define DateTimeFormat "%Y/%m/%d %i:%M%p" + +// theses icons will appear in table headers +//#define USE_CUSTOM_SORTING_ICON +//#define tableHeaderAscendingIcon "A|" +//#define tableHeaderDescendingIcon "D|" +//#define tableHeaderFileNameString " File name" +//#define tableHeaderFileTypeString " Type" +//#define tableHeaderFileSizeString " Size" +//#define tableHeaderFileDateTimeString " Date" + +//#define USE_BOOKMARK +//#define bookmarkPaneWith 150.0f +//#define IMGUI_TOGGLE_BUTTON ToggleButton +//#define bookmarksButtonString "Bookmark" +//#define bookmarksButtonHelpString "Bookmark" +//#define addBookmarkButtonString "+" +//#define removeBookmarkButtonString "-" diff --git a/cpp-projects/3d-engine/imgui/extra/dirent/dirent.h b/cpp-projects/3d-engine/imgui/extra/dirent/dirent.h new file mode 100644 index 0000000..f7a46da --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/dirent/dirent.h @@ -0,0 +1,1160 @@ +/* + * Dirent interface for Microsoft Visual Studio + * + * Copyright (C) 1998-2019 Toni Ronkko + * This file is part of dirent. Dirent may be freely distributed + * under the MIT license. For all details and documentation, see + * https://github.com/tronkko/dirent + */ +#ifndef DIRENT_H +#define DIRENT_H + +/* Hide warnings about unreferenced local functions */ +#if defined(__clang__) +# pragma clang diagnostic ignored "-Wunused-function" +#elif defined(_MSC_VER) +# pragma warning(disable:4505) +#elif defined(__GNUC__) +# pragma GCC diagnostic ignored "-Wunused-function" +#endif + +/* + * Include windows.h without Windows Sockets 1.1 to prevent conflicts with + * Windows Sockets 2.0. + */ +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Indicates that d_type field is available in dirent structure */ +#define _DIRENT_HAVE_D_TYPE + +/* Indicates that d_namlen field is available in dirent structure */ +#define _DIRENT_HAVE_D_NAMLEN + +/* Entries missing from MSVC 6.0 */ +#if !defined(FILE_ATTRIBUTE_DEVICE) +# define FILE_ATTRIBUTE_DEVICE 0x40 +#endif + +/* File type and permission flags for stat(), general mask */ +#if !defined(S_IFMT) +# define S_IFMT _S_IFMT +#endif + +/* Directory bit */ +#if !defined(S_IFDIR) +# define S_IFDIR _S_IFDIR +#endif + +/* Character device bit */ +#if !defined(S_IFCHR) +# define S_IFCHR _S_IFCHR +#endif + +/* Pipe bit */ +#if !defined(S_IFFIFO) +# define S_IFFIFO _S_IFFIFO +#endif + +/* Regular file bit */ +#if !defined(S_IFREG) +# define S_IFREG _S_IFREG +#endif + +/* Read permission */ +#if !defined(S_IREAD) +# define S_IREAD _S_IREAD +#endif + +/* Write permission */ +#if !defined(S_IWRITE) +# define S_IWRITE _S_IWRITE +#endif + +/* Execute permission */ +#if !defined(S_IEXEC) +# define S_IEXEC _S_IEXEC +#endif + +/* Pipe */ +#if !defined(S_IFIFO) +# define S_IFIFO _S_IFIFO +#endif + +/* Block device */ +#if !defined(S_IFBLK) +# define S_IFBLK 0 +#endif + +/* Link */ +#if !defined(S_IFLNK) +# define S_IFLNK 0 +#endif + +/* Socket */ +#if !defined(S_IFSOCK) +# define S_IFSOCK 0 +#endif + +/* Read user permission */ +#if !defined(S_IRUSR) +# define S_IRUSR S_IREAD +#endif + +/* Write user permission */ +#if !defined(S_IWUSR) +# define S_IWUSR S_IWRITE +#endif + +/* Execute user permission */ +#if !defined(S_IXUSR) +# define S_IXUSR 0 +#endif + +/* Read group permission */ +#if !defined(S_IRGRP) +# define S_IRGRP 0 +#endif + +/* Write group permission */ +#if !defined(S_IWGRP) +# define S_IWGRP 0 +#endif + +/* Execute group permission */ +#if !defined(S_IXGRP) +# define S_IXGRP 0 +#endif + +/* Read others permission */ +#if !defined(S_IROTH) +# define S_IROTH 0 +#endif + +/* Write others permission */ +#if !defined(S_IWOTH) +# define S_IWOTH 0 +#endif + +/* Execute others permission */ +#if !defined(S_IXOTH) +# define S_IXOTH 0 +#endif + +/* Maximum length of file name */ +#if !defined(PATH_MAX) +# define PATH_MAX MAX_PATH +#endif +#if !defined(FILENAME_MAX) +# define FILENAME_MAX MAX_PATH +#endif +#if !defined(NAME_MAX) +# define NAME_MAX FILENAME_MAX +#endif + +/* File type flags for d_type */ +#define DT_UNKNOWN 0 +#define DT_REG S_IFREG +#define DT_DIR S_IFDIR +#define DT_FIFO S_IFIFO +#define DT_SOCK S_IFSOCK +#define DT_CHR S_IFCHR +#define DT_BLK S_IFBLK +#define DT_LNK S_IFLNK + +/* Macros for converting between st_mode and d_type */ +#define IFTODT(mode) ((mode) & S_IFMT) +#define DTTOIF(type) (type) + +/* + * File type macros. Note that block devices, sockets and links cannot be + * distinguished on Windows and the macros S_ISBLK, S_ISSOCK and S_ISLNK are + * only defined for compatibility. These macros should always return false + * on Windows. + */ +#if !defined(S_ISFIFO) +# define S_ISFIFO(mode) (((mode) & S_IFMT) == S_IFIFO) +#endif +#if !defined(S_ISDIR) +# define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) +#endif +#if !defined(S_ISREG) +# define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) +#endif +#if !defined(S_ISLNK) +# define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK) +#endif +#if !defined(S_ISSOCK) +# define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK) +#endif +#if !defined(S_ISCHR) +# define S_ISCHR(mode) (((mode) & S_IFMT) == S_IFCHR) +#endif +#if !defined(S_ISBLK) +# define S_ISBLK(mode) (((mode) & S_IFMT) == S_IFBLK) +#endif + +/* Return the exact length of the file name without zero terminator */ +#define _D_EXACT_NAMLEN(p) ((p)->d_namlen) + +/* Return the maximum size of a file name */ +#define _D_ALLOC_NAMLEN(p) ((PATH_MAX)+1) + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Wide-character version */ +struct _wdirent { + /* Always zero */ + long d_ino; + + /* File position within stream */ + long d_off; + + /* Structure size */ + unsigned short d_reclen; + + /* Length of name without \0 */ + size_t d_namlen; + + /* File type */ + int d_type; + + /* File name */ + wchar_t d_name[PATH_MAX+1]; +}; +typedef struct _wdirent _wdirent; + +struct _WDIR { + /* Current directory entry */ + struct _wdirent ent; + + /* Private file data */ + WIN32_FIND_DATAW data; + + /* True if data is valid */ + int cached; + + /* Win32 search handle */ + HANDLE handle; + + /* Initial directory name */ + wchar_t *patt; +}; +typedef struct _WDIR _WDIR; + +/* Multi-byte character version */ +struct dirent { + /* Always zero */ + long d_ino; + + /* File position within stream */ + long d_off; + + /* Structure size */ + unsigned short d_reclen; + + /* Length of name without \0 */ + size_t d_namlen; + + /* File type */ + int d_type; + + /* File name */ + char d_name[PATH_MAX+1]; +}; +typedef struct dirent dirent; + +struct DIR { + struct dirent ent; + struct _WDIR *wdirp; +}; +typedef struct DIR DIR; + + +/* Dirent functions */ +static DIR *opendir (const char *dirname); +static _WDIR *_wopendir (const wchar_t *dirname); + +static struct dirent *readdir (DIR *dirp); +static struct _wdirent *_wreaddir (_WDIR *dirp); + +static int readdir_r( + DIR *dirp, struct dirent *entry, struct dirent **result); +static int _wreaddir_r( + _WDIR *dirp, struct _wdirent *entry, struct _wdirent **result); + +static int closedir (DIR *dirp); +static int _wclosedir (_WDIR *dirp); + +static void rewinddir (DIR* dirp); +static void _wrewinddir (_WDIR* dirp); + +static int scandir (const char *dirname, struct dirent ***namelist, + int (*filter)(const struct dirent*), + int (*compare)(const struct dirent**, const struct dirent**)); + +static int alphasort (const struct dirent **a, const struct dirent **b); + +static int versionsort (const struct dirent **a, const struct dirent **b); + + +/* For compatibility with Symbian */ +#define wdirent _wdirent +#define WDIR _WDIR +#define wopendir _wopendir +#define wreaddir _wreaddir +#define wclosedir _wclosedir +#define wrewinddir _wrewinddir + + +/* Internal utility functions */ +static WIN32_FIND_DATAW *dirent_first (_WDIR *dirp); +static WIN32_FIND_DATAW *dirent_next (_WDIR *dirp); + +static int dirent_mbstowcs_s( + size_t *pReturnValue, + wchar_t *wcstr, + size_t sizeInWords, + const char *mbstr, + size_t count); + +static int dirent_wcstombs_s( + size_t *pReturnValue, + char *mbstr, + size_t sizeInBytes, + const wchar_t *wcstr, + size_t count); + +static void dirent_set_errno (int error); + + +/* + * Open directory stream DIRNAME for read and return a pointer to the + * internal working area that is used to retrieve individual directory + * entries. + */ +static _WDIR* +_wopendir( + const wchar_t *dirname) +{ + _WDIR *dirp; + DWORD n; + wchar_t *p; + + /* Must have directory name */ + if (dirname == NULL || dirname[0] == '\0') { + dirent_set_errno (ENOENT); + return NULL; + } + + /* Allocate new _WDIR structure */ + dirp = (_WDIR*) malloc (sizeof (struct _WDIR)); + if (!dirp) { + return NULL; + } + + /* Reset _WDIR structure */ + dirp->handle = INVALID_HANDLE_VALUE; + dirp->patt = NULL; + dirp->cached = 0; + + /* + * Compute the length of full path plus zero terminator + * + * Note that on WinRT there's no way to convert relative paths + * into absolute paths, so just assume it is an absolute path. + */ +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + /* Desktop */ + n = GetFullPathNameW (dirname, 0, NULL, NULL); +#else + /* WinRT */ + n = wcslen (dirname); +#endif + + /* Allocate room for absolute directory name and search pattern */ + dirp->patt = (wchar_t*) malloc (sizeof (wchar_t) * n + 16); + if (dirp->patt == NULL) { + goto exit_closedir; + } + + /* + * Convert relative directory name to an absolute one. This + * allows rewinddir() to function correctly even when current + * working directory is changed between opendir() and rewinddir(). + * + * Note that on WinRT there's no way to convert relative paths + * into absolute paths, so just assume it is an absolute path. + */ +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + /* Desktop */ + n = GetFullPathNameW (dirname, n, dirp->patt, NULL); + if (n <= 0) { + goto exit_closedir; + } +#else + /* WinRT */ + wcsncpy_s (dirp->patt, n+1, dirname, n); +#endif + + /* Append search pattern \* to the directory name */ + p = dirp->patt + n; + switch (p[-1]) { + case '\\': + case '/': + case ':': + /* Directory ends in path separator, e.g. c:\temp\ */ + /*NOP*/; + break; + + default: + /* Directory name doesn't end in path separator */ + *p++ = '\\'; + } + *p++ = '*'; + *p = '\0'; + + /* Open directory stream and retrieve the first entry */ + if (!dirent_first (dirp)) { + goto exit_closedir; + } + + /* Success */ + return dirp; + + /* Failure */ +exit_closedir: + _wclosedir (dirp); + return NULL; +} + +/* + * Read next directory entry. + * + * Returns pointer to static directory entry which may be overwritten by + * subsequent calls to _wreaddir(). + */ +static struct _wdirent* +_wreaddir( + _WDIR *dirp) +{ + struct _wdirent *entry; + + /* + * Read directory entry to buffer. We can safely ignore the return value + * as entry will be set to NULL in case of error. + */ + (void) _wreaddir_r (dirp, &dirp->ent, &entry); + + /* Return pointer to statically allocated directory entry */ + return entry; +} + +/* + * Read next directory entry. + * + * Returns zero on success. If end of directory stream is reached, then sets + * result to NULL and returns zero. + */ +static int +_wreaddir_r( + _WDIR *dirp, + struct _wdirent *entry, + struct _wdirent **result) +{ + WIN32_FIND_DATAW *datap; + + /* Read next directory entry */ + datap = dirent_next (dirp); + if (datap) { + size_t n; + DWORD attr; + + /* + * Copy file name as wide-character string. If the file name is too + * long to fit in to the destination buffer, then truncate file name + * to PATH_MAX characters and zero-terminate the buffer. + */ + n = 0; + while (n < PATH_MAX && datap->cFileName[n] != 0) { + entry->d_name[n] = datap->cFileName[n]; + n++; + } + entry->d_name[n] = 0; + + /* Length of file name excluding zero terminator */ + entry->d_namlen = n; + + /* File type */ + attr = datap->dwFileAttributes; + if ((attr & FILE_ATTRIBUTE_DEVICE) != 0) { + entry->d_type = DT_CHR; + } else if ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) { + entry->d_type = DT_DIR; + } else { + entry->d_type = DT_REG; + } + + /* Reset dummy fields */ + entry->d_ino = 0; + entry->d_off = 0; + entry->d_reclen = sizeof (struct _wdirent); + + /* Set result address */ + *result = entry; + + } else { + + /* Return NULL to indicate end of directory */ + *result = NULL; + + } + + return /*OK*/0; +} + +/* + * Close directory stream opened by opendir() function. This invalidates the + * DIR structure as well as any directory entry read previously by + * _wreaddir(). + */ +static int +_wclosedir( + _WDIR *dirp) +{ + int ok; + if (dirp) { + + /* Release search handle */ + if (dirp->handle != INVALID_HANDLE_VALUE) { + FindClose (dirp->handle); + } + + /* Release search pattern */ + free (dirp->patt); + + /* Release directory structure */ + free (dirp); + ok = /*success*/0; + + } else { + + /* Invalid directory stream */ + dirent_set_errno (EBADF); + ok = /*failure*/-1; + + } + return ok; +} + +/* + * Rewind directory stream such that _wreaddir() returns the very first + * file name again. + */ +static void +_wrewinddir( + _WDIR* dirp) +{ + if (dirp) { + /* Release existing search handle */ + if (dirp->handle != INVALID_HANDLE_VALUE) { + FindClose (dirp->handle); + } + + /* Open new search handle */ + dirent_first (dirp); + } +} + +/* Get first directory entry (internal) */ +static WIN32_FIND_DATAW* +dirent_first( + _WDIR *dirp) +{ + WIN32_FIND_DATAW *datap; + DWORD error; + + /* Open directory and retrieve the first entry */ + dirp->handle = FindFirstFileExW( + dirp->patt, FindExInfoStandard, &dirp->data, + FindExSearchNameMatch, NULL, 0); + if (dirp->handle != INVALID_HANDLE_VALUE) { + + /* a directory entry is now waiting in memory */ + datap = &dirp->data; + dirp->cached = 1; + + } else { + + /* Failed to open directory: no directory entry in memory */ + dirp->cached = 0; + datap = NULL; + + /* Set error code */ + error = GetLastError (); + switch (error) { + case ERROR_ACCESS_DENIED: + /* No read access to directory */ + dirent_set_errno (EACCES); + break; + + case ERROR_DIRECTORY: + /* Directory name is invalid */ + dirent_set_errno (ENOTDIR); + break; + + case ERROR_PATH_NOT_FOUND: + default: + /* Cannot find the file */ + dirent_set_errno (ENOENT); + } + + } + return datap; +} + +/* + * Get next directory entry (internal). + * + * Returns + */ +static WIN32_FIND_DATAW* +dirent_next( + _WDIR *dirp) +{ + WIN32_FIND_DATAW *p; + + /* Get next directory entry */ + if (dirp->cached != 0) { + + /* A valid directory entry already in memory */ + p = &dirp->data; + dirp->cached = 0; + + } else if (dirp->handle != INVALID_HANDLE_VALUE) { + + /* Get the next directory entry from stream */ + if (FindNextFileW (dirp->handle, &dirp->data) != FALSE) { + /* Got a file */ + p = &dirp->data; + } else { + /* The very last entry has been processed or an error occurred */ + FindClose (dirp->handle); + dirp->handle = INVALID_HANDLE_VALUE; + p = NULL; + } + + } else { + + /* End of directory stream reached */ + p = NULL; + + } + + return p; +} + +/* + * Open directory stream using plain old C-string. + */ +static DIR* +opendir( + const char *dirname) +{ + struct DIR *dirp; + + /* Must have directory name */ + if (dirname == NULL || dirname[0] == '\0') { + dirent_set_errno (ENOENT); + return NULL; + } + + /* Allocate memory for DIR structure */ + dirp = (DIR*) malloc (sizeof (struct DIR)); + if (!dirp) { + return NULL; + } + { + int error; + wchar_t wname[PATH_MAX + 1]; + size_t n; + + /* Convert directory name to wide-character string */ + error = dirent_mbstowcs_s( + &n, wname, PATH_MAX + 1, dirname, PATH_MAX + 1); + if (error) { + /* + * Cannot convert file name to wide-character string. This + * occurs if the string contains invalid multi-byte sequences or + * the output buffer is too small to contain the resulting + * string. + */ + goto exit_free; + } + + + /* Open directory stream using wide-character name */ + dirp->wdirp = _wopendir (wname); + if (!dirp->wdirp) { + goto exit_free; + } + + } + + /* Success */ + return dirp; + + /* Failure */ +exit_free: + free (dirp); + return NULL; +} + +/* + * Read next directory entry. + */ +static struct dirent* +readdir( + DIR *dirp) +{ + struct dirent *entry; + + /* + * Read directory entry to buffer. We can safely ignore the return value + * as entry will be set to NULL in case of error. + */ + (void) readdir_r (dirp, &dirp->ent, &entry); + + /* Return pointer to statically allocated directory entry */ + return entry; +} + +/* + * Read next directory entry into called-allocated buffer. + * + * Returns zero on success. If the end of directory stream is reached, then + * sets result to NULL and returns zero. + */ +static int +readdir_r( + DIR *dirp, + struct dirent *entry, + struct dirent **result) +{ + WIN32_FIND_DATAW *datap; + + /* Read next directory entry */ + datap = dirent_next (dirp->wdirp); + if (datap) { + size_t n; + int error; + + /* Attempt to convert file name to multi-byte string */ + error = dirent_wcstombs_s( + &n, entry->d_name, PATH_MAX + 1, datap->cFileName, PATH_MAX + 1); + + /* + * If the file name cannot be represented by a multi-byte string, + * then attempt to use old 8+3 file name. This allows traditional + * Unix-code to access some file names despite of unicode + * characters, although file names may seem unfamiliar to the user. + * + * Be ware that the code below cannot come up with a short file + * name unless the file system provides one. At least + * VirtualBox shared folders fail to do this. + */ + if (error && datap->cAlternateFileName[0] != '\0') { + error = dirent_wcstombs_s( + &n, entry->d_name, PATH_MAX + 1, + datap->cAlternateFileName, PATH_MAX + 1); + } + + if (!error) { + DWORD attr; + + /* Length of file name excluding zero terminator */ + entry->d_namlen = n - 1; + + /* File attributes */ + attr = datap->dwFileAttributes; + if ((attr & FILE_ATTRIBUTE_DEVICE) != 0) { + entry->d_type = DT_CHR; + } else if ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) { + entry->d_type = DT_DIR; + } else { + entry->d_type = DT_REG; + } + + /* Reset dummy fields */ + entry->d_ino = 0; + entry->d_off = 0; + entry->d_reclen = sizeof (struct dirent); + + } else { + + /* + * Cannot convert file name to multi-byte string so construct + * an erroneous directory entry and return that. Note that + * we cannot return NULL as that would stop the processing + * of directory entries completely. + */ + entry->d_name[0] = '?'; + entry->d_name[1] = '\0'; + entry->d_namlen = 1; + entry->d_type = DT_UNKNOWN; + entry->d_ino = 0; + entry->d_off = -1; + entry->d_reclen = 0; + + } + + /* Return pointer to directory entry */ + *result = entry; + + } else { + + /* No more directory entries */ + *result = NULL; + + } + + return /*OK*/0; +} + +/* + * Close directory stream. + */ +static int +closedir( + DIR *dirp) +{ + int ok; + if (dirp) { + + /* Close wide-character directory stream */ + ok = _wclosedir (dirp->wdirp); + dirp->wdirp = NULL; + + /* Release multi-byte character version */ + free (dirp); + + } else { + + /* Invalid directory stream */ + dirent_set_errno (EBADF); + ok = /*failure*/-1; + + } + return ok; +} + +/* + * Rewind directory stream to beginning. + */ +static void +rewinddir( + DIR* dirp) +{ + /* Rewind wide-character string directory stream */ + _wrewinddir (dirp->wdirp); +} + +/* + * Scan directory for entries. + */ +static int +scandir( + const char *dirname, + struct dirent ***namelist, + int (*filter)(const struct dirent*), + int (*compare)(const struct dirent**, const struct dirent**)) +{ + struct dirent **files = NULL; + size_t size = 0; + size_t allocated = 0; + const size_t init_size = 1; + DIR *dir = NULL; + struct dirent *entry; + struct dirent *tmp = NULL; + size_t i; + int result = 0; + + /* Open directory stream */ + dir = opendir (dirname); + if (dir) { + + /* Read directory entries to memory */ + while (1) { + + /* Enlarge pointer table to make room for another pointer */ + if (size >= allocated) { + void *p; + size_t num_entries; + + /* Compute number of entries in the enlarged pointer table */ + if (size < init_size) { + /* Allocate initial pointer table */ + num_entries = init_size; + } else { + /* Double the size */ + num_entries = size * 2; + } + + /* Allocate first pointer table or enlarge existing table */ + p = realloc (files, sizeof (void*) * num_entries); + if (p != NULL) { + /* Got the memory */ + files = (dirent**) p; + allocated = num_entries; + } else { + /* Out of memory */ + result = -1; + break; + } + + } + + /* Allocate room for temporary directory entry */ + if (tmp == NULL) { + tmp = (struct dirent*) malloc (sizeof (struct dirent)); + if (tmp == NULL) { + /* Cannot allocate temporary directory entry */ + result = -1; + break; + } + } + + /* Read directory entry to temporary area */ + if (readdir_r (dir, tmp, &entry) == /*OK*/0) { + + /* Did we get an entry? */ + if (entry != NULL) { + int pass; + + /* Determine whether to include the entry in result */ + if (filter) { + /* Let the filter function decide */ + pass = filter (tmp); + } else { + /* No filter function, include everything */ + pass = 1; + } + + if (pass) { + /* Store the temporary entry to pointer table */ + files[size++] = tmp; + tmp = NULL; + + /* Keep up with the number of files */ + result++; + } + + } else { + + /* + * End of directory stream reached => sort entries and + * exit. + */ + qsort (files, size, sizeof (void*), + (int (*) (const void*, const void*)) compare); + break; + + } + + } else { + /* Error reading directory entry */ + result = /*Error*/ -1; + break; + } + + } + + } else { + /* Cannot open directory */ + result = /*Error*/ -1; + } + + /* Release temporary directory entry */ + free (tmp); + + /* Release allocated memory on error */ + if (result < 0) { + for (i = 0; i < size; i++) { + free (files[i]); + } + free (files); + files = NULL; + } + + /* Close directory stream */ + if (dir) { + closedir (dir); + } + + /* Pass pointer table to caller */ + if (namelist) { + *namelist = files; + } + return result; +} + +/* Alphabetical sorting */ +static int +alphasort( + const struct dirent **a, const struct dirent **b) +{ + return strcoll ((*a)->d_name, (*b)->d_name); +} + +/* Sort versions */ +static int +versionsort( + const struct dirent **a, const struct dirent **b) +{ + /* FIXME: implement strverscmp and use that */ + return alphasort (a, b); +} + +/* Convert multi-byte string to wide character string */ +static int +dirent_mbstowcs_s( + size_t *pReturnValue, + wchar_t *wcstr, + size_t sizeInWords, + const char *mbstr, + size_t count) +{ + int error; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + /* Microsoft Visual Studio 2005 or later */ + error = mbstowcs_s (pReturnValue, wcstr, sizeInWords, mbstr, count); + +#else + + /* Older Visual Studio or non-Microsoft compiler */ + size_t n; + + /* Convert to wide-character string (or count characters) */ + n = mbstowcs (wcstr, mbstr, sizeInWords); + if (!wcstr || n < count) { + + /* Zero-terminate output buffer */ + if (wcstr && sizeInWords) { + if (n >= sizeInWords) { + n = sizeInWords - 1; + } + wcstr[n] = 0; + } + + /* Length of resulting multi-byte string WITH zero terminator */ + if (pReturnValue) { + *pReturnValue = n + 1; + } + + /* Success */ + error = 0; + + } else { + + /* Could not convert string */ + error = 1; + + } + +#endif + return error; +} + +/* Convert wide-character string to multi-byte string */ +static int +dirent_wcstombs_s( + size_t *pReturnValue, + char *mbstr, + size_t sizeInBytes, /* max size of mbstr */ + const wchar_t *wcstr, + size_t count) +{ + int error; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + /* Microsoft Visual Studio 2005 or later */ + error = wcstombs_s (pReturnValue, mbstr, sizeInBytes, wcstr, count); + +#else + + /* Older Visual Studio or non-Microsoft compiler */ + size_t n; + + /* Convert to multi-byte string (or count the number of bytes needed) */ + n = wcstombs (mbstr, wcstr, sizeInBytes); + if (!mbstr || n < count) { + + /* Zero-terminate output buffer */ + if (mbstr && sizeInBytes) { + if (n >= sizeInBytes) { + n = sizeInBytes - 1; + } + mbstr[n] = '\0'; + } + + /* Length of resulting multi-bytes string WITH zero-terminator */ + if (pReturnValue) { + *pReturnValue = n + 1; + } + + /* Success */ + error = 0; + + } else { + + /* Cannot convert string */ + error = 1; + + } + +#endif + return error; +} + +/* Set errno variable */ +static void +dirent_set_errno( + int error) +{ +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + /* Microsoft Visual Studio 2005 and later */ + _set_errno (error); + +#else + + /* Non-Microsoft compiler or older Microsoft compiler */ + errno = error; + +#endif +} + + +#ifdef __cplusplus +} +#endif +#endif /*DIRENT_H*/ diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/crude_json.cpp b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/crude_json.cpp new file mode 100644 index 0000000..f2b3ba5 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/crude_json.cpp @@ -0,0 +1,814 @@ +// Crude implementation of JSON value object and parser. +// +// LICENSE +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// CREDITS +// Written by Michal Cichon +# include "crude_json.h" +# include +# include +# include +# include +# include +# include + + +namespace crude_json { + +value::value(value&& other) + : m_Type(other.m_Type) +{ + switch (m_Type) + { + case type_t::object: construct(m_Storage, std::move( *object_ptr(other.m_Storage))); break; + case type_t::array: construct(m_Storage, std::move( *array_ptr(other.m_Storage))); break; + case type_t::string: construct(m_Storage, std::move( *string_ptr(other.m_Storage))); break; + case type_t::boolean: construct(m_Storage, std::move(*boolean_ptr(other.m_Storage))); break; + case type_t::number: construct(m_Storage, std::move( *number_ptr(other.m_Storage))); break; + default: break; + } + destruct(other.m_Storage, other.m_Type); + other.m_Type = type_t::null; +} + +value::value(const value& other) + : m_Type(other.m_Type) +{ + switch (m_Type) + { + case type_t::object: construct(m_Storage, *object_ptr(other.m_Storage)); break; + case type_t::array: construct(m_Storage, *array_ptr(other.m_Storage)); break; + case type_t::string: construct(m_Storage, *string_ptr(other.m_Storage)); break; + case type_t::boolean: construct(m_Storage, *boolean_ptr(other.m_Storage)); break; + case type_t::number: construct(m_Storage, *number_ptr(other.m_Storage)); break; + default: break; + } +} + +value& value::operator[](size_t index) +{ + if (is_null()) + m_Type = construct(m_Storage, type_t::array); + + if (is_array()) + { + auto& v = *array_ptr(m_Storage); + if (index >= v.size()) + v.insert(v.end(), index - v.size() + 1, value()); + + return v[index]; + } + + CRUDE_ASSERT(false && "operator[] on unsupported type"); + std::terminate(); +} + +const value& value::operator[](size_t index) const +{ + if (is_array()) + return (*array_ptr(m_Storage))[index]; + + CRUDE_ASSERT(false && "operator[] on unsupported type"); + std::terminate(); +} + +value& value::operator[](const string& key) +{ + if (is_null()) + m_Type = construct(m_Storage, type_t::object); + + if (is_object()) + return (*object_ptr(m_Storage))[key]; + + CRUDE_ASSERT(false && "operator[] on unsupported type"); + std::terminate(); +} + +const value& value::operator[](const string& key) const +{ + if (is_object()) + { + auto& o = *object_ptr(m_Storage); + auto it = o.find(key); + CRUDE_ASSERT(it != o.end()); + return it->second; + } + + CRUDE_ASSERT(false && "operator[] on unsupported type"); + std::terminate(); +} + +bool value::contains(const string& key) const +{ + if (is_object()) + { + auto& o = *object_ptr(m_Storage); + auto it = o.find(key); + return it != o.end(); + } + + return false; +} + +void value::push_back(const value& value) +{ + if (is_null()) + m_Type = construct(m_Storage, type_t::array); + + if (is_array()) + { + auto& v = *array_ptr(m_Storage); + v.push_back(value); + } + else + { + CRUDE_ASSERT(false && "operator[] on unsupported type"); + std::terminate(); + } +} + +void value::push_back(value&& value) +{ + if (is_null()) + m_Type = construct(m_Storage, type_t::array); + + if (is_array()) + { + auto& v = *array_ptr(m_Storage); + v.push_back(std::move(value)); + } + else + { + CRUDE_ASSERT(false && "operator[] on unsupported type"); + std::terminate(); + } +} + +void value::swap(value& other) +{ + using std::swap; + + if (m_Type == other.m_Type) + { + switch (m_Type) + { + case type_t::object: swap(*object_ptr(m_Storage), *object_ptr(other.m_Storage)); break; + case type_t::array: swap(*array_ptr(m_Storage), *array_ptr(other.m_Storage)); break; + case type_t::string: swap(*string_ptr(m_Storage), *string_ptr(other.m_Storage)); break; + case type_t::boolean: swap(*boolean_ptr(m_Storage), *boolean_ptr(other.m_Storage)); break; + case type_t::number: swap(*number_ptr(m_Storage), *number_ptr(other.m_Storage)); break; + default: break; + } + } + else + { + value tmp(std::move(other)); + other.~value(); + new (&other) value(std::move(*this)); + this->~value(); + new (this) value(std::move(tmp)); + } +} + +string value::dump(const int indent, const char indent_char) const +{ + dump_context_t context(indent, indent_char); + + context.out.precision(std::numeric_limits::max_digits10 + 1); + context.out << std::defaultfloat; + + dump(context, 0); + return context.out.str(); +} + +void value::dump_context_t::write_indent(int level) +{ + if (indent <= 0 || level == 0) + return; + + out.fill(indent_char); + out.width(indent * level); + out << indent_char; + out.width(0); +} + +void value::dump_context_t::write_separator() +{ + if (indent < 0) + return; + + out.put(' '); +} + +void value::dump_context_t::write_newline() +{ + if (indent < 0) + return; + + out.put('\n'); +} + +void value::dump(dump_context_t& context, int level) const +{ + context.write_indent(level); + + switch (m_Type) + { + case type_t::null: + context.out << "null"; + break; + + case type_t::object: + context.out << '{'; + { + context.write_newline(); + bool first = true; + for (auto& entry : *object_ptr(m_Storage)) + { + if (!first) { context.out << ','; context.write_newline(); } else first = false; + context.write_indent(level + 1); + context.out << '\"' << entry.first << "\":"; + if (!entry.second.is_structured()) + { + context.write_separator(); + entry.second.dump(context, 0); + } + else + { + context.write_newline(); + entry.second.dump(context, level + 1); + } + } + if (!first) + context.write_newline(); + } + context.write_indent(level); + context.out << '}'; + break; + + case type_t::array: + context.out << '['; + { + context.write_newline(); + bool first = true; + for (auto& entry : *array_ptr(m_Storage)) + { + if (!first) { context.out << ','; context.write_newline(); } else first = false; + if (!entry.is_structured()) + { + context.write_indent(level + 1); + entry.dump(context, 0); + } + else + { + entry.dump(context, level + 1); + } + } + if (!first) + context.write_newline(); + } + context.write_indent(level); + context.out << ']'; + break; + + case type_t::string: + context.out << '\"'; + + if (string_ptr(m_Storage)->find_first_of("\"\\/\b\f\n\r") != string::npos || string_ptr(m_Storage)->find('\0') != string::npos) + { + for (auto c : *string_ptr(m_Storage)) + { + if (c == '\"') context.out << "\\\""; + else if (c == '\\') context.out << "\\\\"; + else if (c == '/') context.out << "\\/"; + else if (c == '\b') context.out << "\\b"; + else if (c == '\f') context.out << "\\f"; + else if (c == '\n') context.out << "\\n"; + else if (c == '\r') context.out << "\\r"; + else if (c == '\t') context.out << "\\t"; + else if (c == 0) context.out << "\\u0000"; + else context.out << c; + } + } + else + context.out << *string_ptr(m_Storage); + context.out << '\"'; + break; + + + case type_t::boolean: + if (*boolean_ptr(m_Storage)) + context.out << "true"; + else + context.out << "false"; + break; + + case type_t::number: + context.out << *number_ptr(m_Storage); + break; + + default: + break; + } +} + +struct value::parser +{ + parser(const char* begin, const char* end) + : m_Cursor(begin) + , m_End(end) + { + } + + value parse() + { + value v; + + // Switch to C locale to make strtod and strtol work as expected + auto previous_locale = std::setlocale(LC_NUMERIC, "C"); + + // Accept single value only when end of the stream is reached. + if (!accept_element(v) || !eof()) + v = value(type_t::discarded); + + if (previous_locale && strcmp(previous_locale, "C") != 0) + std::setlocale(LC_NUMERIC, previous_locale); + + return v; + } + +private: + struct cursor_state + { + cursor_state(parser* p) + : m_Owner(p) + , m_LastCursor(p->m_Cursor) + { + } + + void reset() + { + m_Owner->m_Cursor = m_LastCursor; + } + + bool operator()(bool accept) + { + if (!accept) + reset(); + else + m_LastCursor = m_Owner->m_Cursor; + return accept; + } + + private: + parser* m_Owner; + const char* m_LastCursor; + }; + + cursor_state state() + { + return cursor_state(this); + } + + bool accept_value(value& result) + { + return accept_object(result) + || accept_array(result) + || accept_string(result) + || accept_number(result) + || accept_boolean(result) + || accept_null(result); + } + + bool accept_object(value& result) + { + auto s = state(); + + object o; + if (s(accept('{') && accept_ws() && accept('}'))) + { + result = o; + return true; + } + else if (s(accept('{') && accept_members(o) && accept('}'))) + { + result = std::move(o); + return true; + } + + return false; + } + + bool accept_members(object& o) + { + if (!accept_member(o)) + return false; + + while (true) + { + auto s = state(); + if (!s(accept(',') && accept_member(o))) + break; + } + + return true; + } + + bool accept_member(object& o) + { + auto s = state(); + + value key; + value v; + if (s(accept_ws() && accept_string(key) && accept_ws() && accept(':') && accept_element(v))) + { + o.emplace(std::move(key.get()), std::move(v)); + return true; + } + + return false; + } + + bool accept_array(value& result) + { + auto s = state(); + + if (s(accept('[') && accept_ws() && accept(']'))) + { + result = array(); + return true; + } + + array a; + if (s(accept('[') && accept_elements(a) && accept(']'))) + { + result = std::move(a); + return true; + } + + return false; + } + + bool accept_elements(array& a) + { + value v; + if (!accept_element(v)) + return false; + + a.emplace_back(std::move(v)); + while (true) + { + auto s = state(); + v = nullptr; + if (!s(accept(',') && accept_element(v))) + break; + a.emplace_back(std::move(v)); + } + + return true; + } + + bool accept_element(value& result) + { + auto s = state(); + return s(accept_ws() && accept_value(result) && accept_ws()); + } + + bool accept_string(value& result) + { + auto s = state(); + + string v; + if (s(accept('\"') && accept_characters(v) && accept('\"'))) + { + result = std::move(v); + return true; + } + else + return false; + } + + bool accept_characters(string& result) + { + int c; + while (accept_character(c)) + { + CRUDE_ASSERT(c < 128); // #todo: convert characters > 127 to UTF-8 + result.push_back(static_cast(c)); + } + + return true; + } + + bool accept_character(int& c) + { + auto s = state(); + + if (accept('\\')) + { + return accept_escape(c); + } + else if (expect('\"')) + return false; + + // #todo: Handle UTF-8 sequences. + return s((c = peek()) >= 0) && advance(); + } + + bool accept_escape(int& c) + { + if (accept('\"')) { c = '\"'; return true; } + if (accept('\\')) { c = '\\'; return true; } + if (accept('/')) { c = '/'; return true; } + if (accept('b')) { c = '\b'; return true; } + if (accept('f')) { c = '\f'; return true; } + if (accept('n')) { c = '\n'; return true; } + if (accept('r')) { c = '\r'; return true; } + if (accept('t')) { c = '\t'; return true; } + + auto s = state(); + + string hex; + hex.reserve(4); + if (s(accept('u') && accept_hex(hex) && accept_hex(hex) && accept_hex(hex) && accept_hex(hex))) + { + char* end = nullptr; + auto v = std::strtol(hex.c_str(), &end, 16); + if (end != hex.c_str() + hex.size()) + return false; + + c = v; + return true; + } + + return false; + } + + bool accept_hex(string& result) + { + if (accept_digit(result)) + return true; + + auto c = peek(); + if ((c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) + { + advance(); + result.push_back(static_cast(c)); + return true; + } + + return false; + } + + bool accept_number(value& result) + { + auto s = state(); + + string n; + if (s(accept_int(n) && accept_frac(n) && accept_exp(n))) + { + char* end = nullptr; + auto v = std::strtod(n.c_str(), &end); + if (end != n.c_str() + n.size()) + return false; + + if (v != 0 && !std::isnormal(v)) + return false; + + result = v; + return true; + } + + return false; + } + + bool accept_int(string& result) + { + auto s = state(); + + string part; + if (s(accept_onenine(part) && accept_digits(part))) + { + result += std::move(part); + return true; + } + + part.resize(0); + if (accept_digit(part)) + { + result += std::move(part); + return true; + } + + part.resize(0); + if (s(accept('-') && accept_onenine(part) && accept_digits(part))) + { + result += '-'; + result += std::move(part); + return true; + } + + part.resize(0); + if (s(accept('-') && accept_digit(part))) + { + result += '-'; + result += std::move(part); + return true; + } + + return false; + } + + bool accept_digits(string& result) + { + string part; + if (!accept_digit(part)) + return false; + + while (accept_digit(part)) + ; + + result += std::move(part); + + return true; + } + + bool accept_digit(string& result) + { + if (accept('0')) + { + result.push_back('0'); + return true; + } + else if (accept_onenine(result)) + return true; + + return false; + } + + bool accept_onenine(string& result) + { + auto c = peek(); + if (c >= '1' && c <= '9') + { + result.push_back(static_cast(c)); + return advance(); + } + + return false; + } + + bool accept_frac(string& result) + { + auto s = state(); + + string part; + if (s(accept('.') && accept_digits(part))) + { + result += '.'; + result += std::move(part); + } + + return true; + } + + bool accept_exp(string& result) + { + auto s = state(); + + string part; + if (s(accept('e') && accept_sign(part) && accept_digits(part))) + { + result += 'e'; + result += std::move(part); + return true; + } + part.resize(0); + if (s(accept('E') && accept_sign(part) && accept_digits(part))) + { + result += 'E'; + result += std::move(part); + } + + return true; + } + + bool accept_sign(string& result) + { + if (accept('+')) + result.push_back('+'); + else if (accept('-')) + result.push_back('-'); + + return true; + } + + bool accept_ws() + { + while (expect('\x09') || expect('\x0A') || expect('\x0D') || expect('\x20')) + advance(); + return true; + } + + bool accept_boolean(value& result) + { + if (accept("true")) + { + result = true; + return true; + } + else if (accept("false")) + { + result = false; + return true; + } + + return false; + } + + bool accept_null(value& result) + { + if (accept("null")) + { + result = nullptr; + return true; + } + + return false; + } + + bool accept(char c) + { + if (expect(c)) + return advance(); + else + return false; + } + + bool accept(const char* str) + { + auto last = m_Cursor; + + while (*str) + { + if (eof() || *str != *m_Cursor) + { + m_Cursor = last; + return false; + } + + advance(); + ++str; + } + + return true; + } + + int peek() const + { + if (!eof()) + return *m_Cursor; + else + return -1; + } + + bool expect(char c) + { + return peek() == c; + } + + bool advance(int count = 1) + { + if (m_Cursor + count > m_End) + { + m_Cursor = m_End; + return false; + } + + m_Cursor += count; + + return true; + } + + bool eof() const + { + return m_Cursor == m_End; + } + + const char* m_Cursor; + const char* m_End; +}; + +value value::parse(const string& data) +{ + auto p = parser(data.c_str(), data.c_str() + data.size()); + + auto v = p.parse(); + + return v; +} + +} // namespace crude_json diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/crude_json.h b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/crude_json.h new file mode 100644 index 0000000..06eedd7 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/crude_json.h @@ -0,0 +1,223 @@ +// Crude implementation of JSON value object and parser. +// +// LICENSE +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// CREDITS +// Written by Michal Cichon +# ifndef __CRUDE_JSON_H__ +# define __CRUDE_JSON_H__ +# pragma once + +# include +# include +# include +# include +# include +# include +# include + +# ifndef CRUDE_ASSERT +# include +# define CRUDE_ASSERT(expr) assert(expr) +# endif + +namespace crude_json { + +struct value; + +using string = std::string; +using object = std::map; +using array = std::vector; +using number = double; +using boolean = bool; +using null = std::nullptr_t; + +enum class type_t +{ + null, + object, + array, + string, + boolean, + number, + discarded +}; + +struct value +{ + value(type_t type = type_t::null): m_Type(construct(m_Storage, type)) {} + value(value&& other); + value(const value& other); + + value( null) : m_Type(construct(m_Storage, null())) {} + value( object&& v): m_Type(construct(m_Storage, std::move(v))) {} + value(const object& v): m_Type(construct(m_Storage, v)) {} + value( array&& v): m_Type(construct(m_Storage, std::move(v))) {} + value(const array& v): m_Type(construct(m_Storage, v)) {} + value( string&& v): m_Type(construct(m_Storage, std::move(v))) {} + value(const string& v): m_Type(construct(m_Storage, v)) {} + value(const char* v): m_Type(construct(m_Storage, v)) {} + value( boolean v): m_Type(construct(m_Storage, v)) {} + value( number v): m_Type(construct(m_Storage, v)) {} + ~value() { destruct(m_Storage, m_Type); } + + value& operator=(value&& other) { if (this != &other) { value(std::move(other)).swap(*this); } return *this; } + value& operator=(const value& other) { if (this != &other) { value( other).swap(*this); } return *this; } + + value& operator=( null) { auto other = value( ); swap(other); return *this; } + value& operator=( object&& v) { auto other = value(std::move(v)); swap(other); return *this; } + value& operator=(const object& v) { auto other = value( v); swap(other); return *this; } + value& operator=( array&& v) { auto other = value(std::move(v)); swap(other); return *this; } + value& operator=(const array& v) { auto other = value( v); swap(other); return *this; } + value& operator=( string&& v) { auto other = value(std::move(v)); swap(other); return *this; } + value& operator=(const string& v) { auto other = value( v); swap(other); return *this; } + value& operator=(const char* v) { auto other = value( v); swap(other); return *this; } + value& operator=( boolean v) { auto other = value( v); swap(other); return *this; } + value& operator=( number v) { auto other = value( v); swap(other); return *this; } + + type_t type() const { return m_Type; } + + operator type_t() const { return m_Type; } + + value& operator[](size_t index); + const value& operator[](size_t index) const; + value& operator[](const string& key); + const value& operator[](const string& key) const; + + bool contains(const string& key) const; + + void push_back(const value& value); + void push_back(value&& value); + + bool is_primitive() const { return is_string() || is_number() || is_boolean() || is_null(); } + bool is_structured() const { return is_object() || is_array(); } + bool is_null() const { return m_Type == type_t::null; } + bool is_object() const { return m_Type == type_t::object; } + bool is_array() const { return m_Type == type_t::array; } + bool is_string() const { return m_Type == type_t::string; } + bool is_boolean() const { return m_Type == type_t::boolean; } + bool is_number() const { return m_Type == type_t::number; } + bool is_discarded() const { return m_Type == type_t::discarded; } + + template const T& get() const; + template T& get(); + + string dump(const int indent = -1, const char indent_char = ' ') const; + + void swap(value& other); + + inline friend void swap(value& lhs, value& rhs) { lhs.swap(rhs); } + + // Returns discarded value for invalid inputs. + static value parse(const string& data); + +private: + struct parser; + + // VS2015: std::max() is not constexpr yet. +# define CRUDE_MAX2(a, b) ((a) < (b) ? (b) : (a)) +# define CRUDE_MAX3(a, b, c) CRUDE_MAX2(CRUDE_MAX2(a, b), c) +# define CRUDE_MAX4(a, b, c, d) CRUDE_MAX2(CRUDE_MAX3(a, b, c), d) +# define CRUDE_MAX5(a, b, c, d, e) CRUDE_MAX2(CRUDE_MAX4(a, b, c, d), e) + enum + { + max_size = CRUDE_MAX5( sizeof(string), sizeof(object), sizeof(array), sizeof(number), sizeof(boolean)), + max_align = CRUDE_MAX5(alignof(string), alignof(object), alignof(array), alignof(number), alignof(boolean)) + }; +# undef CRUDE_MAX5 +# undef CRUDE_MAX4 +# undef CRUDE_MAX3 +# undef CRUDE_MAX2 + using storage_t = std::aligned_storage::type; + + static object* object_ptr( storage_t& storage) { return reinterpret_cast< object*>(&storage); } + static const object* object_ptr(const storage_t& storage) { return reinterpret_cast(&storage); } + static array* array_ptr( storage_t& storage) { return reinterpret_cast< array*>(&storage); } + static const array* array_ptr(const storage_t& storage) { return reinterpret_cast(&storage); } + static string* string_ptr( storage_t& storage) { return reinterpret_cast< string*>(&storage); } + static const string* string_ptr(const storage_t& storage) { return reinterpret_cast(&storage); } + static boolean* boolean_ptr( storage_t& storage) { return reinterpret_cast< boolean*>(&storage); } + static const boolean* boolean_ptr(const storage_t& storage) { return reinterpret_cast(&storage); } + static number* number_ptr( storage_t& storage) { return reinterpret_cast< number*>(&storage); } + static const number* number_ptr(const storage_t& storage) { return reinterpret_cast(&storage); } + + static type_t construct(storage_t& storage, type_t type) + { + switch (type) + { + case type_t::object: new (&storage) object(); break; + case type_t::array: new (&storage) array(); break; + case type_t::string: new (&storage) string(); break; + case type_t::boolean: new (&storage) boolean(); break; + case type_t::number: new (&storage) number(); break; + default: break; + } + + return type; + } + + static type_t construct(storage_t& storage, null) { (void)storage; return type_t::null; } + static type_t construct(storage_t& storage, object&& value) { new (&storage) object(std::forward(value)); return type_t::object; } + static type_t construct(storage_t& storage, const object& value) { new (&storage) object(value); return type_t::object; } + static type_t construct(storage_t& storage, array&& value) { new (&storage) array(std::forward(value)); return type_t::array; } + static type_t construct(storage_t& storage, const array& value) { new (&storage) array(value); return type_t::array; } + static type_t construct(storage_t& storage, string&& value) { new (&storage) string(std::forward(value)); return type_t::string; } + static type_t construct(storage_t& storage, const string& value) { new (&storage) string(value); return type_t::string; } + static type_t construct(storage_t& storage, const char* value) { new (&storage) string(value); return type_t::string; } + static type_t construct(storage_t& storage, boolean value) { new (&storage) boolean(value); return type_t::boolean; } + static type_t construct(storage_t& storage, number value) { new (&storage) number(value); return type_t::number; } + + static void destruct(storage_t& storage, type_t type) + { + switch (type) + { + case type_t::object: object_ptr(storage)->~object(); break; + case type_t::array: array_ptr(storage)->~array(); break; + case type_t::string: string_ptr(storage)->~string(); break; + default: break; + } + } + + struct dump_context_t + { + std::ostringstream out; + const int indent = -1; + const char indent_char = ' '; + + // VS2015: Aggregate initialization isn't a thing yet. + dump_context_t(const int indent, const char indent_char) + : indent(indent) + , indent_char(indent_char) + { + } + + void write_indent(int level); + void write_separator(); + void write_newline(); + }; + + void dump(dump_context_t& context, int level) const; + + storage_t m_Storage; + type_t m_Type; +}; + +template <> inline const object& value::get() const { CRUDE_ASSERT(m_Type == type_t::object); return *object_ptr(m_Storage); } +template <> inline const array& value::get() const { CRUDE_ASSERT(m_Type == type_t::array); return *array_ptr(m_Storage); } +template <> inline const string& value::get() const { CRUDE_ASSERT(m_Type == type_t::string); return *string_ptr(m_Storage); } +template <> inline const boolean& value::get() const { CRUDE_ASSERT(m_Type == type_t::boolean); return *boolean_ptr(m_Storage); } +template <> inline const number& value::get() const { CRUDE_ASSERT(m_Type == type_t::number); return *number_ptr(m_Storage); } + +template <> inline object& value::get() { CRUDE_ASSERT(m_Type == type_t::object); return *object_ptr(m_Storage); } +template <> inline array& value::get() { CRUDE_ASSERT(m_Type == type_t::array); return *array_ptr(m_Storage); } +template <> inline string& value::get() { CRUDE_ASSERT(m_Type == type_t::string); return *string_ptr(m_Storage); } +template <> inline boolean& value::get() { CRUDE_ASSERT(m_Type == type_t::boolean); return *boolean_ptr(m_Storage); } +template <> inline number& value::get() { CRUDE_ASSERT(m_Type == type_t::number); return *number_ptr(m_Storage); } + + +} // namespace crude_json + +# endif // __CRUDE_JSON_H__ \ No newline at end of file diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_bezier_math.h b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_bezier_math.h new file mode 100644 index 0000000..09b76e7 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_bezier_math.h @@ -0,0 +1,142 @@ +//------------------------------------------------------------------------------ +// LICENSE +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// CREDITS +// Written by Michal Cichon +//------------------------------------------------------------------------------ +# ifndef __IMGUI_BEZIER_MATH_H__ +# define __IMGUI_BEZIER_MATH_H__ +# pragma once + + +//------------------------------------------------------------------------------ +# include "imgui_extra_math.h" + + +//------------------------------------------------------------------------------ +template +struct ImCubicBezierPointsT +{ + T P0; + T P1; + T P2; + T P3; +}; +using ImCubicBezierPoints = ImCubicBezierPointsT; + + +//------------------------------------------------------------------------------ +// Low-level Bezier curve sampling. +template inline T ImLinearBezier(const T& p0, const T& p1, float t); +template inline T ImLinearBezierDt(const T& p0, const T& p1, float t); +template inline T ImQuadraticBezier(const T& p0, const T& p1, const T& p2, float t); +template inline T ImQuadraticBezierDt(const T& p0, const T& p1, const T& p2, float t); +template inline T ImCubicBezier(const T& p0, const T& p1, const T& p2, const T& p3, float t); +template inline T ImCubicBezierDt(const T& p0, const T& p1, const T& p2, const T& p3, float t); + + +// High-level Bezier sampling, automatically collapse to lower level Bezier curves if control points overlap. +template inline T ImCubicBezierSample(const T& p0, const T& p1, const T& p2, const T& p3, float t); +template inline T ImCubicBezierSample(const ImCubicBezierPointsT& curve, float t); +template inline T ImCubicBezierTangent(const T& p0, const T& p1, const T& p2, const T& p3, float t); +template inline T ImCubicBezierTangent(const ImCubicBezierPointsT& curve, float t); + + +// Calculate approximate length of Cubic Bezier curve. +template inline float ImCubicBezierLength(const T& p0, const T& p1, const T& p2, const T& p3); +template inline float ImCubicBezierLength(const ImCubicBezierPointsT& curve); + + +// Splits Cubic Bezier curve into two curves. +template +struct ImCubicBezierSplitResultT +{ + ImCubicBezierPointsT Left; + ImCubicBezierPointsT Right; +}; +using ImCubicBezierSplitResult = ImCubicBezierSplitResultT; + +template inline ImCubicBezierSplitResultT ImCubicBezierSplit(const T& p0, const T& p1, const T& p2, const T& p3, float t); +template inline ImCubicBezierSplitResultT ImCubicBezierSplit(const ImCubicBezierPointsT& curve, float t); + + +// Returns bounding rectangle of Cubic Bezier curve. +inline ImRect ImCubicBezierBoundingRect(const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3); +inline ImRect ImCubicBezierBoundingRect(const ImCubicBezierPoints& curve); + + +// Project point on Cubic Bezier curve. +struct ImProjectResult +{ + ImVec2 Point; // Point on curve + float Time; // [0 - 1] + float Distance; // Distance to curve +}; + +inline ImProjectResult ImProjectOnCubicBezier(const ImVec2& p, const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const int subdivisions = 100); +inline ImProjectResult ImProjectOnCubicBezier(const ImVec2& p, const ImCubicBezierPoints& curve, const int subdivisions = 100); + + +// Calculate intersection between line and a Cubic Bezier curve. +struct ImCubicBezierIntersectResult +{ + int Count; + ImVec2 Points[3]; +}; + +inline ImCubicBezierIntersectResult ImCubicBezierLineIntersect(const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& a0, const ImVec2& a1); +inline ImCubicBezierIntersectResult ImCubicBezierLineIntersect(const ImCubicBezierPoints& curve, const ImLine& line); + + +// Adaptive Cubic Bezier subdivision. +enum ImCubicBezierSubdivideFlags +{ + ImCubicBezierSubdivide_None = 0, + ImCubicBezierSubdivide_SkipFirst = 1 +}; + +struct ImCubicBezierSubdivideSample +{ + ImVec2 Point; + ImVec2 Tangent; +}; + +using ImCubicBezierSubdivideCallback = void (*)(const ImCubicBezierSubdivideSample& p, void* user_pointer); + +inline void ImCubicBezierSubdivide(ImCubicBezierSubdivideCallback callback, void* user_pointer, const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, float tess_tol = -1.0f, ImCubicBezierSubdivideFlags flags = ImCubicBezierSubdivide_None); +inline void ImCubicBezierSubdivide(ImCubicBezierSubdivideCallback callback, void* user_pointer, const ImCubicBezierPoints& curve, float tess_tol = -1.0f, ImCubicBezierSubdivideFlags flags = ImCubicBezierSubdivide_None); + + +// F has signature void(const ImCubicBezierSubdivideSample& p) +template inline void ImCubicBezierSubdivide(F& callback, const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, float tess_tol = -1.0f, ImCubicBezierSubdivideFlags flags = ImCubicBezierSubdivide_None); +template inline void ImCubicBezierSubdivide(F& callback, const ImCubicBezierPoints& curve, float tess_tol = -1.0f, ImCubicBezierSubdivideFlags flags = ImCubicBezierSubdivide_None); + +// Fixed step Cubic Bezier subdivision. +struct ImCubicBezierFixedStepSample +{ + float T; + float Length; + ImVec2 Point; + bool BreakSearch; +}; + +using ImCubicBezierFixedStepCallback = void (*)(ImCubicBezierFixedStepSample& sample, void* user_pointer); + +inline void ImCubicBezierFixedStep(ImCubicBezierFixedStepCallback callback, void* user_pointer, const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, float step, bool overshoot = false, float max_value_error = 1e-3f, float max_t_error = 1e-5f); +inline void ImCubicBezierFixedStep(ImCubicBezierFixedStepCallback callback, void* user_pointer, const ImCubicBezierPoints& curve, float step, bool overshoot = false, float max_value_error = 1e-3f, float max_t_error = 1e-5f); + + +// F has signature void(const ImCubicBezierFixedStepSample& p) +template inline void ImCubicBezierFixedStep(F& callback, const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, float step, bool overshoot = false, float max_value_error = 1e-3f, float max_t_error = 1e-5f); +template inline void ImCubicBezierFixedStep(F& callback, const ImCubicBezierPoints& curve, float step, bool overshoot = false, float max_value_error = 1e-3f, float max_t_error = 1e-5f); + + +//------------------------------------------------------------------------------ +# include "imgui_bezier_math.inl" + + +//------------------------------------------------------------------------------ +# endif // __IMGUI_BEZIER_MATH_H__ diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_bezier_math.inl b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_bezier_math.inl new file mode 100644 index 0000000..c2c7c43 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_bezier_math.inl @@ -0,0 +1,670 @@ +//------------------------------------------------------------------------------ +// LICENSE +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// CREDITS +// Written by Michal Cichon +//------------------------------------------------------------------------------ +# ifndef __IMGUI_BEZIER_MATH_INL__ +# define __IMGUI_BEZIER_MATH_INL__ +# pragma once + + +//------------------------------------------------------------------------------ +# include "imgui_bezier_math.h" +# include // used in ImCubicBezierFixedStep + + +//------------------------------------------------------------------------------ +template +inline T ImLinearBezier(const T& p0, const T& p1, float t) +{ + return p0 + t * (p1 - p0); +} + +template +inline T ImLinearBezierDt(const T& p0, const T& p1, float t) +{ + IM_UNUSED(t); + + return p1 - p0; +} + +template +inline T ImQuadraticBezier(const T& p0, const T& p1, const T& p2, float t) +{ + const auto a = 1 - t; + + return a * a * p0 + 2 * t * a * p1 + t * t * p2; +} + +template +inline T ImQuadraticBezierDt(const T& p0, const T& p1, const T& p2, float t) +{ + return 2 * (1 - t) * (p1 - p0) + 2 * t * (p2 - p1); +} + +template +inline T ImCubicBezier(const T& p0, const T& p1, const T& p2, const T& p3, float t) +{ + const auto a = 1 - t; + const auto b = a * a * a; + const auto c = t * t * t; + + return b * p0 + 3 * t * a * a * p1 + 3 * t * t * a * p2 + c * p3; +} + +template +inline T ImCubicBezierDt(const T& p0, const T& p1, const T& p2, const T& p3, float t) +{ + const auto a = 1 - t; + const auto b = a * a; + const auto c = t * t; + const auto d = 2 * t * a; + + return -3 * p0 * b + 3 * p1 * (b - d) + 3 * p2 * (d - c) + 3 * p3 * c; +} + +template +inline T ImCubicBezierSample(const T& p0, const T& p1, const T& p2, const T& p3, float t) +{ + const auto cp0_zero = ImLengthSqr(p1 - p0) < 1e-5f; + const auto cp1_zero = ImLengthSqr(p3 - p2) < 1e-5f; + + if (cp0_zero && cp1_zero) + return ImLinearBezier(p0, p3, t); + else if (cp0_zero) + return ImQuadraticBezier(p0, p2, p3, t); + else if (cp1_zero) + return ImQuadraticBezier(p0, p1, p3, t); + else + return ImCubicBezier(p0, p1, p2, p3, t); +} + +template +inline T ImCubicBezierSample(const ImCubicBezierPointsT& curve, float t) +{ + return ImCubicBezierSample(curve.P0, curve.P1, curve.P2, curve.P3, t); +} + +template +inline T ImCubicBezierTangent(const T& p0, const T& p1, const T& p2, const T& p3, float t) +{ + const auto cp0_zero = ImLengthSqr(p1 - p0) < 1e-5f; + const auto cp1_zero = ImLengthSqr(p3 - p2) < 1e-5f; + + if (cp0_zero && cp1_zero) + return ImLinearBezierDt(p0, p3, t); + else if (cp0_zero) + return ImQuadraticBezierDt(p0, p2, p3, t); + else if (cp1_zero) + return ImQuadraticBezierDt(p0, p1, p3, t); + else + return ImCubicBezierDt(p0, p1, p2, p3, t); +} + +template +inline T ImCubicBezierTangent(const ImCubicBezierPointsT& curve, float t) +{ + return ImCubicBezierTangent(curve.P0, curve.P1, curve.P2, curve.P3, t); +} + +template +inline float ImCubicBezierLength(const T& p0, const T& p1, const T& p2, const T& p3) +{ + // Legendre-Gauss abscissae with n=24 (x_i values, defined at i=n as the roots of the nth order Legendre polynomial Pn(x)) + static const float t_values[] = + { + -0.0640568928626056260850430826247450385909f, + 0.0640568928626056260850430826247450385909f, + -0.1911188674736163091586398207570696318404f, + 0.1911188674736163091586398207570696318404f, + -0.3150426796961633743867932913198102407864f, + 0.3150426796961633743867932913198102407864f, + -0.4337935076260451384870842319133497124524f, + 0.4337935076260451384870842319133497124524f, + -0.5454214713888395356583756172183723700107f, + 0.5454214713888395356583756172183723700107f, + -0.6480936519369755692524957869107476266696f, + 0.6480936519369755692524957869107476266696f, + -0.7401241915785543642438281030999784255232f, + 0.7401241915785543642438281030999784255232f, + -0.8200019859739029219539498726697452080761f, + 0.8200019859739029219539498726697452080761f, + -0.8864155270044010342131543419821967550873f, + 0.8864155270044010342131543419821967550873f, + -0.9382745520027327585236490017087214496548f, + 0.9382745520027327585236490017087214496548f, + -0.9747285559713094981983919930081690617411f, + 0.9747285559713094981983919930081690617411f, + -0.9951872199970213601799974097007368118745f, + 0.9951872199970213601799974097007368118745f + }; + + // Legendre-Gauss weights with n=24 (w_i values, defined by a function linked to in the Bezier primer article) + static const float c_values[] = + { + 0.1279381953467521569740561652246953718517f, + 0.1279381953467521569740561652246953718517f, + 0.1258374563468282961213753825111836887264f, + 0.1258374563468282961213753825111836887264f, + 0.1216704729278033912044631534762624256070f, + 0.1216704729278033912044631534762624256070f, + 0.1155056680537256013533444839067835598622f, + 0.1155056680537256013533444839067835598622f, + 0.1074442701159656347825773424466062227946f, + 0.1074442701159656347825773424466062227946f, + 0.0976186521041138882698806644642471544279f, + 0.0976186521041138882698806644642471544279f, + 0.0861901615319532759171852029837426671850f, + 0.0861901615319532759171852029837426671850f, + 0.0733464814110803057340336152531165181193f, + 0.0733464814110803057340336152531165181193f, + 0.0592985849154367807463677585001085845412f, + 0.0592985849154367807463677585001085845412f, + 0.0442774388174198061686027482113382288593f, + 0.0442774388174198061686027482113382288593f, + 0.0285313886289336631813078159518782864491f, + 0.0285313886289336631813078159518782864491f, + 0.0123412297999871995468056670700372915759f, + 0.0123412297999871995468056670700372915759f + }; + + static_assert(sizeof(t_values) / sizeof(*t_values) == sizeof(c_values) / sizeof(*c_values), ""); + + auto arc = [p0, p1, p2, p3](float t) + { + const auto p = ImCubicBezierDt(p0, p1, p2, p3, t); + const auto l = ImLength(p); + return l; + }; + + const auto z = 0.5f; + const auto n = sizeof(t_values) / sizeof(*t_values); + + auto accumulator = 0.0f; + for (size_t i = 0; i < n; ++i) + { + const auto t = z * t_values[i] + z; + accumulator += c_values[i] * arc(t); + } + + return z * accumulator; +} + +template +inline float ImCubicBezierLength(const ImCubicBezierPointsT& curve) +{ + return ImCubicBezierLength(curve.P0, curve.P1, curve.P2, curve.P3); +} + +template +inline ImCubicBezierSplitResultT ImCubicBezierSplit(const T& p0, const T& p1, const T& p2, const T& p3, float t) +{ + const auto z1 = t; + const auto z2 = z1 * z1; + const auto z3 = z1 * z1 * z1; + const auto s1 = z1 - 1; + const auto s2 = s1 * s1; + const auto s3 = s1 * s1 * s1; + + return ImCubicBezierSplitResultT + { + ImCubicBezierPointsT + { + p0, + z1 * p1 - s1 * p0, + z2 * p2 - 2 * z1 * s1 * p1 + s2 * p0, + z3 * p3 - 3 * z2 * s1 * p2 + 3 * z1 * s2 * p1 - s3 * p0 + }, + ImCubicBezierPointsT + { + z3 * p0 - 3 * z2 * s1 * p1 + 3 * z1 * s2 * p2 - s3 * p3, + z2 * p1 - 2 * z1 * s1 * p2 + s2 * p3, + z1 * p2 - s1 * p3, + p3, + } + }; +} + +template +inline ImCubicBezierSplitResultT ImCubicBezierSplit(const ImCubicBezierPointsT& curve, float t) +{ + return ImCubicBezierSplit(curve.P0, curve.P1, curve.P2, curve.P3, t); +} + +inline ImRect ImCubicBezierBoundingRect(const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3) +{ + auto a = 3 * p3 - 9 * p2 + 9 * p1 - 3 * p0; + auto b = 6 * p0 - 12 * p1 + 6 * p2; + auto c = 3 * p1 - 3 * p0; + auto delta_squared = ImMul(b, b) - 4 * ImMul(a, c); + + auto tl = ImMin(p0, p3); + auto rb = ImMax(p0, p3); + +# define IM_VEC2_INDEX(v, i) *(&v.x + i) + + for (int i = 0; i < 2; ++i) + { + if (IM_VEC2_INDEX(delta_squared, i) >= 0) + { + auto delta = ImSqrt(IM_VEC2_INDEX(delta_squared, i)); + + auto t0 = (-IM_VEC2_INDEX(b, i) + delta) / (2 * IM_VEC2_INDEX(a, i)); + if (t0 > 0 && t0 < 1) + { + auto p = ImCubicBezier(IM_VEC2_INDEX(p0, i), IM_VEC2_INDEX(p1, i), IM_VEC2_INDEX(p2, i), IM_VEC2_INDEX(p3, i), t0); + IM_VEC2_INDEX(tl, i) = ImMin(IM_VEC2_INDEX(tl, i), p); + IM_VEC2_INDEX(rb, i) = ImMax(IM_VEC2_INDEX(rb, i), p); + } + + auto t1 = (-IM_VEC2_INDEX(b, i) - delta) / (2 * IM_VEC2_INDEX(a, i)); + if (t1 > 0 && t1 < 1) + { + auto p = ImCubicBezier(IM_VEC2_INDEX(p0, i), IM_VEC2_INDEX(p1, i), IM_VEC2_INDEX(p2, i), IM_VEC2_INDEX(p3, i), t1); + IM_VEC2_INDEX(tl, i) = ImMin(IM_VEC2_INDEX(tl, i), p); + IM_VEC2_INDEX(rb, i) = ImMax(IM_VEC2_INDEX(rb, i), p); + } + } + } + +# undef IM_VEC2_INDEX + + return ImRect(tl, rb); +} + +inline ImRect ImCubicBezierBoundingRect(const ImCubicBezierPoints& curve) +{ + return ImCubicBezierBoundingRect(curve.P0, curve.P1, curve.P2, curve.P3); +} + +inline ImProjectResult ImProjectOnCubicBezier(const ImVec2& point, const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const int subdivisions) +{ + // http://pomax.github.io/bezierinfo/#projections + + const float epsilon = 1e-5f; + const float fixed_step = 1.0f / static_cast(subdivisions - 1); + + ImProjectResult result; + result.Point = point; + result.Time = 0.0f; + result.Distance = FLT_MAX; + + // Step 1: Coarse check + for (int i = 0; i < subdivisions; ++i) + { + auto t = i * fixed_step; + auto p = ImCubicBezier(p0, p1, p2, p3, t); + auto s = point - p; + auto d = ImDot(s, s); + + if (d < result.Distance) + { + result.Point = p; + result.Time = t; + result.Distance = d; + } + } + + if (result.Time == 0.0f || ImFabs(result.Time - 1.0f) <= epsilon) + { + result.Distance = ImSqrt(result.Distance); + return result; + } + + // Step 2: Fine check + auto left = result.Time - fixed_step; + auto right = result.Time + fixed_step; + auto step = fixed_step * 0.1f; + + for (auto t = left; t < right + step; t += step) + { + auto p = ImCubicBezier(p0, p1, p2, p3, t); + auto s = point - p; + auto d = ImDot(s, s); + + if (d < result.Distance) + { + result.Point = p; + result.Time = t; + result.Distance = d; + } + } + + result.Distance = ImSqrt(result.Distance); + + return result; +} + +inline ImProjectResult ImProjectOnCubicBezier(const ImVec2& p, const ImCubicBezierPoints& curve, const int subdivisions) +{ + return ImProjectOnCubicBezier(p, curve.P0, curve.P1, curve.P2, curve.P3, subdivisions); +} + +inline ImCubicBezierIntersectResult ImCubicBezierLineIntersect(const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& a0, const ImVec2& a1) +{ + auto cubic_roots = [](float a, float b, float c, float d, float* roots) -> int + { + int count = 0; + + auto sign = [](float x) -> float { return x < 0 ? -1.0f : 1.0f; }; + + auto A = b / a; + auto B = c / a; + auto C = d / a; + + auto Q = (3 * B - ImPow(A, 2)) / 9; + auto R = (9 * A * B - 27 * C - 2 * ImPow(A, 3)) / 54; + auto D = ImPow(Q, 3) + ImPow(R, 2); // polynomial discriminant + + if (D >= 0) // complex or duplicate roots + { + auto S = sign(R + ImSqrt(D)) * ImPow(ImFabs(R + ImSqrt(D)), (1.0f / 3.0f)); + auto T = sign(R - ImSqrt(D)) * ImPow(ImFabs(R - ImSqrt(D)), (1.0f / 3.0f)); + + roots[0] = -A / 3 + (S + T); // real root + roots[1] = -A / 3 - (S + T) / 2; // real part of complex root + roots[2] = -A / 3 - (S + T) / 2; // real part of complex root + auto Im = ImFabs(ImSqrt(3) * (S - T) / 2); // complex part of root pair + + // discard complex roots + if (Im != 0) + count = 1; + else + count = 3; + } + else // distinct real roots + { + auto th = ImAcos(R / ImSqrt(-ImPow(Q, 3))); + + roots[0] = 2 * ImSqrt(-Q) * ImCos(th / 3) - A / 3; + roots[1] = 2 * ImSqrt(-Q) * ImCos((th + 2 * IM_PI) / 3) - A / 3; + roots[2] = 2 * ImSqrt(-Q) * ImCos((th + 4 * IM_PI) / 3) - A / 3; + + count = 3; + } + + return count; + }; + + // https://github.com/kaishiqi/Geometric-Bezier/blob/master/GeometricBezier/src/kaishiqi/geometric/intersection/Intersection.as + // + // Start with Bezier using Bernstein polynomials for weighting functions: + // (1-t^3)P0 + 3t(1-t)^2P1 + 3t^2(1-t)P2 + t^3P3 + // + // Expand and collect terms to form linear combinations of original Bezier + // controls. This ends up with a vector cubic in t: + // (-P0+3P1-3P2+P3)t^3 + (3P0-6P1+3P2)t^2 + (-3P0+3P1)t + P0 + // /\ /\ /\ /\ + // || || || || + // c3 c2 c1 c0 + + // Calculate the coefficients + auto c3 = -p0 + 3 * p1 - 3 * p2 + p3; + auto c2 = 3 * p0 - 6 * p1 + 3 * p2; + auto c1 = -3 * p0 + 3 * p1; + auto c0 = p0; + + // Convert line to normal form: ax + by + c = 0 + auto a = a1.y - a0.y; + auto b = a0.x - a1.x; + auto c = a0.x * (a0.y - a1.y) + a0.y * (a1.x - a0.x); + + // Rotate each cubic coefficient using line for new coordinate system? + // Find roots of rotated cubic + float roots[3]; + auto rootCount = cubic_roots( + a * c3.x + b * c3.y, + a * c2.x + b * c2.y, + a * c1.x + b * c1.y, + a * c0.x + b * c0.y + c, + roots); + + // Any roots in closed interval [0,1] are intersections on Bezier, but + // might not be on the line segment. + // Find intersections and calculate point coordinates + + auto min = ImMin(a0, a1); + auto max = ImMax(a0, a1); + + ImCubicBezierIntersectResult result; + auto points = result.Points; + + for (int i = 0; i < rootCount; ++i) + { + auto root = roots[i]; + + if (0 <= root && root <= 1) + { + // We're within the Bezier curve + // Find point on Bezier + auto p = ImCubicBezier(p0, p1, p2, p3, root); + + // See if point is on line segment + // Had to make special cases for vertical and horizontal lines due + // to slight errors in calculation of p00 + if (a0.x == a1.x) + { + if (min.y <= p.y && p.y <= max.y) + *points++ = p; + } + else if (a0.y == a1.y) + { + if (min.x <= p.x && p.x <= max.x) + *points++ = p; + } + else if (p.x >= min.x && p.y >= min.y && p.x <= max.x && p.y <= max.y) + { + *points++ = p; + } + } + } + + result.Count = static_cast(points - result.Points); + + return result; +} + +inline ImCubicBezierIntersectResult ImCubicBezierLineIntersect(const ImCubicBezierPoints& curve, const ImLine& line) +{ + return ImCubicBezierLineIntersect(curve.P0, curve.P1, curve.P2, curve.P3, line.A, line.B); +} + +inline void ImCubicBezierSubdivide(ImCubicBezierSubdivideCallback callback, void* user_pointer, const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, float tess_tol, ImCubicBezierSubdivideFlags flags) +{ + return ImCubicBezierSubdivide(callback, user_pointer, ImCubicBezierPoints{ p0, p1, p2, p3 }, tess_tol, flags); +} + +inline void ImCubicBezierSubdivide(ImCubicBezierSubdivideCallback callback, void* user_pointer, const ImCubicBezierPoints& curve, float tess_tol, ImCubicBezierSubdivideFlags flags) +{ + struct Tesselator + { + ImCubicBezierSubdivideCallback Callback; + void* UserPointer; + float TesselationTollerance; + ImCubicBezierSubdivideFlags Flags; + + void Commit(const ImVec2& p, const ImVec2& t) + { + ImCubicBezierSubdivideSample sample; + sample.Point = p; + sample.Tangent = t; + Callback(sample, UserPointer); + } + + void Subdivide(const ImCubicBezierPoints& curve, int level = 0) + { + float dx = curve.P3.x - curve.P0.x; + float dy = curve.P3.y - curve.P0.y; + float d2 = ((curve.P1.x - curve.P3.x) * dy - (curve.P1.y - curve.P3.y) * dx); + float d3 = ((curve.P2.x - curve.P3.x) * dy - (curve.P2.y - curve.P3.y) * dx); + d2 = (d2 >= 0) ? d2 : -d2; + d3 = (d3 >= 0) ? d3 : -d3; + if ((d2 + d3) * (d2 + d3) < TesselationTollerance * (dx * dx + dy * dy)) + { + Commit(curve.P3, ImCubicBezierTangent(curve, 1.0f)); + } + else if (level < 10) + { + const auto p12 = (curve.P0 + curve.P1) * 0.5f; + const auto p23 = (curve.P1 + curve.P2) * 0.5f; + const auto p34 = (curve.P2 + curve.P3) * 0.5f; + const auto p123 = (p12 + p23) * 0.5f; + const auto p234 = (p23 + p34) * 0.5f; + const auto p1234 = (p123 + p234) * 0.5f; + + Subdivide(ImCubicBezierPoints { curve.P0, p12, p123, p1234 }, level + 1); + Subdivide(ImCubicBezierPoints { p1234, p234, p34, curve.P3 }, level + 1); + } + } + }; + + if (tess_tol < 0) + tess_tol = 1.118f; // sqrtf(1.25f) + + Tesselator tesselator; + tesselator.Callback = callback; + tesselator.UserPointer = user_pointer; + tesselator.TesselationTollerance = tess_tol * tess_tol; + tesselator.Flags = flags; + + if (!(tesselator.Flags & ImCubicBezierSubdivide_SkipFirst)) + tesselator.Commit(curve.P0, ImCubicBezierTangent(curve, 0.0f)); + + tesselator.Subdivide(curve, 0); +} + +template inline void ImCubicBezierSubdivide(F& callback, const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, float tess_tol, ImCubicBezierSubdivideFlags flags) +{ + auto handler = [](const ImCubicBezierSubdivideSample& p, void* user_pointer) + { + auto& callback = *reinterpret_cast(user_pointer); + callback(p); + }; + + ImCubicBezierSubdivide(handler, &callback, ImCubicBezierPoints{ p0, p1, p2, p3 }, tess_tol, flags); +} + +template inline void ImCubicBezierSubdivide(F& callback, const ImCubicBezierPoints& curve, float tess_tol, ImCubicBezierSubdivideFlags flags) +{ + auto handler = [](const ImCubicBezierSubdivideSample& p, void* user_pointer) + { + auto& callback = *reinterpret_cast(user_pointer); + callback(p); + }; + + ImCubicBezierSubdivide(handler, &callback, curve, tess_tol, flags); +} + +inline void ImCubicBezierFixedStep(ImCubicBezierFixedStepCallback callback, void* user_pointer, const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, float step, bool overshoot, float max_value_error, float max_t_error) +{ + if (step <= 0.0f || !callback || max_value_error <= 0 || max_t_error <= 0) + return; + + ImCubicBezierFixedStepSample sample; + sample.T = 0.0f; + sample.Length = 0.0f; + sample.Point = p0; + sample.BreakSearch = false; + + callback(sample, user_pointer); + if (sample.BreakSearch) + return; + + const auto total_length = ImCubicBezierLength(p0, p1, p2, p3); + const auto point_count = static_cast(total_length / step) + (overshoot ? 2 : 1); + const auto t_min = 0.0f; + const auto t_max = step * point_count / total_length; + const auto t_0 = (t_min + t_max) * 0.5f; + + // #todo: replace map with ImVector + binary search + std::map cache; + for (int point_index = 1; point_index < point_count; ++point_index) + { + const auto targetLength = point_index * step; + + float t_start = t_min; + float t_end = t_max; + float t = t_0; + + float t_best = t; + float error_best = total_length; + + while (true) + { + auto cacheIt = cache.find(t); + if (cacheIt == cache.end()) + { + const auto front = ImCubicBezierSplit(p0, p1, p2, p3, t).Left; + const auto split_length = ImCubicBezierLength(front); + + cacheIt = cache.emplace(t, split_length).first; + } + + const auto length = cacheIt->second; + const auto error = targetLength - length; + + if (error < error_best) + { + error_best = error; + t_best = t; + } + + if (ImFabs(error) <= max_value_error || ImFabs(t_start - t_end) <= max_t_error) + { + sample.T = t; + sample.Length = length; + sample.Point = ImCubicBezier(p0, p1, p2, p3, t); + + callback(sample, user_pointer); + if (sample.BreakSearch) + return; + + break; + } + else if (error < 0.0f) + t_end = t; + else // if (error > 0.0f) + t_start = t; + + t = (t_start + t_end) * 0.5f; + } + } +} + +inline void ImCubicBezierFixedStep(ImCubicBezierFixedStepCallback callback, void* user_pointer, const ImCubicBezierPoints& curve, float step, bool overshoot, float max_value_error, float max_t_error) +{ + ImCubicBezierFixedStep(callback, user_pointer, curve.P0, curve.P1, curve.P2, curve.P3, step, overshoot, max_value_error, max_t_error); +} + +// F has signature void(const ImCubicBezierFixedStepSample& p) +template +inline void ImCubicBezierFixedStep(F& callback, const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, float step, bool overshoot, float max_value_error, float max_t_error) +{ + auto handler = [](ImCubicBezierFixedStepSample& sample, void* user_pointer) + { + auto& callback = *reinterpret_cast(user_pointer); + callback(sample); + }; + + ImCubicBezierFixedStep(handler, &callback, p0, p1, p2, p3, step, overshoot, max_value_error, max_t_error); +} + +template +inline void ImCubicBezierFixedStep(F& callback, const ImCubicBezierPoints& curve, float step, bool overshoot, float max_value_error, float max_t_error) +{ + auto handler = [](ImCubicBezierFixedStepSample& sample, void* user_pointer) + { + auto& callback = *reinterpret_cast(user_pointer); + callback(sample); + }; + + ImCubicBezierFixedStep(handler, &callback, curve.P0, curve.P1, curve.P2, curve.P3, step, overshoot, max_value_error, max_t_error); +} + + +//------------------------------------------------------------------------------ +# endif // __IMGUI_BEZIER_MATH_INL__ diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_canvas.cpp b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_canvas.cpp new file mode 100644 index 0000000..df63a40 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_canvas.cpp @@ -0,0 +1,513 @@ +# define IMGUI_DEFINE_MATH_OPERATORS +# include "imgui_canvas.h" +# include + +// https://stackoverflow.com/a/36079786 +# define DECLARE_HAS_MEMBER(__trait_name__, __member_name__) \ + \ + template \ + class __trait_name__ \ + { \ + using check_type = ::std::remove_const_t<__boost_has_member_T__>; \ + struct no_type {char x[2];}; \ + using yes_type = char; \ + \ + struct base { void __member_name__() {}}; \ + struct mixin : public base, public check_type {}; \ + \ + template struct aux {}; \ + \ + template static no_type test(aux<&U::__member_name__>*); \ + template static yes_type test(...); \ + \ + public: \ + \ + static constexpr bool value = (sizeof(yes_type) == sizeof(test(0))); \ + } + +namespace ImCanvasDetails { + +DECLARE_HAS_MEMBER(HasFringeScale, _FringeScale); + +struct FringeScaleRef +{ + // Overload is present when ImDrawList does have _FringeScale member variable. + template + static float& Get(typename std::enable_if::value, T>::type* drawList) + { + return drawList->_FringeScale; + } + + // Overload is present when ImDrawList does not have _FringeScale member variable. + template + static float& Get(typename std::enable_if::value, T>::type*) + { + static float placeholder = 1.0f; + return placeholder; + } +}; + +DECLARE_HAS_MEMBER(HasVtxCurrentOffset, _VtxCurrentOffset); + +struct VtxCurrentOffsetRef +{ + // Overload is present when ImDrawList does have _FringeScale member variable. + template + static unsigned int& Get(typename std::enable_if::value, T>::type* drawList) + { + return drawList->_VtxCurrentOffset; + } + + // Overload is present when ImDrawList does not have _FringeScale member variable. + template + static unsigned int& Get(typename std::enable_if::value, T>::type* drawList) + { + return drawList->_CmdHeader.VtxOffset; + } +}; + +} // namespace ImCanvasDetails + +// Returns a reference to _FringeScale extension to ImDrawList +// +// If ImDrawList does not have _FringeScale a placeholder is returned. +static inline float& ImFringeScaleRef(ImDrawList* drawList) +{ + using namespace ImCanvasDetails; + return FringeScaleRef::Get(drawList); +} + +static inline unsigned int& ImVtxOffsetRef(ImDrawList* drawList) +{ + using namespace ImCanvasDetails; + return VtxCurrentOffsetRef::Get(drawList); +} + +static inline ImVec2 ImSelectPositive(const ImVec2& lhs, const ImVec2& rhs) { return ImVec2(lhs.x > 0.0f ? lhs.x : rhs.x, lhs.y > 0.0f ? lhs.y : rhs.y); } + +bool ImGuiEx::Canvas::Begin(const char* id, const ImVec2& size) +{ + return Begin(ImGui::GetID(id), size); +} + +bool ImGuiEx::Canvas::Begin(ImGuiID id, const ImVec2& size) +{ + IM_ASSERT(m_InBeginEnd == false); + + m_WidgetPosition = ImGui::GetCursorScreenPos(); + m_WidgetSize = ImSelectPositive(size, ImGui::GetContentRegionAvail()); + m_WidgetRect = ImRect(m_WidgetPosition, m_WidgetPosition + m_WidgetSize); + m_DrawList = ImGui::GetWindowDrawList(); + + UpdateViewTransformPosition(); + + if (ImGui::IsClippedEx(m_WidgetRect, id, false)) + return false; + + // Save current channel, so we can assert when user + // call canvas API with different one. + m_ExpectedChannel = m_DrawList->_Splitter._Current; + + // #debug: Canvas content. + //m_DrawList->AddRectFilled(m_StartPos, m_StartPos + m_CurrentSize, IM_COL32(0, 0, 0, 64)); + m_DrawList->AddRect(m_WidgetRect.Min, m_WidgetRect.Max, IM_COL32(255, 0, 255, 64)); + + ImGui::SetCursorScreenPos(ImVec2(0.0f, 0.0f)); + +# if IMGUI_EX_CANVAS_DEFERED() + m_Ranges.resize(0); +# endif + + SaveInputState(); + SaveViewportState(); + + EnterLocalSpace(); + + // Emit dummy widget matching bounds of the canvas. + ImGui::SetCursorScreenPos(m_ViewRect.Min); + ImGui::Dummy(m_ViewRect.GetSize()); + + ImGui::SetCursorScreenPos(ImVec2(0.0f, 0.0f)); + + m_InBeginEnd = true; + + return true; +} + +void ImGuiEx::Canvas::End() +{ + // If you're here your call to Begin() returned false, + // or Begin() wasn't called at all. + IM_ASSERT(m_InBeginEnd == true); + + // If you're here, please make sure you do not interleave + // channel splitter with canvas. + // Always call canvas function with using same channel. + IM_ASSERT(m_DrawList->_Splitter._Current == m_ExpectedChannel); + + //auto& io = ImGui::GetIO(); + + // Check: Unmatched calls to Suspend() / Resume(). Please check your code. + IM_ASSERT(m_SuspendCounter == 0); + + LeaveLocalSpace(); + + // Emit dummy widget matching bounds of the canvas. + ImGui::SetCursorScreenPos(m_WidgetPosition); + ImGui::Dummy(m_WidgetSize); + + // #debug: Rect around canvas. Content should be inside these bounds. + //m_DrawList->AddRect(m_WidgetPosition - ImVec2(1.0f, 1.0f), m_WidgetPosition + m_WidgetSize + ImVec2(1.0f, 1.0f), IM_COL32(196, 0, 0, 255)); + + m_InBeginEnd = false; +} + +void ImGuiEx::Canvas::SetView(const ImVec2& origin, float scale) +{ + SetView(CanvasView(origin, scale)); +} + +void ImGuiEx::Canvas::SetView(const CanvasView& view) +{ + if (m_InBeginEnd) + LeaveLocalSpace(); + + if (m_View.Origin.x != view.Origin.x || m_View.Origin.y != view.Origin.y) + { + m_View.Origin = view.Origin; + + UpdateViewTransformPosition(); + } + + if (m_View.Scale != view.Scale) + { + m_View.Scale = view.Scale; + m_View.InvScale = view.InvScale; + } + + if (m_InBeginEnd) + EnterLocalSpace(); +} + +void ImGuiEx::Canvas::CenterView(const ImVec2& canvasPoint) +{ + auto view = CalcCenterView(canvasPoint); + SetView(view); +} + +ImGuiEx::CanvasView ImGuiEx::Canvas::CalcCenterView(const ImVec2& canvasPoint) const +{ + auto localCenter = ToLocal(m_WidgetPosition + m_WidgetSize * 0.5f); + auto localOffset = canvasPoint - localCenter; + auto offset = FromLocalV(localOffset); + + return CanvasView{ m_View.Origin - offset, m_View.Scale }; +} + +void ImGuiEx::Canvas::CenterView(const ImRect& canvasRect) +{ + auto view = CalcCenterView(canvasRect); + + SetView(view); +} + +ImGuiEx::CanvasView ImGuiEx::Canvas::CalcCenterView(const ImRect& canvasRect) const +{ + auto canvasRectSize = canvasRect.GetSize(); + + if (canvasRectSize.x <= 0.0f || canvasRectSize.y <= 0.0f) + return View(); + + auto widgetAspectRatio = m_WidgetSize.y > 0.0f ? m_WidgetSize.x / m_WidgetSize.y : 0.0f; + auto canvasRectAspectRatio = canvasRectSize.y > 0.0f ? canvasRectSize.x / canvasRectSize.y : 0.0f; + + if (widgetAspectRatio <= 0.0f || canvasRectAspectRatio <= 0.0f) + return View(); + + auto newOrigin = m_View.Origin; + auto newScale = m_View.Scale; + if (canvasRectAspectRatio > widgetAspectRatio) + { + // width span across view + newScale = m_WidgetSize.x / canvasRectSize.x; + newOrigin = canvasRect.Min * -newScale; + newOrigin.y += (m_WidgetSize.y - canvasRectSize.y * newScale) * 0.5f; + } + else + { + // height span across view + newScale = m_WidgetSize.y / canvasRectSize.y; + newOrigin = canvasRect.Min * -newScale; + newOrigin.x += (m_WidgetSize.x - canvasRectSize.x * newScale) * 0.5f; + } + + return CanvasView{ newOrigin, newScale }; +} + +void ImGuiEx::Canvas::Suspend() +{ + // If you're here, please make sure you do not interleave + // channel splitter with canvas. + // Always call canvas function with using same channel. + IM_ASSERT(m_DrawList->_Splitter._Current == m_ExpectedChannel); + + if (m_SuspendCounter == 0) + LeaveLocalSpace(); + + ++m_SuspendCounter; +} + +void ImGuiEx::Canvas::Resume() +{ + // If you're here, please make sure you do not interleave + // channel splitter with canvas. + // Always call canvas function with using same channel. + IM_ASSERT(m_DrawList->_Splitter._Current == m_ExpectedChannel); + + // Check: Number of calls to Resume() do not match calls to Suspend(). Please check your code. + IM_ASSERT(m_SuspendCounter > 0); + if (--m_SuspendCounter == 0) + EnterLocalSpace(); +} + +ImVec2 ImGuiEx::Canvas::FromLocal(const ImVec2& point) const +{ + return point * m_View.Scale + m_ViewTransformPosition; +} + +ImVec2 ImGuiEx::Canvas::FromLocal(const ImVec2& point, const CanvasView& view) const +{ + return point * view.Scale + view.Origin + m_WidgetPosition; +} + +ImVec2 ImGuiEx::Canvas::FromLocalV(const ImVec2& vector) const +{ + return vector * m_View.Scale; +} + +ImVec2 ImGuiEx::Canvas::FromLocalV(const ImVec2& vector, const CanvasView& view) const +{ + return vector * view.Scale; +} + +ImVec2 ImGuiEx::Canvas::ToLocal(const ImVec2& point) const +{ + return (point - m_ViewTransformPosition) * m_View.InvScale; +} + +ImVec2 ImGuiEx::Canvas::ToLocal(const ImVec2& point, const CanvasView& view) const +{ + return (point - view.Origin - m_WidgetPosition) * view.InvScale; +} + +ImVec2 ImGuiEx::Canvas::ToLocalV(const ImVec2& vector) const +{ + return vector * m_View.InvScale; +} + +ImVec2 ImGuiEx::Canvas::ToLocalV(const ImVec2& vector, const CanvasView& view) const +{ + return vector * view.InvScale; +} + +ImRect ImGuiEx::Canvas::CalcViewRect(const CanvasView& view) const +{ + ImRect result; + result.Min = ImVec2(-view.Origin.x, -view.Origin.y) * view.InvScale; + result.Max = (m_WidgetSize - view.Origin) * view.InvScale; + return result; +} + +void ImGuiEx::Canvas::UpdateViewTransformPosition() +{ + m_ViewTransformPosition = m_View.Origin + m_WidgetPosition; +} + +void ImGuiEx::Canvas::SaveInputState() +{ + auto& io = ImGui::GetIO(); + m_MousePosBackup = io.MousePos; + m_MousePosPrevBackup = io.MousePosPrev; + for (auto i = 0; i < IM_ARRAYSIZE(m_MouseClickedPosBackup); ++i) + m_MouseClickedPosBackup[i] = io.MouseClickedPos[i]; + + // Record cursor max to prevent scrollbars from appearing. + m_WindowCursorMaxBackup = ImGui::GetCurrentWindow()->DC.CursorMaxPos; +} + +void ImGuiEx::Canvas::RestoreInputState() +{ + auto& io = ImGui::GetIO(); + io.MousePos = m_MousePosBackup; + io.MousePosPrev = m_MousePosPrevBackup; + for (auto i = 0; i < IM_ARRAYSIZE(m_MouseClickedPosBackup); ++i) + io.MouseClickedPos[i] = m_MouseClickedPosBackup[i]; + ImGui::GetCurrentWindow()->DC.CursorMaxPos = m_WindowCursorMaxBackup; +} + +void ImGuiEx::Canvas::SaveViewportState() +{ +# if defined(IMGUI_HAS_VIEWPORT) + auto viewport = ImGui::GetWindowViewport(); + + m_ViewportPosBackup = viewport->Pos; + m_ViewportSizeBackup = viewport->Size; +# endif +} + +void ImGuiEx::Canvas::RestoreViewportState() +{ +# if defined(IMGUI_HAS_VIEWPORT) + auto viewport = ImGui::GetWindowViewport(); + + viewport->Pos = m_ViewportPosBackup; + viewport->Size = m_ViewportSizeBackup; +# endif +} + +void ImGuiEx::Canvas::EnterLocalSpace() +{ + // Prepare ImDrawList for drawing in local coordinate system: + // - determine visible part of the canvas + // - start unique draw command + // - add clip rect matching canvas size + // - record current command index + // - record current vertex write index + + // Determine visible part of the canvas. Make it before + // adding new command, to avoid round rip where command + // is removed in PopClipRect() and added again next PushClipRect(). + ImGui::PushClipRect(m_WidgetPosition, m_WidgetPosition + m_WidgetSize, true); + auto clipped_clip_rect = m_DrawList->_ClipRectStack.back(); + ImGui::PopClipRect(); + + // Make sure we do not share draw command with anyone. We don't want to mess + // with someones clip rectangle. + + // #FIXME: + // This condition is not enough to avoid when user choose + // to use channel splitter. + // + // To deal with Suspend()/Resume() calls empty draw command + // is always added then splitter is active. Otherwise + // channel merger will collapse our draw command one with + // different clip rectangle. + // + // More investigation is needed. To get to the bottom of this. + if ((!m_DrawList->CmdBuffer.empty() && m_DrawList->CmdBuffer.back().ElemCount > 0) || m_DrawList->_Splitter._Count > 1) + m_DrawList->AddDrawCmd(); + +# if IMGUI_EX_CANVAS_DEFERED() + m_Ranges.resize(m_Ranges.Size + 1); + m_CurrentRange = &m_Ranges.back(); + m_CurrentRange->BeginComandIndex = ImMax(m_DrawList->CmdBuffer.Size - 1, 0); + m_CurrentRange->BeginVertexIndex = m_DrawList->_VtxCurrentIdx + ImVtxOffsetRef(m_DrawList); +# endif + m_DrawListCommadBufferSize = ImMax(m_DrawList->CmdBuffer.Size - 1, 0); + m_DrawListStartVertexIndex = m_DrawList->_VtxCurrentIdx + ImVtxOffsetRef(m_DrawList); + +# if defined(IMGUI_HAS_VIEWPORT) + auto viewport_min = m_ViewportPosBackup; + auto viewport_max = m_ViewportPosBackup + m_ViewportSizeBackup; + + viewport_min.x = (viewport_min.x - m_ViewTransformPosition.x) * m_View.InvScale; + viewport_min.y = (viewport_min.y - m_ViewTransformPosition.y) * m_View.InvScale; + viewport_max.x = (viewport_max.x - m_ViewTransformPosition.x) * m_View.InvScale; + viewport_max.y = (viewport_max.y - m_ViewTransformPosition.y) * m_View.InvScale; + + auto viewport = ImGui::GetWindowViewport(); + viewport->Pos = viewport_min; + viewport->Size = viewport_max - viewport_min; +# endif + + // Clip rectangle in parent canvas space and move it to local space. + clipped_clip_rect.x = (clipped_clip_rect.x - m_ViewTransformPosition.x) * m_View.InvScale; + clipped_clip_rect.y = (clipped_clip_rect.y - m_ViewTransformPosition.y) * m_View.InvScale; + clipped_clip_rect.z = (clipped_clip_rect.z - m_ViewTransformPosition.x) * m_View.InvScale; + clipped_clip_rect.w = (clipped_clip_rect.w - m_ViewTransformPosition.y) * m_View.InvScale; + ImGui::PushClipRect(ImVec2(clipped_clip_rect.x, clipped_clip_rect.y), ImVec2(clipped_clip_rect.z, clipped_clip_rect.w), false); + + // Transform mouse position to local space. + auto& io = ImGui::GetIO(); + io.MousePos = (m_MousePosBackup - m_ViewTransformPosition) * m_View.InvScale; + io.MousePosPrev = (m_MousePosPrevBackup - m_ViewTransformPosition) * m_View.InvScale; + for (auto i = 0; i < IM_ARRAYSIZE(m_MouseClickedPosBackup); ++i) + io.MouseClickedPos[i] = (m_MouseClickedPosBackup[i] - m_ViewTransformPosition) * m_View.InvScale; + + m_ViewRect = CalcViewRect(m_View);; + + auto& fringeScale = ImFringeScaleRef(m_DrawList); + m_LastFringeScale = fringeScale; + fringeScale *= m_View.InvScale; +} + +void ImGuiEx::Canvas::LeaveLocalSpace() +{ + IM_ASSERT(m_DrawList->_Splitter._Current == m_ExpectedChannel); + +# if IMGUI_EX_CANVAS_DEFERED() + IM_ASSERT(m_CurrentRange != nullptr); + + m_CurrentRange->EndVertexIndex = m_DrawList->_VtxCurrentIdx + ImVtxOffsetRef(m_DrawList); + m_CurrentRange->EndCommandIndex = m_DrawList->CmdBuffer.size(); + if (m_CurrentRange->BeginVertexIndex == m_CurrentRange->EndVertexIndex) + { + // Drop empty range + m_Ranges.resize(m_Ranges.Size - 1); + } + m_CurrentRange = nullptr; +# endif + + // Move vertices to screen space. + auto vertex = m_DrawList->VtxBuffer.Data + m_DrawListStartVertexIndex; + auto vertexEnd = m_DrawList->VtxBuffer.Data + m_DrawList->_VtxCurrentIdx + ImVtxOffsetRef(m_DrawList); + + // If canvas view is not scaled take a faster path. + if (m_View.Scale != 1.0f) + { + while (vertex < vertexEnd) + { + vertex->pos.x = vertex->pos.x * m_View.Scale + m_ViewTransformPosition.x; + vertex->pos.y = vertex->pos.y * m_View.Scale + m_ViewTransformPosition.y; + ++vertex; + } + + // Move clip rectangles to screen space. + for (int i = m_DrawListCommadBufferSize; i < m_DrawList->CmdBuffer.size(); ++i) + { + auto& command = m_DrawList->CmdBuffer[i]; + command.ClipRect.x = command.ClipRect.x * m_View.Scale + m_ViewTransformPosition.x; + command.ClipRect.y = command.ClipRect.y * m_View.Scale + m_ViewTransformPosition.y; + command.ClipRect.z = command.ClipRect.z * m_View.Scale + m_ViewTransformPosition.x; + command.ClipRect.w = command.ClipRect.w * m_View.Scale + m_ViewTransformPosition.y; + } + } + else + { + while (vertex < vertexEnd) + { + vertex->pos.x = vertex->pos.x + m_ViewTransformPosition.x; + vertex->pos.y = vertex->pos.y + m_ViewTransformPosition.y; + ++vertex; + } + + // Move clip rectangles to screen space. + for (int i = m_DrawListCommadBufferSize; i < m_DrawList->CmdBuffer.size(); ++i) + { + auto& command = m_DrawList->CmdBuffer[i]; + command.ClipRect.x = command.ClipRect.x + m_ViewTransformPosition.x; + command.ClipRect.y = command.ClipRect.y + m_ViewTransformPosition.y; + command.ClipRect.z = command.ClipRect.z + m_ViewTransformPosition.x; + command.ClipRect.w = command.ClipRect.w + m_ViewTransformPosition.y; + } + } + + auto& fringeScale = ImFringeScaleRef(m_DrawList); + fringeScale = m_LastFringeScale; + + // And pop \o/ + ImGui::PopClipRect(); + + RestoreInputState(); + RestoreViewportState(); +} diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_canvas.h b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_canvas.h new file mode 100644 index 0000000..44f4347 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_canvas.h @@ -0,0 +1,258 @@ +// Canvas widget - view over infinite virtual space. +// +// Canvas allows you to draw your widgets anywhere over infinite space and provide +// view over it with support for panning and scaling. +// +// When you enter a canvas ImGui is moved to virtual space which mean: +// - ImGui::GetCursorScreenPos() return (0, 0) and which correspond to top left corner +// of the canvas on the screen (this can be changed usign CanvasView()). +// - Mouse input is brought to canvas space, so widgets works as usual. +// - Everything you draw with ImDrawList will be in virtual space. +// +// By default origin point is on top left corner of canvas widget. It can be +// changed with call to CanvasView() where you can specify what part of space +// should be viewed by setting viewport origin point and scale. Current state +// can be queried with CanvasViewOrigin() and CanvasViewScale(). +// +// Viewport size is controlled by 'size' parameter in BeginCanvas(). You can query +// it using CanvasContentMin/Max/Size functions. They are useful if you to not specify +// canvas size in which case all free space is used. +// +// Bounds of visible region of infinite space can be queried using CanvasViewMin/Max/Size +// functions. Everything that is drawn outside of this region will be clipped +// as usual in ImGui. +// +// While drawing inside canvas you can translate position from world (usual ImGui space) +// to virtual space and back usign CanvasFromWorld()/CanvasToWorld(). +// +// Canvas can be nested in each other (they are regular widgets after all). There +// is a way to transform position between current and parent canvas with +// CanvasFromParent()/CanvasToParent(). +// +// Sometimes in more elaborate scenarios you want to move out canvas virtual space, +// do something and came back. You can do that with SuspendCanvas() and ResumeCanvas(). +// +// Note: +// It is not valid to call canvas API outside of BeginCanvas() / EndCanvas() scope. +// +// LICENSE +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// CREDITS +// Written by Michal Cichon +# ifndef __IMGUI_EX_CANVAS_H__ +# define __IMGUI_EX_CANVAS_H__ +# pragma once + +# include +# include // ImRect, ImFloor + +namespace ImGuiEx { + +struct CanvasView +{ + ImVec2 Origin; + float Scale = 1.0f; + float InvScale = 1.0f; + + CanvasView() = default; + CanvasView(const ImVec2& origin, float scale) + : Origin(origin) + , Scale(scale) + , InvScale(scale ? 1.0f / scale : 0.0f) + { + } + + void Set(const ImVec2& origin, float scale) + { + *this = CanvasView(origin, scale); + } +}; + +// Canvas widget represent view over infinite plane. +// +// It acts like a child window without scroll bars with +// ability to zoom to specific part of canvas plane. +// +// Widgets are clipped according to current view exactly +// same way ImGui do. To avoid `missing widgets` artifacts first +// setup visible region with SetView() then draw content. +// +// Everything drawn with ImDrawList betwen calls to Begin()/End() +// will be drawn on canvas plane. This behavior can be suspended +// by calling Suspend() and resumed by calling Resume(). +// +// Warning: +// Please do not interleave canvas with use of channel splitter. +// Keep channel splitter contained inside canvas or always +// call canvas functions from same channel. +struct Canvas +{ + // Begins drawing content of canvas plane. + // + // When false is returned that mean canvas is not visible to the + // user can drawing should be skipped and End() not called. + // When true is returned drawing must be ended with call to End(). + // + // If any size component is equal to zero or less canvas will + // automatically expand to all available area on that axis. + // So (0, 300) will take horizontal space and have height + // of 300 points. (0, 0) will take all remaining space of + // the window. + // + // You can query size of the canvas while it is being drawn + // by calling Rect(). + bool Begin(const char* id, const ImVec2& size); + bool Begin(ImGuiID id, const ImVec2& size); + + // Ends interaction with canvas plane. + // + // Must be called only when Begin() retuned true. + void End(); + + // Sets visible region of canvas plane. + // + // Origin is an offset of infinite plane origin from top left + // corner of the canvas. + // + // Scale greater than 1 make canvas content be bigger, less than 1 smaller. + void SetView(const ImVec2& origin, float scale); + void SetView(const CanvasView& view); + + // Centers view over specific point on canvas plane. + // + // View will be centered on specific point by changing origin + // but not scale. + void CenterView(const ImVec2& canvasPoint); + + // Calculates view over specific point on canvas plane. + CanvasView CalcCenterView(const ImVec2& canvasPoint) const; + + // Centers view over specific rectangle on canvas plane. + // + // Whole rectangle will fit in canvas view. This will affect both + // origin and scale. + void CenterView(const ImRect& canvasRect); + + // Calculates view over specific rectangle on canvas plane. + CanvasView CalcCenterView(const ImRect& canvasRect) const; + + // Suspends canvas by returning to normal ImGui transformation space. + // While suspended UI will not be drawn on canvas plane. + // + // Calls to Suspend()/Resume() are symetrical. Each call to Suspend() + // must be matched with call to Resume(). + void Suspend(); + void Resume(); + + // Transforms point from canvas plane to ImGui. + ImVec2 FromLocal(const ImVec2& point) const; + ImVec2 FromLocal(const ImVec2& point, const CanvasView& view) const; + + // Transforms vector from canvas plant to ImGui. + ImVec2 FromLocalV(const ImVec2& vector) const; + ImVec2 FromLocalV(const ImVec2& vector, const CanvasView& view) const; + + // Transforms point from ImGui to canvas plane. + ImVec2 ToLocal(const ImVec2& point) const; + ImVec2 ToLocal(const ImVec2& point, const CanvasView& view) const; + + // Transforms vector from ImGui to canvas plane. + ImVec2 ToLocalV(const ImVec2& vector) const; + ImVec2 ToLocalV(const ImVec2& vector, const CanvasView& view) const; + + // Returns widget bounds. + // + // Note: + // Rect is valid after call to Begin(). + const ImRect& Rect() const { return m_WidgetRect; } + + // Returns visible region on canvas plane (in canvas plane coordinates). + const ImRect& ViewRect() const { return m_ViewRect; } + + // Calculates visible region for view. + ImRect CalcViewRect(const CanvasView& view) const; + + // Returns current view. + const CanvasView& View() const { return m_View; } + + // Returns origin of the view. + // + // Origin is an offset of infinite plane origin from top left + // corner of the canvas. + const ImVec2& ViewOrigin() const { return m_View.Origin; } + + // Returns scale of the view. + float ViewScale() const { return m_View.Scale; } + + // Returns true if canvas is suspended. + // + // See: Suspend()/Resume() + bool IsSuspended() const { return m_SuspendCounter > 0; } + +private: +# define IMGUI_EX_CANVAS_DEFERED() 0 + +# if IMGUI_EX_CANVAS_DEFERED() + struct Range + { + int BeginVertexIndex = 0; + int EndVertexIndex = 0; + int BeginComandIndex = 0; + int EndCommandIndex = 0; + }; +# endif + + void UpdateViewTransformPosition(); + + void SaveInputState(); + void RestoreInputState(); + + void SaveViewportState(); + void RestoreViewportState(); + + void EnterLocalSpace(); + void LeaveLocalSpace(); + + bool m_InBeginEnd = false; + + ImVec2 m_WidgetPosition; + ImVec2 m_WidgetSize; + ImRect m_WidgetRect; + + ImDrawList* m_DrawList = nullptr; + int m_ExpectedChannel = 0; + +# if IMGUI_EX_CANVAS_DEFERED() + ImVector m_Ranges; + Range* m_CurrentRange = nullptr; +# endif + + int m_DrawListCommadBufferSize = 0; + int m_DrawListStartVertexIndex = 0; + + CanvasView m_View; + ImRect m_ViewRect; + + ImVec2 m_ViewTransformPosition; + + int m_SuspendCounter = 0; + + float m_LastFringeScale = 1.0f; + + ImVec2 m_MousePosBackup; + ImVec2 m_MousePosPrevBackup; + ImVec2 m_MouseClickedPosBackup[IM_ARRAYSIZE(ImGuiIO::MouseClickedPos)]; + ImVec2 m_WindowCursorMaxBackup; + +# if defined(IMGUI_HAS_VIEWPORT) + ImVec2 m_ViewportPosBackup; + ImVec2 m_ViewportSizeBackup; +# endif +}; + +} // namespace ImGuiEx + +# endif // __IMGUI_EX_CANVAS_H__ \ No newline at end of file diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_extra_math.h b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_extra_math.h new file mode 100644 index 0000000..2a3a2fe --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_extra_math.h @@ -0,0 +1,71 @@ +//------------------------------------------------------------------------------ +// LICENSE +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// CREDITS +// Written by Michal Cichon +//------------------------------------------------------------------------------ +# ifndef __IMGUI_EXTRA_MATH_H__ +# define __IMGUI_EXTRA_MATH_H__ +# pragma once + + +//------------------------------------------------------------------------------ +# include +# ifndef IMGUI_DEFINE_MATH_OPERATORS +# define IMGUI_DEFINE_MATH_OPERATORS +# endif +# include + + +//------------------------------------------------------------------------------ +struct ImLine +{ + ImVec2 A, B; +}; + + +//------------------------------------------------------------------------------ +inline bool operator==(const ImVec2& lhs, const ImVec2& rhs); +inline bool operator!=(const ImVec2& lhs, const ImVec2& rhs); +inline ImVec2 operator*(const float lhs, const ImVec2& rhs); +inline ImVec2 operator-(const ImVec2& lhs); + + +//------------------------------------------------------------------------------ +inline float ImLength(float v); +inline float ImLength(const ImVec2& v); +inline float ImLengthSqr(float v); +inline ImVec2 ImNormalized(const ImVec2& v); + + +//------------------------------------------------------------------------------ +inline bool ImRect_IsEmpty(const ImRect& rect); +inline ImVec2 ImRect_ClosestPoint(const ImRect& rect, const ImVec2& p, bool snap_to_edge); +inline ImVec2 ImRect_ClosestPoint(const ImRect& rect, const ImVec2& p, bool snap_to_edge, float radius); +inline ImVec2 ImRect_ClosestPoint(const ImRect& rect, const ImRect& b); +inline ImLine ImRect_ClosestLine(const ImRect& rect_a, const ImRect& rect_b); +inline ImLine ImRect_ClosestLine(const ImRect& rect_a, const ImRect& rect_b, float radius_a, float radius_b); + + + +//------------------------------------------------------------------------------ +namespace ImEasing { + +template +inline V EaseOutQuad(V b, V c, T t) +{ + return b - c * (t * (t - 2)); +} + +} // namespace ImEasing + + +//------------------------------------------------------------------------------ +# include "imgui_extra_math.inl" + + +//------------------------------------------------------------------------------ +# endif // __IMGUI_EXTRA_MATH_H__ diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_extra_math.inl b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_extra_math.inl new file mode 100644 index 0000000..18fb25e --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_extra_math.inl @@ -0,0 +1,187 @@ +//------------------------------------------------------------------------------ +// LICENSE +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// CREDITS +// Written by Michal Cichon +//------------------------------------------------------------------------------ +# ifndef __IMGUI_EXTRA_MATH_INL__ +# define __IMGUI_EXTRA_MATH_INL__ +# pragma once + + +//------------------------------------------------------------------------------ +# include "imgui_extra_math.h" + + +//------------------------------------------------------------------------------ +inline bool operator==(const ImVec2& lhs, const ImVec2& rhs) +{ + return lhs.x == rhs.x && lhs.y == rhs.y; +} + +inline bool operator!=(const ImVec2& lhs, const ImVec2& rhs) +{ + return lhs.x != rhs.x || lhs.y != rhs.y; +} + +inline ImVec2 operator*(const float lhs, const ImVec2& rhs) +{ + return ImVec2(lhs * rhs.x, lhs * rhs.y); +} + +inline ImVec2 operator-(const ImVec2& lhs) +{ + return ImVec2(-lhs.x, -lhs.y); +} + + +//------------------------------------------------------------------------------ +inline float ImLength(float v) +{ + return v; +} + +inline float ImLength(const ImVec2& v) +{ + return ImSqrt(ImLengthSqr(v)); +} + +inline float ImLengthSqr(float v) +{ + return v * v; +} + +inline ImVec2 ImNormalized(const ImVec2& v) +{ + return v * ImInvLength(v, 0.0f); +} + + + + +//------------------------------------------------------------------------------ +inline bool ImRect_IsEmpty(const ImRect& rect) +{ + return rect.Min.x >= rect.Max.x + || rect.Min.y >= rect.Max.y; +} + +inline ImVec2 ImRect_ClosestPoint(const ImRect& rect, const ImVec2& p, bool snap_to_edge) +{ + if (!snap_to_edge && rect.Contains(p)) + return p; + + return ImVec2( + (p.x > rect.Max.x) ? rect.Max.x : (p.x < rect.Min.x ? rect.Min.x : p.x), + (p.y > rect.Max.y) ? rect.Max.y : (p.y < rect.Min.y ? rect.Min.y : p.y) + ); +} + +inline ImVec2 ImRect_ClosestPoint(const ImRect& rect, const ImVec2& p, bool snap_to_edge, float radius) +{ + auto point = ImRect_ClosestPoint(rect, p, snap_to_edge); + + const auto offset = p - point; + const auto distance_sq = offset.x * offset.x + offset.y * offset.y; + if (distance_sq <= 0) + return point; + + const auto distance = ImSqrt(distance_sq); + + return point + offset * (ImMin(distance, radius) * (1.0f / distance)); +} + +inline ImVec2 ImRect_ClosestPoint(const ImRect& rect, const ImRect& other) +{ + ImVec2 result; + if (other.Min.x >= rect.Max.x) + result.x = rect.Max.x; + else if (other.Max.x <= rect.Min.x) + result.x = rect.Min.x; + else + result.x = (ImMax(rect.Min.x, other.Min.x) + ImMin(rect.Max.x, other.Max.x)) / 2; + + if (other.Min.y >= rect.Max.y) + result.y = rect.Max.y; + else if (other.Max.y <= rect.Min.y) + result.y = rect.Min.y; + else + result.y = (ImMax(rect.Min.y, other.Min.y) + ImMin(rect.Max.y, other.Max.y)) / 2; + + return result; +} + +inline ImLine ImRect_ClosestLine(const ImRect& rect_a, const ImRect& rect_b) +{ + ImLine result; + result.A = ImRect_ClosestPoint(rect_a, rect_b); + result.B = ImRect_ClosestPoint(rect_b, rect_a); + + auto distribute = [](float& a, float& b, float a0, float a1, float b0, float b1) + { + if (a0 >= b1 || a1 <= b0) + return; + + const auto aw = a1 - a0; + const auto bw = b1 - b0; + + if (aw > bw) + { + b = b0 + bw - bw * (a - a0) / aw; + a = b; + } + else if (aw < bw) + { + a = a0 + aw - aw * (b - b0) / bw; + b = a; + } + }; + + distribute(result.A.x, result.B.x, rect_a.Min.x, rect_a.Max.x, rect_b.Min.x, rect_b.Max.x); + distribute(result.A.y, result.B.y, rect_a.Min.y, rect_a.Max.y, rect_b.Min.y, rect_b.Max.y); + + return result; +} + +inline ImLine ImRect_ClosestLine(const ImRect& rect_a, const ImRect& rect_b, float radius_a, float radius_b) +{ + auto line = ImRect_ClosestLine(rect_a, rect_b); + if (radius_a < 0) + radius_a = 0; + if (radius_b < 0) + radius_b = 0; + + if (radius_a == 0 && radius_b == 0) + return line; + + const auto offset = line.B - line.A; + const auto length_sq = offset.x * offset.x + offset.y * offset.y; + const auto radius_a_sq = radius_a * radius_a; + const auto radius_b_sq = radius_b * radius_b; + + if (length_sq <= 0) + return line; + + const auto length = ImSqrt(length_sq); + const auto direction = ImVec2(offset.x / length, offset.y / length); + + const auto total_radius_sq = radius_a_sq + radius_b_sq; + if (total_radius_sq > length_sq) + { + const auto scale = length / (radius_a + radius_b); + radius_a *= scale; + radius_b *= scale; + } + + line.A = line.A + (direction * radius_a); + line.B = line.B - (direction * radius_b); + + return line; +} + + +//------------------------------------------------------------------------------ +# endif // __IMGUI_EXTRA_MATH_INL__ diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor.cpp b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor.cpp new file mode 100644 index 0000000..c1911c3 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor.cpp @@ -0,0 +1,5316 @@ +//------------------------------------------------------------------------------ +// LICENSE +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// CREDITS +// Written by Michal Cichon +//------------------------------------------------------------------------------ +# include "imgui_node_editor_internal.h" +# include // snprintf +# include +# include +# include +# include +# include +# include +# include +# include + +// https://stackoverflow.com/a/8597498 +# define DECLARE_HAS_NESTED(Name, Member) \ + \ + template \ + struct has_nested_ ## Name \ + { \ + typedef char yes; \ + typedef yes(&no)[2]; \ + \ + template static yes test(decltype(U::Member)*); \ + template static no test(...); \ + \ + static bool const value = sizeof(test(0)) == sizeof(yes); \ + }; + + +namespace ax { +namespace NodeEditor { +namespace Detail { + +# define DECLARE_KEY_TESTER(Key) \ + DECLARE_HAS_NESTED(Key, Key) \ + struct KeyTester_ ## Key \ + { \ + template \ + static int Get(typename std::enable_if::value, T>::type*) \ + { \ + return ImGui::GetKeyIndex(T::Key); \ + } \ + \ + template \ + static int Get(typename std::enable_if::value, T>::type*) \ + { \ + return -1; \ + } \ + } + +DECLARE_KEY_TESTER(ImGuiKey_F); +DECLARE_KEY_TESTER(ImGuiKey_D); + +static inline int GetKeyIndexForF() +{ + return KeyTester_ImGuiKey_F::Get(nullptr); +} + +static inline int GetKeyIndexForD() +{ + return KeyTester_ImGuiKey_D::Get(nullptr); +} + +} // namespace Detail +} // namespace NodeEditor +} // namespace ax + + +//------------------------------------------------------------------------------ +namespace ed = ax::NodeEditor::Detail; + + +//------------------------------------------------------------------------------ +static const int c_BackgroundChannelCount = 1; +static const int c_LinkChannelCount = 4; +static const int c_UserLayersCount = 5; + +static const int c_UserLayerChannelStart = 0; +static const int c_BackgroundChannelStart = c_UserLayerChannelStart + c_UserLayersCount; +static const int c_LinkStartChannel = c_BackgroundChannelStart + c_BackgroundChannelCount; +static const int c_NodeStartChannel = c_LinkStartChannel + c_LinkChannelCount; + +static const int c_BackgroundChannel_SelectionRect = c_BackgroundChannelStart + 0; + +static const int c_UserChannel_Content = c_UserLayerChannelStart + 1; +static const int c_UserChannel_Grid = c_UserLayerChannelStart + 2; +static const int c_UserChannel_HintsBackground = c_UserLayerChannelStart + 3; +static const int c_UserChannel_Hints = c_UserLayerChannelStart + 4; + +static const int c_LinkChannel_Selection = c_LinkStartChannel + 0; +static const int c_LinkChannel_Links = c_LinkStartChannel + 1; +static const int c_LinkChannel_Flow = c_LinkStartChannel + 2; +static const int c_LinkChannel_NewLink = c_LinkStartChannel + 3; + +static const int c_ChannelsPerNode = 5; +static const int c_NodeBaseChannel = 0; +static const int c_NodeBackgroundChannel = 1; +static const int c_NodeUserBackgroundChannel = 2; +static const int c_NodePinChannel = 3; +static const int c_NodeContentChannel = 4; + +static const float c_GroupSelectThickness = 6.0f; // canvas pixels +static const float c_LinkSelectThickness = 5.0f; // canvas pixels +static const float c_NavigationZoomMargin = 0.1f; // percentage of visible bounds +static const float c_MouseZoomDuration = 0.15f; // seconds +static const float c_SelectionFadeOutDuration = 0.15f; // seconds +static const auto c_ScrollButtonIndex = 1; + + +//------------------------------------------------------------------------------ +# if defined(_DEBUG) && defined(_WIN32) +extern "C" __declspec(dllimport) void __stdcall OutputDebugStringA(const char* string); + +static void LogV(const char* fmt, va_list args) +{ + const int buffer_size = 1024; + static char buffer[1024]; + + vsnprintf(buffer, buffer_size - 1, fmt, args); + buffer[buffer_size - 1] = 0; + + ImGui::LogText("\nNode Editor: %s", buffer); + + OutputDebugStringA("NodeEditor: "); + OutputDebugStringA(buffer); + OutputDebugStringA("\n"); +} +# endif + +void ed::Log(const char* fmt, ...) +{ +# if defined(_DEBUG) && defined(_WIN32) + va_list args; + va_start(args, fmt); + LogV(fmt, args); + va_end(args); +# endif +} + + +//------------------------------------------------------------------------------ +static bool IsGroup(const ed::Node* node) +{ + if (node && node->m_Type == ed::NodeType::Group) + return true; + else + return false; +} + + +//------------------------------------------------------------------------------ +static void ImDrawListSplitter_Grow(ImDrawList* draw_list, ImDrawListSplitter* splitter, int channels_count) +{ + IM_ASSERT(splitter != nullptr); + IM_ASSERT(splitter->_Count <= channels_count); + + if (splitter->_Count == 1) + { + splitter->Split(draw_list, channels_count); + return; + } + + int old_channels_count = splitter->_Channels.Size; + if (old_channels_count < channels_count) + splitter->_Channels.resize(channels_count); + int old_used_channels_count = splitter->_Count; + splitter->_Count = channels_count; + + for (int i = old_used_channels_count; i < channels_count; i++) + { + if (i >= old_channels_count) + { + IM_PLACEMENT_NEW(&splitter->_Channels[i]) ImDrawChannel(); + } + else + { + splitter->_Channels[i]._CmdBuffer.resize(0); + splitter->_Channels[i]._IdxBuffer.resize(0); + } + if (splitter->_Channels[i]._CmdBuffer.Size == 0) + { + ImDrawCmd draw_cmd; + draw_cmd.ClipRect = draw_list->_ClipRectStack.back(); + draw_cmd.TextureId = draw_list->_TextureIdStack.back(); + splitter->_Channels[i]._CmdBuffer.push_back(draw_cmd); + } + } +} + +static void ImDrawList_ChannelsGrow(ImDrawList* draw_list, int channels_count) +{ + ImDrawListSplitter_Grow(draw_list, &draw_list->_Splitter, channels_count); +} + +static void ImDrawListSplitter_SwapChannels(ImDrawListSplitter* splitter, int left, int right) +{ + IM_ASSERT(left < splitter->_Count && right < splitter->_Count); + if (left == right) + return; + + auto currentChannel = splitter->_Current; + + auto* leftCmdBuffer = &splitter->_Channels[left]._CmdBuffer; + auto* leftIdxBuffer = &splitter->_Channels[left]._IdxBuffer; + auto* rightCmdBuffer = &splitter->_Channels[right]._CmdBuffer; + auto* rightIdxBuffer = &splitter->_Channels[right]._IdxBuffer; + + leftCmdBuffer->swap(*rightCmdBuffer); + leftIdxBuffer->swap(*rightIdxBuffer); + + if (currentChannel == left) + splitter->_Current = right; + else if (currentChannel == right) + splitter->_Current = left; +} + +static void ImDrawList_SwapChannels(ImDrawList* drawList, int left, int right) +{ + ImDrawListSplitter_SwapChannels(&drawList->_Splitter, left, right); +} + +static void ImDrawList_SwapSplitter(ImDrawList* drawList, ImDrawListSplitter& splitter) +{ + auto& currentSplitter = drawList->_Splitter; + + std::swap(currentSplitter._Current, splitter._Current); + std::swap(currentSplitter._Count, splitter._Count); + currentSplitter._Channels.swap(splitter._Channels); +} + +//static void ImDrawList_TransformChannel_Inner(ImVector& vtxBuffer, const ImVector& idxBuffer, const ImVector& cmdBuffer, const ImVec2& preOffset, const ImVec2& scale, const ImVec2& postOffset) +//{ +// auto idxRead = idxBuffer.Data; +// +// int indexOffset = 0; +// for (auto& cmd : cmdBuffer) +// { +// auto idxCount = cmd.ElemCount; +// +// if (idxCount == 0) continue; +// +// auto minIndex = idxRead[indexOffset]; +// auto maxIndex = idxRead[indexOffset]; +// +// for (auto i = 1u; i < idxCount; ++i) +// { +// auto idx = idxRead[indexOffset + i]; +// minIndex = std::min(minIndex, idx); +// maxIndex = ImMax(maxIndex, idx); +// } +// +// for (auto vtx = vtxBuffer.Data + minIndex, vtxEnd = vtxBuffer.Data + maxIndex + 1; vtx < vtxEnd; ++vtx) +// { +// vtx->pos.x = (vtx->pos.x + preOffset.x) * scale.x + postOffset.x; +// vtx->pos.y = (vtx->pos.y + preOffset.y) * scale.y + postOffset.y; +// } +// +// indexOffset += idxCount; +// } +//} + +//static void ImDrawList_TransformChannels(ImDrawList* drawList, int begin, int end, const ImVec2& preOffset, const ImVec2& scale, const ImVec2& postOffset) +//{ +// int lastCurrentChannel = drawList->_ChannelsCurrent; +// if (lastCurrentChannel != 0) +// drawList->ChannelsSetCurrent(0); +// +// auto& vtxBuffer = drawList->VtxBuffer; +// +// if (begin == 0 && begin != end) +// { +// ImDrawList_TransformChannel_Inner(vtxBuffer, drawList->IdxBuffer, drawList->CmdBuffer, preOffset, scale, postOffset); +// ++begin; +// } +// +// for (int channelIndex = begin; channelIndex < end; ++channelIndex) +// { +// auto& channel = drawList->_Channels[channelIndex]; +// ImDrawList_TransformChannel_Inner(vtxBuffer, channel.IdxBuffer, channel.CmdBuffer, preOffset, scale, postOffset); +// } +// +// if (lastCurrentChannel != 0) +// drawList->ChannelsSetCurrent(lastCurrentChannel); +//} + +//static void ImDrawList_ClampClipRects_Inner(ImVector& cmdBuffer, const ImVec4& clipRect, const ImVec2& offset) +//{ +// for (auto& cmd : cmdBuffer) +// { +// cmd.ClipRect.x = ImMax(cmd.ClipRect.x + offset.x, clipRect.x); +// cmd.ClipRect.y = ImMax(cmd.ClipRect.y + offset.y, clipRect.y); +// cmd.ClipRect.z = std::min(cmd.ClipRect.z + offset.x, clipRect.z); +// cmd.ClipRect.w = std::min(cmd.ClipRect.w + offset.y, clipRect.w); +// } +//} + +//static void ImDrawList_TranslateAndClampClipRects(ImDrawList* drawList, int begin, int end, const ImVec2& offset) +//{ +// int lastCurrentChannel = drawList->_ChannelsCurrent; +// if (lastCurrentChannel != 0) +// drawList->ChannelsSetCurrent(0); +// +// auto clipRect = drawList->_ClipRectStack.back(); +// +// if (begin == 0 && begin != end) +// { +// ImDrawList_ClampClipRects_Inner(drawList->CmdBuffer, clipRect, offset); +// ++begin; +// } +// +// for (int channelIndex = begin; channelIndex < end; ++channelIndex) +// { +// auto& channel = drawList->_Channels[channelIndex]; +// ImDrawList_ClampClipRects_Inner(channel.CmdBuffer, clipRect, offset); +// } +// +// if (lastCurrentChannel != 0) +// drawList->ChannelsSetCurrent(lastCurrentChannel); +//} + +static void ImDrawList_PathBezierOffset(ImDrawList* drawList, float offset, const ImVec2& p0, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3) +{ + using namespace ed; + + auto acceptPoint = [drawList, offset](const ImCubicBezierSubdivideSample& r) + { + drawList->PathLineTo(r.Point + ImNormalized(ImVec2(-r.Tangent.y, r.Tangent.x)) * offset); + }; + + ImCubicBezierSubdivide(acceptPoint, p0, p1, p2, p3); +} + +/* +static void ImDrawList_PolyFillScanFlood(ImDrawList *draw, std::vector* poly, ImColor color, int gap = 1, float strokeWidth = 1.0f) +{ + std::vector scanHits; + ImVec2 min, max; // polygon min/max points + auto io = ImGui::GetIO(); + float y; + bool isMinMaxDone = false; + unsigned int polysize = poly->size(); + + // find the orthagonal bounding box + // probably can put this as a predefined + if (!isMinMaxDone) + { + min.x = min.y = FLT_MAX; + max.x = max.y = FLT_MIN; + for (auto p : *poly) + { + if (p.x < min.x) min.x = p.x; + if (p.y < min.y) min.y = p.y; + if (p.x > max.x) max.x = p.x; + if (p.y > max.y) max.y = p.y; + } + isMinMaxDone = true; + } + + // Bounds check + if ((max.x < 0) || (min.x > io.DisplaySize.x) || (max.y < 0) || (min.y > io.DisplaySize.y)) return; + + // Vertically clip + if (min.y < 0) min.y = 0; + if (max.y > io.DisplaySize.y) max.y = io.DisplaySize.y; + + // so we know we start on the outside of the object we step out by 1. + min.x -= 1; + max.x += 1; + + // Initialise our starting conditions + y = min.y; + + // Go through each scan line iteratively, jumping by 'gap' pixels each time + while (y < max.y) + { + scanHits.clear(); + + { + int jump = 1; + ImVec2 fp = poly->at(0); + + for (size_t i = 0; i < polysize - 1; i++) + { + ImVec2 pa = poly->at(i); + ImVec2 pb = poly->at(i + 1); + + // jump double/dud points + if (pa.x == pb.x && pa.y == pb.y) continue; + + // if we encounter our hull/poly start point, then we've now created the + // closed + // hull, jump the next segment and reset the first-point + if ((!jump) && (fp.x == pb.x) && (fp.y == pb.y)) + { + if (i < polysize - 2) + { + fp = poly->at(i + 2); + jump = 1; + i++; + } + } + else + { + jump = 0; + } + + // test to see if this segment makes the scan-cut. + if ((pa.y > pb.y && y < pa.y && y > pb.y) || (pa.y < pb.y && y > pa.y && y < pb.y)) + { + ImVec2 intersect; + + intersect.y = y; + if (pa.x == pb.x) + { + intersect.x = pa.x; + } + else + { + intersect.x = (pb.x - pa.x) / (pb.y - pa.y) * (y - pa.y) + pa.x; + } + scanHits.push_back(intersect); + } + } + + // Sort the scan hits by X, so we have a proper left->right ordering + sort(scanHits.begin(), scanHits.end(), [](ImVec2 const &a, ImVec2 const &b) { return a.x < b.x; }); + + // generate the line segments. + { + int i = 0; + int l = scanHits.size() - 1; // we need pairs of points, this prevents segfault. + for (i = 0; i < l; i += 2) + { + draw->AddLine(scanHits[i], scanHits[i + 1], color, strokeWidth); + } + } + } + y += gap; + } // for each scan line + scanHits.clear(); +} +*/ + +static void ImDrawList_AddBezierWithArrows(ImDrawList* drawList, const ImCubicBezierPoints& curve, float thickness, + float startArrowSize, float startArrowWidth, float endArrowSize, float endArrowWidth, + bool fill, ImU32 color, float strokeThickness) +{ + using namespace ax; + + if ((color >> 24) == 0) + return; + + const auto half_thickness = thickness * 0.5f; + + if (fill) + { + drawList->AddBezierCurve(curve.P0, curve.P1, curve.P2, curve.P3, color, thickness); + + if (startArrowSize > 0.0f) + { + const auto start_dir = ImNormalized(ImCubicBezierTangent(curve.P0, curve.P1, curve.P2, curve.P3, 0.0f)); + const auto start_n = ImVec2(-start_dir.y, start_dir.x); + const auto half_width = startArrowWidth * 0.5f; + const auto tip = curve.P0 - start_dir * startArrowSize; + + drawList->PathLineTo(curve.P0 - start_n * ImMax(half_width, half_thickness)); + drawList->PathLineTo(curve.P0 + start_n * ImMax(half_width, half_thickness)); + drawList->PathLineTo(tip); + drawList->PathFillConvex(color); + } + + if (endArrowSize > 0.0f) + { + const auto end_dir = ImNormalized(ImCubicBezierTangent(curve.P0, curve.P1, curve.P2, curve.P3, 1.0f)); + const auto end_n = ImVec2( -end_dir.y, end_dir.x); + const auto half_width = endArrowWidth * 0.5f; + const auto tip = curve.P3 + end_dir * endArrowSize; + + drawList->PathLineTo(curve.P3 + end_n * ImMax(half_width, half_thickness)); + drawList->PathLineTo(curve.P3 - end_n * ImMax(half_width, half_thickness)); + drawList->PathLineTo(tip); + drawList->PathFillConvex(color); + } + } + else + { + if (startArrowSize > 0.0f) + { + const auto start_dir = ImNormalized(ImCubicBezierTangent(curve.P0, curve.P1, curve.P2, curve.P3, 0.0f)); + const auto start_n = ImVec2(-start_dir.y, start_dir.x); + const auto half_width = startArrowWidth * 0.5f; + const auto tip = curve.P0 - start_dir * startArrowSize; + + if (half_width > half_thickness) + drawList->PathLineTo(curve.P0 - start_n * half_width); + drawList->PathLineTo(tip); + if (half_width > half_thickness) + drawList->PathLineTo(curve.P0 + start_n * half_width); + } + + ImDrawList_PathBezierOffset(drawList, half_thickness, curve.P0, curve.P1, curve.P2, curve.P3); + + if (endArrowSize > 0.0f) + { + const auto end_dir = ImNormalized(ImCubicBezierTangent(curve.P0, curve.P1, curve.P2, curve.P3, 1.0f)); + const auto end_n = ImVec2( -end_dir.y, end_dir.x); + const auto half_width = endArrowWidth * 0.5f; + const auto tip = curve.P3 + end_dir * endArrowSize; + + if (half_width > half_thickness) + drawList->PathLineTo(curve.P3 + end_n * half_width); + drawList->PathLineTo(tip); + if (half_width > half_thickness) + drawList->PathLineTo(curve.P3 - end_n * half_width); + } + + ImDrawList_PathBezierOffset(drawList, half_thickness, curve.P3, curve.P2, curve.P1, curve.P0); + + drawList->PathStroke(color, true, strokeThickness); + } +} + + + + +//------------------------------------------------------------------------------ +// +// Pin +// +//------------------------------------------------------------------------------ +void ed::Pin::Draw(ImDrawList* drawList, DrawFlags flags) +{ + if (flags & Hovered) + { + drawList->ChannelsSetCurrent(m_Node->m_Channel + c_NodePinChannel); + + drawList->AddRectFilled(m_Bounds.Min, m_Bounds.Max, + m_Color, m_Rounding, m_Corners); + + if (m_BorderWidth > 0.0f) + { + FringeScaleScope fringe(1.0f); + drawList->AddRect(m_Bounds.Min, m_Bounds.Max, + m_BorderColor, m_Rounding, m_Corners, m_BorderWidth); + } + + if (!Editor->IsSelected(m_Node)) + m_Node->Draw(drawList, flags); + } +} + +ImVec2 ed::Pin::GetClosestPoint(const ImVec2& p) const +{ + return ImRect_ClosestPoint(m_Pivot, p, true, m_Radius + m_ArrowSize); +} + +ImLine ed::Pin::GetClosestLine(const Pin* pin) const +{ + return ImRect_ClosestLine(m_Pivot, pin->m_Pivot, m_Radius + m_ArrowSize, pin->m_Radius + pin->m_ArrowSize); +} + + + + +//------------------------------------------------------------------------------ +// +// Node +// +//------------------------------------------------------------------------------ +bool ed::Node::AcceptDrag() +{ + m_DragStart = m_Bounds.Min; + return true; +} + +void ed::Node::UpdateDrag(const ImVec2& offset) +{ + auto size = m_Bounds.GetSize(); + m_Bounds.Min = ImFloor(m_DragStart + offset); + m_Bounds.Max = m_Bounds.Min + size; +} + +bool ed::Node::EndDrag() +{ + return m_Bounds.Min != m_DragStart; +} + +void ed::Node::Draw(ImDrawList* drawList, DrawFlags flags) +{ + if (flags == Detail::Object::None) + { + drawList->ChannelsSetCurrent(m_Channel + c_NodeBackgroundChannel); + + drawList->AddRectFilled( + m_Bounds.Min, + m_Bounds.Max, + m_Color, m_Rounding); + + if (IsGroup(this)) + { + drawList->AddRectFilled( + m_GroupBounds.Min, + m_GroupBounds.Max, + m_GroupColor, m_GroupRounding); + + if (m_GroupBorderWidth > 0.0f) + { + FringeScaleScope fringe(1.0f); + + drawList->AddRect( + m_GroupBounds.Min, + m_GroupBounds.Max, + m_GroupBorderColor, m_GroupRounding, 15, m_GroupBorderWidth); + } + } + +# if 0 + // #debug: highlight group regions + auto drawRect = [drawList](const ImRect& rect, ImU32 color) + { + if (ImRect_IsEmpty(rect)) return; + drawList->AddRectFilled(rect.Min, rect.Max, color); + }; + + drawRect(GetRegionBounds(NodeRegion::Top), IM_COL32(255, 0, 0, 64)); + drawRect(GetRegionBounds(NodeRegion::Bottom), IM_COL32(255, 0, 0, 64)); + drawRect(GetRegionBounds(NodeRegion::Left), IM_COL32(0, 255, 0, 64)); + drawRect(GetRegionBounds(NodeRegion::Right), IM_COL32(0, 255, 0, 64)); + drawRect(GetRegionBounds(NodeRegion::TopLeft), IM_COL32(255, 0, 255, 64)); + drawRect(GetRegionBounds(NodeRegion::TopRight), IM_COL32(255, 0, 255, 64)); + drawRect(GetRegionBounds(NodeRegion::BottomLeft), IM_COL32(255, 0, 255, 64)); + drawRect(GetRegionBounds(NodeRegion::BottomRight), IM_COL32(255, 0, 255, 64)); + drawRect(GetRegionBounds(NodeRegion::Center), IM_COL32(0, 0, 255, 64)); + drawRect(GetRegionBounds(NodeRegion::Header), IM_COL32(0, 255, 255, 64)); +# endif + + DrawBorder(drawList, m_BorderColor, m_BorderWidth); + } + else if (flags & Selected) + { + const auto borderColor = Editor->GetColor(StyleColor_SelNodeBorder); + const auto& editorStyle = Editor->GetStyle(); + + drawList->ChannelsSetCurrent(m_Channel + c_NodeBaseChannel); + + DrawBorder(drawList, borderColor, editorStyle.SelectedNodeBorderWidth); + } + else if (!IsGroup(this) && (flags & Hovered)) + { + const auto borderColor = Editor->GetColor(StyleColor_HovNodeBorder); + const auto& editorStyle = Editor->GetStyle(); + + drawList->ChannelsSetCurrent(m_Channel + c_NodeBaseChannel); + + DrawBorder(drawList, borderColor, editorStyle.HoveredNodeBorderWidth); + } +} + +void ed::Node::DrawBorder(ImDrawList* drawList, ImU32 color, float thickness) +{ + if (thickness > 0.0f) + { + drawList->AddRect(m_Bounds.Min, m_Bounds.Max, + color, m_Rounding, 15, thickness); + } +} + +void ed::Node::GetGroupedNodes(std::vector& result, bool append) +{ + if (!append) + result.resize(0); + + if (!IsGroup(this)) + return; + + const auto firstNodeIndex = result.size(); + Editor->FindNodesInRect(m_GroupBounds, result, true, false); + + for (auto index = firstNodeIndex; index < result.size(); ++index) + result[index]->GetGroupedNodes(result, true); +} + +ImRect ed::Node::GetRegionBounds(NodeRegion region) const +{ + if (m_Type == NodeType::Node) + { + if (region == NodeRegion::Header) + return m_Bounds; + } + else if (m_Type == NodeType::Group) + { + const float activeAreaMinimumSize = ImMax(ImMax( + Editor->GetView().InvScale * c_GroupSelectThickness, + m_GroupBorderWidth), c_GroupSelectThickness); + const float minimumSize = activeAreaMinimumSize * 5; + + auto bounds = m_Bounds; + if (bounds.GetWidth() < minimumSize) + bounds.Expand(ImVec2(minimumSize - bounds.GetWidth(), 0.0f)); + if (bounds.GetHeight() < minimumSize) + bounds.Expand(ImVec2(0.0f, minimumSize - bounds.GetHeight())); + + if (region == NodeRegion::Top) + { + bounds.Max.y = bounds.Min.y + activeAreaMinimumSize; + bounds.Min.x += activeAreaMinimumSize; + bounds.Max.x -= activeAreaMinimumSize; + return bounds; + } + else if (region == NodeRegion::Bottom) + { + bounds.Min.y = bounds.Max.y - activeAreaMinimumSize; + bounds.Min.x += activeAreaMinimumSize; + bounds.Max.x -= activeAreaMinimumSize; + return bounds; + } + else if (region == NodeRegion::Left) + { + bounds.Max.x = bounds.Min.x + activeAreaMinimumSize; + bounds.Min.y += activeAreaMinimumSize; + bounds.Max.y -= activeAreaMinimumSize; + return bounds; + } + else if (region == NodeRegion::Right) + { + bounds.Min.x = bounds.Max.x - activeAreaMinimumSize; + bounds.Min.y += activeAreaMinimumSize; + bounds.Max.y -= activeAreaMinimumSize; + return bounds; + } + else if (region == NodeRegion::TopLeft) + { + bounds.Max.x = bounds.Min.x + activeAreaMinimumSize * 2; + bounds.Max.y = bounds.Min.y + activeAreaMinimumSize * 2; + return bounds; + } + else if (region == NodeRegion::TopRight) + { + bounds.Min.x = bounds.Max.x - activeAreaMinimumSize * 2; + bounds.Max.y = bounds.Min.y + activeAreaMinimumSize * 2; + return bounds; + } + else if (region == NodeRegion::BottomRight) + { + bounds.Min.x = bounds.Max.x - activeAreaMinimumSize * 2; + bounds.Min.y = bounds.Max.y - activeAreaMinimumSize * 2; + return bounds; + } + else if (region == NodeRegion::BottomLeft) + { + bounds.Max.x = bounds.Min.x + activeAreaMinimumSize * 2; + bounds.Min.y = bounds.Max.y - activeAreaMinimumSize * 2; + return bounds; + } + else if (region == NodeRegion::Header) + { + bounds.Min.x += activeAreaMinimumSize; + bounds.Max.x -= activeAreaMinimumSize; + bounds.Min.y += activeAreaMinimumSize; + bounds.Max.y = ImMax(bounds.Min.y + activeAreaMinimumSize, m_GroupBounds.Min.y); + return bounds; + } + else if (region == NodeRegion::Center) + { + bounds.Max.x -= activeAreaMinimumSize; + bounds.Min.y = ImMax(bounds.Min.y + activeAreaMinimumSize, m_GroupBounds.Min.y); + bounds.Min.x += activeAreaMinimumSize; + bounds.Max.y -= activeAreaMinimumSize; + return bounds; + } + } + + return ImRect(); +} + +ed::NodeRegion ed::Node::GetRegion(const ImVec2& point) const +{ + if (m_Type == NodeType::Node) + { + if (m_Bounds.Contains(point)) + return NodeRegion::Header; + else + return NodeRegion::None; + } + else if (m_Type == NodeType::Group) + { + static const NodeRegion c_Regions[] = + { + // Corners first, they may overlap other regions. + NodeRegion::TopLeft, + NodeRegion::TopRight, + NodeRegion::BottomLeft, + NodeRegion::BottomRight, + NodeRegion::Header, + NodeRegion::Top, + NodeRegion::Bottom, + NodeRegion::Left, + NodeRegion::Right, + NodeRegion::Center + }; + + for (auto region : c_Regions) + { + auto bounds = GetRegionBounds(region); + if (bounds.Contains(point)) + return region; + } + } + + return NodeRegion::None; +} + + + + +//------------------------------------------------------------------------------ +// +// Link +// +//------------------------------------------------------------------------------ +void ed::Link::Draw(ImDrawList* drawList, DrawFlags flags) +{ + if (flags == None) + { + drawList->ChannelsSetCurrent(c_LinkChannel_Links); + + Draw(drawList, m_Color, 0.0f); + } + else if (flags & Selected) + { + const auto borderColor = Editor->GetColor(StyleColor_SelLinkBorder); + + drawList->ChannelsSetCurrent(c_LinkChannel_Selection); + + Draw(drawList, borderColor, 4.5f); + } + else if (flags & Hovered) + { + const auto borderColor = Editor->GetColor(StyleColor_HovLinkBorder); + + drawList->ChannelsSetCurrent(c_LinkChannel_Selection); + + Draw(drawList, borderColor, 2.0f); + } +} + +void ed::Link::Draw(ImDrawList* drawList, ImU32 color, float extraThickness) const +{ + if (!m_IsLive) + return; + + const auto curve = GetCurve(); + + ImDrawList_AddBezierWithArrows(drawList, curve, m_Thickness + extraThickness, + m_StartPin && m_StartPin->m_ArrowSize > 0.0f ? m_StartPin->m_ArrowSize + extraThickness : 0.0f, + m_StartPin && m_StartPin->m_ArrowWidth > 0.0f ? m_StartPin->m_ArrowWidth + extraThickness : 0.0f, + m_EndPin && m_EndPin->m_ArrowSize > 0.0f ? m_EndPin->m_ArrowSize + extraThickness : 0.0f, + m_EndPin && m_EndPin->m_ArrowWidth > 0.0f ? m_EndPin->m_ArrowWidth + extraThickness : 0.0f, + true, color, 1.0f); +} + +void ed::Link::UpdateEndpoints() +{ + const auto line = m_StartPin->GetClosestLine(m_EndPin); + m_Start = line.A; + m_End = line.B; +} + +ImCubicBezierPoints ed::Link::GetCurve() const +{ + auto easeLinkStrength = [](const ImVec2& a, const ImVec2& b, float strength) + { + const auto distanceX = b.x - a.x; + const auto distanceY = b.y - a.y; + const auto distance = ImSqrt(distanceX * distanceX + distanceY * distanceY); + const auto halfDistance = distance * 0.5f; + + if (halfDistance < strength) + strength = strength * ImSin(IM_PI * 0.5f * halfDistance / strength); + + return strength; + }; + + const auto startStrength = easeLinkStrength(m_Start, m_End, m_StartPin->m_Strength); + const auto endStrength = easeLinkStrength(m_Start, m_End, m_EndPin->m_Strength); + const auto cp0 = m_Start + m_StartPin->m_Dir * startStrength; + const auto cp1 = m_End + m_EndPin->m_Dir * endStrength; + + ImCubicBezierPoints result; + result.P0 = m_Start; + result.P1 = cp0; + result.P2 = cp1; + result.P3 = m_End; + + return result; +} + +bool ed::Link::TestHit(const ImVec2& point, float extraThickness) const +{ + if (!m_IsLive) + return false; + + auto bounds = GetBounds(); + if (extraThickness > 0.0f) + bounds.Expand(extraThickness); + + if (!bounds.Contains(point)) + return false; + + const auto bezier = GetCurve(); + const auto result = ImProjectOnCubicBezier(point, bezier.P0, bezier.P1, bezier.P2, bezier.P3, 50); + + return result.Distance <= m_Thickness + extraThickness; +} + +bool ed::Link::TestHit(const ImRect& rect, bool allowIntersect) const +{ + if (!m_IsLive) + return false; + + const auto bounds = GetBounds(); + + if (rect.Contains(bounds)) + return true; + + if (!allowIntersect || !rect.Overlaps(bounds)) + return false; + + const auto bezier = GetCurve(); + + const auto p0 = rect.GetTL(); + const auto p1 = rect.GetTR(); + const auto p2 = rect.GetBR(); + const auto p3 = rect.GetBL(); + + if (ImCubicBezierLineIntersect(bezier.P0, bezier.P1, bezier.P2, bezier.P3, p0, p1).Count > 0) + return true; + if (ImCubicBezierLineIntersect(bezier.P0, bezier.P1, bezier.P2, bezier.P3, p1, p2).Count > 0) + return true; + if (ImCubicBezierLineIntersect(bezier.P0, bezier.P1, bezier.P2, bezier.P3, p2, p3).Count > 0) + return true; + if (ImCubicBezierLineIntersect(bezier.P0, bezier.P1, bezier.P2, bezier.P3, p3, p0).Count > 0) + return true; + + return false; +} + +ImRect ed::Link::GetBounds() const +{ + if (m_IsLive) + { + const auto curve = GetCurve(); + auto bounds = ImCubicBezierBoundingRect(curve.P0, curve.P1, curve.P2, curve.P3); + + if (bounds.GetWidth() == 0.0f) + { + bounds.Min.x -= 0.5f; + bounds.Max.x += 0.5f; + } + + if (bounds.GetHeight() == 0.0f) + { + bounds.Min.y -= 0.5f; + bounds.Max.y += 0.5f; + } + + if (m_StartPin->m_ArrowSize) + { + const auto start_dir = ImNormalized(ImCubicBezierTangent(curve.P0, curve.P1, curve.P2, curve.P3, 0.0f)); + const auto p0 = curve.P0; + const auto p1 = curve.P0 - start_dir * m_StartPin->m_ArrowSize; + const auto min = ImMin(p0, p1); + const auto max = ImMax(p0, p1); + auto arrowBounds = ImRect(min, ImMax(max, min + ImVec2(1, 1))); + bounds.Add(arrowBounds); + } + + if (m_EndPin->m_ArrowSize) + { + const auto end_dir = ImNormalized(ImCubicBezierTangent(curve.P0, curve.P1, curve.P2, curve.P3, 1.0f)); + const auto p0 = curve.P3; + const auto p1 = curve.P3 + end_dir * m_EndPin->m_ArrowSize; + const auto min = ImMin(p0, p1); + const auto max = ImMax(p0, p1); + auto arrowBounds = ImRect(min, ImMax(max, min + ImVec2(1, 1))); + bounds.Add(arrowBounds); + } + + return bounds; + } + else + return ImRect(); +} + + + + +//------------------------------------------------------------------------------ +// +// Editor Context +// +//------------------------------------------------------------------------------ +ed::EditorContext::EditorContext(const ax::NodeEditor::Config* config) + : m_IsFirstFrame(true) + , m_IsWindowActive(false) + , m_ShortcutsEnabled(true) + , m_Style() + , m_Nodes() + , m_Pins() + , m_Links() + , m_SelectionId(1) + , m_LastActiveLink(nullptr) + , m_Canvas() + , m_IsCanvasVisible(false) + , m_NodeBuilder(this) + , m_HintBuilder(this) + , m_CurrentAction(nullptr) + , m_NavigateAction(this, m_Canvas) + , m_SizeAction(this) + , m_DragAction(this) + , m_SelectAction(this) + , m_ContextMenuAction(this) + , m_ShortcutAction(this) + , m_CreateItemAction(this) + , m_DeleteItemsAction(this) + , m_AnimationControllers{ &m_FlowAnimationController } + , m_FlowAnimationController(this) + , m_DoubleClickedNode(0) + , m_DoubleClickedPin(0) + , m_DoubleClickedLink(0) + , m_BackgroundClicked(false) + , m_BackgroundDoubleClicked(false) + , m_IsInitialized(false) + , m_Settings() + , m_Config(config) + , m_ExternalChannel(0) +{ +} + +ed::EditorContext::~EditorContext() +{ + if (m_IsInitialized) + SaveSettings(); + + for (auto link : m_Links) delete link.m_Object; + for (auto pin : m_Pins) delete pin.m_Object; + for (auto node : m_Nodes) delete node.m_Object; + + m_Splitter.ClearFreeMemory(); +} + +void ed::EditorContext::Begin(const char* id, const ImVec2& size) +{ + if (!m_IsInitialized) + { + LoadSettings(); + m_IsInitialized = true; + } + + //ImGui::LogToClipboard(); + //Log("---- begin ----"); + + for (auto node : m_Nodes) node->Reset(); + for (auto pin : m_Pins) pin->Reset(); + for (auto link : m_Links) link->Reset(); + + auto drawList = ImGui::GetWindowDrawList(); + + ImDrawList_SwapSplitter(drawList, m_Splitter); + m_ExternalChannel = drawList->_Splitter._Current; + + ImGui::PushID(id); + + auto availableContentSize = ImGui::GetContentRegionAvail(); + ImVec2 canvasSize = ImFloor(size); + if (canvasSize.x <= 0.0f) + canvasSize.x = ImMax(4.0f, availableContentSize.x); + if (canvasSize.y <= 0.0f) + canvasSize.y = ImMax(4.0f, availableContentSize.y); + + m_IsCanvasVisible = m_Canvas.Begin(id, canvasSize); + + //ImGui::PushStyleColor(ImGuiCol_ChildBg, ImVec4(0, 0, 0, 0)); + //ImGui::BeginChild(id, size, false, + // ImGuiWindowFlags_NoMove | + // ImGuiWindowFlags_NoScrollbar | + // ImGuiWindowFlags_NoScrollWithMouse); + + ImGui::CaptureKeyboardFromApp(); + + m_IsWindowActive = ImGui::IsWindowFocused(); + + // + m_NavigateAction.SetWindow(m_Canvas.ViewRect().Min, m_Canvas.ViewRect().GetSize()); + + if (m_CurrentAction && m_CurrentAction->IsDragging() && m_NavigateAction.MoveOverEdge()) + { + auto& io = ImGui::GetIO(); + auto offset = m_NavigateAction.GetMoveOffset(); + for (int i = 0; i < 5; ++i) + io.MouseClickedPos[i] = io.MouseClickedPos[i] - offset; + } + else + m_NavigateAction.StopMoveOverEdge(); + + m_Canvas.SetView(m_NavigateAction.GetView()); + + // #debug #clip + //ImGui::Text("CLIP = { x=%g y=%g w=%g h=%g r=%g b=%g }", + // clipMin.x, clipMin.y, clipMax.x - clipMin.x, clipMax.y - clipMin.y, clipMax.x, clipMax.y); + + // Reserve channels for background and links + ImDrawList_ChannelsGrow(drawList, c_NodeStartChannel); + + if (HasSelectionChanged()) + ++m_SelectionId; + + m_LastSelectedObjects = m_SelectedObjects; +} + +void ed::EditorContext::End() +{ + //auto& io = ImGui::GetIO(); + auto control = BuildControl(m_CurrentAction && m_CurrentAction->IsDragging()); // NavigateAction.IsMovingOverEdge() + auto drawList = ImGui::GetWindowDrawList(); + //auto& editorStyle = GetStyle(); + + m_DoubleClickedNode = control.DoubleClickedNode ? control.DoubleClickedNode->m_ID : 0; + m_DoubleClickedPin = control.DoubleClickedPin ? control.DoubleClickedPin->m_ID : 0; + m_DoubleClickedLink = control.DoubleClickedLink ? control.DoubleClickedLink->m_ID : 0; + m_BackgroundClicked = control.BackgroundClicked; + m_BackgroundDoubleClicked = control.BackgroundDoubleClicked; + + //if (DoubleClickedNode) LOG_TRACE(0, "DOUBLE CLICK NODE: %d", DoubleClickedNode); + //if (DoubleClickedPin) LOG_TRACE(0, "DOUBLE CLICK PIN: %d", DoubleClickedPin); + //if (DoubleClickedLink) LOG_TRACE(0, "DOUBLE CLICK LINK: %d", DoubleClickedLink); + //if (BackgroundDoubleClicked) LOG_TRACE(0, "DOUBLE CLICK BACKGROUND", DoubleClickedLink); + + const bool isSelecting = m_CurrentAction && m_CurrentAction->AsSelect() != nullptr; + const bool isDragging = m_CurrentAction && m_CurrentAction->AsDrag() != nullptr; + //const bool isSizing = CurrentAction && CurrentAction->AsSize() != nullptr; + + // Draw nodes + for (auto node : m_Nodes) + if (node->m_IsLive && node->IsVisible()) + node->Draw(drawList); + + // Draw links + for (auto link : m_Links) + if (link->m_IsLive && link->IsVisible()) + link->Draw(drawList); + + // Highlight selected objects + { + auto selectedObjects = &m_SelectedObjects; + if (auto selectAction = m_CurrentAction ? m_CurrentAction->AsSelect() : nullptr) + selectedObjects = &selectAction->m_CandidateObjects; + + for (auto selectedObject : *selectedObjects) + if (selectedObject->IsVisible()) + selectedObject->Draw(drawList, Object::Selected); + } + + if (!isSelecting) + { + auto hoveredObject = control.HotObject; + if (auto dragAction = m_CurrentAction ? m_CurrentAction->AsDrag() : nullptr) + hoveredObject = dragAction->m_DraggedObject; + if (auto sizeAction = m_CurrentAction ? m_CurrentAction->AsSize() : nullptr) + hoveredObject = sizeAction->m_SizedNode; + + if (hoveredObject && !IsSelected(hoveredObject) && hoveredObject->IsVisible()) + hoveredObject->Draw(drawList, Object::Hovered); + } + + // Draw animations + for (auto controller : m_AnimationControllers) + controller->Draw(drawList); + + if (m_CurrentAction && !m_CurrentAction->Process(control)) + m_CurrentAction = nullptr; + + if (m_NavigateAction.m_IsActive) + m_NavigateAction.Process(control); + else + m_NavigateAction.Accept(control); + + if (nullptr == m_CurrentAction) + { + EditorAction* possibleAction = nullptr; + + auto accept = [&possibleAction, &control](EditorAction& action) + { + auto result = action.Accept(control); + + if (result == EditorAction::True) + return true; + else if (/*!possibleAction &&*/ result == EditorAction::Possible) + possibleAction = &action; + else if (result == EditorAction::Possible) + action.Reject(); + + return false; + }; + + if (accept(m_ContextMenuAction)) + m_CurrentAction = &m_ContextMenuAction; + else if (accept(m_ShortcutAction)) + m_CurrentAction = &m_ShortcutAction; + else if (accept(m_SizeAction)) + m_CurrentAction = &m_SizeAction; + else if (accept(m_DragAction)) + m_CurrentAction = &m_DragAction; + else if (accept(m_SelectAction)) + m_CurrentAction = &m_SelectAction; + else if (accept(m_CreateItemAction)) + m_CurrentAction = &m_CreateItemAction; + else if (accept(m_DeleteItemsAction)) + m_CurrentAction = &m_DeleteItemsAction; + + if (possibleAction) + ImGui::SetMouseCursor(possibleAction->GetCursor()); + + if (m_CurrentAction && possibleAction) + possibleAction->Reject(); + } + + if (m_CurrentAction) + ImGui::SetMouseCursor(m_CurrentAction->GetCursor()); + + // Draw selection rectangle + m_SelectAction.Draw(drawList); + + bool sortGroups = false; + if (control.ActiveNode) + { + if (!IsGroup(control.ActiveNode)) + { + // Bring active node to front + auto activeNodeIt = std::find(m_Nodes.begin(), m_Nodes.end(), control.ActiveNode); + std::rotate(activeNodeIt, activeNodeIt + 1, m_Nodes.end()); + } + else if (!isDragging && m_CurrentAction && m_CurrentAction->AsDrag()) + { + // Bring content of dragged group to front + std::vector nodes; + control.ActiveNode->GetGroupedNodes(nodes); + + std::stable_partition(m_Nodes.begin(), m_Nodes.end(), [&nodes](Node* node) + { + return std::find(nodes.begin(), nodes.end(), node) == nodes.end(); + }); + + sortGroups = true; + } + } + + // Sort nodes if bounds of node changed + if (sortGroups || ((m_Settings.m_DirtyReason & (SaveReasonFlags::Position | SaveReasonFlags::Size)) != SaveReasonFlags::None)) + { + // Bring all groups before regular nodes + auto groupsItEnd = std::stable_partition(m_Nodes.begin(), m_Nodes.end(), IsGroup); + + // Sort groups by area + std::sort(m_Nodes.begin(), groupsItEnd, [this](Node* lhs, Node* rhs) + { + const auto& lhsSize = lhs == m_SizeAction.m_SizedNode ? m_SizeAction.GetStartGroupBounds().GetSize() : lhs->m_GroupBounds.GetSize(); + const auto& rhsSize = rhs == m_SizeAction.m_SizedNode ? m_SizeAction.GetStartGroupBounds().GetSize() : rhs->m_GroupBounds.GetSize(); + + const auto lhsArea = lhsSize.x * lhsSize.y; + const auto rhsArea = rhsSize.x * rhsSize.y; + + return lhsArea > rhsArea; + }); + } + +# if 1 + // Every node has few channels assigned. Grow channel list + // to hold twice as much of channels and place them in + // node drawing order. + { + // Copy group nodes + auto liveNodeCount = static_cast(std::count_if(m_Nodes.begin(), m_Nodes.end(), [](Node* node) { return node->m_IsLive; })); + + // Reserve two additional channels for sorted list of channels + auto nodeChannelCount = drawList->_Splitter._Count; + ImDrawList_ChannelsGrow(drawList, drawList->_Splitter._Count + c_ChannelsPerNode * liveNodeCount + c_LinkChannelCount); + + int targetChannel = nodeChannelCount; + + auto copyNode = [&targetChannel, drawList](Node* node) + { + if (!node->m_IsLive) + return; + + for (int i = 0; i < c_ChannelsPerNode; ++i) + ImDrawList_SwapChannels(drawList, node->m_Channel + i, targetChannel + i); + + node->m_Channel = targetChannel; + targetChannel += c_ChannelsPerNode; + }; + + auto groupsItEnd = std::find_if(m_Nodes.begin(), m_Nodes.end(), [](Node* node) { return !IsGroup(node); }); + + // Copy group nodes + std::for_each(m_Nodes.begin(), groupsItEnd, copyNode); + + // Copy links + for (int i = 0; i < c_LinkChannelCount; ++i, ++targetChannel) + ImDrawList_SwapChannels(drawList, c_LinkStartChannel + i, targetChannel); + + // Copy normal nodes + std::for_each(groupsItEnd, m_Nodes.end(), copyNode); + } +# endif + + // ImGui::PopClipRect(); + + // Draw grid +# if 1 // #FIXME + { + //auto& style = ImGui::GetStyle(); + + drawList->ChannelsSetCurrent(c_UserChannel_Grid); + + ImVec2 offset = m_Canvas.ViewOrigin() * (1.0f / m_Canvas.ViewScale()); + ImU32 GRID_COLOR = GetColor(StyleColor_Grid, ImClamp(m_Canvas.ViewScale() * m_Canvas.ViewScale(), 0.0f, 1.0f)); + float GRID_SX = 32.0f;// * m_Canvas.ViewScale(); + float GRID_SY = 32.0f;// * m_Canvas.ViewScale(); + ImVec2 VIEW_POS = m_Canvas.ViewRect().Min; + ImVec2 VIEW_SIZE = m_Canvas.ViewRect().GetSize(); + + drawList->AddRectFilled(VIEW_POS, VIEW_POS + VIEW_SIZE, GetColor(StyleColor_Bg)); + + for (float x = fmodf(offset.x, GRID_SX); x < VIEW_SIZE.x; x += GRID_SX) + drawList->AddLine(ImVec2(x, 0.0f) + VIEW_POS, ImVec2(x, VIEW_SIZE.y) + VIEW_POS, GRID_COLOR); + for (float y = fmodf(offset.y, GRID_SY); y < VIEW_SIZE.y; y += GRID_SY) + drawList->AddLine(ImVec2(0.0f, y) + VIEW_POS, ImVec2(VIEW_SIZE.x, y) + VIEW_POS, GRID_COLOR); + } +# endif + +# if 0 + { + auto userChannel = drawList->_Splitter._Count; + auto channelsToCopy = c_UserLayersCount; + ImDrawList_ChannelsGrow(drawList, userChannel + channelsToCopy); + for (int i = 0; i < channelsToCopy; ++i) + ImDrawList_SwapChannels(drawList, userChannel + i, c_UserLayerChannelStart + i); + } +# endif + +# if 0 + { + auto preOffset = ImVec2(0, 0); + auto postOffset = m_OldCanvas.WindowScreenPos + m_OldCanvas.ClientOrigin; + auto scale = m_OldCanvas.Zoom; + + ImDrawList_TransformChannels(drawList, 0, 1, preOffset, scale, postOffset); + ImDrawList_TransformChannels(drawList, c_BackgroundChannelStart, drawList->_ChannelsCount - 1, preOffset, scale, postOffset); + + auto clipTranslation = m_OldCanvas.WindowScreenPos - m_OldCanvas.FromScreen(m_OldCanvas.WindowScreenPos); + ImGui::PushClipRect(m_OldCanvas.WindowScreenPos + ImVec2(1, 1), m_OldCanvas.WindowScreenPos + m_OldCanvas.WindowScreenSize - ImVec2(1, 1), false); + ImDrawList_TranslateAndClampClipRects(drawList, 0, 1, clipTranslation); + ImDrawList_TranslateAndClampClipRects(drawList, c_BackgroundChannelStart, drawList->_ChannelsCount - 1, clipTranslation); + ImGui::PopClipRect(); + + // #debug: Static grid in local space + //for (float x = 0; x < Canvas.WindowScreenSize.x; x += 100) + // drawList->AddLine(ImVec2(x, 0.0f) + Canvas.WindowScreenPos, ImVec2(x, Canvas.WindowScreenSize.y) + Canvas.WindowScreenPos, IM_COL32(255, 0, 0, 128)); + //for (float y = 0; y < Canvas.WindowScreenSize.y; y += 100) + // drawList->AddLine(ImVec2(0.0f, y) + Canvas.WindowScreenPos, ImVec2(Canvas.WindowScreenSize.x, y) + Canvas.WindowScreenPos, IM_COL32(255, 0, 0, 128)); + } +# endif + +# if 1 + // Move user and hint channels to top + { + // Clip plane is transformed to global space. + // These channels already have clip planes in global space, so + // we move them to clip plane. Batch transformation in canvas + // will bring them back to global space. + auto preTransformClipRect = [this, drawList](int channelIndex) + { + ImDrawChannel& channel = drawList->_Splitter._Channels[channelIndex]; + for (ImDrawCmd& cmd : channel._CmdBuffer) + { + auto a = ToCanvas(ImVec2(cmd.ClipRect.x, cmd.ClipRect.y)); + auto b = ToCanvas(ImVec2(cmd.ClipRect.z, cmd.ClipRect.w)); + cmd.ClipRect = ImVec4(a.x, a.y, b.x, b.y); + } + }; + + drawList->ChannelsSetCurrent(0); + + auto channelCount = drawList->_Splitter._Count; + ImDrawList_ChannelsGrow(drawList, channelCount + 3); + ImDrawList_SwapChannels(drawList, c_UserChannel_HintsBackground, channelCount + 0); + ImDrawList_SwapChannels(drawList, c_UserChannel_Hints, channelCount + 1); + ImDrawList_SwapChannels(drawList, c_UserChannel_Content, channelCount + 2); + + preTransformClipRect(channelCount + 0); + preTransformClipRect(channelCount + 1); + preTransformClipRect(channelCount + 2); + } +# endif + + UpdateAnimations(); + + drawList->ChannelsMerge(); + + // #debug + // drawList->AddRectFilled(ImVec2(-10.0f, -10.0f), ImVec2(10.0f, 10.0f), IM_COL32(255, 0, 255, 255)); + + // ImGui::EndChild(); + // ImGui::PopStyleColor(); + if (m_IsCanvasVisible) + m_Canvas.End(); + + ImDrawList_SwapSplitter(drawList, m_Splitter); + + // Draw border + { + auto& style = ImGui::GetStyle(); + auto borderShadoColor = style.Colors[ImGuiCol_BorderShadow]; + auto borderColor = style.Colors[ImGuiCol_Border]; + drawList->AddRect(m_Canvas.Rect().Min + ImVec2(1, 1), m_Canvas.Rect().Max - ImVec2(1, 1), ImColor(borderShadoColor)); + drawList->AddRect(m_Canvas.Rect().Min, m_Canvas.Rect().Max, ImColor(borderColor)); + } + + // ShowMetrics(control); + + ImGui::PopID(); + + if (!m_CurrentAction && m_IsFirstFrame && !m_Settings.m_Selection.empty()) + { + ClearSelection(); + for (auto id : m_Settings.m_Selection) + if (auto object = FindObject(id)) + SelectObject(object); + } + + if (HasSelectionChanged()) + MakeDirty(SaveReasonFlags::Selection); + + if (m_Settings.m_IsDirty && !m_CurrentAction) + SaveSettings(); + + m_IsFirstFrame = false; +} + +bool ed::EditorContext::DoLink(LinkId id, PinId startPinId, PinId endPinId, ImU32 color, float thickness) +{ + //auto& editorStyle = GetStyle(); + + auto startPin = FindPin(startPinId); + auto endPin = FindPin(endPinId); + + if (!startPin || !startPin->m_IsLive || !endPin || !endPin->m_IsLive) + return false; + + startPin->m_HasConnection = true; + endPin->m_HasConnection = true; + + auto link = GetLink(id); + link->m_StartPin = startPin; + link->m_EndPin = endPin; + link->m_Color = color; + link->m_Thickness = thickness; + link->m_IsLive = true; + + link->UpdateEndpoints(); + + return true; +} + +void ed::EditorContext::SetNodePosition(NodeId nodeId, const ImVec2& position) +{ + auto node = FindNode(nodeId); + if (!node) + { + node = CreateNode(nodeId); + node->m_IsLive = false; + } + + if (node->m_Bounds.Min != position) + { + node->m_Bounds.Translate(position - node->m_Bounds.Min); + node->m_Bounds.Floor(); + MakeDirty(NodeEditor::SaveReasonFlags::Position, node); + } +} + +ImVec2 ed::EditorContext::GetNodePosition(NodeId nodeId) +{ + auto node = FindNode(nodeId); + if (!node) + return ImVec2(FLT_MAX, FLT_MAX); + + return node->m_Bounds.Min; +} + +ImVec2 ed::EditorContext::GetNodeSize(NodeId nodeId) +{ + auto node = FindNode(nodeId); + if (!node) + return ImVec2(0, 0); + + return node->m_Bounds.GetSize(); +} + +void ed::EditorContext::MarkNodeToRestoreState(Node* node) +{ + node->m_RestoreState = true; +} + +void ed::EditorContext::RestoreNodeState(Node* node) +{ + auto settings = m_Settings.FindNode(node->m_ID); + if (!settings) + return; + + // Load state from config (if possible) + if (!NodeSettings::Parse(m_Config.LoadNode(node->m_ID), *settings)) + return; + + node->m_Bounds.Min = settings->m_Location; + node->m_Bounds.Max = node->m_Bounds.Min + settings->m_Size; + node->m_Bounds.Floor(); + node->m_GroupBounds.Min = settings->m_Location; + node->m_GroupBounds.Max = node->m_GroupBounds.Min + settings->m_GroupSize; + node->m_GroupBounds.Floor(); +} + +void ed::EditorContext::ClearSelection() +{ + m_SelectedObjects.clear(); +} + +void ed::EditorContext::SelectObject(Object* object) +{ + m_SelectedObjects.push_back(object); +} + +void ed::EditorContext::DeselectObject(Object* object) +{ + auto objectIt = std::find(m_SelectedObjects.begin(), m_SelectedObjects.end(), object); + if (objectIt != m_SelectedObjects.end()) + m_SelectedObjects.erase(objectIt); +} + +void ed::EditorContext::SetSelectedObject(Object* object) +{ + ClearSelection(); + SelectObject(object); +} + +void ed::EditorContext::ToggleObjectSelection(Object* object) +{ + if (IsSelected(object)) + DeselectObject(object); + else + SelectObject(object); +} + +bool ed::EditorContext::IsSelected(Object* object) +{ + return std::find(m_SelectedObjects.begin(), m_SelectedObjects.end(), object) != m_SelectedObjects.end(); +} + +const ed::vector& ed::EditorContext::GetSelectedObjects() +{ + return m_SelectedObjects; +} + +bool ed::EditorContext::IsAnyNodeSelected() +{ + for (auto object : m_SelectedObjects) + if (object->AsNode()) + return true; + + return false; +} + +bool ed::EditorContext::IsAnyLinkSelected() +{ + for (auto object : m_SelectedObjects) + if (object->AsLink()) + return true; + + return false; +} + +bool ed::EditorContext::HasSelectionChanged() +{ + return m_LastSelectedObjects != m_SelectedObjects; +} + +ed::Node* ed::EditorContext::FindNodeAt(const ImVec2& p) +{ + for (auto node : m_Nodes) + if (node->TestHit(p)) + return node; + + return nullptr; +} + +void ed::EditorContext::FindNodesInRect(const ImRect& r, vector& result, bool append, bool includeIntersecting) +{ + if (!append) + result.resize(0); + + if (ImRect_IsEmpty(r)) + return; + + for (auto node : m_Nodes) + if (node->TestHit(r, includeIntersecting)) + result.push_back(node); +} + +void ed::EditorContext::FindLinksInRect(const ImRect& r, vector& result, bool append) +{ + if (!append) + result.resize(0); + + if (ImRect_IsEmpty(r)) + return; + + for (auto link : m_Links) + if (link->TestHit(r)) + result.push_back(link); +} + +void ed::EditorContext::FindLinksForNode(NodeId nodeId, vector& result, bool add) +{ + if (!add) + result.clear(); + + for (auto link : m_Links) + { + if (!link->m_IsLive) + continue; + + if (link->m_StartPin->m_Node->m_ID == nodeId || link->m_EndPin->m_Node->m_ID == nodeId) + result.push_back(link); + } +} + +bool ed::EditorContext::PinHadAnyLinks(PinId pinId) +{ + auto pin = FindPin(pinId); + if (!pin || !pin->m_IsLive) + return false; + + return pin->m_HasConnection || pin->m_HadConnection; +} + +void ed::EditorContext::NotifyLinkDeleted(Link* link) +{ + if (m_LastActiveLink == link) + m_LastActiveLink = nullptr; +} + +void ed::EditorContext::Suspend(SuspendFlags flags) +{ + auto drawList = ImGui::GetWindowDrawList(); + auto lastChannel = drawList->_Splitter._Current; + drawList->ChannelsSetCurrent(m_ExternalChannel); + m_Canvas.Suspend(); + drawList->ChannelsSetCurrent(lastChannel); + if ((flags & SuspendFlags::KeepSplitter) != SuspendFlags::KeepSplitter) + ImDrawList_SwapSplitter(drawList, m_Splitter); +} + +void ed::EditorContext::Resume(SuspendFlags flags) +{ + auto drawList = ImGui::GetWindowDrawList(); + if ((flags & SuspendFlags::KeepSplitter) != SuspendFlags::KeepSplitter) + ImDrawList_SwapSplitter(drawList, m_Splitter); + auto lastChannel = drawList->_Splitter._Current; + drawList->ChannelsSetCurrent(m_ExternalChannel); + m_Canvas.Resume(); + drawList->ChannelsSetCurrent(lastChannel); +} + +bool ed::EditorContext::IsSuspended() +{ + return m_Canvas.IsSuspended(); +} + +bool ed::EditorContext::IsActive() +{ + return m_IsWindowActive; +} + +ed::Pin* ed::EditorContext::CreatePin(PinId id, PinKind kind) +{ + IM_ASSERT(nullptr == FindObject(id)); + auto pin = new Pin(this, id, kind); + m_Pins.push_back({id, pin}); + std::sort(m_Pins.begin(), m_Pins.end()); + return pin; +} + +ed::Node* ed::EditorContext::CreateNode(NodeId id) +{ + IM_ASSERT(nullptr == FindObject(id)); + auto node = new Node(this, id); + m_Nodes.push_back({id, node}); + //std::sort(Nodes.begin(), Nodes.end()); + + auto settings = m_Settings.FindNode(id); + if (!settings) + settings = m_Settings.AddNode(id); + + if (!settings->m_WasUsed) + { + settings->m_WasUsed = true; + RestoreNodeState(node); + } + + node->m_Bounds.Min = settings->m_Location; + node->m_Bounds.Max = node->m_Bounds.Min; + node->m_Bounds.Floor(); + + if (settings->m_GroupSize.x > 0 || settings->m_GroupSize.y > 0) + { + node->m_Type = NodeType::Group; + node->m_GroupBounds.Min = settings->m_Location; + node->m_GroupBounds.Max = node->m_GroupBounds.Min + settings->m_GroupSize; + node->m_GroupBounds.Floor(); + } + + node->m_IsLive = false; + + return node; +} + +ed::Link* ed::EditorContext::CreateLink(LinkId id) +{ + IM_ASSERT(nullptr == FindObject(id)); + auto link = new Link(this, id); + m_Links.push_back({id, link}); + std::sort(m_Links.begin(), m_Links.end()); + + return link; +} + +template +static inline auto FindItemInLinear(C& container, Id id) +{ +# if defined(_DEBUG) + auto start = container.data(); + auto end = container.data() + container.size(); + for (auto it = start; it < end; ++it) + if ((*it).m_ID == id) + return it->m_Object; +# else + for (auto item : container) + if (item.m_ID == id) + return item.m_Object; +# endif + + return static_cast(nullptr); +} + +template +static inline auto FindItemIn(C& container, Id id) +{ +//# if defined(_DEBUG) +// auto start = container.data(); +// auto end = container.data() + container.size(); +// for (auto it = start; it < end; ++it) +// if ((*it)->ID == id) +// return *it; +//# else +// for (auto item : container) +// if (item->ID == id) +// return item; +//# endif + auto key = typename C::value_type{ id, nullptr }; + auto first = container.cbegin(); + auto last = container.cend(); + auto it = std::lower_bound(first, last, key); + if (it != last && (key.m_ID == it->m_ID)) + return it->m_Object; + else + return static_castm_Object)>(nullptr); +} + +ed::Node* ed::EditorContext::FindNode(NodeId id) +{ + return FindItemInLinear(m_Nodes, id); +} + +ed::Pin* ed::EditorContext::FindPin(PinId id) +{ + return FindItemIn(m_Pins, id); +} + +ed::Link* ed::EditorContext::FindLink(LinkId id) +{ + return FindItemIn(m_Links, id); +} + +ed::Object* ed::EditorContext::FindObject(ObjectId id) +{ + if (id.IsNodeId()) + return FindNode(id.AsNodeId()); + else if (id.IsLinkId()) + return FindLink(id.AsLinkId()); + else if (id.IsPinId()) + return FindPin(id.AsPinId()); + else + return nullptr; +} + +ed::Node* ed::EditorContext::GetNode(NodeId id) +{ + auto node = FindNode(id); + if (!node) + node = CreateNode(id); + return node; +} + +ed::Pin* ed::EditorContext::GetPin(PinId id, PinKind kind) +{ + if (auto pin = FindPin(id)) + { + pin->m_Kind = kind; + return pin; + } + else + return CreatePin(id, kind); +} + +ed::Link* ed::EditorContext::GetLink(LinkId id) +{ + if (auto link = FindLink(id)) + return link; + else + return CreateLink(id); +} + +void ed::EditorContext::LoadSettings() +{ + ed::Settings::Parse(m_Config.Load(), m_Settings); + + m_NavigateAction.m_Scroll = m_Settings.m_ViewScroll; + m_NavigateAction.m_Zoom = m_Settings.m_ViewZoom; +} + +void ed::EditorContext::SaveSettings() +{ + m_Config.BeginSave(); + + for (auto& node : m_Nodes) + { + auto settings = m_Settings.FindNode(node->m_ID); + settings->m_Location = node->m_Bounds.Min; + settings->m_Size = node->m_Bounds.GetSize(); + if (IsGroup(node)) + settings->m_GroupSize = node->m_GroupBounds.GetSize(); + + if (!node->m_RestoreState && settings->m_IsDirty && m_Config.SaveNodeSettings) + { + if (m_Config.SaveNode(node->m_ID, settings->Serialize().dump(), settings->m_DirtyReason)) + settings->ClearDirty(); + } + } + + m_Settings.m_Selection.resize(0); + for (auto& object : m_SelectedObjects) + m_Settings.m_Selection.push_back(object->ID()); + + m_Settings.m_ViewScroll = m_NavigateAction.m_Scroll; + m_Settings.m_ViewZoom = m_NavigateAction.m_Zoom; + + if (m_Config.Save(m_Settings.Serialize(), m_Settings.m_DirtyReason)) + m_Settings.ClearDirty(); + + m_Config.EndSave(); +} + +void ed::EditorContext::MakeDirty(SaveReasonFlags reason) +{ + m_Settings.MakeDirty(reason); +} + +void ed::EditorContext::MakeDirty(SaveReasonFlags reason, Node* node) +{ + m_Settings.MakeDirty(reason, node); +} + +ed::Link* ed::EditorContext::FindLinkAt(const ImVec2& p) +{ + for (auto& link : m_Links) + if (link->TestHit(p, c_LinkSelectThickness)) + return link; + + return nullptr; +} + +ImU32 ed::EditorContext::GetColor(StyleColor colorIndex) const +{ + return ImColor(m_Style.Colors[colorIndex]); +} + +ImU32 ed::EditorContext::GetColor(StyleColor colorIndex, float alpha) const +{ + auto color = m_Style.Colors[colorIndex]; + return ImColor(color.x, color.y, color.z, color.w * alpha); +} + +void ed::EditorContext::RegisterAnimation(Animation* animation) +{ + m_LiveAnimations.push_back(animation); +} + +void ed::EditorContext::UnregisterAnimation(Animation* animation) +{ + auto it = std::find(m_LiveAnimations.begin(), m_LiveAnimations.end(), animation); + if (it != m_LiveAnimations.end()) + m_LiveAnimations.erase(it); +} + +void ed::EditorContext::UpdateAnimations() +{ + m_LastLiveAnimations = m_LiveAnimations; + + for (auto animation : m_LastLiveAnimations) + { + const bool isLive = (std::find(m_LiveAnimations.begin(), m_LiveAnimations.end(), animation) != m_LiveAnimations.end()); + + if (isLive) + animation->Update(); + } +} + +void ed::EditorContext::Flow(Link* link) +{ + m_FlowAnimationController.Flow(link); +} + +void ed::EditorContext::SetUserContext(bool globalSpace) +{ + const auto mousePos = ImGui::GetMousePos(); + + // Move drawing cursor to mouse location and prepare layer for + // content added by user. + if (globalSpace) + ImGui::SetCursorScreenPos(m_Canvas.FromLocal(mousePos)); + else + ImGui::SetCursorScreenPos(m_Canvas.FromLocal(mousePos)); + //ImGui::SetCursorScreenPos(ImFloor(mousePos)); + //ImGui::SetCursorScreenPos(ImVec2(floorf(mousePos.x), floorf(mousePos.y))); + + if (!IsSuspended()) + { + auto drawList = ImGui::GetWindowDrawList(); + drawList->ChannelsSetCurrent(c_UserChannel_Content); + } + + // #debug + // drawList->AddCircleFilled(ImGui::GetMousePos(), 4, IM_COL32(0, 255, 0, 255)); +} + +void ed::EditorContext::EnableShortcuts(bool enable) +{ + m_ShortcutsEnabled = enable; +} + +bool ed::EditorContext::AreShortcutsEnabled() +{ + return m_ShortcutsEnabled; +} + +ed::Control ed::EditorContext::BuildControl(bool allowOffscreen) +{ + if (!allowOffscreen && !ImGui::IsWindowHovered(ImGuiHoveredFlags_AllowWhenBlockedByActiveItem)) + return Control(nullptr, nullptr, nullptr, nullptr, false, false, false, false); + + const auto mousePos = ImGui::GetMousePos(); + + // Expand clip rectangle to always contain cursor + auto editorRect = m_Canvas.ViewRect(); + auto isMouseOffscreen = allowOffscreen && !editorRect.Contains(mousePos); + if (isMouseOffscreen) + { + // Extend clip rect to capture off-screen mouse cursor + editorRect.Add(ImFloor(mousePos)); + editorRect.Add(ImVec2(ImCeil(mousePos.x), ImCeil(mousePos.y))); + + ImGui::PushClipRect(editorRect.Min, editorRect.Max, false); + } + + Object* hotObject = nullptr; + Object* activeObject = nullptr; + Object* clickedObject = nullptr; + Object* doubleClickedObject = nullptr; + + // Emits invisible button and returns true if it is clicked. + auto emitInteractiveArea = [](ObjectId id, const ImRect& rect) + { + char idString[33] = { 0 }; // itoa can output 33 bytes maximum + snprintf(idString, 32, "%p", id.AsPointer()); + ImGui::SetCursorScreenPos(rect.Min); + + // debug + //if (id < 0) return ImGui::Button(idString, to_imvec(rect.size)); + + auto result = ImGui::InvisibleButton(idString, rect.GetSize()); + + // #debug + //ImGui::GetWindowDrawList()->AddRectFilled(ImGui::GetItemRectMin(), ImGui::GetItemRectMax(), IM_COL32(0, 255, 0, 64)); + + return result; + }; + + // Check input interactions over area. + auto checkInteractionsInArea = [&emitInteractiveArea, &hotObject, &activeObject, &clickedObject, &doubleClickedObject](ObjectId id, const ImRect& rect, Object* object) + { + if (emitInteractiveArea(id, rect)) + clickedObject = object; + if (!doubleClickedObject && ImGui::IsMouseDoubleClicked(0) && ImGui::IsItemHovered()) + doubleClickedObject = object; + + if (!hotObject && ImGui::IsItemHovered(ImGuiHoveredFlags_AllowWhenBlockedByActiveItem)) + hotObject = object; + + if (ImGui::IsItemActive()) + activeObject = object; + }; + + // Process live nodes and pins. + for (auto nodeIt = m_Nodes.rbegin(), nodeItEnd = m_Nodes.rend(); nodeIt != nodeItEnd; ++nodeIt) + { + auto node = *nodeIt; + + if (!node->m_IsLive) continue; + + // Check for interactions with live pins in node before + // processing node itself. Pins does not overlap each other + // and all are within node bounds. + for (auto pin = node->m_LastPin; pin; pin = pin->m_PreviousPin) + { + if (!pin->m_IsLive) continue; + + checkInteractionsInArea(pin->m_ID, pin->m_Bounds, pin); + } + + // Check for interactions with node. + if (node->m_Type == NodeType::Group) + { + // Node with a hole + ImGui::PushID(node->m_ID.AsPointer()); + + static const NodeRegion c_Regions[] = + { + NodeRegion::TopLeft, + NodeRegion::TopRight, + NodeRegion::BottomLeft, + NodeRegion::BottomRight, + NodeRegion::Top, + NodeRegion::Bottom, + NodeRegion::Left, + NodeRegion::Right, + NodeRegion::Header, + }; + + for (auto region : c_Regions) + { + auto bounds = node->GetRegionBounds(region); + if (ImRect_IsEmpty(bounds)) + continue; + checkInteractionsInArea(NodeId(static_cast(region)), bounds, node); + } + + ImGui::PopID(); + } + else + checkInteractionsInArea(node->m_ID, node->m_Bounds, node); + } + + // Links are not regular widgets and must be done manually since + // ImGui does not support interactive elements with custom hit maps. + // + // Links can steal input from background. + + // Links are just over background. So if anything else + // is hovered we can skip them. + if (nullptr == hotObject) + hotObject = FindLinkAt(mousePos); + + // Check for interaction with background. + auto backgroundClicked = emitInteractiveArea(NodeId(0), editorRect); + auto backgroundDoubleClicked = !doubleClickedObject && ImGui::IsItemHovered() ? ImGui::IsMouseDoubleClicked(0) : false; + auto isBackgroundActive = ImGui::IsItemActive(); + auto isBackgroundHot = !hotObject; + auto isDragging = ImGui::IsMouseDragging(0, 1) || ImGui::IsMouseDragging(1, 1) || ImGui::IsMouseDragging(2, 1); + + if (backgroundDoubleClicked) + backgroundClicked = false; + + if (isMouseOffscreen) + ImGui::PopClipRect(); + + // Process link input using background interactions. + auto hotLink = hotObject ? hotObject->AsLink() : nullptr; + + // ImGui take care of tracking active items. With link + // we must do this ourself. + if (!isDragging && isBackgroundActive && hotLink && !m_LastActiveLink) + m_LastActiveLink = hotLink; + if (isBackgroundActive && m_LastActiveLink) + { + activeObject = m_LastActiveLink; + isBackgroundActive = false; + } + else if (!isBackgroundActive && m_LastActiveLink) + m_LastActiveLink = nullptr; + + // Steal click from backgrounds if link is hovered. + if (!isDragging && backgroundClicked && hotLink) + { + clickedObject = hotLink; + backgroundClicked = false; + } + + // Steal double-click from backgrounds if link is hovered. + if (!isDragging && backgroundDoubleClicked && hotLink) + { + doubleClickedObject = hotLink; + backgroundDoubleClicked = false; + } + + return Control(hotObject, activeObject, clickedObject, doubleClickedObject, + isBackgroundHot, isBackgroundActive, backgroundClicked, backgroundDoubleClicked); +} + +void ed::EditorContext::ShowMetrics(const Control& control) +{ + auto& io = ImGui::GetIO(); + + auto getObjectName = [](Object* object) + { + if (!object) return ""; + else if (object->AsNode()) return "Node"; + else if (object->AsPin()) return "Pin"; + else if (object->AsLink()) return "Link"; + else return ""; + }; + + auto getHotObjectName = [&control, &getObjectName]() + { + if (control.HotObject) + return getObjectName(control.HotObject); + else if (control.BackgroundHot) + return "Background"; + else + return ""; + }; + + auto getActiveObjectName = [&control, &getObjectName]() + { + if (control.ActiveObject) + return getObjectName(control.ActiveObject); + else if (control.BackgroundActive) + return "Background"; + else + return ""; + }; + + auto liveNodeCount = (int)std::count_if(m_Nodes.begin(), m_Nodes.end(), [](Node* node) { return node->m_IsLive; }); + auto livePinCount = (int)std::count_if(m_Pins.begin(), m_Pins.end(), [](Pin* pin) { return pin->m_IsLive; }); + auto liveLinkCount = (int)std::count_if(m_Links.begin(), m_Links.end(), [](Link* link) { return link->m_IsLive; }); + + auto canvasRect = m_Canvas.Rect(); + auto viewRect = m_Canvas.ViewRect(); + auto localMousePos = m_Canvas.ToLocal(io.MousePos); + auto globalMousePos = io.MousePos; + + ImGui::SetCursorScreenPos(canvasRect.Min + ImVec2(5, 5)); + ImGui::BeginGroup(); + ImGui::Text("Is Editor Active: %s", ImGui::IsWindowHovered() ? "true" : "false"); + ImGui::Text("View Position: { x=%g y=%g }", viewRect.Min.x, viewRect.Min.y); + ImGui::Text("View Size: { w=%g h=%g }", viewRect.GetWidth(), viewRect.GetHeight()); + ImGui::Text("Canvas Size: { w=%g h=%g }", canvasRect.GetWidth(), canvasRect.GetHeight()); + ImGui::Text("Mouse: { x=%.0f y=%.0f } global: { x=%g y=%g }", localMousePos.x, localMousePos.y, globalMousePos.x, globalMousePos.y); + ImGui::Text("Live Nodes: %d", liveNodeCount); + ImGui::Text("Live Pins: %d", livePinCount); + ImGui::Text("Live Links: %d", liveLinkCount); + ImGui::Text("Hot Object: %s (%p)", getHotObjectName(), control.HotObject ? control.HotObject->ID().AsPointer() : nullptr); + if (auto node = control.HotObject ? control.HotObject->AsNode() : nullptr) + { + ImGui::SameLine(); + ImGui::Text("{ x=%g y=%g w=%g h=%g }", node->m_Bounds.Min.x, node->m_Bounds.Min.y, node->m_Bounds.GetWidth(), node->m_Bounds.GetHeight()); + } + ImGui::Text("Active Object: %s (%p)", getActiveObjectName(), control.ActiveObject ? control.ActiveObject->ID().AsPointer() : nullptr); + if (auto node = control.ActiveObject ? control.ActiveObject->AsNode() : nullptr) + { + ImGui::SameLine(); + ImGui::Text("{ x=%g y=%g w=%g h=%g }", node->m_Bounds.Min.x, node->m_Bounds.Min.y, node->m_Bounds.GetWidth(), node->m_Bounds.GetHeight()); + } + ImGui::Text("Action: %s", m_CurrentAction ? m_CurrentAction->GetName() : ""); + ImGui::Text("Action Is Dragging: %s", m_CurrentAction && m_CurrentAction->IsDragging() ? "Yes" : "No"); + m_NavigateAction.ShowMetrics(); + m_SizeAction.ShowMetrics(); + m_DragAction.ShowMetrics(); + m_SelectAction.ShowMetrics(); + m_ContextMenuAction.ShowMetrics(); + m_CreateItemAction.ShowMetrics(); + m_DeleteItemsAction.ShowMetrics(); + ImGui::EndGroup(); +} + + + + +//------------------------------------------------------------------------------ +// +// Node Settings +// +//------------------------------------------------------------------------------ +void ed::NodeSettings::ClearDirty() +{ + m_IsDirty = false; + m_DirtyReason = SaveReasonFlags::None; +} + +void ed::NodeSettings::MakeDirty(SaveReasonFlags reason) +{ + m_IsDirty = true; + m_DirtyReason = m_DirtyReason | reason; +} + +ed::json::value ed::NodeSettings::Serialize() +{ + json::value result; + result["location"]["x"] = m_Location.x; + result["location"]["y"] = m_Location.y; + + if (m_GroupSize.x > 0 || m_GroupSize.y > 0) + { + result["group_size"]["x"] = m_GroupSize.x; + result["group_size"]["y"] = m_GroupSize.y; + } + + return result; +} + +bool ed::NodeSettings::Parse(const std::string& string, NodeSettings& settings) +{ + auto settingsValue = json::value::parse(string); + if (settingsValue.is_discarded()) + return false; + + return Parse(settingsValue, settings); +} + +bool ed::NodeSettings::Parse(const json::value& data, NodeSettings& result) +{ + if (!data.is_object()) + return false; + + auto tryParseVector = [](const json::value& v, ImVec2& result) -> bool + { + if (v.is_object()) + { + auto xValue = v["x"]; + auto yValue = v["y"]; + + if (xValue.is_number() && yValue.is_number()) + { + result.x = static_cast(xValue.get()); + result.y = static_cast(yValue.get()); + + return true; + } + } + + return false; + }; + + if (!tryParseVector(data["location"], result.m_Location)) + return false; + + if (data.contains("group_size") && !tryParseVector(data["group_size"], result.m_GroupSize)) + return false; + + return true; +} + + + + +//------------------------------------------------------------------------------ +// +// Settings +// +//------------------------------------------------------------------------------ +ed::NodeSettings* ed::Settings::AddNode(NodeId id) +{ + m_Nodes.push_back(NodeSettings(id)); + return &m_Nodes.back(); +} + +ed::NodeSettings* ed::Settings::FindNode(NodeId id) +{ + for (auto& settings : m_Nodes) + if (settings.m_ID == id) + return &settings; + + return nullptr; +} + +void ed::Settings::ClearDirty(Node* node) +{ + if (node) + { + auto settings = FindNode(node->m_ID); + IM_ASSERT(settings); + settings->ClearDirty(); + } + else + { + m_IsDirty = false; + m_DirtyReason = SaveReasonFlags::None; + + for (auto& knownNode : m_Nodes) + knownNode.ClearDirty(); + } +} + +void ed::Settings::MakeDirty(SaveReasonFlags reason, Node* node) +{ + m_IsDirty = true; + m_DirtyReason = m_DirtyReason | reason; + + if (node) + { + auto settings = FindNode(node->m_ID); + IM_ASSERT(settings); + + settings->MakeDirty(reason); + } +} + +std::string ed::Settings::Serialize() +{ + json::value result; + + auto serializeObjectId = [](ObjectId id) + { + auto value = std::to_string(reinterpret_cast(id.AsPointer())); + switch (id.Type()) + { + default: + case NodeEditor::Detail::ObjectType::None: return value; + case NodeEditor::Detail::ObjectType::Node: return "node:" + value; + case NodeEditor::Detail::ObjectType::Link: return "link:" + value; + case NodeEditor::Detail::ObjectType::Pin: return "pin:" + value; + } + }; + + auto& nodes = result["nodes"]; + for (auto& node : m_Nodes) + { + if (node.m_WasUsed) + nodes[serializeObjectId(node.m_ID)] = node.Serialize(); + } + + auto& selection = result["selection"]; + for (auto& id : m_Selection) + selection.push_back(serializeObjectId(id)); + + auto& view = result["view"]; + view["scroll"]["x"] = m_ViewScroll.x; + view["scroll"]["y"] = m_ViewScroll.y; + view["zoom"] = m_ViewZoom; + + return result.dump(); +} + +bool ed::Settings::Parse(const std::string& string, Settings& settings) +{ + Settings result = settings; + + auto settingsValue = json::value::parse(string); + if (settingsValue.is_discarded()) + return false; + + if (!settingsValue.is_object()) + return false; + + auto tryParseVector = [](const json::value& v, ImVec2& result) -> bool + { + if (v.is_object() && v.contains("x") && v.contains("y")) + { + auto xValue = v["x"]; + auto yValue = v["y"]; + + if (xValue.is_number() && yValue.is_number()) + { + result.x = static_cast(xValue.get()); + result.y = static_cast(yValue.get()); + + return true; + } + } + + return false; + }; + + auto deserializeObjectId = [](const std::string& str) + { + auto separator = str.find_first_of(':'); + auto idStart = str.c_str() + ((separator != std::string::npos) ? separator + 1 : 0); + auto id = reinterpret_cast(strtoull(idStart, nullptr, 10)); + if (str.compare(0, separator, "node") == 0) + return ObjectId(NodeId(id)); + else if (str.compare(0, separator, "link") == 0) + return ObjectId(LinkId(id)); + else if (str.compare(0, separator, "pin") == 0) + return ObjectId(PinId(id)); + else + // fallback to old format + return ObjectId(NodeId(id)); //return ObjectId(); + }; + + //auto& settingsObject = settingsValue.get(); + + auto& nodesValue = settingsValue["nodes"]; + if (nodesValue.is_object()) + { + for (auto& node : nodesValue.get()) + { + auto id = deserializeObjectId(node.first.c_str()).AsNodeId(); + + auto nodeSettings = result.FindNode(id); + if (!nodeSettings) + nodeSettings = result.AddNode(id); + + NodeSettings::Parse(node.second, *nodeSettings); + } + } + + auto& selectionValue = settingsValue["selection"]; + if (selectionValue.is_array()) + { + const auto selectionArray = selectionValue.get(); + + result.m_Selection.reserve(selectionArray.size()); + result.m_Selection.resize(0); + for (auto& selection : selectionArray) + { + if (selection.is_string()) + result.m_Selection.push_back(deserializeObjectId(selection.get())); + } + } + + auto& viewValue = settingsValue["view"]; + if (viewValue.is_object()) + { + auto& viewScrollValue = viewValue["scroll"]; + auto& viewZoomValue = viewValue["zoom"]; + + if (!tryParseVector(viewScrollValue, result.m_ViewScroll)) + result.m_ViewScroll = ImVec2(0, 0); + + result.m_ViewZoom = viewZoomValue.is_number() ? static_cast(viewZoomValue.get()) : 1.0f; + } + + settings = std::move(result); + + return true; +} + + + +//------------------------------------------------------------------------------ +// +// Animation +// +//------------------------------------------------------------------------------ +ed::Animation::Animation(EditorContext* editor): + Editor(editor), + m_State(Stopped), + m_Time(0.0f), + m_Duration(0.0f) +{ +} + +ed::Animation::~Animation() +{ + Stop(); +} + +void ed::Animation::Play(float duration) +{ + if (IsPlaying()) + Stop(); + + m_State = Playing; + if (duration < 0) + duration = 0.0f; + + m_Time = 0.0f; + m_Duration = duration; + + OnPlay(); + + Editor->RegisterAnimation(this); + + if (duration == 0.0f) + Stop(); +} + +void ed::Animation::Stop() +{ + if (!IsPlaying()) + return; + + m_State = Stopped; + + Editor->UnregisterAnimation(this); + + OnStop(); +} + +void ed::Animation::Finish() +{ + if (!IsPlaying()) + return; + + OnFinish(); + + Stop(); +} + +void ed::Animation::Update() +{ + if (!IsPlaying()) + return; + + m_Time += ImMax(0.0f, ImGui::GetIO().DeltaTime); + if (m_Time < m_Duration) + { + const float progress = GetProgress(); + OnUpdate(progress); + } + else + { + OnFinish(); + Stop(); + } +} + + + + +//------------------------------------------------------------------------------ +// +// Navigate Animation +// +//------------------------------------------------------------------------------ +ed::NavigateAnimation::NavigateAnimation(EditorContext* editor, NavigateAction& scrollAction): + Animation(editor), + Action(scrollAction) +{ +} + +void ed::NavigateAnimation::NavigateTo(const ImRect& target, float duration) +{ + Stop(); + + m_Start = Action.GetViewRect(); + m_Target = target; + + // Skip tiny animations + auto minoffset = m_Target.Min - m_Start.Min; + auto maxOffset = m_Target.Max - m_Start.Max; + auto epsilon = 1e-4f; + if (ImFabs(minoffset.x) < epsilon && ImFabs(minoffset.y) < epsilon && + ImFabs(maxOffset.x) < epsilon && ImFabs(maxOffset.y) < epsilon) + { + duration = 0; + } + + Play(duration); +} + +void ed::NavigateAnimation::OnUpdate(float progress) +{ + ImRect current; + current.Min = ImEasing::EaseOutQuad(m_Start.Min, m_Target.Min - m_Start.Min, progress); + current.Max = ImEasing::EaseOutQuad(m_Start.Max, m_Target.Max - m_Start.Max, progress); + Action.SetViewRect(current); +} + +void ed::NavigateAnimation::OnStop() +{ + Editor->MakeDirty(SaveReasonFlags::Navigation); +} + +void ed::NavigateAnimation::OnFinish() +{ + Action.SetViewRect(m_Target); + + Editor->MakeDirty(SaveReasonFlags::Navigation); +} + + + + +//------------------------------------------------------------------------------ +// +// Flow Animation +// +//------------------------------------------------------------------------------ +ed::FlowAnimation::FlowAnimation(FlowAnimationController* controller): + Animation(controller->Editor), + Controller(controller), + m_Link(nullptr), + m_Offset(0.0f), + m_PathLength(0.0f) +{ +} + +void ed::FlowAnimation::Flow(ed::Link* link, float markerDistance, float speed, float duration) +{ + Stop(); + + if (m_Link != link) + { + m_Offset = 0.0f; + ClearPath(); + } + + if (m_MarkerDistance != markerDistance) + ClearPath(); + + m_MarkerDistance = markerDistance; + m_Speed = speed; + m_Link = link; + + Play(duration); +} + +void ed::FlowAnimation::Draw(ImDrawList* drawList) +{ + if (!IsPlaying() || !IsLinkValid() || !m_Link->IsVisible()) + return; + + if (!IsPathValid()) + UpdatePath(); + + m_Offset = fmodf(m_Offset, m_MarkerDistance); + + const auto progress = GetProgress(); + + const auto flowAlpha = 1.0f - progress * progress; + const auto flowColor = Editor->GetColor(StyleColor_Flow, flowAlpha); + //const auto flowPath = Link->GetCurve(); + + m_Link->Draw(drawList, flowColor, 2.0f); + + if (IsPathValid()) + { + //Offset = 0; + + const auto markerAlpha = powf(1.0f - progress, 0.35f); + const auto markerRadius = 4.0f * (1.0f - progress) + 2.0f; + const auto markerColor = Editor->GetColor(StyleColor_FlowMarker, markerAlpha); + + for (float d = m_Offset; d < m_PathLength; d += m_MarkerDistance) + drawList->AddCircleFilled(SamplePath(d), markerRadius, markerColor); + } +} + +bool ed::FlowAnimation::IsLinkValid() const +{ + return m_Link && m_Link->m_IsLive; +} + +bool ed::FlowAnimation::IsPathValid() const +{ + return m_Path.size() > 1 && m_PathLength > 0.0f && m_Link->m_Start == m_LastStart && m_Link->m_End == m_LastEnd; +} + +void ed::FlowAnimation::UpdatePath() +{ + if (!IsLinkValid()) + { + ClearPath(); + return; + } + + const auto curve = m_Link->GetCurve(); + + m_LastStart = m_Link->m_Start; + m_LastEnd = m_Link->m_End; + m_PathLength = ImCubicBezierLength(curve.P0, curve.P1, curve.P2, curve.P3); + + auto collectPointsCallback = [this](ImCubicBezierFixedStepSample& result) + { + m_Path.push_back(CurvePoint{ result.Length, result.Point }); + }; + + const auto step = ImMax(m_MarkerDistance * 0.5f, 15.0f); + + m_Path.resize(0); + ImCubicBezierFixedStep(collectPointsCallback, curve, step, false, 0.5f, 0.001f); +} + +void ed::FlowAnimation::ClearPath() +{ + vector().swap(m_Path); + m_PathLength = 0.0f; +} + +ImVec2 ed::FlowAnimation::SamplePath(float distance) +{ + //distance = ImMax(0.0f, std::min(distance, PathLength)); + + auto endPointIt = std::find_if(m_Path.begin(), m_Path.end(), [distance](const CurvePoint& p) { return distance < p.Distance; }); + if (endPointIt == m_Path.end()) + endPointIt = m_Path.end() - 1; + else if (endPointIt == m_Path.begin()) + endPointIt = m_Path.begin() + 1; + + const auto& start = endPointIt[-1]; + const auto& end = *endPointIt; + const auto t = (distance - start.Distance) / (end.Distance - start.Distance); + + return start.Point + (end.Point - start.Point) * t; +} + +void ed::FlowAnimation::OnUpdate(float progress) +{ + IM_UNUSED(progress); + + m_Offset += m_Speed * ImGui::GetIO().DeltaTime; +} + +void ed::FlowAnimation::OnStop() +{ + Controller->Release(this); +} + + + + +//------------------------------------------------------------------------------ +// +// Flow Animation Controller +// +//------------------------------------------------------------------------------ +ed::FlowAnimationController::FlowAnimationController(EditorContext* editor): + AnimationController(editor) +{ +} + +ed::FlowAnimationController::~FlowAnimationController() +{ + for (auto animation : m_Animations) + delete animation; +} + +void ed::FlowAnimationController::Flow(Link* link) +{ + if (!link || !link->m_IsLive) + return; + + auto& editorStyle = GetStyle(); + + auto animation = GetOrCreate(link); + + animation->Flow(link, editorStyle.FlowMarkerDistance, editorStyle.FlowSpeed, editorStyle.FlowDuration); +} + +void ed::FlowAnimationController::Draw(ImDrawList* drawList) +{ + if (m_Animations.empty()) + return; + + drawList->ChannelsSetCurrent(c_LinkChannel_Flow); + + for (auto animation : m_Animations) + animation->Draw(drawList); +} + +ed::FlowAnimation* ed::FlowAnimationController::GetOrCreate(Link* link) +{ + // Return live animation which match target link + { + auto animationIt = std::find_if(m_Animations.begin(), m_Animations.end(), [link](FlowAnimation* animation) { return animation->m_Link == link; }); + if (animationIt != m_Animations.end()) + return *animationIt; + } + + // There are no live animations for target link, try to reuse inactive old one + if (!m_FreePool.empty()) + { + auto animation = m_FreePool.back(); + m_FreePool.pop_back(); + return animation; + } + + // Cache miss, allocate new one + auto animation = new FlowAnimation(this); + m_Animations.push_back(animation); + + return animation; +} + +void ed::FlowAnimationController::Release(FlowAnimation* animation) +{ + IM_UNUSED(animation); +} + + + +//------------------------------------------------------------------------------ +// +// Navigate Action +// +//------------------------------------------------------------------------------ +const float ed::NavigateAction::s_ZoomLevels[] = +{ + 0.1f, 0.15f, 0.20f, 0.25f, 0.33f, 0.5f, 0.75f, 1.0f, 1.25f, 1.50f, 2.0f, 2.5f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f +}; + +const int ed::NavigateAction::s_ZoomLevelCount = sizeof(s_ZoomLevels) / sizeof(*s_ZoomLevels); + +ed::NavigateAction::NavigateAction(EditorContext* editor, ImGuiEx::Canvas& canvas): + EditorAction(editor), + m_IsActive(false), + m_Zoom(1), + m_Scroll(0, 0), + m_ScrollStart(0, 0), + m_ScrollDelta(0, 0), + m_Canvas(canvas), + m_WindowScreenPos(0, 0), + m_WindowScreenSize(0, 0), + m_Animation(editor, *this), + m_Reason(NavigationReason::Unknown), + m_LastSelectionId(0), + m_LastObject(nullptr), + m_MovingOverEdge(false), + m_MoveOffset(0, 0) +{ +} + +ed::EditorAction::AcceptResult ed::NavigateAction::Accept(const Control& control) +{ + IM_ASSERT(!m_IsActive); + + if (m_IsActive) + return False; + + if (ImGui::IsWindowHovered() /*&& !ImGui::IsAnyItemActive()*/ && ImGui::IsMouseDragging(c_ScrollButtonIndex, 0.0f)) + { + m_IsActive = true; + m_ScrollStart = m_Scroll; + m_ScrollDelta = ImGui::GetMouseDragDelta(c_ScrollButtonIndex); + m_Scroll = m_ScrollStart - m_ScrollDelta * m_Zoom; + } + + auto& io = ImGui::GetIO(); + + if (ImGui::IsWindowFocused() && ImGui::IsKeyPressed(GetKeyIndexForF()) && Editor->AreShortcutsEnabled()) + { + const auto allowZoomIn = io.KeyShift; + + auto findHotObjectToZoom = [this, &control, &io]() -> Object* + { + if (control.HotObject) + { + if (auto pin = control.HotObject->AsPin()) + return pin->m_Node; + else + return control.HotObject; + } + else if (control.BackgroundHot) + { + auto node = Editor->FindNodeAt(io.MousePos); + if (IsGroup(node)) + return node; + } + + return nullptr; + }; + + bool navigateToContent = false; + if (!Editor->GetSelectedObjects().empty()) + { + if (m_Reason != NavigationReason::Selection || m_LastSelectionId != Editor->GetSelectionId() || allowZoomIn) + { + m_LastSelectionId = Editor->GetSelectionId(); + NavigateTo(Editor->GetSelectionBounds(), allowZoomIn, -1.0f, NavigationReason::Selection); + } + else + navigateToContent = true; + } + else if(auto hotObject = findHotObjectToZoom()) + { + if (m_Reason != NavigationReason::Object || m_LastObject != hotObject || allowZoomIn) + { + m_LastObject = hotObject; + auto bounds = hotObject->GetBounds(); + NavigateTo(bounds, allowZoomIn, -1.0f, NavigationReason::Object); + } + else + navigateToContent = true; + } + else + navigateToContent = true; + + if (navigateToContent) + NavigateTo(Editor->GetContentBounds(), true, -1.0f, NavigationReason::Content); + } + + // // #debug + // if (auto drawList = ImGui::GetWindowDrawList()) + // drawList->AddCircleFilled(io.MousePos, 4.0f, IM_COL32(255, 0, 255, 255)); + + if (HandleZoom(control)) + return True; + + return m_IsActive ? True : False; +} + +bool ed::NavigateAction::Process(const Control& control) +{ + IM_UNUSED(control); + + if (!m_IsActive) + return false; + + if (ImGui::IsMouseDragging(c_ScrollButtonIndex, 0.0f)) + { + m_ScrollDelta = ImGui::GetMouseDragDelta(c_ScrollButtonIndex); + m_Scroll = m_ScrollStart - m_ScrollDelta * m_Zoom; + +// if (IsActive && Animation.IsPlaying()) +// Animation.Target = Animation.Target - ScrollDelta * Animation.TargetZoom; + } + else + { + if (m_Scroll != m_ScrollStart) + Editor->MakeDirty(SaveReasonFlags::Navigation); + + m_IsActive = false; + } + + // #TODO: Handle zoom while scrolling + // HandleZoom(control); + + return m_IsActive; +} + +bool ed::NavigateAction::HandleZoom(const Control& control) +{ + IM_UNUSED(control); + + const auto currentAction = Editor->GetCurrentAction(); + const auto allowOffscreen = currentAction && currentAction->IsDragging(); + + auto& io = ImGui::GetIO(); + + if (!io.MouseWheel || (!allowOffscreen && !ImGui::IsWindowHovered()))// && !ImGui::IsAnyItemActive()) + return false; + + auto savedScroll = m_Scroll; + auto savedZoom = m_Zoom; + + m_Animation.Finish(); + + auto mousePos = io.MousePos; + auto steps = (int)io.MouseWheel; + auto newZoom = MatchZoom(steps, s_ZoomLevels[steps < 0 ? 0 : s_ZoomLevelCount - 1]); + + auto oldView = GetView(); + m_Zoom = newZoom; + auto newView = GetView(); + + auto screenPos = m_Canvas.FromLocal(mousePos, oldView); + auto canvasPos = m_Canvas.ToLocal(screenPos, newView); + + auto offset = (canvasPos - mousePos) * m_Zoom; + auto targetScroll = m_Scroll - offset; + + if (m_Scroll != savedScroll || m_Zoom != savedZoom) + { + m_Scroll = savedScroll; + m_Zoom = savedZoom; + + Editor->MakeDirty(SaveReasonFlags::Navigation); + } + + auto targetRect = m_Canvas.CalcViewRect(ImGuiEx::CanvasView(-targetScroll, newZoom)); + + NavigateTo(targetRect, c_MouseZoomDuration, NavigationReason::MouseZoom); + + return true; +} + +void ed::NavigateAction::ShowMetrics() +{ + EditorAction::ShowMetrics(); + + ImGui::Text("%s:", GetName()); + ImGui::Text(" Active: %s", m_IsActive ? "yes" : "no"); + ImGui::Text(" Scroll: { x=%g y=%g }", m_Scroll.x, m_Scroll.y); + ImGui::Text(" Zoom: %g", m_Zoom); +} + +void ed::NavigateAction::NavigateTo(const ImRect& bounds, bool zoomIn, float duration, NavigationReason reason) +{ + if (ImRect_IsEmpty(bounds)) + return; + + if (duration < 0.0f) + duration = GetStyle().ScrollDuration; + + if (!zoomIn) + { + auto viewRect = m_Canvas.ViewRect(); + auto viewRectCenter = viewRect.GetCenter(); + auto targetCenter = bounds.GetCenter(); + + viewRect.Translate(targetCenter - viewRectCenter); + + NavigateTo(viewRect, duration, reason); + } + else + { + // Grow rect by 5% to leave some reasonable margin + // from the edges of the canvas. + auto rect = bounds; + auto extend = ImMax(rect.GetWidth(), rect.GetHeight()); + rect.Expand(extend * c_NavigationZoomMargin * 0.5f); + + NavigateTo(rect, duration, reason); + } +} + +void ed::NavigateAction::NavigateTo(const ImRect& target, float duration, NavigationReason reason) +{ + m_Reason = reason; + + m_Animation.NavigateTo(target, duration); +} + +void ed::NavigateAction::StopNavigation() +{ + m_Animation.Stop(); +} + +void ed::NavigateAction::FinishNavigation() +{ + m_Animation.Finish(); +} + +bool ed::NavigateAction::MoveOverEdge() +{ + // Don't interrupt non-edge animations + if (m_Animation.IsPlaying()) + return false; + + auto& io = ImGui::GetIO(); + const auto screenRect = m_Canvas.ViewRect(); + const auto screenMousePos = io.MousePos; + + // Mouse is over screen, do nothing + if (screenRect.Contains(screenMousePos)) + return false; + + const auto screenPointOnEdge = ImRect_ClosestPoint(screenRect, screenMousePos, true); + const auto direction = screenPointOnEdge - screenMousePos; + const auto offset = -direction * io.DeltaTime * 10.0f; + + m_Scroll = m_Scroll + offset; + + m_MoveOffset = offset; + m_MovingOverEdge = true; + + return true; +} + +void ed::NavigateAction::StopMoveOverEdge() +{ + if (m_MovingOverEdge) + { + Editor->MakeDirty(SaveReasonFlags::Navigation); + + m_MoveOffset = ImVec2(0, 0); + m_MovingOverEdge = false; + } +} + +void ed::NavigateAction::SetWindow(ImVec2 position, ImVec2 size) +{ + m_WindowScreenPos = position; + m_WindowScreenSize = size; +} + +ImGuiEx::CanvasView ed::NavigateAction::GetView() const +{ + return ImGuiEx::CanvasView(-m_Scroll, m_Zoom); +} + +ImVec2 ed::NavigateAction::GetViewOrigin() const +{ + return -m_Scroll; +} + +float ed::NavigateAction::GetViewScale() const +{ + return m_Zoom; +} + +void ed::NavigateAction::SetViewRect(const ImRect& rect) +{ + auto view = m_Canvas.CalcCenterView(rect); + m_Scroll = -view.Origin; + m_Zoom = view.Scale; +} + +ImRect ed::NavigateAction::GetViewRect() const +{ + return m_Canvas.CalcViewRect(GetView()); +} + +float ed::NavigateAction::MatchZoom(int steps, float fallbackZoom) +{ + auto currentZoomIndex = MatchZoomIndex(steps); + if (currentZoomIndex < 0) + return fallbackZoom; + + auto currentZoom = s_ZoomLevels[currentZoomIndex]; + if (fabsf(currentZoom - m_Zoom) > 0.001f) + return currentZoom; + + auto newIndex = currentZoomIndex + steps; + if (newIndex >= 0 && newIndex < s_ZoomLevelCount) + return s_ZoomLevels[newIndex]; + else + return fallbackZoom; +} + +int ed::NavigateAction::MatchZoomIndex(int direction) +{ + int bestIndex = -1; + float bestDistance = 0.0f; + + for (int i = 0; i < s_ZoomLevelCount; ++i) + { + auto distance = fabsf(s_ZoomLevels[i] - m_Zoom); + if (distance < bestDistance || bestIndex < 0) + { + bestDistance = distance; + bestIndex = i; + } + } + + if (bestDistance > 0.001f) + { + if (direction > 0) + { + ++bestIndex; + + if (bestIndex >= s_ZoomLevelCount) + bestIndex = s_ZoomLevelCount - 1; + } + else if (direction < 0) + { + --bestIndex; + + if (bestIndex < 0) + bestIndex = 0; + } + } + + return bestIndex; +} + + + + +//------------------------------------------------------------------------------ +// +// Size Action +// +//------------------------------------------------------------------------------ +ed::SizeAction::SizeAction(EditorContext* editor): + EditorAction(editor), + m_IsActive(false), + m_Clean(false), + m_SizedNode(nullptr), + m_Pivot(NodeRegion::None), + m_Cursor(ImGuiMouseCursor_Arrow) +{ +} + +ed::EditorAction::AcceptResult ed::SizeAction::Accept(const Control& control) +{ + IM_ASSERT(!m_IsActive); + + if (m_IsActive) + return False; + + if (control.ActiveNode && IsGroup(control.ActiveNode) && ImGui::IsMouseDragging(0, 0)) + { + //const auto mousePos = to_point(ImGui::GetMousePos()); + //const auto closestPoint = control.ActiveNode->Bounds.get_closest_point_hollow(mousePos, static_cast(control.ActiveNode->Rounding)); + + auto pivot = GetRegion(control.ActiveNode); + if (pivot != NodeRegion::Header && pivot != NodeRegion::Center) + { + m_StartBounds = control.ActiveNode->m_Bounds; + m_StartGroupBounds = control.ActiveNode->m_GroupBounds; + m_LastSize = control.ActiveNode->m_Bounds.GetSize(); + m_MinimumSize = ImVec2(0, 0); + m_LastDragOffset = ImVec2(0, 0); + m_Pivot = pivot; + m_Cursor = ChooseCursor(m_Pivot); + m_SizedNode = control.ActiveNode; + m_IsActive = true; + } + } + else if (control.HotNode && IsGroup(control.HotNode)) + { + m_Cursor = ChooseCursor(GetRegion(control.HotNode)); + return Possible; + } + + return m_IsActive ? True : False; +} + +bool ed::SizeAction::Process(const Control& control) +{ + if (m_Clean) + { + m_Clean = false; + + if (m_SizedNode->m_Bounds.Min != m_StartBounds.Min || m_SizedNode->m_GroupBounds.Min != m_StartGroupBounds.Min) + Editor->MakeDirty(SaveReasonFlags::Position | SaveReasonFlags::User, m_SizedNode); + + if (m_SizedNode->m_Bounds.GetSize() != m_StartBounds.GetSize() || m_SizedNode->m_GroupBounds.GetSize() != m_StartGroupBounds.GetSize()) + Editor->MakeDirty(SaveReasonFlags::Size | SaveReasonFlags::User, m_SizedNode); + + m_SizedNode = nullptr; + } + + if (!m_IsActive) + return false; + + if (control.ActiveNode == m_SizedNode) + { + const auto dragOffset = (control.ActiveNode == m_SizedNode) ? ImGui::GetMouseDragDelta(0, 0.0f) : m_LastDragOffset; + m_LastDragOffset = dragOffset; + + if (m_MinimumSize.x == 0.0f && m_LastSize.x != m_SizedNode->m_Bounds.GetWidth()) + m_MinimumSize.x = m_SizedNode->m_Bounds.GetWidth(); + if (m_MinimumSize.y == 0.0f && m_LastSize.y != m_SizedNode->m_Bounds.GetHeight()) + m_MinimumSize.y = m_SizedNode->m_Bounds.GetHeight(); + + auto minimumSize = ImMax(m_MinimumSize, m_StartBounds.GetSize() - m_StartGroupBounds.GetSize()); + + + auto newBounds = m_StartBounds; + + if ((m_Pivot & NodeRegion::Top) == NodeRegion::Top) + newBounds.Min.y = ImMin(newBounds.Max.y - minimumSize.y, Editor->AlignPointToGrid(newBounds.Min.y + dragOffset.y)); + if ((m_Pivot & NodeRegion::Bottom) == NodeRegion::Bottom) + newBounds.Max.y = ImMax(newBounds.Min.y + minimumSize.y, Editor->AlignPointToGrid(newBounds.Max.y + dragOffset.y)); + if ((m_Pivot & NodeRegion::Left) == NodeRegion::Left) + newBounds.Min.x = ImMin(newBounds.Max.x - minimumSize.x, Editor->AlignPointToGrid(newBounds.Min.x + dragOffset.x)); + if ((m_Pivot & NodeRegion::Right) == NodeRegion::Right) + newBounds.Max.x = ImMax(newBounds.Min.x + minimumSize.x, Editor->AlignPointToGrid(newBounds.Max.x + dragOffset.x)); + + newBounds.Floor(); + + m_LastSize = newBounds.GetSize(); + + m_SizedNode->m_Bounds = newBounds; + m_SizedNode->m_GroupBounds = newBounds; + m_SizedNode->m_GroupBounds.Min.x -= m_StartBounds.Min.x - m_StartGroupBounds.Min.x; + m_SizedNode->m_GroupBounds.Min.y -= m_StartBounds.Min.y - m_StartGroupBounds.Min.y; + m_SizedNode->m_GroupBounds.Max.x -= m_StartBounds.Max.x - m_StartGroupBounds.Max.x; + m_SizedNode->m_GroupBounds.Max.y -= m_StartBounds.Max.y - m_StartGroupBounds.Max.y; + } + else if (!control.ActiveNode) + { + m_Clean = true; + m_IsActive = false; + return true; + } + + return m_IsActive; +} + +void ed::SizeAction::ShowMetrics() +{ + EditorAction::ShowMetrics(); + + auto getObjectName = [](Object* object) + { + if (!object) return ""; + else if (object->AsNode()) return "Node"; + else if (object->AsPin()) return "Pin"; + else if (object->AsLink()) return "Link"; + else return ""; + }; + + ImGui::Text("%s:", GetName()); + ImGui::Text(" Active: %s", m_IsActive ? "yes" : "no"); + ImGui::Text(" Node: %s (%p)", getObjectName(m_SizedNode), m_SizedNode ? m_SizedNode->m_ID.AsPointer() : nullptr); + if (m_SizedNode && m_IsActive) + { + ImGui::Text(" Bounds: { x=%g y=%g w=%g h=%g }", m_SizedNode->m_Bounds.Min.x, m_SizedNode->m_Bounds.Min.y, m_SizedNode->m_Bounds.GetWidth(), m_SizedNode->m_Bounds.GetHeight()); + ImGui::Text(" Group Bounds: { x=%g y=%g w=%g h=%g }", m_SizedNode->m_GroupBounds.Min.x, m_SizedNode->m_GroupBounds.Min.y, m_SizedNode->m_GroupBounds.GetWidth(), m_SizedNode->m_GroupBounds.GetHeight()); + ImGui::Text(" Start Bounds: { x=%g y=%g w=%g h=%g }", m_StartBounds.Min.x, m_StartBounds.Min.y, m_StartBounds.GetWidth(), m_StartBounds.GetHeight()); + ImGui::Text(" Start Group Bounds: { x=%g y=%g w=%g h=%g }", m_StartGroupBounds.Min.x, m_StartGroupBounds.Min.y, m_StartGroupBounds.GetWidth(), m_StartGroupBounds.GetHeight()); + ImGui::Text(" Minimum Size: { w=%g h=%g }", m_MinimumSize.x, m_MinimumSize.y); + ImGui::Text(" Last Size: { w=%g h=%g }", m_LastSize.x, m_LastSize.y); + } +} + +ed::NodeRegion ed::SizeAction::GetRegion(Node* node) +{ + return node->GetRegion(ImGui::GetMousePos()); +} + +ImGuiMouseCursor ed::SizeAction::ChooseCursor(NodeRegion region) +{ + switch (region) + { + default: + case NodeRegion::Center: + return ImGuiMouseCursor_Arrow; + + case NodeRegion::Top: + case NodeRegion::Bottom: + return ImGuiMouseCursor_ResizeNS; + + case NodeRegion::Left: + case NodeRegion::Right: + return ImGuiMouseCursor_ResizeEW; + + case NodeRegion::TopLeft: + case NodeRegion::BottomRight: + return ImGuiMouseCursor_ResizeNWSE; + + case NodeRegion::TopRight: + case NodeRegion::BottomLeft: + return ImGuiMouseCursor_ResizeNESW; + } +} + + + + +//------------------------------------------------------------------------------ +// +// Drag Action +// +//------------------------------------------------------------------------------ +ed::DragAction::DragAction(EditorContext* editor): + EditorAction(editor), + m_IsActive(false), + m_Clear(false), + m_DraggedObject(nullptr) +{ +} + +ed::EditorAction::AcceptResult ed::DragAction::Accept(const Control& control) +{ + IM_ASSERT(!m_IsActive); + + if (m_IsActive) + return False; + + if (control.ActiveObject && ImGui::IsMouseDragging(0)) + { + if (!control.ActiveObject->AcceptDrag()) + return False; + + m_DraggedObject = control.ActiveObject; + + m_Objects.resize(0); + m_Objects.push_back(m_DraggedObject); + + if (Editor->IsSelected(m_DraggedObject)) + { + for (auto selectedObject : Editor->GetSelectedObjects()) + if (auto selectedNode = selectedObject->AsNode()) + if (selectedNode != m_DraggedObject && selectedNode->AcceptDrag()) + m_Objects.push_back(selectedNode); + } + + auto& io = ImGui::GetIO(); + if (!io.KeyShift) + { + std::vector groupedNodes; + for (auto object : m_Objects) + if (auto node = object->AsNode()) + node->GetGroupedNodes(groupedNodes, true); + + auto isAlreadyPicked = [this](Node* node) + { + return std::find(m_Objects.begin(), m_Objects.end(), node) != m_Objects.end(); + }; + + for (auto candidate : groupedNodes) + if (!isAlreadyPicked(candidate) && candidate->AcceptDrag()) + m_Objects.push_back(candidate); + } + + m_IsActive = true; + } + else if (control.HotNode && IsGroup(control.HotNode) && control.HotNode->GetRegion(ImGui::GetMousePos()) == NodeRegion::Header) + { + return Possible; + } + + return m_IsActive ? True : False; +} + +bool ed::DragAction::Process(const Control& control) +{ + if (m_Clear) + { + m_Clear = false; + + for (auto object : m_Objects) + { + if (object->EndDrag()) + Editor->MakeDirty(SaveReasonFlags::Position | SaveReasonFlags::User, object->AsNode()); + } + + m_Objects.resize(0); + + m_DraggedObject = nullptr; + } + + if (!m_IsActive) + return false; + + if (control.ActiveObject == m_DraggedObject) + { + auto dragOffset = ImGui::GetMouseDragDelta(0, 0.0f); + + auto draggedOrigin = m_DraggedObject->DragStartLocation(); + auto alignPivot = ImVec2(0, 0); + + // TODO: Move this experimental alignment to closes pivot out of internals to node API + if (auto draggedNode = m_DraggedObject->AsNode()) + { + float x = FLT_MAX; + float y = FLT_MAX; + + auto testPivot = [this, &x, &y, &draggedOrigin, &dragOffset, &alignPivot](const ImVec2& pivot) + { + auto initial = draggedOrigin + dragOffset + pivot; + auto candidate = Editor->AlignPointToGrid(initial) - draggedOrigin - pivot; + + if (ImFabs(candidate.x) < ImFabs(ImMin(x, FLT_MAX))) + { + x = candidate.x; + alignPivot.x = pivot.x; + } + + if (ImFabs(candidate.y) < ImFabs(ImMin(y, FLT_MAX))) + { + y = candidate.y; + alignPivot.y = pivot.y; + } + }; + + for (auto pin = draggedNode->m_LastPin; pin; pin = pin->m_PreviousPin) + { + auto pivot = pin->m_Pivot.GetCenter() - draggedNode->m_Bounds.Min; + testPivot(pivot); + } + + //testPivot(point(0, 0)); + } + + auto alignedOffset = Editor->AlignPointToGrid(draggedOrigin + dragOffset + alignPivot) - draggedOrigin - alignPivot; + + if (!ImGui::GetIO().KeyAlt) + dragOffset = alignedOffset; + + for (auto object : m_Objects) + object->UpdateDrag(dragOffset); + } + else if (!control.ActiveObject) + { + m_Clear = true; + + m_IsActive = false; + return true; + } + + return m_IsActive; +} + +void ed::DragAction::ShowMetrics() +{ + EditorAction::ShowMetrics(); + + auto getObjectName = [](Object* object) + { + if (!object) return ""; + else if (object->AsNode()) return "Node"; + else if (object->AsPin()) return "Pin"; + else if (object->AsLink()) return "Link"; + else return ""; + }; + + ImGui::Text("%s:", GetName()); + ImGui::Text(" Active: %s", m_IsActive ? "yes" : "no"); + ImGui::Text(" Node: %s (%p)", getObjectName(m_DraggedObject), m_DraggedObject ? m_DraggedObject->ID().AsPointer() : nullptr); +} + + + + +//------------------------------------------------------------------------------ +// +// Select Action +// +//------------------------------------------------------------------------------ +ed::SelectAction::SelectAction(EditorContext* editor): + EditorAction(editor), + m_IsActive(false), + m_SelectGroups(false), + m_SelectLinkMode(false), + m_CommitSelection(false), + m_StartPoint(), + m_Animation(editor) +{ +} + +ed::EditorAction::AcceptResult ed::SelectAction::Accept(const Control& control) +{ + IM_ASSERT(!m_IsActive); + + if (m_IsActive) + return False; + + auto& io = ImGui::GetIO(); + m_SelectGroups = io.KeyShift; + m_SelectLinkMode = io.KeyAlt; + + m_SelectedObjectsAtStart.clear(); + + if (control.BackgroundActive && ImGui::IsMouseDragging(0, 1)) + { + m_IsActive = true; + m_StartPoint = ImGui::GetMousePos(); + m_EndPoint = m_StartPoint; + + // Links and nodes cannot be selected together + if ((m_SelectLinkMode && Editor->IsAnyNodeSelected()) || + (!m_SelectLinkMode && Editor->IsAnyLinkSelected())) + { + Editor->ClearSelection(); + } + + if (io.KeyCtrl) + m_SelectedObjectsAtStart = Editor->GetSelectedObjects(); + } + else if (control.BackgroundClicked) + { + Editor->ClearSelection(); + } + else + { + Object* clickedObject = control.ClickedNode ? static_cast(control.ClickedNode) : static_cast(control.ClickedLink); + + if (clickedObject) + { + // Links and nodes cannot be selected together + if ((clickedObject->AsLink() && Editor->IsAnyNodeSelected()) || + (clickedObject->AsNode() && Editor->IsAnyLinkSelected())) + { + Editor->ClearSelection(); + } + + if (io.KeyCtrl) + Editor->ToggleObjectSelection(clickedObject); + else + Editor->SetSelectedObject(clickedObject); + } + } + + if (m_IsActive) + m_Animation.Stop(); + + return m_IsActive ? True : False; +} + +bool ed::SelectAction::Process(const Control& control) +{ + IM_UNUSED(control); + + if (m_CommitSelection) + { + Editor->ClearSelection(); + for (auto object : m_CandidateObjects) + Editor->SelectObject(object); + + m_CandidateObjects.clear(); + + m_CommitSelection = false; + } + + if (!m_IsActive) + return false; + + if (ImGui::IsMouseDragging(0, 0)) + { + m_EndPoint = ImGui::GetMousePos(); + + auto topLeft = ImVec2(std::min(m_StartPoint.x, m_EndPoint.x), std::min(m_StartPoint.y, m_EndPoint.y)); + auto bottomRight = ImVec2(ImMax(m_StartPoint.x, m_EndPoint.x), ImMax(m_StartPoint.y, m_EndPoint.y)); + auto rect = ImRect(topLeft, bottomRight); + if (rect.GetWidth() <= 0) + rect.Max.x = rect.Min.x + 1; + if (rect.GetHeight() <= 0) + rect.Max.y = rect.Min.y + 1; + + vector nodes; + vector links; + + if (m_SelectLinkMode) + { + Editor->FindLinksInRect(rect, links); + m_CandidateObjects.assign(links.begin(), links.end()); + } + else + { + Editor->FindNodesInRect(rect, nodes); + m_CandidateObjects.assign(nodes.begin(), nodes.end()); + + if (m_SelectGroups) + { + auto endIt = std::remove_if(m_CandidateObjects.begin(), m_CandidateObjects.end(), [](Object* object) { return !IsGroup(object->AsNode()); }); + m_CandidateObjects.erase(endIt, m_CandidateObjects.end()); + } + else + { + auto endIt = std::remove_if(m_CandidateObjects.begin(), m_CandidateObjects.end(), [](Object* object) { return IsGroup(object->AsNode()); }); + m_CandidateObjects.erase(endIt, m_CandidateObjects.end()); + } + } + + m_CandidateObjects.insert(m_CandidateObjects.end(), m_SelectedObjectsAtStart.begin(), m_SelectedObjectsAtStart.end()); + std::sort(m_CandidateObjects.begin(), m_CandidateObjects.end()); + m_CandidateObjects.erase(std::unique(m_CandidateObjects.begin(), m_CandidateObjects.end()), m_CandidateObjects.end()); + } + else + { + m_IsActive = false; + + m_Animation.Play(c_SelectionFadeOutDuration); + + m_CommitSelection = true; + + return true; + } + + return m_IsActive; +} + +void ed::SelectAction::ShowMetrics() +{ + EditorAction::ShowMetrics(); + + ImGui::Text("%s:", GetName()); + ImGui::Text(" Active: %s", m_IsActive ? "yes" : "no"); +} + +void ed::SelectAction::Draw(ImDrawList* drawList) +{ + if (!m_IsActive && !m_Animation.IsPlaying()) + return; + + const auto alpha = m_Animation.IsPlaying() ? ImEasing::EaseOutQuad(1.0f, -1.0f, m_Animation.GetProgress()) : 1.0f; + + const auto fillColor = Editor->GetColor(m_SelectLinkMode ? StyleColor_LinkSelRect : StyleColor_NodeSelRect, alpha); + const auto outlineColor = Editor->GetColor(m_SelectLinkMode ? StyleColor_LinkSelRectBorder : StyleColor_NodeSelRectBorder, alpha); + + drawList->ChannelsSetCurrent(c_BackgroundChannel_SelectionRect); + + auto min = ImVec2(std::min(m_StartPoint.x, m_EndPoint.x), std::min(m_StartPoint.y, m_EndPoint.y)); + auto max = ImVec2(ImMax(m_StartPoint.x, m_EndPoint.x), ImMax(m_StartPoint.y, m_EndPoint.y)); + + drawList->AddRectFilled(min, max, fillColor); + FringeScaleScope fringe(1.0f); + drawList->AddRect(min, max, outlineColor); +} + + + + +//------------------------------------------------------------------------------ +// +// Context Menu Action +// +//------------------------------------------------------------------------------ +ed::ContextMenuAction::ContextMenuAction(EditorContext* editor): + EditorAction(editor), + m_CandidateMenu(Menu::None), + m_CurrentMenu(Menu::None), + m_ContextId() +{ +} + +ed::EditorAction::AcceptResult ed::ContextMenuAction::Accept(const Control& control) +{ + const auto isPressed = ImGui::IsMouseClicked(1); + const auto isReleased = ImGui::IsMouseReleased(1); + const auto isDragging = ImGui::IsMouseDragging(1); + + if (isPressed || isReleased || isDragging) + { + Menu candidateMenu = ContextMenuAction::None; + ObjectId contextId; + + if (auto hotObejct = control.HotObject) + { + if (hotObejct->AsNode()) + candidateMenu = Node; + else if (hotObejct->AsPin()) + candidateMenu = Pin; + else if (hotObejct->AsLink()) + candidateMenu = Link; + + if (candidateMenu != None) + contextId = hotObejct->ID(); + } + else if (control.BackgroundHot) + candidateMenu = Background; + + if (isPressed) + { + m_CandidateMenu = candidateMenu; + m_ContextId = contextId; + return Possible; + } + else if (isReleased && m_CandidateMenu == candidateMenu && m_ContextId == contextId) + { + m_CurrentMenu = m_CandidateMenu; + m_CandidateMenu = ContextMenuAction::None; + return True; + } + else + { + m_CandidateMenu = None; + m_CurrentMenu = None; + m_ContextId = ObjectId(); + return False; + } + } + + return False; +} + +bool ed::ContextMenuAction::Process(const Control& control) +{ + IM_UNUSED(control); + + m_CandidateMenu = None; + m_CurrentMenu = None; + m_ContextId = ObjectId(); + return false; +} + +void ed::ContextMenuAction::Reject() +{ + m_CandidateMenu = None; + m_CurrentMenu = None; + m_ContextId = ObjectId(); +} + +void ed::ContextMenuAction::ShowMetrics() +{ + EditorAction::ShowMetrics(); + + auto getMenuName = [](Menu menu) + { + switch (menu) + { + default: + case None: return "None"; + case Node: return "Node"; + case Pin: return "Pin"; + case Link: return "Link"; + case Background: return "Background"; + } + }; + + + ImGui::Text("%s:", GetName()); + ImGui::Text(" Menu: %s", getMenuName(m_CurrentMenu)); +} + +bool ed::ContextMenuAction::ShowNodeContextMenu(NodeId* nodeId) +{ + if (m_CurrentMenu != Node) + return false; + + *nodeId = m_ContextId.AsNodeId(); + Editor->SetUserContext(); + return true; +} + +bool ed::ContextMenuAction::ShowPinContextMenu(PinId* pinId) +{ + if (m_CurrentMenu != Pin) + return false; + + *pinId = m_ContextId.AsPinId(); + Editor->SetUserContext(); + return true; +} + +bool ed::ContextMenuAction::ShowLinkContextMenu(LinkId* linkId) +{ + if (m_CurrentMenu != Link) + return false; + + *linkId = m_ContextId.AsLinkId(); + Editor->SetUserContext(); + return true; +} + +bool ed::ContextMenuAction::ShowBackgroundContextMenu() +{ + if (m_CurrentMenu != Background) + return false; + + Editor->SetUserContext(); + return true; +} + + + + +//------------------------------------------------------------------------------ +// +// Cut/Copy/Paste Action +// +//------------------------------------------------------------------------------ +ed::ShortcutAction::ShortcutAction(EditorContext* editor): + EditorAction(editor), + m_IsActive(false), + m_InAction(false), + m_CurrentAction(Action::None), + m_Context() +{ +} + +ed::EditorAction::AcceptResult ed::ShortcutAction::Accept(const Control& control) +{ + if (!Editor->IsActive() || !Editor->AreShortcutsEnabled()) + return False; + + Action candidateAction = None; + + auto& io = ImGui::GetIO(); + if (io.KeyCtrl && !io.KeyShift && !io.KeyAlt && ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_X))) + candidateAction = Cut; + if (io.KeyCtrl && !io.KeyShift && !io.KeyAlt && ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_C))) + candidateAction = Copy; + if (io.KeyCtrl && !io.KeyShift && !io.KeyAlt && ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_V))) + candidateAction = Paste; + if (io.KeyCtrl && !io.KeyShift && !io.KeyAlt && ImGui::IsKeyPressed(GetKeyIndexForD())) + candidateAction = Duplicate; + if (!io.KeyCtrl && !io.KeyShift && !io.KeyAlt && ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_Space))) + candidateAction = CreateNode; + + if (candidateAction != None) + { + if (candidateAction != Paste && candidateAction != CreateNode) + { + auto& selection = Editor->GetSelectedObjects(); + if (!selection.empty()) + { + // #TODO: Find a way to simplify logic. + + m_Context.assign(selection.begin(), selection.end()); + + // Expand groups + vector extra; + for (auto object : m_Context) + { + auto node = object->AsNode(); + if (IsGroup(node)) + node->GetGroupedNodes(extra, true); + } + + // Apply groups and remove duplicates + if (!extra.empty()) + { + m_Context.insert(m_Context.end(), extra.begin(), extra.end()); + std::sort(m_Context.begin(), m_Context.end()); + m_Context.erase(std::unique(m_Context.begin(), m_Context.end()), m_Context.end()); + } + } + else if (control.HotObject && control.HotObject->IsSelectable() && !IsGroup(control.HotObject->AsNode())) + { + m_Context.push_back(control.HotObject); + } + + if (m_Context.empty()) + return False; + + // Does copying only links make sense? + //const auto hasOnlyLinks = std::all_of(Context.begin(), Context.end(), [](Object* object) { return object->AsLink() != nullptr; }); + //if (hasOnlyLinks) + // return False; + + // If no links are selected, pick all links between nodes within context + const auto hasAnyLinks = std::any_of(m_Context.begin(), m_Context.end(), [](Object* object) { return object->AsLink() != nullptr; }); + if (!hasAnyLinks && m_Context.size() > 1) // one node cannot make connection to anything + { + // Collect nodes in sorted vector viable for binary search + std::vector> nodes; + + nodes.reserve(m_Context.size()); + std::for_each(m_Context.begin(), m_Context.end(), [&nodes](Object* object) { if (auto node = object->AsNode()) nodes.push_back({node->m_ID, node}); }); + + std::sort(nodes.begin(), nodes.end()); + + auto isNodeInContext = [&nodes](NodeId nodeId) + { + return std::binary_search(nodes.begin(), nodes.end(), ObjectWrapper{nodeId, nullptr}); + }; + + // Collect links connected to nodes and drop those reaching out of context + std::vector links; + + for (auto node : nodes) + Editor->FindLinksForNode(node.m_ID, links, true); + + // Remove duplicates + std::sort(links.begin(), links.end()); + links.erase(std::unique(links.begin(), links.end()), links.end()); + + // Drop out of context links + links.erase(std::remove_if(links.begin(), links.end(), [&isNodeInContext](Link* link) + { + return !isNodeInContext(link->m_StartPin->m_Node->m_ID) || !isNodeInContext(link->m_EndPin->m_Node->m_ID); + }), links.end()); + + // Append links and remove duplicates + m_Context.insert(m_Context.end(), links.begin(), links.end()); + } + } + else + m_Context.resize(0); + + m_IsActive = true; + m_CurrentAction = candidateAction; + + return True; + } + + return False; +} + +bool ed::ShortcutAction::Process(const Control& control) +{ + IM_UNUSED(control); + + m_IsActive = false; + m_CurrentAction = None; + m_Context.resize(0); + return false; +} + +void ed::ShortcutAction::Reject() +{ + m_IsActive = false; + m_CurrentAction = None; + m_Context.resize(0); +} + +void ed::ShortcutAction::ShowMetrics() +{ + EditorAction::ShowMetrics(); + + auto getActionName = [](Action action) + { + switch (action) + { + default: + case None: return "None"; + case Cut: return "Cut"; + case Copy: return "Copy"; + case Paste: return "Paste"; + case Duplicate: return "Duplicate"; + case CreateNode: return "CreateNode"; + } + }; + + ImGui::Text("%s:", GetName()); + ImGui::Text(" Action: %s", getActionName(m_CurrentAction)); +} + +bool ed::ShortcutAction::Begin() +{ + if (m_IsActive) + m_InAction = true; + return m_IsActive; +} + +void ed::ShortcutAction::End() +{ + if (m_IsActive) + m_InAction = false; +} + +bool ed::ShortcutAction::AcceptCut() +{ + IM_ASSERT(m_InAction); + return m_CurrentAction == Cut; +} + +bool ed::ShortcutAction::AcceptCopy() +{ + IM_ASSERT(m_InAction); + return m_CurrentAction == Copy; +} + +bool ed::ShortcutAction::AcceptPaste() +{ + IM_ASSERT(m_InAction); + return m_CurrentAction == Paste; +} + +bool ed::ShortcutAction::AcceptDuplicate() +{ + IM_ASSERT(m_InAction); + return m_CurrentAction == Duplicate; +} + +bool ed::ShortcutAction::AcceptCreateNode() +{ + IM_ASSERT(m_InAction); + return m_CurrentAction == CreateNode; +} + + + + +//------------------------------------------------------------------------------ +// +// Create Item Action +// +//------------------------------------------------------------------------------ +ed::CreateItemAction::CreateItemAction(EditorContext* editor): + EditorAction(editor), + m_InActive(false), + m_NextStage(None), + m_CurrentStage(None), + m_ItemType(NoItem), + m_UserAction(Unknown), + m_LinkColor(IM_COL32_WHITE), + m_LinkThickness(1.0f), + m_LinkStart(nullptr), + m_LinkEnd(nullptr), + + m_IsActive(false), + m_DraggedPin(nullptr), + + m_IsInGlobalSpace(false) +{ +} + +ed::EditorAction::AcceptResult ed::CreateItemAction::Accept(const Control& control) +{ + IM_ASSERT(!m_IsActive); + + if (m_IsActive) + return EditorAction::False; + + if (control.ActivePin && ImGui::IsMouseDragging(0)) + { + m_DraggedPin = control.ActivePin; + DragStart(m_DraggedPin); + + Editor->ClearSelection(); + } + else if (control.HotPin) + { + return EditorAction::Possible; + } + else + return EditorAction::False; + + m_IsActive = true; + + return EditorAction::True; +} + +bool ed::CreateItemAction::Process(const Control& control) +{ + IM_ASSERT(m_IsActive); + + if (!m_IsActive) + return false; + + if (m_DraggedPin && control.ActivePin == m_DraggedPin && (m_CurrentStage == Possible)) + { + const auto draggingFromSource = (m_DraggedPin->m_Kind == PinKind::Output); + + ed::Pin cursorPin(Editor, 0, draggingFromSource ? PinKind::Input : PinKind::Output); + cursorPin.m_Pivot = ImRect(ImGui::GetMousePos(), ImGui::GetMousePos()); + cursorPin.m_Dir = -m_DraggedPin->m_Dir; + cursorPin.m_Strength = m_DraggedPin->m_Strength; + + ed::Link candidate(Editor, 0); + candidate.m_Color = m_LinkColor; + candidate.m_StartPin = draggingFromSource ? m_DraggedPin : &cursorPin; + candidate.m_EndPin = draggingFromSource ? &cursorPin : m_DraggedPin; + + ed::Pin*& freePin = draggingFromSource ? candidate.m_EndPin : candidate.m_StartPin; + + if (control.HotPin) + { + DropPin(control.HotPin); + + if (m_UserAction == UserAccept) + freePin = control.HotPin; + } + else if (control.BackgroundHot) + DropNode(); + else + DropNothing(); + + auto drawList = ImGui::GetWindowDrawList(); + drawList->ChannelsSetCurrent(c_LinkChannel_NewLink); + + candidate.UpdateEndpoints(); + candidate.Draw(drawList, m_LinkColor, m_LinkThickness); + } + else if (m_CurrentStage == Possible || !control.ActivePin) + { + if (!ImGui::IsWindowHovered()) + { + m_DraggedPin = nullptr; + DropNothing(); + } + + DragEnd(); + m_IsActive = false; + } + + return m_IsActive; +} + +void ed::CreateItemAction::ShowMetrics() +{ + EditorAction::ShowMetrics(); + + auto getStageName = [](Stage stage) + { + switch (stage) + { + case None: return "None"; + case Possible: return "Possible"; + case Create: return "Create"; + default: return ""; + } + }; + + auto getActionName = [](Action action) + { + switch (action) + { + default: + case Unknown: return "Unknown"; + case UserReject: return "Reject"; + case UserAccept: return "Accept"; + } + }; + + auto getItemName = [](Type item) + { + switch (item) + { + default: + case NoItem: return "None"; + case Node: return "Node"; + case Link: return "Link"; + } + }; + + ImGui::Text("%s:", GetName()); + ImGui::Text(" Stage: %s", getStageName(m_CurrentStage)); + ImGui::Text(" User Action: %s", getActionName(m_UserAction)); + ImGui::Text(" Item Type: %s", getItemName(m_ItemType)); +} + +void ed::CreateItemAction::SetStyle(ImU32 color, float thickness) +{ + m_LinkColor = color; + m_LinkThickness = thickness; +} + +bool ed::CreateItemAction::Begin() +{ + IM_ASSERT(false == m_InActive); + + m_InActive = true; + m_CurrentStage = m_NextStage; + m_UserAction = Unknown; + m_LinkColor = IM_COL32_WHITE; + m_LinkThickness = 1.0f; + + if (m_CurrentStage == None) + return false; + + m_LastChannel = ImGui::GetWindowDrawList()->_Splitter._Current; + + return true; +} + +void ed::CreateItemAction::End() +{ + IM_ASSERT(m_InActive); + + if (m_IsInGlobalSpace) + { + ImGui::PopClipRect(); + Editor->Resume(SuspendFlags::KeepSplitter); + + auto currentChannel = ImGui::GetWindowDrawList()->_Splitter._Current; + if (currentChannel != m_LastChannel) + ImGui::GetWindowDrawList()->ChannelsSetCurrent(m_LastChannel); + + m_IsInGlobalSpace = false; + } + + m_InActive = false; +} + +void ed::CreateItemAction::DragStart(Pin* startPin) +{ + IM_ASSERT(!m_InActive); + + m_NextStage = Possible; + m_LinkStart = startPin; + m_LinkEnd = nullptr; +} + +void ed::CreateItemAction::DragEnd() +{ + IM_ASSERT(!m_InActive); + + if (m_CurrentStage == Possible && m_UserAction == UserAccept) + { + m_NextStage = Create; + } + else + { + m_NextStage = None; + m_ItemType = NoItem; + m_LinkStart = nullptr; + m_LinkEnd = nullptr; + } +} + +void ed::CreateItemAction::DropPin(Pin* endPin) +{ + IM_ASSERT(!m_InActive); + + m_ItemType = Link; + m_LinkEnd = endPin; +} + +void ed::CreateItemAction::DropNode() +{ + IM_ASSERT(!m_InActive); + + m_ItemType = Node; + m_LinkEnd = nullptr; +} + +void ed::CreateItemAction::DropNothing() +{ + IM_ASSERT(!m_InActive); + + m_ItemType = NoItem; + m_LinkEnd = nullptr; +} + +ed::CreateItemAction::Result ed::CreateItemAction::RejectItem() +{ + IM_ASSERT(m_InActive); + + if (!m_InActive || m_CurrentStage == None || m_ItemType == NoItem) + return Indeterminate; + + m_UserAction = UserReject; + + return True; +} + +ed::CreateItemAction::Result ed::CreateItemAction::AcceptItem() +{ + IM_ASSERT(m_InActive); + + if (!m_InActive || m_CurrentStage == None || m_ItemType == NoItem) + return Indeterminate; + + m_UserAction = UserAccept; + + if (m_CurrentStage == Create) + { + m_NextStage = None; + m_ItemType = NoItem; + m_LinkStart = nullptr; + m_LinkEnd = nullptr; + return True; + } + else + return False; +} + +ed::CreateItemAction::Result ed::CreateItemAction::QueryLink(PinId* startId, PinId* endId) +{ + IM_ASSERT(m_InActive); + + if (!m_InActive || m_CurrentStage == None || m_ItemType != Link) + return Indeterminate; + + auto linkStartId = m_LinkStart->m_ID; + auto linkEndId = m_LinkEnd->m_ID; + + *startId = linkStartId; + *endId = linkEndId; + + Editor->SetUserContext(true); + + if (!m_IsInGlobalSpace) + { + Editor->Suspend(SuspendFlags::KeepSplitter); + + auto rect = Editor->GetRect(); + ImGui::PushClipRect(rect.Min + ImVec2(1, 1), rect.Max - ImVec2(1, 1), false); + m_IsInGlobalSpace = true; + } + + return True; +} + +ed::CreateItemAction::Result ed::CreateItemAction::QueryNode(PinId* pinId) +{ + IM_ASSERT(m_InActive); + + if (!m_InActive || m_CurrentStage == None || m_ItemType != Node) + return Indeterminate; + + *pinId = m_LinkStart ? m_LinkStart->m_ID : 0; + + Editor->SetUserContext(true); + + if (!m_IsInGlobalSpace) + { + Editor->Suspend(SuspendFlags::KeepSplitter); + + auto rect = Editor->GetRect(); + ImGui::PushClipRect(rect.Min + ImVec2(1, 1), rect.Max - ImVec2(1, 1), false); + m_IsInGlobalSpace = true; + } + + return True; +} + + + + +//------------------------------------------------------------------------------ +// +// Delete Items Action +// +//------------------------------------------------------------------------------ +ed::DeleteItemsAction::DeleteItemsAction(EditorContext* editor): + EditorAction(editor), + m_IsActive(false), + m_InInteraction(false), + m_CurrentItemType(Unknown), + m_UserAction(Undetermined) +{ +} + +ed::EditorAction::AcceptResult ed::DeleteItemsAction::Accept(const Control& control) +{ + IM_ASSERT(!m_IsActive); + + if (m_IsActive) + return False; + + auto addDeadLinks = [this]() + { + vector links; + for (auto object : m_CandidateObjects) + { + auto node = object->AsNode(); + if (!node) + continue; + + Editor->FindLinksForNode(node->m_ID, links, true); + } + if (!links.empty()) + { + std::sort(links.begin(), links.end()); + links.erase(std::unique(links.begin(), links.end()), links.end()); + m_CandidateObjects.insert(m_CandidateObjects.end(), links.begin(), links.end()); + } + }; + + auto& io = ImGui::GetIO(); + if (ImGui::IsWindowFocused() && ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_Delete)) && Editor->AreShortcutsEnabled()) + { + auto& selection = Editor->GetSelectedObjects(); + if (!selection.empty()) + { + m_CandidateObjects = selection; + addDeadLinks(); + m_IsActive = true; + return True; + } + } + else if (control.ClickedLink && io.KeyAlt) + { + m_CandidateObjects.clear(); + m_CandidateObjects.push_back(control.ClickedLink); + m_IsActive = true; + return True; + } + + else if (!m_ManuallyDeletedObjects.empty()) + { + m_CandidateObjects = m_ManuallyDeletedObjects; + m_ManuallyDeletedObjects.clear(); + addDeadLinks(); + m_IsActive = true; + return True; + } + + return m_IsActive ? True : False; +} + +bool ed::DeleteItemsAction::Process(const Control& control) +{ + IM_UNUSED(control); + + if (!m_IsActive) + return false; + + m_IsActive = false; + return true; +} + +void ed::DeleteItemsAction::ShowMetrics() +{ + EditorAction::ShowMetrics(); + + //auto getObjectName = [](Object* object) + //{ + // if (!object) return ""; + // else if (object->AsNode()) return "Node"; + // else if (object->AsPin()) return "Pin"; + // else if (object->AsLink()) return "Link"; + // else return ""; + //}; + + ImGui::Text("%s:", GetName()); + ImGui::Text(" Active: %s", m_IsActive ? "yes" : "no"); + //ImGui::Text(" Node: %s (%d)", getObjectName(DeleteItemsgedNode), DeleteItemsgedNode ? DeleteItemsgedNode->ID : 0); +} + +bool ed::DeleteItemsAction::Add(Object* object) +{ + if (Editor->GetCurrentAction() != nullptr) + return false; + + m_ManuallyDeletedObjects.push_back(object); + + return true; +} + +bool ed::DeleteItemsAction::Begin() +{ + if (!m_IsActive) + return false; + + IM_ASSERT(!m_InInteraction); + m_InInteraction = true; + + m_CurrentItemType = Unknown; + m_UserAction = Undetermined; + + return m_IsActive; +} + +void ed::DeleteItemsAction::End() +{ + if (!m_IsActive) + return; + + IM_ASSERT(m_InInteraction); + m_InInteraction = false; +} + +bool ed::DeleteItemsAction::QueryLink(LinkId* linkId, PinId* startId, PinId* endId) +{ + ObjectId objectId; + if (!QueryItem(&objectId, Link)) + return false; + + if (auto id = objectId.AsLinkId()) + *linkId = id; + else + return false; + + if (startId || endId) + { + auto link = Editor->FindLink(*linkId); + if (startId) + *startId = link->m_StartPin->m_ID; + if (endId) + *endId = link->m_EndPin->m_ID; + } + + return true; +} + +bool ed::DeleteItemsAction::QueryNode(NodeId* nodeId) +{ + ObjectId objectId; + if (!QueryItem(&objectId, Node)) + return false; + + if (auto id = objectId.AsNodeId()) + *nodeId = id; + else + return false; + + return true; +} + +bool ed::DeleteItemsAction::QueryItem(ObjectId* itemId, IteratorType itemType) +{ + if (!m_InInteraction) + return false; + + if (m_CurrentItemType != itemType) + { + m_CurrentItemType = itemType; + m_CandidateItemIndex = 0; + } + else if (m_UserAction == Undetermined) + { + RejectItem(); + } + + m_UserAction = Undetermined; + + auto itemCount = (int)m_CandidateObjects.size(); + while (m_CandidateItemIndex < itemCount) + { + auto item = m_CandidateObjects[m_CandidateItemIndex]; + if (itemType == Node) + { + if (auto node = item->AsNode()) + { + *itemId = node->m_ID; + return true; + } + } + else if (itemType == Link) + { + if (auto link = item->AsLink()) + { + *itemId = link->m_ID; + return true; + } + } + + ++m_CandidateItemIndex; + } + + if (m_CandidateItemIndex == itemCount) + m_CurrentItemType = Unknown; + + return false; +} + +bool ed::DeleteItemsAction::AcceptItem() +{ + if (!m_InInteraction) + return false; + + m_UserAction = Accepted; + + RemoveItem(); + + return true; +} + +void ed::DeleteItemsAction::RejectItem() +{ + if (!m_InInteraction) + return; + + m_UserAction = Rejected; + + RemoveItem(); +} + +void ed::DeleteItemsAction::RemoveItem() +{ + auto item = m_CandidateObjects[m_CandidateItemIndex]; + m_CandidateObjects.erase(m_CandidateObjects.begin() + m_CandidateItemIndex); + + Editor->DeselectObject(item); + + if (m_CurrentItemType == Link) + Editor->NotifyLinkDeleted(item->AsLink()); +} + + + + +//------------------------------------------------------------------------------ +// +// Node Builder +// +//------------------------------------------------------------------------------ +ed::NodeBuilder::NodeBuilder(EditorContext* editor): + Editor(editor), + m_CurrentNode(nullptr), + m_CurrentPin(nullptr) +{ +} + +ed::NodeBuilder::~NodeBuilder() +{ + m_Splitter.ClearFreeMemory(); + m_PinSplitter.ClearFreeMemory(); +} + +void ed::NodeBuilder::Begin(NodeId nodeId) +{ + IM_ASSERT(nullptr == m_CurrentNode); + + m_CurrentNode = Editor->GetNode(nodeId); + + if (m_CurrentNode->m_RestoreState) + { + Editor->RestoreNodeState(m_CurrentNode); + m_CurrentNode->m_RestoreState = false; + } + + if (m_CurrentNode->m_CenterOnScreen) + { + auto bounds = Editor->GetViewRect(); + auto offset = bounds.GetCenter() - m_CurrentNode->m_Bounds.GetCenter(); + + if (ImLengthSqr(offset) > 0) + { + if (::IsGroup(m_CurrentNode)) + { + std::vector groupedNodes; + m_CurrentNode->GetGroupedNodes(groupedNodes); + groupedNodes.push_back(m_CurrentNode); + + for (auto node : groupedNodes) + { + node->m_Bounds.Translate(ImFloor(offset)); + node->m_GroupBounds.Translate(ImFloor(offset)); + Editor->MakeDirty(SaveReasonFlags::Position | SaveReasonFlags::User, node); + } + } + else + { + m_CurrentNode->m_Bounds.Translate(ImFloor(offset)); + m_CurrentNode->m_GroupBounds.Translate(ImFloor(offset)); + Editor->MakeDirty(SaveReasonFlags::Position | SaveReasonFlags::User, m_CurrentNode); + } + } + + m_CurrentNode->m_CenterOnScreen = false; + } + + // Position node on screen + ImGui::SetCursorScreenPos(m_CurrentNode->m_Bounds.Min); + + auto& editorStyle = Editor->GetStyle(); + + const auto alpha = ImGui::GetStyle().Alpha; + + m_CurrentNode->m_IsLive = true; + m_CurrentNode->m_LastPin = nullptr; + m_CurrentNode->m_Color = Editor->GetColor(StyleColor_NodeBg, alpha); + m_CurrentNode->m_BorderColor = Editor->GetColor(StyleColor_NodeBorder, alpha); + m_CurrentNode->m_BorderWidth = editorStyle.NodeBorderWidth; + m_CurrentNode->m_Rounding = editorStyle.NodeRounding; + m_CurrentNode->m_GroupColor = Editor->GetColor(StyleColor_GroupBg, alpha); + m_CurrentNode->m_GroupBorderColor = Editor->GetColor(StyleColor_GroupBorder, alpha); + m_CurrentNode->m_GroupBorderWidth = editorStyle.GroupBorderWidth; + m_CurrentNode->m_GroupRounding = editorStyle.GroupRounding; + + m_IsGroup = false; + + // Grow channel list and select user channel + if (auto drawList = ImGui::GetWindowDrawList()) + { + m_CurrentNode->m_Channel = drawList->_Splitter._Count; + ImDrawList_ChannelsGrow(drawList, drawList->_Splitter._Count + c_ChannelsPerNode); + drawList->ChannelsSetCurrent(m_CurrentNode->m_Channel + c_NodeContentChannel); + + m_Splitter.Clear(); + ImDrawList_SwapSplitter(drawList, m_Splitter); + } + + // Begin outer group + ImGui::BeginGroup(); + + // Apply frame padding. Begin inner group if necessary. + if (editorStyle.NodePadding.x != 0 || editorStyle.NodePadding.y != 0 || editorStyle.NodePadding.z != 0 || editorStyle.NodePadding.w != 0) + { + ImGui::SetCursorPos(ImGui::GetCursorPos() + ImVec2(editorStyle.NodePadding.x, editorStyle.NodePadding.y)); + ImGui::BeginGroup(); + } +} + +void ed::NodeBuilder::End() +{ + IM_ASSERT(nullptr != m_CurrentNode); + + if (auto drawList = ImGui::GetWindowDrawList()) + { + IM_ASSERT(drawList->_Splitter._Count == 1); // Did you forgot to call drawList->ChannelsMerge()? + ImDrawList_SwapSplitter(drawList, m_Splitter); + } + + // Apply frame padding. This must be done in this convoluted way if outer group + // size must contain inner group padding. + auto& editorStyle = Editor->GetStyle(); + if (editorStyle.NodePadding.x != 0 || editorStyle.NodePadding.y != 0 || editorStyle.NodePadding.z != 0 || editorStyle.NodePadding.w != 0) + { + ImGui::EndGroup(); + ImGui::SameLine(0, editorStyle.NodePadding.z); + ImGui::Dummy(ImVec2(0, 0)); + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + editorStyle.NodePadding.w); + } + + // End outer group. + ImGui::EndGroup(); + + m_NodeRect = ImGui_GetItemRect(); + m_NodeRect.Floor(); + + if (m_CurrentNode->m_Bounds.GetSize() != m_NodeRect.GetSize()) + { + m_CurrentNode->m_Bounds.Max = m_CurrentNode->m_Bounds.Min + m_NodeRect.GetSize(); + Editor->MakeDirty(SaveReasonFlags::Size, m_CurrentNode); + } + + if (m_IsGroup) + { + // Groups cannot have pins. Discard them. + for (auto pin = m_CurrentNode->m_LastPin; pin; pin = pin->m_PreviousPin) + pin->Reset(); + + m_CurrentNode->m_Type = NodeType::Group; + m_CurrentNode->m_GroupBounds = m_GroupBounds; + m_CurrentNode->m_LastPin = nullptr; + } + else + m_CurrentNode->m_Type = NodeType::Node; + + m_CurrentNode = nullptr; +} + +void ed::NodeBuilder::BeginPin(PinId pinId, PinKind kind) +{ + IM_ASSERT(nullptr != m_CurrentNode); + IM_ASSERT(nullptr == m_CurrentPin); + IM_ASSERT(false == m_IsGroup); + + auto& editorStyle = Editor->GetStyle(); + + m_CurrentPin = Editor->GetPin(pinId, kind); + m_CurrentPin->m_Node = m_CurrentNode; + + m_CurrentPin->m_IsLive = true; + m_CurrentPin->m_Color = Editor->GetColor(StyleColor_PinRect); + m_CurrentPin->m_BorderColor = Editor->GetColor(StyleColor_PinRectBorder); + m_CurrentPin->m_BorderWidth = editorStyle.PinBorderWidth; + m_CurrentPin->m_Rounding = editorStyle.PinRounding; + m_CurrentPin->m_Corners = static_cast(editorStyle.PinCorners); + m_CurrentPin->m_Radius = editorStyle.PinRadius; + m_CurrentPin->m_ArrowSize = editorStyle.PinArrowSize; + m_CurrentPin->m_ArrowWidth = editorStyle.PinArrowWidth; + m_CurrentPin->m_Dir = kind == PinKind::Output ? editorStyle.SourceDirection : editorStyle.TargetDirection; + m_CurrentPin->m_Strength = editorStyle.LinkStrength; + + m_CurrentPin->m_PreviousPin = m_CurrentNode->m_LastPin; + m_CurrentNode->m_LastPin = m_CurrentPin; + + m_PivotAlignment = editorStyle.PivotAlignment; + m_PivotSize = editorStyle.PivotSize; + m_PivotScale = editorStyle.PivotScale; + m_ResolvePinRect = true; + m_ResolvePivot = true; + + if (auto drawList = ImGui::GetWindowDrawList()) + { + m_PinSplitter.Clear(); + ImDrawList_SwapSplitter(drawList, m_PinSplitter); + } + + ImGui::BeginGroup(); +} + +void ed::NodeBuilder::EndPin() +{ + IM_ASSERT(nullptr != m_CurrentPin); + + if (auto drawList = ImGui::GetWindowDrawList()) + { + IM_ASSERT(drawList->_Splitter._Count == 1); // Did you forgot to call drawList->ChannelsMerge()? + ImDrawList_SwapSplitter(drawList, m_PinSplitter); + } + + ImGui::EndGroup(); + + if (m_ResolvePinRect) + m_CurrentPin->m_Bounds = ImGui_GetItemRect(); + + if (m_ResolvePivot) + { + auto& pinRect = m_CurrentPin->m_Bounds; + + if (m_PivotSize.x < 0) + m_PivotSize.x = pinRect.GetWidth(); + if (m_PivotSize.y < 0) + m_PivotSize.y = pinRect.GetHeight(); + + m_CurrentPin->m_Pivot.Min = pinRect.Min + ImMul(pinRect.GetSize(), m_PivotAlignment); + m_CurrentPin->m_Pivot.Max = m_CurrentPin->m_Pivot.Min + ImMul(m_PivotSize, m_PivotScale); + } + + // #debug: Draw pin bounds + //ImGui::GetWindowDrawList()->AddRect(m_CurrentPin->m_Bounds.Min, m_CurrentPin->m_Bounds.Max, IM_COL32(255, 255, 0, 255)); + + // #debug: Draw pin pivot rectangle + //ImGui::GetWindowDrawList()->AddRect(m_CurrentPin->m_Pivot.Min, m_CurrentPin->m_Pivot.Max, IM_COL32(255, 0, 255, 255)); + + m_CurrentPin = nullptr; +} + +void ed::NodeBuilder::PinRect(const ImVec2& a, const ImVec2& b) +{ + IM_ASSERT(nullptr != m_CurrentPin); + + m_CurrentPin->m_Bounds = ImRect(a, b); + m_CurrentPin->m_Bounds.Floor(); + m_ResolvePinRect = false; +} + +void ed::NodeBuilder::PinPivotRect(const ImVec2& a, const ImVec2& b) +{ + IM_ASSERT(nullptr != m_CurrentPin); + + m_CurrentPin->m_Pivot = ImRect(a, b); + m_ResolvePivot = false; +} + +void ed::NodeBuilder::PinPivotSize(const ImVec2& size) +{ + IM_ASSERT(nullptr != m_CurrentPin); + + m_PivotSize = size; + m_ResolvePivot = true; +} + +void ed::NodeBuilder::PinPivotScale(const ImVec2& scale) +{ + IM_ASSERT(nullptr != m_CurrentPin); + + m_PivotScale = scale; + m_ResolvePivot = true; +} + +void ed::NodeBuilder::PinPivotAlignment(const ImVec2& alignment) +{ + IM_ASSERT(nullptr != m_CurrentPin); + + m_PivotAlignment = alignment; + m_ResolvePivot = true; +} + +void ed::NodeBuilder::Group(const ImVec2& size) +{ + IM_ASSERT(nullptr != m_CurrentNode); + IM_ASSERT(nullptr == m_CurrentPin); + IM_ASSERT(false == m_IsGroup); + + m_IsGroup = true; + + if (IsGroup(m_CurrentNode)) + ImGui::Dummy(m_CurrentNode->m_GroupBounds.GetSize()); + else + ImGui::Dummy(size); + + m_GroupBounds = ImGui_GetItemRect(); + m_GroupBounds.Floor(); +} + +ImDrawList* ed::NodeBuilder::GetUserBackgroundDrawList() const +{ + return GetUserBackgroundDrawList(m_CurrentNode); +} + +ImDrawList* ed::NodeBuilder::GetUserBackgroundDrawList(Node* node) const +{ + if (node && node->m_IsLive) + { + auto drawList = ImGui::GetWindowDrawList(); + drawList->ChannelsSetCurrent(node->m_Channel + c_NodeUserBackgroundChannel); + return drawList; + } + else + return nullptr; +} + + + + +//------------------------------------------------------------------------------ +// +// Node Builder +// +//------------------------------------------------------------------------------ +ed::HintBuilder::HintBuilder(EditorContext* editor): + Editor(editor), + m_IsActive(false), + m_CurrentNode(nullptr) +{ +} + +bool ed::HintBuilder::Begin(NodeId nodeId) +{ + IM_ASSERT(nullptr == m_CurrentNode); + + auto& view = Editor->GetView(); + auto& rect = Editor->GetRect(); + + const float c_min_zoom = 0.75f; + const float c_max_zoom = 0.50f; + + if (view.Scale > 0.75f) + return false; + + auto node = Editor->FindNode(nodeId); + if (!IsGroup(node)) + return false; + + m_CurrentNode = node; + + m_LastChannel = ImGui::GetWindowDrawList()->_Splitter._Current; + + Editor->Suspend(SuspendFlags::KeepSplitter); + + const auto alpha = ImMax(0.0f, std::min(1.0f, (view.Scale - c_min_zoom) / (c_max_zoom - c_min_zoom))); + + ImGui::GetWindowDrawList()->ChannelsSetCurrent(c_UserChannel_HintsBackground); + ImGui::PushClipRect(rect.Min + ImVec2(1, 1), rect.Max - ImVec2(1, 1), false); + + ImGui::GetWindowDrawList()->ChannelsSetCurrent(c_UserChannel_Hints); + ImGui::PushClipRect(rect.Min + ImVec2(1, 1), rect.Max - ImVec2(1, 1), false); + + ImGui::PushStyleVar(ImGuiStyleVar_Alpha, alpha); + + m_IsActive = true; + + return true; +} + +void ed::HintBuilder::End() +{ + if (!m_IsActive) + return; + + ImGui::PopStyleVar(); + + ImGui::GetWindowDrawList()->ChannelsSetCurrent(c_UserChannel_Hints); + ImGui::PopClipRect(); + + ImGui::GetWindowDrawList()->ChannelsSetCurrent(c_UserChannel_HintsBackground); + ImGui::PopClipRect(); + + ImGui::GetWindowDrawList()->ChannelsSetCurrent(m_LastChannel); + + Editor->Resume(SuspendFlags::KeepSplitter); + + m_IsActive = false; + m_CurrentNode = nullptr; +} + +ImVec2 ed::HintBuilder::GetGroupMin() +{ + IM_ASSERT(nullptr != m_CurrentNode); + + return Editor->ToScreen(m_CurrentNode->m_Bounds.Min); +} + +ImVec2 ed::HintBuilder::GetGroupMax() +{ + IM_ASSERT(nullptr != m_CurrentNode); + + return Editor->ToScreen(m_CurrentNode->m_Bounds.Max); +} + +ImDrawList* ed::HintBuilder::GetForegroundDrawList() +{ + IM_ASSERT(nullptr != m_CurrentNode); + + auto drawList = ImGui::GetWindowDrawList(); + + drawList->ChannelsSetCurrent(c_UserChannel_Hints); + + return drawList; +} + +ImDrawList* ed::HintBuilder::GetBackgroundDrawList() +{ + IM_ASSERT(nullptr != m_CurrentNode); + + auto drawList = ImGui::GetWindowDrawList(); + + drawList->ChannelsSetCurrent(c_UserChannel_HintsBackground); + + return drawList; +} + + + + +//------------------------------------------------------------------------------ +// +// Style +// +//------------------------------------------------------------------------------ +void ed::Style::PushColor(StyleColor colorIndex, const ImVec4& color) +{ + ColorModifier modifier; + modifier.Index = colorIndex; + modifier.Value = Colors[colorIndex]; + m_ColorStack.push_back(modifier); + Colors[colorIndex] = color; +} + +void ed::Style::PopColor(int count) +{ + while (count > 0) + { + auto& modifier = m_ColorStack.back(); + Colors[modifier.Index] = modifier.Value; + m_ColorStack.pop_back(); + --count; + } +} + +void ed::Style::PushVar(StyleVar varIndex, float value) +{ + auto* var = GetVarFloatAddr(varIndex); + IM_ASSERT(var != nullptr); + VarModifier modifier; + modifier.Index = varIndex; + modifier.Value = ImVec4(*var, 0, 0, 0); + *var = value; + m_VarStack.push_back(modifier); +} + +void ed::Style::PushVar(StyleVar varIndex, const ImVec2& value) +{ + auto* var = GetVarVec2Addr(varIndex); + IM_ASSERT(var != nullptr); + VarModifier modifier; + modifier.Index = varIndex; + modifier.Value = ImVec4(var->x, var->y, 0, 0); + *var = value; + m_VarStack.push_back(modifier); +} + +void ed::Style::PushVar(StyleVar varIndex, const ImVec4& value) +{ + auto* var = GetVarVec4Addr(varIndex); + IM_ASSERT(var != nullptr); + VarModifier modifier; + modifier.Index = varIndex; + modifier.Value = *var; + *var = value; + m_VarStack.push_back(modifier); +} + +void ed::Style::PopVar(int count) +{ + while (count > 0) + { + auto& modifier = m_VarStack.back(); + if (auto floatValue = GetVarFloatAddr(modifier.Index)) + *floatValue = modifier.Value.x; + else if (auto vec2Value = GetVarVec2Addr(modifier.Index)) + *vec2Value = ImVec2(modifier.Value.x, modifier.Value.y); + else if (auto vec4Value = GetVarVec4Addr(modifier.Index)) + *vec4Value = modifier.Value; + m_VarStack.pop_back(); + --count; + } +} + +const char* ed::Style::GetColorName(StyleColor colorIndex) const +{ + switch (colorIndex) + { + case StyleColor_Bg: return "Bg"; + case StyleColor_Grid: return "Grid"; + case StyleColor_NodeBg: return "NodeBg"; + case StyleColor_NodeBorder: return "NodeBorder"; + case StyleColor_HovNodeBorder: return "HoveredNodeBorder"; + case StyleColor_SelNodeBorder: return "SelNodeBorder"; + case StyleColor_NodeSelRect: return "NodeSelRect"; + case StyleColor_NodeSelRectBorder: return "NodeSelRectBorder"; + case StyleColor_HovLinkBorder: return "HoveredLinkBorder"; + case StyleColor_SelLinkBorder: return "SelLinkBorder"; + case StyleColor_LinkSelRect: return "LinkSelRect"; + case StyleColor_LinkSelRectBorder: return "LinkSelRectBorder"; + case StyleColor_PinRect: return "PinRect"; + case StyleColor_PinRectBorder: return "PinRectBorder"; + case StyleColor_Flow: return "Flow"; + case StyleColor_FlowMarker: return "FlowMarker"; + case StyleColor_GroupBg: return "GroupBg"; + case StyleColor_GroupBorder: return "GroupBorder"; + case StyleColor_Count: break; + } + + IM_ASSERT(0); + return "Unknown"; +} + +float* ed::Style::GetVarFloatAddr(StyleVar idx) +{ + switch (idx) + { + case StyleVar_NodeRounding: return &NodeRounding; + case StyleVar_NodeBorderWidth: return &NodeBorderWidth; + case StyleVar_HoveredNodeBorderWidth: return &HoveredNodeBorderWidth; + case StyleVar_SelectedNodeBorderWidth: return &SelectedNodeBorderWidth; + case StyleVar_PinRounding: return &PinRounding; + case StyleVar_PinBorderWidth: return &PinBorderWidth; + case StyleVar_LinkStrength: return &LinkStrength; + case StyleVar_ScrollDuration: return &ScrollDuration; + case StyleVar_FlowMarkerDistance: return &FlowMarkerDistance; + case StyleVar_FlowSpeed: return &FlowSpeed; + case StyleVar_FlowDuration: return &FlowDuration; + case StyleVar_PinCorners: return &PinCorners; + case StyleVar_PinRadius: return &PinRadius; + case StyleVar_PinArrowSize: return &PinArrowSize; + case StyleVar_PinArrowWidth: return &PinArrowWidth; + case StyleVar_GroupRounding: return &GroupRounding; + case StyleVar_GroupBorderWidth: return &GroupBorderWidth; + default: return nullptr; + } +} + +ImVec2* ed::Style::GetVarVec2Addr(StyleVar idx) +{ + switch (idx) + { + case StyleVar_SourceDirection: return &SourceDirection; + case StyleVar_TargetDirection: return &TargetDirection; + case StyleVar_PivotAlignment: return &PivotAlignment; + case StyleVar_PivotSize: return &PivotSize; + case StyleVar_PivotScale: return &PivotScale; + default: return nullptr; + } +} + +ImVec4* ed::Style::GetVarVec4Addr(StyleVar idx) +{ + switch (idx) + { + case StyleVar_NodePadding: return &NodePadding; + default: return nullptr; + } +} + + + + +//------------------------------------------------------------------------------ +// +// Config +// +//------------------------------------------------------------------------------ +ed::Config::Config(const ax::NodeEditor::Config* config) +{ + if (config) + *static_cast(this) = *config; +} + +std::string ed::Config::Load() +{ + std::string data; + + if (LoadSettings) + { + const auto size = LoadSettings(nullptr, UserPointer); + if (size > 0) + { + data.resize(size); + LoadSettings(const_cast(data.data()), UserPointer); + } + } + else if (SettingsFile) + { + std::ifstream file(SettingsFile); + if (file) + { + file.seekg(0, std::ios_base::end); + auto size = static_cast(file.tellg()); + file.seekg(0, std::ios_base::beg); + + data.reserve(size); + data.assign(std::istreambuf_iterator(file), std::istreambuf_iterator()); + } + } + + return data; +} + +std::string ed::Config::LoadNode(NodeId nodeId) +{ + std::string data; + + if (LoadNodeSettings) + { + const auto size = LoadNodeSettings(nodeId, nullptr, UserPointer); + if (size > 0) + { + data.resize(size); + LoadNodeSettings(nodeId, const_cast(data.data()), UserPointer); + } + } + + return data; +} + +void ed::Config::BeginSave() +{ + if (BeginSaveSession) + BeginSaveSession(UserPointer); +} + +bool ed::Config::Save(const std::string& data, SaveReasonFlags flags) +{ + if (SaveSettings) + { + return SaveSettings(data.c_str(), data.size(), flags, UserPointer); + } + else if (SettingsFile) + { + std::ofstream settingsFile(SettingsFile); + if (settingsFile) + settingsFile << data; + + return !!settingsFile; + } + + return false; +} + +bool ed::Config::SaveNode(NodeId nodeId, const std::string& data, SaveReasonFlags flags) +{ + if (SaveNodeSettings) + return SaveNodeSettings(nodeId, data.c_str(), data.size(), flags, UserPointer); + + return false; +} + +void ed::Config::EndSave() +{ + if (EndSaveSession) + EndSaveSession(UserPointer); +} diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor.h b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor.h new file mode 100644 index 0000000..02282a1 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor.h @@ -0,0 +1,444 @@ +//------------------------------------------------------------------------------ +// LICENSE +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// CREDITS +// Written by Michal Cichon +//------------------------------------------------------------------------------ +# ifndef __IMGUI_NODE_EDITOR_H__ +# define __IMGUI_NODE_EDITOR_H__ +# pragma once + + +//------------------------------------------------------------------------------ +# include +# include // std::uintXX_t +# include // std::move + + +//------------------------------------------------------------------------------ +namespace ax { +namespace NodeEditor { + + +//------------------------------------------------------------------------------ +struct NodeId; +struct LinkId; +struct PinId; + + +//------------------------------------------------------------------------------ +enum class SaveReasonFlags: uint32_t +{ + None = 0x00000000, + Navigation = 0x00000001, + Position = 0x00000002, + Size = 0x00000004, + Selection = 0x00000008, + User = 0x00000010 +}; + +inline SaveReasonFlags operator |(SaveReasonFlags lhs, SaveReasonFlags rhs) { return static_cast(static_cast(lhs) | static_cast(rhs)); } +inline SaveReasonFlags operator &(SaveReasonFlags lhs, SaveReasonFlags rhs) { return static_cast(static_cast(lhs) & static_cast(rhs)); } + +using ConfigSaveSettings = bool (*)(const char* data, size_t size, SaveReasonFlags reason, void* userPointer); +using ConfigLoadSettings = size_t (*)(char* data, void* userPointer); + +using ConfigSaveNodeSettings = bool (*)(NodeId nodeId, const char* data, size_t size, SaveReasonFlags reason, void* userPointer); +using ConfigLoadNodeSettings = size_t (*)(NodeId nodeId, char* data, void* userPointer); + +using ConfigSession = void (*)(void* userPointer); + +struct Config +{ + const char* SettingsFile; + ConfigSession BeginSaveSession; + ConfigSession EndSaveSession; + ConfigSaveSettings SaveSettings; + ConfigLoadSettings LoadSettings; + ConfigSaveNodeSettings SaveNodeSettings; + ConfigLoadNodeSettings LoadNodeSettings; + void* UserPointer; + + Config() + : SettingsFile("NodeEditor.json") + , BeginSaveSession(nullptr) + , EndSaveSession(nullptr) + , SaveSettings(nullptr) + , LoadSettings(nullptr) + , SaveNodeSettings(nullptr) + , LoadNodeSettings(nullptr) + , UserPointer(nullptr) + { + } +}; + + +//------------------------------------------------------------------------------ +enum class PinKind +{ + Input, + Output +}; + + +//------------------------------------------------------------------------------ +enum StyleColor +{ + StyleColor_Bg, + StyleColor_Grid, + StyleColor_NodeBg, + StyleColor_NodeBorder, + StyleColor_HovNodeBorder, + StyleColor_SelNodeBorder, + StyleColor_NodeSelRect, + StyleColor_NodeSelRectBorder, + StyleColor_HovLinkBorder, + StyleColor_SelLinkBorder, + StyleColor_LinkSelRect, + StyleColor_LinkSelRectBorder, + StyleColor_PinRect, + StyleColor_PinRectBorder, + StyleColor_Flow, + StyleColor_FlowMarker, + StyleColor_GroupBg, + StyleColor_GroupBorder, + + StyleColor_Count +}; + +enum StyleVar +{ + StyleVar_NodePadding, + StyleVar_NodeRounding, + StyleVar_NodeBorderWidth, + StyleVar_HoveredNodeBorderWidth, + StyleVar_SelectedNodeBorderWidth, + StyleVar_PinRounding, + StyleVar_PinBorderWidth, + StyleVar_LinkStrength, + StyleVar_SourceDirection, + StyleVar_TargetDirection, + StyleVar_ScrollDuration, + StyleVar_FlowMarkerDistance, + StyleVar_FlowSpeed, + StyleVar_FlowDuration, + StyleVar_PivotAlignment, + StyleVar_PivotSize, + StyleVar_PivotScale, + StyleVar_PinCorners, + StyleVar_PinRadius, + StyleVar_PinArrowSize, + StyleVar_PinArrowWidth, + StyleVar_GroupRounding, + StyleVar_GroupBorderWidth, + + StyleVar_Count +}; + +struct Style +{ + ImVec4 NodePadding; + float NodeRounding; + float NodeBorderWidth; + float HoveredNodeBorderWidth; + float SelectedNodeBorderWidth; + float PinRounding; + float PinBorderWidth; + float LinkStrength; + ImVec2 SourceDirection; + ImVec2 TargetDirection; + float ScrollDuration; + float FlowMarkerDistance; + float FlowSpeed; + float FlowDuration; + ImVec2 PivotAlignment; + ImVec2 PivotSize; + ImVec2 PivotScale; + float PinCorners; + float PinRadius; + float PinArrowSize; + float PinArrowWidth; + float GroupRounding; + float GroupBorderWidth; + ImVec4 Colors[StyleColor_Count]; + + Style() + { + NodePadding = ImVec4(8, 8, 8, 8); + NodeRounding = 12.0f; + NodeBorderWidth = 1.5f; + HoveredNodeBorderWidth = 3.5f; + SelectedNodeBorderWidth = 3.5f; + PinRounding = 4.0f; + PinBorderWidth = 0.0f; + LinkStrength = 100.0f; + SourceDirection = ImVec2(1.0f, 0.0f); + TargetDirection = ImVec2(-1.0f, 0.0f); + ScrollDuration = 0.35f; + FlowMarkerDistance = 30.0f; + FlowSpeed = 150.0f; + FlowDuration = 2.0f; + PivotAlignment = ImVec2(0.5f, 0.5f); + PivotSize = ImVec2(0.0f, 0.0f); + PivotScale = ImVec2(1, 1); + PinCorners = ImDrawCornerFlags_All; + PinRadius = 0.0f; + PinArrowSize = 0.0f; + PinArrowWidth = 0.0f; + GroupRounding = 6.0f; + GroupBorderWidth = 1.0f; + + Colors[StyleColor_Bg] = ImColor( 60, 60, 70, 200); + Colors[StyleColor_Grid] = ImColor(120, 120, 120, 40); + Colors[StyleColor_NodeBg] = ImColor( 32, 32, 32, 200); + Colors[StyleColor_NodeBorder] = ImColor(255, 255, 255, 96); + Colors[StyleColor_HovNodeBorder] = ImColor( 50, 176, 255, 255); + Colors[StyleColor_SelNodeBorder] = ImColor(255, 176, 50, 255); + Colors[StyleColor_NodeSelRect] = ImColor( 5, 130, 255, 64); + Colors[StyleColor_NodeSelRectBorder] = ImColor( 5, 130, 255, 128); + Colors[StyleColor_HovLinkBorder] = ImColor( 50, 176, 255, 255); + Colors[StyleColor_SelLinkBorder] = ImColor(255, 176, 50, 255); + Colors[StyleColor_LinkSelRect] = ImColor( 5, 130, 255, 64); + Colors[StyleColor_LinkSelRectBorder] = ImColor( 5, 130, 255, 128); + Colors[StyleColor_PinRect] = ImColor( 60, 180, 255, 100); + Colors[StyleColor_PinRectBorder] = ImColor( 60, 180, 255, 128); + Colors[StyleColor_Flow] = ImColor(255, 128, 64, 255); + Colors[StyleColor_FlowMarker] = ImColor(255, 128, 64, 255); + Colors[StyleColor_GroupBg] = ImColor( 0, 0, 0, 160); + Colors[StyleColor_GroupBorder] = ImColor(255, 255, 255, 32); + } +}; + + +//------------------------------------------------------------------------------ +struct EditorContext; + + +//------------------------------------------------------------------------------ +void SetCurrentEditor(EditorContext* ctx); +EditorContext* GetCurrentEditor(); +EditorContext* CreateEditor(const Config* config = nullptr); +void DestroyEditor(EditorContext* ctx); + +Style& GetStyle(); +const char* GetStyleColorName(StyleColor colorIndex); + +void PushStyleColor(StyleColor colorIndex, const ImVec4& color); +void PopStyleColor(int count = 1); + +void PushStyleVar(StyleVar varIndex, float value); +void PushStyleVar(StyleVar varIndex, const ImVec2& value); +void PushStyleVar(StyleVar varIndex, const ImVec4& value); +void PopStyleVar(int count = 1); + +void Begin(const char* id, const ImVec2& size = ImVec2(0, 0)); +void End(); + +void BeginNode(NodeId id); +void BeginPin(PinId id, PinKind kind); +void PinRect(const ImVec2& a, const ImVec2& b); +void PinPivotRect(const ImVec2& a, const ImVec2& b); +void PinPivotSize(const ImVec2& size); +void PinPivotScale(const ImVec2& scale); +void PinPivotAlignment(const ImVec2& alignment); +void EndPin(); +void Group(const ImVec2& size); +void EndNode(); + +bool BeginGroupHint(NodeId nodeId); +ImVec2 GetGroupMin(); +ImVec2 GetGroupMax(); +ImDrawList* GetHintForegroundDrawList(); +ImDrawList* GetHintBackgroundDrawList(); +void EndGroupHint(); + +// TODO: Add a way to manage node background channels +ImDrawList* GetNodeBackgroundDrawList(NodeId nodeId); + +bool Link(LinkId id, PinId startPinId, PinId endPinId, const ImVec4& color = ImVec4(1, 1, 1, 1), float thickness = 1.0f); + +void Flow(LinkId linkId); + +bool BeginCreate(const ImVec4& color = ImVec4(1, 1, 1, 1), float thickness = 1.0f); +bool QueryNewLink(PinId* startId, PinId* endId); +bool QueryNewLink(PinId* startId, PinId* endId, const ImVec4& color, float thickness = 1.0f); +bool QueryNewNode(PinId* pinId); +bool QueryNewNode(PinId* pinId, const ImVec4& color, float thickness = 1.0f); +bool AcceptNewItem(); +bool AcceptNewItem(const ImVec4& color, float thickness = 1.0f); +void RejectNewItem(); +void RejectNewItem(const ImVec4& color, float thickness = 1.0f); +void EndCreate(); + +bool BeginDelete(); +bool QueryDeletedLink(LinkId* linkId, PinId* startId = nullptr, PinId* endId = nullptr); +bool QueryDeletedNode(NodeId* nodeId); +bool AcceptDeletedItem(); +void RejectDeletedItem(); +void EndDelete(); + +void SetNodePosition(NodeId nodeId, const ImVec2& editorPosition); +ImVec2 GetNodePosition(NodeId nodeId); +ImVec2 GetNodeSize(NodeId nodeId); +void CenterNodeOnScreen(NodeId nodeId); + +void RestoreNodeState(NodeId nodeId); + +void Suspend(); +void Resume(); +bool IsSuspended(); + +bool IsActive(); + +bool HasSelectionChanged(); +int GetSelectedObjectCount(); +int GetSelectedNodes(NodeId* nodes, int size); +int GetSelectedLinks(LinkId* links, int size); +void ClearSelection(); +void SelectNode(NodeId nodeId, bool append = false); +void SelectLink(LinkId linkId, bool append = false); +void DeselectNode(NodeId nodeId); +void DeselectLink(LinkId linkId); + +bool DeleteNode(NodeId nodeId); +bool DeleteLink(LinkId linkId); + +void NavigateToContent(float duration = -1); +void NavigateToSelection(bool zoomIn = false, float duration = -1); + +bool ShowNodeContextMenu(NodeId* nodeId); +bool ShowPinContextMenu(PinId* pinId); +bool ShowLinkContextMenu(LinkId* linkId); +bool ShowBackgroundContextMenu(); + +void EnableShortcuts(bool enable); +bool AreShortcutsEnabled(); + +bool BeginShortcut(); +bool AcceptCut(); +bool AcceptCopy(); +bool AcceptPaste(); +bool AcceptDuplicate(); +bool AcceptCreateNode(); +int GetActionContextSize(); +int GetActionContextNodes(NodeId* nodes, int size); +int GetActionContextLinks(LinkId* links, int size); +void EndShortcut(); + +float GetCurrentZoom(); + +NodeId GetDoubleClickedNode(); +PinId GetDoubleClickedPin(); +LinkId GetDoubleClickedLink(); +bool IsBackgroundClicked(); +bool IsBackgroundDoubleClicked(); + +bool PinHadAnyLinks(PinId pinId); + +ImVec2 GetScreenSize(); +ImVec2 ScreenToCanvas(const ImVec2& pos); +ImVec2 CanvasToScreen(const ImVec2& pos); + + + + + + + + + + +//------------------------------------------------------------------------------ +namespace Details { + +template +struct SafeType +{ + SafeType(T t) + : m_Value(std::move(t)) + { + } + + SafeType(const SafeType&) = default; + + template + SafeType( + const SafeType + < + typename std::enable_if::value, T2>::type, + typename std::enable_if::value, Tag2>::type + >&) = delete; + + SafeType& operator=(const SafeType&) = default; + + explicit operator T() const { return Get(); } + + T Get() const { return m_Value; } + +private: + T m_Value; +}; + + +template +struct SafePointerType + : SafeType +{ + static const Tag Invalid; + + using SafeType::SafeType; + + SafePointerType() + : SafePointerType(Invalid) + { + } + + template explicit SafePointerType(T* ptr): SafePointerType(reinterpret_cast(ptr)) {} + template T* AsPointer() const { return reinterpret_cast(this->Get()); } + + explicit operator bool() const { return *this != Invalid; } +}; + +template +const Tag SafePointerType::Invalid = { 0 }; + +template +inline bool operator==(const SafePointerType& lhs, const SafePointerType& rhs) +{ + return lhs.Get() == rhs.Get(); +} + +template +inline bool operator!=(const SafePointerType& lhs, const SafePointerType& rhs) +{ + return lhs.Get() != rhs.Get(); +} + +} // namespace Details + +struct NodeId final: Details::SafePointerType +{ + using SafePointerType::SafePointerType; +}; + +struct LinkId final: Details::SafePointerType +{ + using SafePointerType::SafePointerType; +}; + +struct PinId final: Details::SafePointerType +{ + using SafePointerType::SafePointerType; +}; + + +//------------------------------------------------------------------------------ +} // namespace Editor +} // namespace ax + + +//------------------------------------------------------------------------------ +# endif // __IMGUI_NODE_EDITOR_H__ \ No newline at end of file diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor_api.cpp b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor_api.cpp new file mode 100644 index 0000000..d468b4e --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor_api.cpp @@ -0,0 +1,637 @@ +//------------------------------------------------------------------------------ +// LICENSE +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// CREDITS +// Written by Michal Cichon +//------------------------------------------------------------------------------ +# include "imgui_node_editor_internal.h" +# include + + +//------------------------------------------------------------------------------ +static ax::NodeEditor::Detail::EditorContext* s_Editor = nullptr; + + +//------------------------------------------------------------------------------ +template +static int BuildIdList(C& container, I* list, int listSize, F&& accept) +{ + if (list != nullptr) + { + int count = 0; + for (auto object : container) + { + if (listSize <= 0) + break; + + if (accept(object)) + { + list[count] = I(object->ID().AsPointer()); + ++count; + --listSize; + } + } + + return count; + } + else + return static_cast(std::count_if(container.begin(), container.end(), accept)); +} + + +//------------------------------------------------------------------------------ +ax::NodeEditor::EditorContext* ax::NodeEditor::CreateEditor(const Config* config) +{ + return reinterpret_cast(new ax::NodeEditor::Detail::EditorContext(config)); +} + +void ax::NodeEditor::DestroyEditor(EditorContext* ctx) +{ + if (GetCurrentEditor() == ctx) + SetCurrentEditor(nullptr); + + auto editor = reinterpret_cast(ctx); + + delete editor; +} + +void ax::NodeEditor::SetCurrentEditor(EditorContext* ctx) +{ + s_Editor = reinterpret_cast(ctx); +} + +ax::NodeEditor::EditorContext* ax::NodeEditor::GetCurrentEditor() +{ + return reinterpret_cast(s_Editor); +} + +ax::NodeEditor::Style& ax::NodeEditor::GetStyle() +{ + return s_Editor->GetStyle(); +} + +const char* ax::NodeEditor::GetStyleColorName(StyleColor colorIndex) +{ + return s_Editor->GetStyle().GetColorName(colorIndex); +} + +void ax::NodeEditor::PushStyleColor(StyleColor colorIndex, const ImVec4& color) +{ + s_Editor->GetStyle().PushColor(colorIndex, color); +} + +void ax::NodeEditor::PopStyleColor(int count) +{ + s_Editor->GetStyle().PopColor(count); +} + +void ax::NodeEditor::PushStyleVar(StyleVar varIndex, float value) +{ + s_Editor->GetStyle().PushVar(varIndex, value); +} + +void ax::NodeEditor::PushStyleVar(StyleVar varIndex, const ImVec2& value) +{ + s_Editor->GetStyle().PushVar(varIndex, value); +} + +void ax::NodeEditor::PushStyleVar(StyleVar varIndex, const ImVec4& value) +{ + s_Editor->GetStyle().PushVar(varIndex, value); +} + +void ax::NodeEditor::PopStyleVar(int count) +{ + s_Editor->GetStyle().PopVar(count); +} + +void ax::NodeEditor::Begin(const char* id, const ImVec2& size) +{ + s_Editor->Begin(id, size); +} + +void ax::NodeEditor::End() +{ + s_Editor->End(); +} + +void ax::NodeEditor::BeginNode(NodeId id) +{ + s_Editor->GetNodeBuilder().Begin(id); +} + +void ax::NodeEditor::BeginPin(PinId id, PinKind kind) +{ + s_Editor->GetNodeBuilder().BeginPin(id, kind); +} + +void ax::NodeEditor::PinRect(const ImVec2& a, const ImVec2& b) +{ + s_Editor->GetNodeBuilder().PinRect(a, b); +} + +void ax::NodeEditor::PinPivotRect(const ImVec2& a, const ImVec2& b) +{ + s_Editor->GetNodeBuilder().PinPivotRect(a, b); +} + +void ax::NodeEditor::PinPivotSize(const ImVec2& size) +{ + s_Editor->GetNodeBuilder().PinPivotSize(size); +} + +void ax::NodeEditor::PinPivotScale(const ImVec2& scale) +{ + s_Editor->GetNodeBuilder().PinPivotScale(scale); +} + +void ax::NodeEditor::PinPivotAlignment(const ImVec2& alignment) +{ + s_Editor->GetNodeBuilder().PinPivotAlignment(alignment); +} + +void ax::NodeEditor::EndPin() +{ + s_Editor->GetNodeBuilder().EndPin(); +} + +void ax::NodeEditor::Group(const ImVec2& size) +{ + s_Editor->GetNodeBuilder().Group(size); +} + +void ax::NodeEditor::EndNode() +{ + s_Editor->GetNodeBuilder().End(); +} + +bool ax::NodeEditor::BeginGroupHint(NodeId nodeId) +{ + return s_Editor->GetHintBuilder().Begin(nodeId); +} + +ImVec2 ax::NodeEditor::GetGroupMin() +{ + return s_Editor->GetHintBuilder().GetGroupMin(); +} + +ImVec2 ax::NodeEditor::GetGroupMax() +{ + return s_Editor->GetHintBuilder().GetGroupMax(); +} + +ImDrawList* ax::NodeEditor::GetHintForegroundDrawList() +{ + return s_Editor->GetHintBuilder().GetForegroundDrawList(); +} + +ImDrawList* ax::NodeEditor::GetHintBackgroundDrawList() +{ + return s_Editor->GetHintBuilder().GetBackgroundDrawList(); +} + +void ax::NodeEditor::EndGroupHint() +{ + s_Editor->GetHintBuilder().End(); +} + +ImDrawList* ax::NodeEditor::GetNodeBackgroundDrawList(NodeId nodeId) +{ + if (auto node = s_Editor->FindNode(nodeId)) + return s_Editor->GetNodeBuilder().GetUserBackgroundDrawList(node); + else + return nullptr; +} + +bool ax::NodeEditor::Link(LinkId id, PinId startPinId, PinId endPinId, const ImVec4& color/* = ImVec4(1, 1, 1, 1)*/, float thickness/* = 1.0f*/) +{ + return s_Editor->DoLink(id, startPinId, endPinId, ImColor(color), thickness); +} + +void ax::NodeEditor::Flow(LinkId linkId) +{ + if (auto link = s_Editor->FindLink(linkId)) + s_Editor->Flow(link); +} + +bool ax::NodeEditor::BeginCreate(const ImVec4& color, float thickness) +{ + auto& context = s_Editor->GetItemCreator(); + + if (context.Begin()) + { + context.SetStyle(ImColor(color), thickness); + return true; + } + else + return false; +} + +bool ax::NodeEditor::QueryNewLink(PinId* startId, PinId* endId) +{ + using Result = ax::NodeEditor::Detail::CreateItemAction::Result; + + auto& context = s_Editor->GetItemCreator(); + + return context.QueryLink(startId, endId) == Result::True; +} + +bool ax::NodeEditor::QueryNewLink(PinId* startId, PinId* endId, const ImVec4& color, float thickness) +{ + using Result = ax::NodeEditor::Detail::CreateItemAction::Result; + + auto& context = s_Editor->GetItemCreator(); + + auto result = context.QueryLink(startId, endId); + if (result != Result::Indeterminate) + context.SetStyle(ImColor(color), thickness); + + return result == Result::True; +} + +bool ax::NodeEditor::QueryNewNode(PinId* pinId) +{ + using Result = ax::NodeEditor::Detail::CreateItemAction::Result; + + auto& context = s_Editor->GetItemCreator(); + + return context.QueryNode(pinId) == Result::True; +} + +bool ax::NodeEditor::QueryNewNode(PinId* pinId, const ImVec4& color, float thickness) +{ + using Result = ax::NodeEditor::Detail::CreateItemAction::Result; + + auto& context = s_Editor->GetItemCreator(); + + auto result = context.QueryNode(pinId); + if (result != Result::Indeterminate) + context.SetStyle(ImColor(color), thickness); + + return result == Result::True; +} + +bool ax::NodeEditor::AcceptNewItem() +{ + using Result = ax::NodeEditor::Detail::CreateItemAction::Result; + + auto& context = s_Editor->GetItemCreator(); + + return context.AcceptItem() == Result::True; +} + +bool ax::NodeEditor::AcceptNewItem(const ImVec4& color, float thickness) +{ + using Result = ax::NodeEditor::Detail::CreateItemAction::Result; + + auto& context = s_Editor->GetItemCreator(); + + auto result = context.AcceptItem(); + if (result != Result::Indeterminate) + context.SetStyle(ImColor(color), thickness); + + return result == Result::True; +} + +void ax::NodeEditor::RejectNewItem() +{ + auto& context = s_Editor->GetItemCreator(); + + context.RejectItem(); +} + +void ax::NodeEditor::RejectNewItem(const ImVec4& color, float thickness) +{ + using Result = ax::NodeEditor::Detail::CreateItemAction::Result; + + auto& context = s_Editor->GetItemCreator(); + + if (context.RejectItem() != Result::Indeterminate) + context.SetStyle(ImColor(color), thickness); +} + +void ax::NodeEditor::EndCreate() +{ + auto& context = s_Editor->GetItemCreator(); + + context.End(); +} + +bool ax::NodeEditor::BeginDelete() +{ + auto& context = s_Editor->GetItemDeleter(); + + return context.Begin(); +} + +bool ax::NodeEditor::QueryDeletedLink(LinkId* linkId, PinId* startId, PinId* endId) +{ + auto& context = s_Editor->GetItemDeleter(); + + return context.QueryLink(linkId, startId, endId); +} + +bool ax::NodeEditor::QueryDeletedNode(NodeId* nodeId) +{ + auto& context = s_Editor->GetItemDeleter(); + + return context.QueryNode(nodeId); +} + +bool ax::NodeEditor::AcceptDeletedItem() +{ + auto& context = s_Editor->GetItemDeleter(); + + return context.AcceptItem(); +} + +void ax::NodeEditor::RejectDeletedItem() +{ + auto& context = s_Editor->GetItemDeleter(); + + context.RejectItem(); +} + +void ax::NodeEditor::EndDelete() +{ + auto& context = s_Editor->GetItemDeleter(); + + context.End(); +} + +void ax::NodeEditor::SetNodePosition(NodeId nodeId, const ImVec2& position) +{ + s_Editor->SetNodePosition(nodeId, position); +} + +ImVec2 ax::NodeEditor::GetNodePosition(NodeId nodeId) +{ + return s_Editor->GetNodePosition(nodeId); +} + +ImVec2 ax::NodeEditor::GetNodeSize(NodeId nodeId) +{ + return s_Editor->GetNodeSize(nodeId); +} + +void ax::NodeEditor::CenterNodeOnScreen(NodeId nodeId) +{ + if (auto node = s_Editor->FindNode(nodeId)) + node->CenterOnScreenInNextFrame(); +} + +void ax::NodeEditor::RestoreNodeState(NodeId nodeId) +{ + if (auto node = s_Editor->FindNode(nodeId)) + s_Editor->MarkNodeToRestoreState(node); +} + +void ax::NodeEditor::Suspend() +{ + s_Editor->Suspend(); +} + +void ax::NodeEditor::Resume() +{ + s_Editor->Resume(); +} + +bool ax::NodeEditor::IsSuspended() +{ + return s_Editor->IsSuspended(); +} + +bool ax::NodeEditor::IsActive() +{ + return s_Editor->IsActive(); +} + +bool ax::NodeEditor::HasSelectionChanged() +{ + return s_Editor->HasSelectionChanged(); +} + +int ax::NodeEditor::GetSelectedObjectCount() +{ + return (int)s_Editor->GetSelectedObjects().size(); +} + +int ax::NodeEditor::GetSelectedNodes(NodeId* nodes, int size) +{ + return BuildIdList(s_Editor->GetSelectedObjects(), nodes, size, [](auto object) + { + return object->AsNode() != nullptr; + }); +} + +int ax::NodeEditor::GetSelectedLinks(LinkId* links, int size) +{ + return BuildIdList(s_Editor->GetSelectedObjects(), links, size, [](auto object) + { + return object->AsLink() != nullptr; + }); +} + +void ax::NodeEditor::ClearSelection() +{ + s_Editor->ClearSelection(); +} + +void ax::NodeEditor::SelectNode(NodeId nodeId, bool append) +{ + if (auto node = s_Editor->FindNode(nodeId)) + { + if (append) + s_Editor->SelectObject(node); + else + s_Editor->SetSelectedObject(node); + } +} + +void ax::NodeEditor::SelectLink(LinkId linkId, bool append) +{ + if (auto link = s_Editor->FindLink(linkId)) + { + if (append) + s_Editor->SelectObject(link); + else + s_Editor->SetSelectedObject(link); + } +} + +void ax::NodeEditor::DeselectNode(NodeId nodeId) +{ + if (auto node = s_Editor->FindNode(nodeId)) + s_Editor->DeselectObject(node); +} + +void ax::NodeEditor::DeselectLink(LinkId linkId) +{ + if (auto link = s_Editor->FindLink(linkId)) + s_Editor->DeselectObject(link); +} + +bool ax::NodeEditor::DeleteNode(NodeId nodeId) +{ + if (auto node = s_Editor->FindNode(nodeId)) + return s_Editor->GetItemDeleter().Add(node); + else + return false; +} + +bool ax::NodeEditor::DeleteLink(LinkId linkId) +{ + if (auto link = s_Editor->FindLink(linkId)) + return s_Editor->GetItemDeleter().Add(link); + else + return false; +} + +void ax::NodeEditor::NavigateToContent(float duration) +{ + s_Editor->NavigateTo(s_Editor->GetContentBounds(), true, duration); +} + +void ax::NodeEditor::NavigateToSelection(bool zoomIn, float duration) +{ + s_Editor->NavigateTo(s_Editor->GetSelectionBounds(), zoomIn, duration); +} + +bool ax::NodeEditor::ShowNodeContextMenu(NodeId* nodeId) +{ + return s_Editor->GetContextMenu().ShowNodeContextMenu(nodeId); +} + +bool ax::NodeEditor::ShowPinContextMenu(PinId* pinId) +{ + return s_Editor->GetContextMenu().ShowPinContextMenu(pinId); +} + +bool ax::NodeEditor::ShowLinkContextMenu(LinkId* linkId) +{ + return s_Editor->GetContextMenu().ShowLinkContextMenu(linkId); +} + +bool ax::NodeEditor::ShowBackgroundContextMenu() +{ + return s_Editor->GetContextMenu().ShowBackgroundContextMenu(); +} + +void ax::NodeEditor::EnableShortcuts(bool enable) +{ + s_Editor->EnableShortcuts(enable); +} + +bool ax::NodeEditor::AreShortcutsEnabled() +{ + return s_Editor->AreShortcutsEnabled(); +} + +bool ax::NodeEditor::BeginShortcut() +{ + return s_Editor->GetShortcut().Begin(); +} + +bool ax::NodeEditor::AcceptCut() +{ + return s_Editor->GetShortcut().AcceptCut(); +} + +bool ax::NodeEditor::AcceptCopy() +{ + return s_Editor->GetShortcut().AcceptCopy(); +} + +bool ax::NodeEditor::AcceptPaste() +{ + return s_Editor->GetShortcut().AcceptPaste(); +} + +bool ax::NodeEditor::AcceptDuplicate() +{ + return s_Editor->GetShortcut().AcceptDuplicate(); +} + +bool ax::NodeEditor::AcceptCreateNode() +{ + return s_Editor->GetShortcut().AcceptCreateNode(); +} + +int ax::NodeEditor::GetActionContextSize() +{ + return static_cast(s_Editor->GetShortcut().m_Context.size()); +} + +int ax::NodeEditor::GetActionContextNodes(NodeId* nodes, int size) +{ + return BuildIdList(s_Editor->GetSelectedObjects(), nodes, size, [](auto object) + { + return object->AsNode() != nullptr; + }); +} + +int ax::NodeEditor::GetActionContextLinks(LinkId* links, int size) +{ + return BuildIdList(s_Editor->GetSelectedObjects(), links, size, [](auto object) + { + return object->AsLink() != nullptr; + }); +} + +void ax::NodeEditor::EndShortcut() +{ + return s_Editor->GetShortcut().End(); +} + +float ax::NodeEditor::GetCurrentZoom() +{ + return s_Editor->GetView().InvScale; +} + +ax::NodeEditor::NodeId ax::NodeEditor::GetDoubleClickedNode() +{ + return s_Editor->GetDoubleClickedNode(); +} + +ax::NodeEditor::PinId ax::NodeEditor::GetDoubleClickedPin() +{ + return s_Editor->GetDoubleClickedPin(); +} + +ax::NodeEditor::LinkId ax::NodeEditor::GetDoubleClickedLink() +{ + return s_Editor->GetDoubleClickedLink(); +} + +bool ax::NodeEditor::IsBackgroundClicked() +{ + return s_Editor->IsBackgroundClicked(); +} + +bool ax::NodeEditor::IsBackgroundDoubleClicked() +{ + return s_Editor->IsBackgroundDoubleClicked(); +} + +bool ax::NodeEditor::PinHadAnyLinks(PinId pinId) +{ + return s_Editor->PinHadAnyLinks(pinId); +} + +ImVec2 ax::NodeEditor::GetScreenSize() +{ + return s_Editor->GetRect().GetSize(); +} + +ImVec2 ax::NodeEditor::ScreenToCanvas(const ImVec2& pos) +{ + return s_Editor->ToCanvas(pos); +} + +ImVec2 ax::NodeEditor::CanvasToScreen(const ImVec2& pos) +{ + return s_Editor->ToScreen(pos); +} diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor_internal.h b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor_internal.h new file mode 100644 index 0000000..cef1bd5 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor_internal.h @@ -0,0 +1,1474 @@ +//------------------------------------------------------------------------------ +// LICENSE +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// CREDITS +// Written by Michal Cichon +//------------------------------------------------------------------------------ +# ifndef __IMGUI_NODE_EDITOR_INTERNAL_H__ +# define __IMGUI_NODE_EDITOR_INTERNAL_H__ +# pragma once + + +//------------------------------------------------------------------------------ +# include "imgui_node_editor.h" + + +//------------------------------------------------------------------------------ +# include +# define IMGUI_DEFINE_MATH_OPERATORS +# include +# include "imgui_extra_math.h" +# include "imgui_bezier_math.h" +# include "imgui_canvas.h" + +# include "crude_json.h" + +# include +# include + + +//------------------------------------------------------------------------------ +namespace ax { +namespace NodeEditor { +namespace Detail { + + +//------------------------------------------------------------------------------ +namespace ed = ax::NodeEditor::Detail; +namespace json = crude_json; + + +//------------------------------------------------------------------------------ +using std::vector; +using std::string; + + +//------------------------------------------------------------------------------ +void Log(const char* fmt, ...); + + +//------------------------------------------------------------------------------ +//inline ImRect ToRect(const ax::rectf& rect); +//inline ImRect ToRect(const ax::rect& rect); +inline ImRect ImGui_GetItemRect(); + + +//------------------------------------------------------------------------------ +// https://stackoverflow.com/a/36079786 +# define DECLARE_HAS_MEMBER(__trait_name__, __member_name__) \ + \ + template \ + class __trait_name__ \ + { \ + using check_type = ::std::remove_const_t<__boost_has_member_T__>; \ + struct no_type {char x[2];}; \ + using yes_type = char; \ + \ + struct base { void __member_name__() {}}; \ + struct mixin : public base, public check_type {}; \ + \ + template struct aux {}; \ + \ + template static no_type test(aux<&U::__member_name__>*); \ + template static yes_type test(...); \ + \ + public: \ + \ + static constexpr bool value = (sizeof(yes_type) == sizeof(test(0))); \ + } + +DECLARE_HAS_MEMBER(HasFringeScale, _FringeScale); + +# undef DECLARE_HAS_MEMBER + +struct FringeScaleRef +{ + // Overload is present when ImDrawList does have _FringeScale member variable. + template + static float& Get(typename std::enable_if::value, T>::type* drawList) + { + return drawList->_FringeScale; + } + + // Overload is present when ImDrawList does not have _FringeScale member variable. + template + static float& Get(typename std::enable_if::value, T>::type*) + { + static float placeholder = 1.0f; + return placeholder; + } +}; + +static inline float& ImFringeScaleRef(ImDrawList* drawList) +{ + return FringeScaleRef::Get(drawList); +} + +struct FringeScaleScope +{ + + FringeScaleScope(float scale) + : m_LastFringeScale(ImFringeScaleRef(ImGui::GetWindowDrawList())) + { + ImFringeScaleRef(ImGui::GetWindowDrawList()) = scale; + } + + ~FringeScaleScope() + { + ImFringeScaleRef(ImGui::GetWindowDrawList()) = m_LastFringeScale; + } + +private: + float m_LastFringeScale; +}; + + +//------------------------------------------------------------------------------ +enum class ObjectType +{ + None, + Node, + Link, + Pin +}; + +using ax::NodeEditor::PinKind; +using ax::NodeEditor::StyleColor; +using ax::NodeEditor::StyleVar; +using ax::NodeEditor::SaveReasonFlags; + +using ax::NodeEditor::NodeId; +using ax::NodeEditor::PinId; +using ax::NodeEditor::LinkId; + +struct ObjectId final: Details::SafePointerType +{ + using Super = Details::SafePointerType; + using Super::Super; + + ObjectId(): Super(Invalid), m_Type(ObjectType::None) {} + ObjectId(PinId pinId): Super(pinId.AsPointer()), m_Type(ObjectType::Pin) {} + ObjectId(NodeId nodeId): Super(nodeId.AsPointer()), m_Type(ObjectType::Node) {} + ObjectId(LinkId linkId): Super(linkId.AsPointer()), m_Type(ObjectType::Link) {} + + explicit operator PinId() const { return AsPinId(); } + explicit operator NodeId() const { return AsNodeId(); } + explicit operator LinkId() const { return AsLinkId(); } + + PinId AsPinId() const { IM_ASSERT(IsPinId()); return PinId(AsPointer()); } + NodeId AsNodeId() const { IM_ASSERT(IsNodeId()); return NodeId(AsPointer()); } + LinkId AsLinkId() const { IM_ASSERT(IsLinkId()); return LinkId(AsPointer()); } + + bool IsPinId() const { return m_Type == ObjectType::Pin; } + bool IsNodeId() const { return m_Type == ObjectType::Node; } + bool IsLinkId() const { return m_Type == ObjectType::Link; } + + ObjectType Type() const { return m_Type; } + +private: + ObjectType m_Type; +}; + +struct EditorContext; + +struct Node; +struct Pin; +struct Link; + +template +struct ObjectWrapper +{ + Id m_ID; + T* m_Object; + + T* operator->() { return m_Object; } + const T* operator->() const { return m_Object; } + + operator T*() { return m_Object; } + operator const T*() const { return m_Object; } + + bool operator<(const ObjectWrapper& rhs) const + { + return m_ID.AsPointer() < rhs.m_ID.AsPointer(); + } +}; + +struct Object +{ + enum DrawFlags + { + None = 0, + Hovered = 1, + Selected = 2 + }; + + inline friend DrawFlags operator|(DrawFlags lhs, DrawFlags rhs) { return static_cast(static_cast(lhs) | static_cast(rhs)); } + inline friend DrawFlags operator&(DrawFlags lhs, DrawFlags rhs) { return static_cast(static_cast(lhs) & static_cast(rhs)); } + inline friend DrawFlags& operator|=(DrawFlags& lhs, DrawFlags rhs) { lhs = lhs | rhs; return lhs; } + inline friend DrawFlags& operator&=(DrawFlags& lhs, DrawFlags rhs) { lhs = lhs & rhs; return lhs; } + + EditorContext* const Editor; + + bool m_IsLive; + + Object(EditorContext* editor) + : Editor(editor) + , m_IsLive(true) + { + } + + virtual ~Object() = default; + + virtual ObjectId ID() = 0; + + bool IsVisible() const + { + if (!m_IsLive) + return false; + + const auto bounds = GetBounds(); + + return ImGui::IsRectVisible(bounds.Min, bounds.Max); + } + + virtual void Reset() { m_IsLive = false; } + + virtual void Draw(ImDrawList* drawList, DrawFlags flags = None) = 0; + + virtual bool AcceptDrag() { return false; } + virtual void UpdateDrag(const ImVec2& offset) { IM_UNUSED(offset); } + virtual bool EndDrag() { return false; } + virtual ImVec2 DragStartLocation() { return GetBounds().Min; } + + virtual bool IsDraggable() { bool result = AcceptDrag(); EndDrag(); return result; } + virtual bool IsSelectable() { return false; } + + virtual bool TestHit(const ImVec2& point, float extraThickness = 0.0f) const + { + if (!m_IsLive) + return false; + + auto bounds = GetBounds(); + if (extraThickness > 0) + bounds.Expand(extraThickness); + + return bounds.Contains(point); + } + + virtual bool TestHit(const ImRect& rect, bool allowIntersect = true) const + { + if (!m_IsLive) + return false; + + const auto bounds = GetBounds(); + + return !ImRect_IsEmpty(bounds) && (allowIntersect ? bounds.Overlaps(rect) : rect.Contains(bounds)); + } + + virtual ImRect GetBounds() const = 0; + + virtual Node* AsNode() { return nullptr; } + virtual Pin* AsPin() { return nullptr; } + virtual Link* AsLink() { return nullptr; } +}; + +struct Pin final: Object +{ + using IdType = PinId; + + PinId m_ID; + PinKind m_Kind; + Node* m_Node; + ImRect m_Bounds; + ImRect m_Pivot; + Pin* m_PreviousPin; + ImU32 m_Color; + ImU32 m_BorderColor; + float m_BorderWidth; + float m_Rounding; + int m_Corners; + ImVec2 m_Dir; + float m_Strength; + float m_Radius; + float m_ArrowSize; + float m_ArrowWidth; + bool m_HasConnection; + bool m_HadConnection; + + Pin(EditorContext* editor, PinId id, PinKind kind) + : Object(editor) + , m_ID(id) + , m_Kind(kind) + , m_Node(nullptr) + , m_Bounds() + , m_PreviousPin(nullptr) + , m_Color(IM_COL32_WHITE) + , m_BorderColor(IM_COL32_BLACK) + , m_BorderWidth(0) + , m_Rounding(0) + , m_Corners(0) + , m_Dir(0, 0) + , m_Strength(0) + , m_Radius(0) + , m_ArrowSize(0) + , m_ArrowWidth(0) + , m_HasConnection(false) + , m_HadConnection(false) + { + } + + virtual ObjectId ID() override { return m_ID; } + + virtual void Reset() override final + { + m_HadConnection = m_HasConnection && m_IsLive; + m_HasConnection = false; + + Object::Reset(); + } + + virtual void Draw(ImDrawList* drawList, DrawFlags flags = None) override final; + + ImVec2 GetClosestPoint(const ImVec2& p) const; + ImLine GetClosestLine(const Pin* pin) const; + + virtual ImRect GetBounds() const override final { return m_Bounds; } + + virtual Pin* AsPin() override final { return this; } +}; + +enum class NodeType +{ + Node, + Group +}; + +enum class NodeRegion : uint8_t +{ + None = 0x00, + Top = 0x01, + Bottom = 0x02, + Left = 0x04, + Right = 0x08, + Center = 0x10, + Header = 0x20, + TopLeft = Top | Left, + TopRight = Top | Right, + BottomLeft = Bottom | Left, + BottomRight = Bottom | Right, +}; + +inline NodeRegion operator |(NodeRegion lhs, NodeRegion rhs) { return static_cast(static_cast(lhs) | static_cast(rhs)); } +inline NodeRegion operator &(NodeRegion lhs, NodeRegion rhs) { return static_cast(static_cast(lhs) & static_cast(rhs)); } + + +struct Node final: Object +{ + using IdType = NodeId; + + NodeId m_ID; + NodeType m_Type; + ImRect m_Bounds; + int m_Channel; + Pin* m_LastPin; + ImVec2 m_DragStart; + + ImU32 m_Color; + ImU32 m_BorderColor; + float m_BorderWidth; + float m_Rounding; + + ImU32 m_GroupColor; + ImU32 m_GroupBorderColor; + float m_GroupBorderWidth; + float m_GroupRounding; + ImRect m_GroupBounds; + + bool m_RestoreState; + bool m_CenterOnScreen; + + Node(EditorContext* editor, NodeId id) + : Object(editor) + , m_ID(id) + , m_Type(NodeType::Node) + , m_Bounds() + , m_Channel(0) + , m_LastPin(nullptr) + , m_DragStart() + , m_Color(IM_COL32_WHITE) + , m_BorderColor(IM_COL32_BLACK) + , m_BorderWidth(0) + , m_Rounding(0) + , m_GroupBounds() + , m_RestoreState(false) + , m_CenterOnScreen(false) + { + } + + virtual ObjectId ID() override { return m_ID; } + + bool AcceptDrag() override; + void UpdateDrag(const ImVec2& offset) override; + bool EndDrag() override; // return true, when changed + ImVec2 DragStartLocation() override { return m_DragStart; } + + virtual bool IsSelectable() override { return true; } + + virtual void Draw(ImDrawList* drawList, DrawFlags flags = None) override final; + void DrawBorder(ImDrawList* drawList, ImU32 color, float thickness = 1.0f); + + void GetGroupedNodes(std::vector& result, bool append = false); + + void CenterOnScreenInNextFrame() { m_CenterOnScreen = true; } + + ImRect GetRegionBounds(NodeRegion region) const; + NodeRegion GetRegion(const ImVec2& point) const; + + virtual ImRect GetBounds() const override final { return m_Bounds; } + + virtual Node* AsNode() override final { return this; } +}; + +struct Link final: Object +{ + using IdType = LinkId; + + LinkId m_ID; + Pin* m_StartPin; + Pin* m_EndPin; + ImU32 m_Color; + float m_Thickness; + ImVec2 m_Start; + ImVec2 m_End; + + Link(EditorContext* editor, LinkId id) + : Object(editor) + , m_ID(id) + , m_StartPin(nullptr) + , m_EndPin(nullptr) + , m_Color(IM_COL32_WHITE) + , m_Thickness(1.0f) + { + } + + virtual ObjectId ID() override { return m_ID; } + + virtual bool IsSelectable() override { return true; } + + virtual void Draw(ImDrawList* drawList, DrawFlags flags = None) override final; + void Draw(ImDrawList* drawList, ImU32 color, float extraThickness = 0.0f) const; + + void UpdateEndpoints(); + + ImCubicBezierPoints GetCurve() const; + + virtual bool TestHit(const ImVec2& point, float extraThickness = 0.0f) const override final; + virtual bool TestHit(const ImRect& rect, bool allowIntersect = true) const override final; + + virtual ImRect GetBounds() const override final; + + virtual Link* AsLink() override final { return this; } +}; + +struct NodeSettings +{ + NodeId m_ID; + ImVec2 m_Location; + ImVec2 m_Size; + ImVec2 m_GroupSize; + bool m_WasUsed; + + bool m_Saved; + bool m_IsDirty; + SaveReasonFlags m_DirtyReason; + + NodeSettings(NodeId id) + : m_ID(id) + , m_Location(0, 0) + , m_Size(0, 0) + , m_GroupSize(0, 0) + , m_WasUsed(false) + , m_Saved(false) + , m_IsDirty(false) + , m_DirtyReason(SaveReasonFlags::None) + { + } + + void ClearDirty(); + void MakeDirty(SaveReasonFlags reason); + + json::value Serialize(); + + static bool Parse(const std::string& string, NodeSettings& settings); + static bool Parse(const json::value& data, NodeSettings& result); +}; + +struct Settings +{ + bool m_IsDirty; + SaveReasonFlags m_DirtyReason; + + vector m_Nodes; + vector m_Selection; + ImVec2 m_ViewScroll; + float m_ViewZoom; + + Settings() + : m_IsDirty(false) + , m_DirtyReason(SaveReasonFlags::None) + , m_ViewScroll(0, 0) + , m_ViewZoom(1.0f) + { + } + + NodeSettings* AddNode(NodeId id); + NodeSettings* FindNode(NodeId id); + + void ClearDirty(Node* node = nullptr); + void MakeDirty(SaveReasonFlags reason, Node* node = nullptr); + + std::string Serialize(); + + static bool Parse(const std::string& string, Settings& settings); +}; + +struct Control +{ + Object* HotObject; + Object* ActiveObject; + Object* ClickedObject; + Object* DoubleClickedObject; + Node* HotNode; + Node* ActiveNode; + Node* ClickedNode; + Node* DoubleClickedNode; + Pin* HotPin; + Pin* ActivePin; + Pin* ClickedPin; + Pin* DoubleClickedPin; + Link* HotLink; + Link* ActiveLink; + Link* ClickedLink; + Link* DoubleClickedLink; + bool BackgroundHot; + bool BackgroundActive; + bool BackgroundClicked; + bool BackgroundDoubleClicked; + + Control(Object* hotObject, Object* activeObject, Object* clickedObject, Object* doubleClickedObject, + bool backgroundHot, bool backgroundActive, bool backgroundClicked, bool backgroundDoubleClicked) + : HotObject(hotObject) + , ActiveObject(activeObject) + , ClickedObject(clickedObject) + , DoubleClickedObject(doubleClickedObject) + , HotNode(nullptr) + , ActiveNode(nullptr) + , ClickedNode(nullptr) + , DoubleClickedNode(nullptr) + , HotPin(nullptr) + , ActivePin(nullptr) + , ClickedPin(nullptr) + , DoubleClickedPin(nullptr) + , HotLink(nullptr) + , ActiveLink(nullptr) + , ClickedLink(nullptr) + , DoubleClickedLink(nullptr) + , BackgroundHot(backgroundHot) + , BackgroundActive(backgroundActive) + , BackgroundClicked(backgroundClicked) + , BackgroundDoubleClicked(backgroundDoubleClicked) + { + if (hotObject) + { + HotNode = hotObject->AsNode(); + HotPin = hotObject->AsPin(); + HotLink = hotObject->AsLink(); + + if (HotPin) + HotNode = HotPin->m_Node; + } + + if (activeObject) + { + ActiveNode = activeObject->AsNode(); + ActivePin = activeObject->AsPin(); + ActiveLink = activeObject->AsLink(); + } + + if (clickedObject) + { + ClickedNode = clickedObject->AsNode(); + ClickedPin = clickedObject->AsPin(); + ClickedLink = clickedObject->AsLink(); + } + + if (doubleClickedObject) + { + DoubleClickedNode = doubleClickedObject->AsNode(); + DoubleClickedPin = doubleClickedObject->AsPin(); + DoubleClickedLink = doubleClickedObject->AsLink(); + } + } +}; + +struct NavigateAction; +struct SizeAction; +struct DragAction; +struct SelectAction; +struct CreateItemAction; +struct DeleteItemsAction; +struct ContextMenuAction; +struct ShortcutAction; + +struct AnimationController; +struct FlowAnimationController; + +struct Animation +{ + enum State + { + Playing, + Stopped + }; + + EditorContext* Editor; + State m_State; + float m_Time; + float m_Duration; + + Animation(EditorContext* editor); + virtual ~Animation(); + + void Play(float duration); + void Stop(); + void Finish(); + void Update(); + + bool IsPlaying() const { return m_State == Playing; } + + float GetProgress() const { return m_Time / m_Duration; } + +protected: + virtual void OnPlay() {} + virtual void OnFinish() {} + virtual void OnStop() {} + + virtual void OnUpdate(float progress) { IM_UNUSED(progress); } +}; + +struct NavigateAnimation final: Animation +{ + NavigateAction& Action; + ImRect m_Start; + ImRect m_Target; + + NavigateAnimation(EditorContext* editor, NavigateAction& scrollAction); + + void NavigateTo(const ImRect& target, float duration); + +private: + void OnUpdate(float progress) override final; + void OnStop() override final; + void OnFinish() override final; +}; + +struct FlowAnimation final: Animation +{ + FlowAnimationController* Controller; + Link* m_Link; + float m_Speed; + float m_MarkerDistance; + float m_Offset; + + FlowAnimation(FlowAnimationController* controller); + + void Flow(Link* link, float markerDistance, float speed, float duration); + + void Draw(ImDrawList* drawList); + +private: + struct CurvePoint + { + float Distance; + ImVec2 Point; + }; + + ImVec2 m_LastStart; + ImVec2 m_LastEnd; + float m_PathLength; + vector m_Path; + + bool IsLinkValid() const; + bool IsPathValid() const; + void UpdatePath(); + void ClearPath(); + + ImVec2 SamplePath(float distance); + + void OnUpdate(float progress) override final; + void OnStop() override final; +}; + +struct AnimationController +{ + EditorContext* Editor; + + AnimationController(EditorContext* editor) + : Editor(editor) + { + } + + virtual ~AnimationController() + { + } + + virtual void Draw(ImDrawList* drawList) + { + IM_UNUSED(drawList); + } +}; + +struct FlowAnimationController final : AnimationController +{ + FlowAnimationController(EditorContext* editor); + virtual ~FlowAnimationController(); + + void Flow(Link* link); + + virtual void Draw(ImDrawList* drawList) override final; + + void Release(FlowAnimation* animation); + +private: + FlowAnimation* GetOrCreate(Link* link); + + vector m_Animations; + vector m_FreePool; +}; + +struct EditorAction +{ + enum AcceptResult { False, True, Possible }; + + EditorAction(EditorContext* editor) + : Editor(editor) + { + } + + virtual ~EditorAction() {} + + virtual const char* GetName() const = 0; + + virtual AcceptResult Accept(const Control& control) = 0; + virtual bool Process(const Control& control) = 0; + virtual void Reject() {} // celled when Accept return 'Possible' and was rejected + + virtual ImGuiMouseCursor GetCursor() { return ImGuiMouseCursor_Arrow; } + + virtual bool IsDragging() { return false; } + + virtual void ShowMetrics() {} + + virtual NavigateAction* AsNavigate() { return nullptr; } + virtual SizeAction* AsSize() { return nullptr; } + virtual DragAction* AsDrag() { return nullptr; } + virtual SelectAction* AsSelect() { return nullptr; } + virtual CreateItemAction* AsCreateItem() { return nullptr; } + virtual DeleteItemsAction* AsDeleteItems() { return nullptr; } + virtual ContextMenuAction* AsContextMenu() { return nullptr; } + virtual ShortcutAction* AsCutCopyPaste() { return nullptr; } + + EditorContext* Editor; +}; + +struct NavigateAction final: EditorAction +{ + enum class NavigationReason + { + Unknown, + MouseZoom, + Selection, + Object, + Content, + Edge + }; + + bool m_IsActive; + float m_Zoom; + ImVec2 m_Scroll; + ImVec2 m_ScrollStart; + ImVec2 m_ScrollDelta; + + NavigateAction(EditorContext* editor, ImGuiEx::Canvas& canvas); + + virtual const char* GetName() const override final { return "Navigate"; } + + virtual AcceptResult Accept(const Control& control) override final; + virtual bool Process(const Control& control) override final; + + virtual void ShowMetrics() override final; + + virtual NavigateAction* AsNavigate() override final { return this; } + + void NavigateTo(const ImRect& bounds, bool zoomIn, float duration = -1.0f, NavigationReason reason = NavigationReason::Unknown); + void StopNavigation(); + void FinishNavigation(); + + bool MoveOverEdge(); + void StopMoveOverEdge(); + bool IsMovingOverEdge() const { return m_MovingOverEdge; } + ImVec2 GetMoveOffset() const { return m_MoveOffset; } + + void SetWindow(ImVec2 position, ImVec2 size); + + ImGuiEx::CanvasView GetView() const; + ImVec2 GetViewOrigin() const; + float GetViewScale() const; + + void SetViewRect(const ImRect& rect); + ImRect GetViewRect() const; + +private: + ImGuiEx::Canvas& m_Canvas; + ImVec2 m_WindowScreenPos; + ImVec2 m_WindowScreenSize; + + NavigateAnimation m_Animation; + NavigationReason m_Reason; + uint64_t m_LastSelectionId; + Object* m_LastObject; + bool m_MovingOverEdge; + ImVec2 m_MoveOffset; + + bool HandleZoom(const Control& control); + + void NavigateTo(const ImRect& target, float duration = -1.0f, NavigationReason reason = NavigationReason::Unknown); + + float MatchZoom(int steps, float fallbackZoom); + int MatchZoomIndex(int direction); + + static const float s_ZoomLevels[]; + static const int s_ZoomLevelCount; +}; + +struct SizeAction final: EditorAction +{ + bool m_IsActive; + bool m_Clean; + Node* m_SizedNode; + + SizeAction(EditorContext* editor); + + virtual const char* GetName() const override final { return "Size"; } + + virtual AcceptResult Accept(const Control& control) override final; + virtual bool Process(const Control& control) override final; + + virtual ImGuiMouseCursor GetCursor() override final { return m_Cursor; } + + virtual void ShowMetrics() override final; + + virtual SizeAction* AsSize() override final { return this; } + + virtual bool IsDragging() override final { return m_IsActive; } + + const ImRect& GetStartGroupBounds() const { return m_StartGroupBounds; } + +private: + NodeRegion GetRegion(Node* node); + ImGuiMouseCursor ChooseCursor(NodeRegion region); + + ImRect m_StartBounds; + ImRect m_StartGroupBounds; + ImVec2 m_LastSize; + ImVec2 m_MinimumSize; + ImVec2 m_LastDragOffset; + ed::NodeRegion m_Pivot; + ImGuiMouseCursor m_Cursor; +}; + +struct DragAction final: EditorAction +{ + bool m_IsActive; + bool m_Clear; + Object* m_DraggedObject; + vector m_Objects; + + DragAction(EditorContext* editor); + + virtual const char* GetName() const override final { return "Drag"; } + + virtual AcceptResult Accept(const Control& control) override final; + virtual bool Process(const Control& control) override final; + + virtual ImGuiMouseCursor GetCursor() override final { return ImGuiMouseCursor_ResizeAll; } + + virtual bool IsDragging() override final { return m_IsActive; } + + virtual void ShowMetrics() override final; + + virtual DragAction* AsDrag() override final { return this; } +}; + +struct SelectAction final: EditorAction +{ + bool m_IsActive; + + bool m_SelectGroups; + bool m_SelectLinkMode; + bool m_CommitSelection; + ImVec2 m_StartPoint; + ImVec2 m_EndPoint; + vector m_CandidateObjects; + vector m_SelectedObjectsAtStart; + + Animation m_Animation; + + SelectAction(EditorContext* editor); + + virtual const char* GetName() const override final { return "Select"; } + + virtual AcceptResult Accept(const Control& control) override final; + virtual bool Process(const Control& control) override final; + + virtual void ShowMetrics() override final; + + virtual bool IsDragging() override final { return m_IsActive; } + + virtual SelectAction* AsSelect() override final { return this; } + + void Draw(ImDrawList* drawList); +}; + +struct ContextMenuAction final: EditorAction +{ + enum Menu { None, Node, Pin, Link, Background }; + + Menu m_CandidateMenu; + Menu m_CurrentMenu; + ObjectId m_ContextId; + + ContextMenuAction(EditorContext* editor); + + virtual const char* GetName() const override final { return "Context Menu"; } + + virtual AcceptResult Accept(const Control& control) override final; + virtual bool Process(const Control& control) override final; + virtual void Reject() override final; + + virtual void ShowMetrics() override final; + + virtual ContextMenuAction* AsContextMenu() override final { return this; } + + bool ShowNodeContextMenu(NodeId* nodeId); + bool ShowPinContextMenu(PinId* pinId); + bool ShowLinkContextMenu(LinkId* linkId); + bool ShowBackgroundContextMenu(); +}; + +struct ShortcutAction final: EditorAction +{ + enum Action { None, Cut, Copy, Paste, Duplicate, CreateNode }; + + bool m_IsActive; + bool m_InAction; + Action m_CurrentAction; + vector m_Context; + + ShortcutAction(EditorContext* editor); + + virtual const char* GetName() const override final { return "Shortcut"; } + + virtual AcceptResult Accept(const Control& control) override final; + virtual bool Process(const Control& control) override final; + virtual void Reject() override final; + + virtual void ShowMetrics() override final; + + virtual ShortcutAction* AsCutCopyPaste() override final { return this; } + + bool Begin(); + void End(); + + bool AcceptCut(); + bool AcceptCopy(); + bool AcceptPaste(); + bool AcceptDuplicate(); + bool AcceptCreateNode(); +}; + +struct CreateItemAction final : EditorAction +{ + enum Stage + { + None, + Possible, + Create + }; + + enum Action + { + Unknown, + UserReject, + UserAccept + }; + + enum Type + { + NoItem, + Node, + Link + }; + + enum Result + { + True, + False, + Indeterminate + }; + + bool m_InActive; + Stage m_NextStage; + + Stage m_CurrentStage; + Type m_ItemType; + Action m_UserAction; + ImU32 m_LinkColor; + float m_LinkThickness; + Pin* m_LinkStart; + Pin* m_LinkEnd; + + bool m_IsActive; + Pin* m_DraggedPin; + + int m_LastChannel = -1; + + + CreateItemAction(EditorContext* editor); + + virtual const char* GetName() const override final { return "Create Item"; } + + virtual AcceptResult Accept(const Control& control) override final; + virtual bool Process(const Control& control) override final; + + virtual ImGuiMouseCursor GetCursor() override final { return ImGuiMouseCursor_Arrow; } + + virtual void ShowMetrics() override final; + + virtual bool IsDragging() override final { return m_IsActive; } + + virtual CreateItemAction* AsCreateItem() override final { return this; } + + void SetStyle(ImU32 color, float thickness); + + bool Begin(); + void End(); + + Result RejectItem(); + Result AcceptItem(); + + Result QueryLink(PinId* startId, PinId* endId); + Result QueryNode(PinId* pinId); + +private: + bool m_IsInGlobalSpace; + + void DragStart(Pin* startPin); + void DragEnd(); + void DropPin(Pin* endPin); + void DropNode(); + void DropNothing(); +}; + +struct DeleteItemsAction final: EditorAction +{ + bool m_IsActive; + bool m_InInteraction; + + DeleteItemsAction(EditorContext* editor); + + virtual const char* GetName() const override final { return "Delete Items"; } + + virtual AcceptResult Accept(const Control& control) override final; + virtual bool Process(const Control& control) override final; + + virtual void ShowMetrics() override final; + + virtual DeleteItemsAction* AsDeleteItems() override final { return this; } + + bool Add(Object* object); + + bool Begin(); + void End(); + + bool QueryLink(LinkId* linkId, PinId* startId = nullptr, PinId* endId = nullptr); + bool QueryNode(NodeId* nodeId); + + bool AcceptItem(); + void RejectItem(); + +private: + enum IteratorType { Unknown, Link, Node }; + enum UserAction { Undetermined, Accepted, Rejected }; + + bool QueryItem(ObjectId* itemId, IteratorType itemType); + void RemoveItem(); + + vector m_ManuallyDeletedObjects; + + IteratorType m_CurrentItemType; + UserAction m_UserAction; + vector m_CandidateObjects; + int m_CandidateItemIndex; +}; + +struct NodeBuilder +{ + EditorContext* const Editor; + + Node* m_CurrentNode; + Pin* m_CurrentPin; + + ImRect m_NodeRect; + + ImRect m_PivotRect; + ImVec2 m_PivotAlignment; + ImVec2 m_PivotSize; + ImVec2 m_PivotScale; + bool m_ResolvePinRect; + bool m_ResolvePivot; + + ImRect m_GroupBounds; + bool m_IsGroup; + + ImDrawListSplitter m_Splitter; + ImDrawListSplitter m_PinSplitter; + + NodeBuilder(EditorContext* editor); + ~NodeBuilder(); + + void Begin(NodeId nodeId); + void End(); + + void BeginPin(PinId pinId, PinKind kind); + void EndPin(); + + void PinRect(const ImVec2& a, const ImVec2& b); + void PinPivotRect(const ImVec2& a, const ImVec2& b); + void PinPivotSize(const ImVec2& size); + void PinPivotScale(const ImVec2& scale); + void PinPivotAlignment(const ImVec2& alignment); + + void Group(const ImVec2& size); + + ImDrawList* GetUserBackgroundDrawList() const; + ImDrawList* GetUserBackgroundDrawList(Node* node) const; +}; + +struct HintBuilder +{ + EditorContext* const Editor; + bool m_IsActive; + Node* m_CurrentNode; + float m_LastFringe = 1.0f; + int m_LastChannel = 0; + + HintBuilder(EditorContext* editor); + + bool Begin(NodeId nodeId); + void End(); + + ImVec2 GetGroupMin(); + ImVec2 GetGroupMax(); + + ImDrawList* GetForegroundDrawList(); + ImDrawList* GetBackgroundDrawList(); +}; + +struct Style: ax::NodeEditor::Style +{ + void PushColor(StyleColor colorIndex, const ImVec4& color); + void PopColor(int count = 1); + + void PushVar(StyleVar varIndex, float value); + void PushVar(StyleVar varIndex, const ImVec2& value); + void PushVar(StyleVar varIndex, const ImVec4& value); + void PopVar(int count = 1); + + const char* GetColorName(StyleColor colorIndex) const; + +private: + struct ColorModifier + { + StyleColor Index; + ImVec4 Value; + }; + + struct VarModifier + { + StyleVar Index; + ImVec4 Value; + }; + + float* GetVarFloatAddr(StyleVar idx); + ImVec2* GetVarVec2Addr(StyleVar idx); + ImVec4* GetVarVec4Addr(StyleVar idx); + + vector m_ColorStack; + vector m_VarStack; +}; + +struct Config: ax::NodeEditor::Config +{ + Config(const ax::NodeEditor::Config* config); + + std::string Load(); + std::string LoadNode(NodeId nodeId); + + void BeginSave(); + bool Save(const std::string& data, SaveReasonFlags flags); + bool SaveNode(NodeId nodeId, const std::string& data, SaveReasonFlags flags); + void EndSave(); +}; + +enum class SuspendFlags : uint8_t +{ + None = 0, + KeepSplitter = 1 +}; + +inline SuspendFlags operator |(SuspendFlags lhs, SuspendFlags rhs) { return static_cast(static_cast(lhs) | static_cast(rhs)); } +inline SuspendFlags operator &(SuspendFlags lhs, SuspendFlags rhs) { return static_cast(static_cast(lhs) & static_cast(rhs)); } + + +struct EditorContext +{ + EditorContext(const ax::NodeEditor::Config* config = nullptr); + ~EditorContext(); + + Style& GetStyle() { return m_Style; } + + void Begin(const char* id, const ImVec2& size = ImVec2(0, 0)); + void End(); + + bool DoLink(LinkId id, PinId startPinId, PinId endPinId, ImU32 color, float thickness); + + + NodeBuilder& GetNodeBuilder() { return m_NodeBuilder; } + HintBuilder& GetHintBuilder() { return m_HintBuilder; } + + EditorAction* GetCurrentAction() { return m_CurrentAction; } + + CreateItemAction& GetItemCreator() { return m_CreateItemAction; } + DeleteItemsAction& GetItemDeleter() { return m_DeleteItemsAction; } + ContextMenuAction& GetContextMenu() { return m_ContextMenuAction; } + ShortcutAction& GetShortcut() { return m_ShortcutAction; } + + const ImGuiEx::CanvasView& GetView() const { return m_Canvas.View(); } + const ImRect& GetViewRect() const { return m_Canvas.ViewRect(); } + const ImRect& GetRect() const { return m_Canvas.Rect(); } + + void SetNodePosition(NodeId nodeId, const ImVec2& screenPosition); + ImVec2 GetNodePosition(NodeId nodeId); + ImVec2 GetNodeSize(NodeId nodeId); + + void MarkNodeToRestoreState(Node* node); + void RestoreNodeState(Node* node); + + void ClearSelection(); + void SelectObject(Object* object); + void DeselectObject(Object* object); + void SetSelectedObject(Object* object); + void ToggleObjectSelection(Object* object); + bool IsSelected(Object* object); + const vector& GetSelectedObjects(); + bool IsAnyNodeSelected(); + bool IsAnyLinkSelected(); + bool HasSelectionChanged(); + uint64_t GetSelectionId() const { return m_SelectionId; } + + Node* FindNodeAt(const ImVec2& p); + void FindNodesInRect(const ImRect& r, vector& result, bool append = false, bool includeIntersecting = true); + void FindLinksInRect(const ImRect& r, vector& result, bool append = false); + + void FindLinksForNode(NodeId nodeId, vector& result, bool add = false); + + bool PinHadAnyLinks(PinId pinId); + + ImVec2 ToCanvas(const ImVec2& point) const { return m_Canvas.ToLocal(point); } + ImVec2 ToScreen(const ImVec2& point) const { return m_Canvas.FromLocal(point); } + + void NotifyLinkDeleted(Link* link); + + void Suspend(SuspendFlags flags = SuspendFlags::None); + void Resume(SuspendFlags flags = SuspendFlags::None); + bool IsSuspended(); + + bool IsActive(); + + void MakeDirty(SaveReasonFlags reason); + void MakeDirty(SaveReasonFlags reason, Node* node); + + Pin* CreatePin(PinId id, PinKind kind); + Node* CreateNode(NodeId id); + Link* CreateLink(LinkId id); + + Node* FindNode(NodeId id); + Pin* FindPin(PinId id); + Link* FindLink(LinkId id); + Object* FindObject(ObjectId id); + + Node* GetNode(NodeId id); + Pin* GetPin(PinId id, PinKind kind); + Link* GetLink(LinkId id); + + Link* FindLinkAt(const ImVec2& p); + + template + ImRect GetBounds(const std::vector& objects) + { + ImRect bounds(FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX); + + for (auto object : objects) + if (object->m_IsLive) + bounds.Add(object->GetBounds()); + + if (ImRect_IsEmpty(bounds)) + bounds = ImRect(); + + return bounds; + } + + template + ImRect GetBounds(const std::vector>& objects) + { + ImRect bounds(FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX); + + for (auto object : objects) + if (object.m_Object->m_IsLive) + bounds.Add(object.m_Object->GetBounds()); + + if (ImRect_IsEmpty(bounds)) + bounds = ImRect(); + + return bounds; + } + + ImRect GetSelectionBounds() { return GetBounds(m_SelectedObjects); } + ImRect GetContentBounds() { return GetBounds(m_Nodes); } + + ImU32 GetColor(StyleColor colorIndex) const; + ImU32 GetColor(StyleColor colorIndex, float alpha) const; + + void NavigateTo(const ImRect& bounds, bool zoomIn = false, float duration = -1) { m_NavigateAction.NavigateTo(bounds, zoomIn, duration); } + + void RegisterAnimation(Animation* animation); + void UnregisterAnimation(Animation* animation); + + void Flow(Link* link); + + void SetUserContext(bool globalSpace = false); + + void EnableShortcuts(bool enable); + bool AreShortcutsEnabled(); + + NodeId GetDoubleClickedNode() const { return m_DoubleClickedNode; } + PinId GetDoubleClickedPin() const { return m_DoubleClickedPin; } + LinkId GetDoubleClickedLink() const { return m_DoubleClickedLink; } + bool IsBackgroundClicked() const { return m_BackgroundClicked; } + bool IsBackgroundDoubleClicked() const { return m_BackgroundDoubleClicked; } + + float AlignPointToGrid(float p) const + { + if (!ImGui::GetIO().KeyAlt) + return p - ImFmod(p, 16.0f); + else + return p; + } + + ImVec2 AlignPointToGrid(const ImVec2& p) const + { + return ImVec2(AlignPointToGrid(p.x), AlignPointToGrid(p.y)); + } + +private: + void LoadSettings(); + void SaveSettings(); + + Control BuildControl(bool allowOffscreen); + + void ShowMetrics(const Control& control); + + void UpdateAnimations(); + + bool m_IsFirstFrame; + bool m_IsWindowActive; + + bool m_ShortcutsEnabled; + + Style m_Style; + + vector> m_Nodes; + vector> m_Pins; + vector> m_Links; + + vector m_SelectedObjects; + + vector m_LastSelectedObjects; + uint64_t m_SelectionId; + + Link* m_LastActiveLink; + + vector m_LiveAnimations; + vector m_LastLiveAnimations; + + ImGuiEx::Canvas m_Canvas; + bool m_IsCanvasVisible; + + NodeBuilder m_NodeBuilder; + HintBuilder m_HintBuilder; + + EditorAction* m_CurrentAction; + NavigateAction m_NavigateAction; + SizeAction m_SizeAction; + DragAction m_DragAction; + SelectAction m_SelectAction; + ContextMenuAction m_ContextMenuAction; + ShortcutAction m_ShortcutAction; + CreateItemAction m_CreateItemAction; + DeleteItemsAction m_DeleteItemsAction; + + vector m_AnimationControllers; + FlowAnimationController m_FlowAnimationController; + + NodeId m_DoubleClickedNode; + PinId m_DoubleClickedPin; + LinkId m_DoubleClickedLink; + bool m_BackgroundClicked; + bool m_BackgroundDoubleClicked; + + bool m_IsInitialized; + Settings m_Settings; + + Config m_Config; + + int m_ExternalChannel; + ImDrawListSplitter m_Splitter; +}; + + +//------------------------------------------------------------------------------ +} // namespace Detail +} // namespace Editor +} // namespace ax + + +//------------------------------------------------------------------------------ +# include "imgui_node_editor_internal.inl" + + +//------------------------------------------------------------------------------ +# endif // __IMGUI_NODE_EDITOR_INTERNAL_H__ diff --git a/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor_internal.inl b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor_internal.inl new file mode 100644 index 0000000..0d16231 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui-node-editor/imgui_node_editor_internal.inl @@ -0,0 +1,55 @@ +//------------------------------------------------------------------------------ +// LICENSE +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// CREDITS +// Written by Michal Cichon +//------------------------------------------------------------------------------ +# ifndef __IMGUI_NODE_EDITOR_INTERNAL_INL__ +# define __IMGUI_NODE_EDITOR_INTERNAL_INL__ +# pragma once + + +//------------------------------------------------------------------------------ +# include "imgui_node_editor_internal.h" + + +//------------------------------------------------------------------------------ +namespace ax { +namespace NodeEditor { +namespace Detail { + + +//------------------------------------------------------------------------------ +//inline ImRect ToRect(const ax::rectf& rect) +//{ +// return ImRect( +// to_imvec(rect.top_left()), +// to_imvec(rect.bottom_right()) +// ); +//} +// +//inline ImRect ToRect(const ax::rect& rect) +//{ +// return ImRect( +// to_imvec(rect.top_left()), +// to_imvec(rect.bottom_right()) +// ); +//} + +inline ImRect ImGui_GetItemRect() +{ + return ImRect(ImGui::GetItemRectMin(), ImGui::GetItemRectMax()); +} + + +//------------------------------------------------------------------------------ +} // namespace Detail +} // namespace Editor +} // namespace ax + + +//------------------------------------------------------------------------------ +# endif // __IMGUI_NODE_EDITOR_INTERNAL_INL__ diff --git a/cpp-projects/3d-engine/imgui/extra/imgui_markdown.h b/cpp-projects/3d-engine/imgui/extra/imgui_markdown.h new file mode 100644 index 0000000..da4a267 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/imgui_markdown.h @@ -0,0 +1,799 @@ +#pragma once + +// License: zlib +// Copyright (c) 2019 Juliette Foucaut & Doug Binks +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +/* +API BREAKING CHANGES +==================== +- 2020/04/22 - Added tooltipCallback parameter to ImGui::MarkdownConfig +- 2019/02/01 - Changed LinkCallback parameters, see https://github.com/juliettef/imgui_markdown/issues/2 +- 2019/02/05 - Added imageCallback parameter to ImGui::MarkdownConfig +- 2019/02/06 - Added useLinkCallback member variable to MarkdownImageData to configure using images as links +*/ + +/* +imgui_markdown https://github.com/juliettef/imgui_markdown +Markdown for Dear ImGui + +A permissively licensed markdown single-header library for https://github.com/ocornut/imgui + +Currently requires C++11 or above + +imgui_markdown currently supports the following markdown functionality: + - Wrapped text + - Headers H1, H2, H3 + - Indented text, multi levels + - Unordered lists and sub-lists + - Links + - Images + +Syntax + +Wrapping: +Text wraps automatically. To add a new line, use 'Return'. + +Headers: +# H1 +## H2 +### H3 + +Indents: +On a new line, at the start of the line, add two spaces per indent. +··Indent level 1 +····Indent level 2 + +Unordered lists: +On a new line, at the start of the line, add two spaces, an asterisks and a space. +For nested lists, add two additional spaces in front of the asterisk per list level increment. +··*·Unordered List level 1 +····*·Unordered List level 2 + +Links: +[link description](https://...) + +Images: +![image alt text](image identifier e.g. filename) + +=============================================================================== + +// Example use on Windows with links opening in a browser + +#include "ImGui.h" // https://github.com/ocornut/imgui +#include "imgui_markdown.h" // https://github.com/juliettef/imgui_markdown +#include "IconsFontAwesome5.h" // https://github.com/juliettef/IconFontCppHeaders + +// Following includes for Windows LinkCallback +#define WIN32_LEAN_AND_MEAN +#include +#include "Shellapi.h" +#include + +void LinkCallback( ImGui::MarkdownLinkCallbackData data_ ); +inline ImGui::MarkdownImageData ImageCallback( ImGui::MarkdownLinkCallbackData data_ ); + +static ImFont* H1 = NULL; +static ImFont* H2 = NULL; +static ImFont* H3 = NULL; + +static ImGui::MarkdownConfig mdConfig; + + +void LinkCallback( ImGui::MarkdownLinkCallbackData data_ ) +{ + std::string url( data_.link, data_.linkLength ); + if( !data_.isImage ) + { + ShellExecuteA( NULL, "open", url.c_str(), NULL, NULL, SW_SHOWNORMAL ); + } +} + +inline ImGui::MarkdownImageData ImageCallback( ImGui::MarkdownLinkCallbackData data_ ) +{ + // In your application you would load an image based on data_ input. Here we just use the imgui font texture. + ImTextureID image = ImGui::GetIO().Fonts->TexID; + // > C++14 can use ImGui::MarkdownImageData imageData{ true, false, image, ImVec2( 40.0f, 20.0f ) }; + ImGui::MarkdownImageData imageData; + imageData.isValid = true; + imageData.useLinkCallback = false; + imageData.user_texture_id = image; + imageData.size = ImVec2( 40.0f, 20.0f ); + + // For image resize when available size.x > image width, add + ImVec2 const contentSize = ImGui::GetContentRegionAvail(); + if( imageData.size.x > contentSize.x ) + { + float const ratio = imageData.size.y/imageData.size.x; + imageData.size.x = contentSize.x; + imageData.size.y = contentSize.x*ratio; + } + + return imageData; +} + +void LoadFonts( float fontSize_ = 12.0f ) +{ + ImGuiIO& io = ImGui::GetIO(); + io.Fonts->Clear(); + // Base font + io.Fonts->AddFontFromFileTTF( "myfont.ttf", fontSize_ ); + // Bold headings H2 and H3 + H2 = io.Fonts->AddFontFromFileTTF( "myfont-bold.ttf", fontSize_ ); + H3 = mdConfig.headingFormats[ 1 ].font; + // bold heading H1 + float fontSizeH1 = fontSize_ * 1.1f; + H1 = io.Fonts->AddFontFromFileTTF( "myfont-bold.ttf", fontSizeH1 ); +} + +void ExampleMarkdownFormatCallback( const ImGui::MarkdownFormatInfo& markdownFormatInfo_, bool start_ ) +{ + // Call the default first so any settings can be overwritten by our implementation. + // Alternatively could be called or not called in a switch statement on a case by case basis. + // See defaultMarkdownFormatCallback definition for furhter examples of how to use it. + ImGui::defaultMarkdownFormatCallback( markdownFormatInfo_, start_ ); + + switch( markdownFormatInfo_.type ) + { + // example: change the colour of heading level 2 + case ImGui::MarkdownFormatType::HEADING: + { + if( markdownFormatInfo_.level == 2 ) + { + if( start_ ) + { + ImGui::PushStyleColor( ImGuiCol_Text, ImGui::GetStyle().Colors[ImGuiCol_TextDisabled] ); + } + else + { + ImGui::PopStyleColor(); + } + } + break; + } + default: + { + break; + } + } +} + +void Markdown( const std::string& markdown_ ) +{ + // You can make your own Markdown function with your prefered string container and markdown config. + // > C++14 can use ImGui::MarkdownConfig mdConfig{ LinkCallback, NULL, ImageCallback, ICON_FA_LINK, { { H1, true }, { H2, true }, { H3, false } }, NULL }; + mdConfig.linkCallback = LinkCallback; + mdConfig.tooltipCallback = NULL; + mdConfig.imageCallback = ImageCallback; + mdConfig.linkIcon = ICON_FA_LINK; + mdConfig.headingFormats[0] = { H1, true }; + mdConfig.headingFormats[1] = { H2, true }; + mdConfig.headingFormats[2] = { H3, false }; + mdConfig.userData = NULL; + mdConfig.formatCallback = ExampleMarkdownFormatCallback; + ImGui::Markdown( markdown_.c_str(), markdown_.length(), mdConfig ); +} + +void MarkdownExample() +{ + const std::string markdownText = u8R"( +# H1 Header: Text and Links +You can add [links like this one to enkisoftware](https://www.enkisoftware.com/) and lines will wrap well. +## H2 Header: indented text. + This text has an indent (two leading spaces). + This one has two. +### H3 Header: Lists + * Unordered lists + * Lists can be indented with two extra spaces. + * Lists can have [links like this one to Avoyd](https://www.avoyd.com/) +)"; + Markdown( markdownText ); +} + +=============================================================================== +*/ + + +#include + +namespace ImGui +{ + //----------------------------------------------------------------------------- + // Basic types + //----------------------------------------------------------------------------- + + struct Link; + struct MarkdownConfig; + + struct MarkdownLinkCallbackData // for both links and images + { + const char* text; // text between square brackets [] + int textLength; + const char* link; // text between brackets () + int linkLength; + void* userData; + bool isImage; // true if '!' is detected in front of the link syntax + }; + + struct MarkdownTooltipCallbackData // for tooltips + { + MarkdownLinkCallbackData linkData; + const char* linkIcon; + }; + + struct MarkdownImageData + { + bool isValid = false; // if true, will draw the image + bool useLinkCallback = false; // if true, linkCallback will be called when image is clicked + ImTextureID user_texture_id = 0; // see ImGui::Image + ImVec2 size = ImVec2( 100.0f, 100.0f ); // see ImGui::Image + ImVec2 uv0 = ImVec2( 0, 0 ); // see ImGui::Image + ImVec2 uv1 = ImVec2( 1, 1 ); // see ImGui::Image + ImVec4 tint_col = ImVec4( 1, 1, 1, 1 ); // see ImGui::Image + ImVec4 border_col = ImVec4( 0, 0, 0, 0 ); // see ImGui::Image + }; + + enum class MarkdownFormatType + { + NORMAL_TEXT, + HEADING, + UNORDERED_LIST, + LINK, + }; + + struct MarkdownFormatInfo + { + MarkdownFormatType type = MarkdownFormatType::NORMAL_TEXT; + int32_t level = 0; // Set for headings: 1 for H1, 2 for H2 etc. + bool itemHovered = false; // Currently only set for links when mouse hovered, only valid when start_ == false + const MarkdownConfig* config = NULL; + }; + + typedef void MarkdownLinkCallback( MarkdownLinkCallbackData data ); + typedef void MarkdownTooltipCallback( MarkdownTooltipCallbackData data ); + + inline void defaultMarkdownTooltipCallback( MarkdownTooltipCallbackData data_ ) + { + if( data_.linkData.isImage ) + { + ImGui::SetTooltip( "%.*s", data_.linkData.linkLength, data_.linkData.link ); + } + else + { + ImGui::SetTooltip( "%s Open in browser\n%.*s", data_.linkIcon, data_.linkData.linkLength, data_.linkData.link ); + } + } + + typedef MarkdownImageData MarkdownImageCallback( MarkdownLinkCallbackData data ); + typedef void MarkdownFormalCallback( const MarkdownFormatInfo& markdownFormatInfo_, bool start_ ); + + inline void defaultMarkdownFormatCallback( const MarkdownFormatInfo& markdownFormatInfo_, bool start_ ); + + struct MarkdownHeadingFormat + { + ImFont* font; // ImGui font + bool separator; // if true, an underlined separator is drawn after the header + }; + + // Configuration struct for Markdown + // - linkCallback is called when a link is clicked on + // - linkIcon is a string which encode a "Link" icon, if available in the current font (e.g. linkIcon = ICON_FA_LINK with FontAwesome + IconFontCppHeaders https://github.com/juliettef/IconFontCppHeaders) + // - headingFormats controls the format of heading H1 to H3, those above H3 use H3 format + struct MarkdownConfig + { + static const int NUMHEADINGS = 3; + + MarkdownLinkCallback* linkCallback = NULL; + MarkdownTooltipCallback* tooltipCallback = NULL; + MarkdownImageCallback* imageCallback = NULL; + const char* linkIcon = ""; // icon displayd in link tooltip + MarkdownHeadingFormat headingFormats[ NUMHEADINGS ] = { { NULL, true }, { NULL, true }, { NULL, true } }; + void* userData = NULL; + MarkdownFormalCallback* formatCallback = defaultMarkdownFormatCallback; + }; + + //----------------------------------------------------------------------------- + // External interface + //----------------------------------------------------------------------------- + + inline void Markdown( const char* markdown_, size_t markdownLength_, const MarkdownConfig& mdConfig_ ); + + //----------------------------------------------------------------------------- + // Internals + //----------------------------------------------------------------------------- + + struct TextRegion; + struct Line; + inline void UnderLine( ImColor col_ ); + inline void RenderLine( const char* markdown_, Line& line_, TextRegion& textRegion_, const MarkdownConfig& mdConfig_ ); + + struct TextRegion + { + TextRegion() : indentX( 0.0f ) + { + } + ~TextRegion() + { + ResetIndent(); + } + + // ImGui::TextWrapped will wrap at the starting position + // so to work around this we render using our own wrapping for the first line + void RenderTextWrapped( const char* text_, const char* text_end_, bool bIndentToHere_ = false ) + { + const float scale = 1.0f; + float widthLeft = GetContentRegionAvail().x; + const char* endLine = ImGui::GetFont()->CalcWordWrapPositionA( scale, text_, text_end_, widthLeft ); + ImGui::TextUnformatted( text_, endLine ); + if( bIndentToHere_ ) + { + float indentNeeded = GetContentRegionAvail().x - widthLeft; + if( indentNeeded ) + { + ImGui::Indent( indentNeeded ); + indentX += indentNeeded; + } + } + widthLeft = GetContentRegionAvail().x; + while( endLine < text_end_ ) + { + text_ = endLine; + if( *text_ == ' ' ) { ++text_; } // skip a space at start of line + endLine = ImGui::GetFont()->CalcWordWrapPositionA( scale, text_, text_end_, widthLeft ); + if( text_ == endLine ) + { + endLine++; + } + ImGui::TextUnformatted( text_, endLine ); + } + } + + void RenderListTextWrapped( const char* text_, const char* text_end_ ) + { + ImGui::Bullet(); + ImGui::SameLine(); + RenderTextWrapped( text_, text_end_, true ); + } + + bool RenderLinkText( const char* text_, const char* text_end_, const Link& link_, const ImGuiStyle& style_, + const char* markdown_, const MarkdownConfig& mdConfig_, const char** linkHoverStart_ ); + + void RenderLinkTextWrapped( const char* text_, const char* text_end_, const Link& link_, const ImGuiStyle& style_, + const char* markdown_, const MarkdownConfig& mdConfig_, const char** linkHoverStart_, bool bIndentToHere_ = false ); + + void ResetIndent() + { + if( indentX > 0.0f ) + { + ImGui::Unindent( indentX ); + } + indentX = 0.0f; + } + + private: + float indentX; + }; + + // Text that starts after a new line (or at beginning) and ends with a newline (or at end) + struct Line { + bool isHeading = false; + bool isUnorderedListStart = false; + bool isLeadingSpace = true; // spaces at start of line + int leadSpaceCount = 0; + int headingCount = 0; + int lineStart = 0; + int lineEnd = 0; + int lastRenderPosition = 0; // lines may get rendered in multiple pieces + }; + + struct TextBlock { // subset of line + int start = 0; + int stop = 0; + int size() const + { + return stop - start; + } + }; + + struct Link { + enum LinkState { + NO_LINK, + HAS_SQUARE_BRACKET_OPEN, + HAS_SQUARE_BRACKETS, + HAS_SQUARE_BRACKETS_ROUND_BRACKET_OPEN, + }; + LinkState state = NO_LINK; + TextBlock text; + TextBlock url; + bool isImage = false; + }; + + inline void UnderLine( ImColor col_ ) + { + ImVec2 min = ImGui::GetItemRectMin(); + ImVec2 max = ImGui::GetItemRectMax(); + min.y = max.y; + ImGui::GetWindowDrawList()->AddLine( min, max, col_, 1.0f ); + } + + inline void RenderLine( const char* markdown_, Line& line_, TextRegion& textRegion_, const MarkdownConfig& mdConfig_ ) + { + // indent + int indentStart = 0; + if( line_.isUnorderedListStart ) // ImGui unordered list render always adds one indent + { + indentStart = 1; + } + for( int j = indentStart; j < line_.leadSpaceCount / 2; ++j ) // add indents + { + ImGui::Indent(); + } + + // render + MarkdownFormatInfo formatInfo; + formatInfo.config = &mdConfig_; + int textStart = line_.lastRenderPosition + 1; + int textSize = line_.lineEnd - textStart; + if( line_.isUnorderedListStart ) // render unordered list + { + formatInfo.type = MarkdownFormatType::UNORDERED_LIST; + mdConfig_.formatCallback( formatInfo, true ); + const char* text = markdown_ + textStart + 1; + textRegion_.RenderListTextWrapped( text, text + textSize - 1 ); + } + else if( line_.isHeading ) // render heading + { + formatInfo.level = line_.headingCount; + formatInfo.type = MarkdownFormatType::HEADING; + mdConfig_.formatCallback( formatInfo, true ); + const char* text = markdown_ + textStart + 1; + textRegion_.RenderTextWrapped( text, text + textSize - 1 ); + } + else // render a normal paragraph chunk + { + formatInfo.type = MarkdownFormatType::NORMAL_TEXT; + mdConfig_.formatCallback( formatInfo, true ); + const char* text = markdown_ + textStart; + textRegion_.RenderTextWrapped( text, text + textSize ); + } + mdConfig_.formatCallback( formatInfo, false ); + + // unindent + for( int j = indentStart; j < line_.leadSpaceCount / 2; ++j ) + { + ImGui::Unindent(); + } + } + + // render markdown + inline void Markdown( const char* markdown_, size_t markdownLength_, const MarkdownConfig& mdConfig_ ) + { + static const char* linkHoverStart = NULL; // we need to preserve status of link hovering between frames + ImGuiStyle& style = ImGui::GetStyle(); + Line line; + Link link; + TextRegion textRegion; + + char c = 0; + for( int i=0; i < (int)markdownLength_; ++i ) + { + c = markdown_[i]; // get the character at index + if( c == 0 ) { break; } // shouldn't happen but don't go beyond 0. + + // If we're at the beginning of the line, count any spaces + if( line.isLeadingSpace ) + { + if( c == ' ' ) + { + ++line.leadSpaceCount; + continue; + } + else + { + line.isLeadingSpace = false; + line.lastRenderPosition = i - 1; + if(( c == '*' ) && ( line.leadSpaceCount >= 2 )) + { + if(( (int)markdownLength_ > i + 1 ) && ( markdown_[ i + 1 ] == ' ' )) // space after '*' + { + line.isUnorderedListStart = true; + ++i; + ++line.lastRenderPosition; + } + continue; + } + else if( c == '#' ) + { + line.headingCount++; + bool bContinueChecking = true; + uint32_t j = i; + while( ++j < (int)markdownLength_ && bContinueChecking ) + { + c = markdown_[j]; + switch( c ) + { + case '#': + line.headingCount++; + break; + case ' ': + line.lastRenderPosition = j - 1; + i = j; + line.isHeading = true; + bContinueChecking = false; + break; + default: + line.isHeading = false; + bContinueChecking = false; + break; + } + } + if( line.isHeading ) { continue; } + } + } + } + + // Test to see if we have a link + switch( link.state ) + { + case Link::NO_LINK: + if( c == '[' ) + { + link.state = Link::HAS_SQUARE_BRACKET_OPEN; + link.text.start = i + 1; + if( i > 0 && markdown_[i - 1] == '!' ) + { + link.isImage = true; + } + } + break; + case Link::HAS_SQUARE_BRACKET_OPEN: + if( c == ']' ) + { + link.state = Link::HAS_SQUARE_BRACKETS; + link.text.stop = i; + } + break; + case Link::HAS_SQUARE_BRACKETS: + if( c == '(' ) + { + link.state = Link::HAS_SQUARE_BRACKETS_ROUND_BRACKET_OPEN; + link.url.start = i + 1; + } + break; + case Link::HAS_SQUARE_BRACKETS_ROUND_BRACKET_OPEN: + if( c == ')' ) + { + // render previous line content + line.lineEnd = link.text.start - ( link.isImage ? 2 : 1 ); + RenderLine( markdown_, line, textRegion, mdConfig_ ); + line.leadSpaceCount = 0; + link.url.stop = i; + line.isUnorderedListStart = false; // the following text shouldn't have bullets + ImGui::SameLine( 0.0f, 0.0f ); + if( link.isImage ) // it's an image, render it. + { + bool drawnImage = false; + bool useLinkCallback = false; + if( mdConfig_.imageCallback ) + { + MarkdownImageData imageData = mdConfig_.imageCallback({ markdown_ + link.text.start, link.text.size(), markdown_ + link.url.start, link.url.size(), mdConfig_.userData, true }); + useLinkCallback = imageData.useLinkCallback; + if( imageData.isValid ) + { + ImGui::Image( imageData.user_texture_id, imageData.size, imageData.uv0, imageData.uv1, imageData.tint_col, imageData.border_col ); + drawnImage = true; + } + } + if( !drawnImage ) + { + ImGui::Text( "( Image %.*s not loaded )", link.url.size(), markdown_ + link.url.start ); + } + if( ImGui::IsItemHovered() ) + { + if( ImGui::IsMouseReleased( 0 ) && mdConfig_.linkCallback && useLinkCallback ) + { + mdConfig_.linkCallback( { markdown_ + link.text.start, link.text.size(), markdown_ + link.url.start, link.url.size(), mdConfig_.userData, true } ); + } + if( link.text.size() > 0 && mdConfig_.tooltipCallback ) + { + mdConfig_.tooltipCallback( {{ markdown_ + link.text.start, link.text.size(), markdown_ + link.url.start, link.url.size(), mdConfig_.userData, true }, mdConfig_.linkIcon } ); + } + } + } + else // it's a link, render it. + { + textRegion.RenderLinkTextWrapped( markdown_ + link.text.start, markdown_ + link.text.start + link.text.size(), link, style, markdown_, mdConfig_, &linkHoverStart, false ); + } + ImGui::SameLine( 0.0f, 0.0f ); + // reset the link by reinitializing it + link = Link(); + line.lastRenderPosition = i; + break; + } + } + + // handle end of line (render) + if( c == '\n' ) + { + // render the line + line.lineEnd = i; + RenderLine( markdown_, line, textRegion, mdConfig_ ); + + // reset the line + line = Line(); + line.lineStart = i + 1; + line.lastRenderPosition = i; + + textRegion.ResetIndent(); + + // reset the link + link = Link(); + } + } + + // render any remaining text if last char wasn't 0 + if( markdownLength_ && line.lineStart < (int)markdownLength_ && markdown_[ line.lineStart ] != 0 ) + { + // handle both null terminated and non null terminated strings + line.lineEnd = (int)markdownLength_; + if( 0 == markdown_[ line.lineEnd - 1 ] ) + { + --line.lineEnd; + } + RenderLine( markdown_, line, textRegion, mdConfig_ ); + } + } + + + inline bool TextRegion::RenderLinkText( const char* text_, const char* text_end_, const Link& link_, const ImGuiStyle& style_, + const char* markdown_, const MarkdownConfig& mdConfig_, const char** linkHoverStart_ ) + { + + MarkdownFormatInfo formatInfo; + formatInfo.config = &mdConfig_; + formatInfo.type = MarkdownFormatType::LINK; + mdConfig_.formatCallback( formatInfo, true ); + ImGui::PushTextWrapPos( -1.0f ); + ImGui::TextUnformatted( text_, text_end_ ); + ImGui::PopTextWrapPos(); + + bool bThisItemHovered = ImGui::IsItemHovered(); + if(bThisItemHovered) + { + *linkHoverStart_ = markdown_ + link_.text.start; + } + bool bHovered = bThisItemHovered || ( *linkHoverStart_ == ( markdown_ + link_.text.start ) ); + + formatInfo.itemHovered = bHovered; + mdConfig_.formatCallback( formatInfo, false ); + + if(bHovered) + { + if(ImGui::IsMouseReleased( 0 ) && mdConfig_.linkCallback) + { + mdConfig_.linkCallback( { markdown_ + link_.text.start, link_.text.size(), markdown_ + link_.url.start, link_.url.size(), mdConfig_.userData, false } ); + } + if( mdConfig_.tooltipCallback ) + { + mdConfig_.tooltipCallback( {{ markdown_ + link_.text.start, link_.text.size(), markdown_ + link_.url.start, link_.url.size(), mdConfig_.userData, false }, mdConfig_.linkIcon } ); + } + } + return bThisItemHovered; + } + + inline void TextRegion::RenderLinkTextWrapped( const char* text_, const char* text_end_, const Link& link_, const ImGuiStyle& style_, + const char* markdown_, const MarkdownConfig& mdConfig_, const char** linkHoverStart_, bool bIndentToHere_ ) + { + const float scale = 1.0f; + float widthLeft = GetContentRegionAvail().x; + const char* endLine = ImGui::GetFont()->CalcWordWrapPositionA( scale, text_, text_end_, widthLeft ); + bool bHovered = RenderLinkText( text_, endLine, link_, style_, markdown_, mdConfig_, linkHoverStart_ ); + if( bIndentToHere_ ) + { + float indentNeeded = GetContentRegionAvail().x - widthLeft; + if( indentNeeded ) + { + ImGui::Indent( indentNeeded ); + indentX += indentNeeded; + } + } + widthLeft = GetContentRegionAvail().x; + while( endLine < text_end_ ) + { + text_ = endLine; + if( *text_ == ' ' ) { ++text_; } // skip a space at start of line + endLine = ImGui::GetFont()->CalcWordWrapPositionA( scale, text_, text_end_, widthLeft ); + if( text_ == endLine ) + { + endLine++; + } + bool bThisLineHovered = RenderLinkText( text_, endLine, link_, style_, markdown_, mdConfig_, linkHoverStart_ ); + bHovered = bHovered || bThisLineHovered; + } + if( !bHovered && *linkHoverStart_ == markdown_ + link_.text.start ) + { + *linkHoverStart_ = NULL; + } + } + + + inline void defaultMarkdownFormatCallback( const MarkdownFormatInfo& markdownFormatInfo_, bool start_ ) + { + switch( markdownFormatInfo_.type ) + { + case MarkdownFormatType::NORMAL_TEXT: + break; + case MarkdownFormatType::HEADING: + { + MarkdownHeadingFormat fmt; + if( markdownFormatInfo_.level > MarkdownConfig::NUMHEADINGS ) + { + fmt = markdownFormatInfo_.config->headingFormats[ MarkdownConfig::NUMHEADINGS - 1 ]; + } + else + { + fmt = markdownFormatInfo_.config->headingFormats[ markdownFormatInfo_.level - 1 ]; + } + if( start_ ) + { + + if( fmt.font ) + { + ImGui::PushFont( fmt.font ); + } + ImGui::NewLine(); + } + else + { + if( fmt.separator ) + { + ImGui::Separator(); + } + ImGui::NewLine(); + if( fmt.font ) + { + ImGui::PopFont(); + } + } + break; + } + case MarkdownFormatType::UNORDERED_LIST: + break; + case MarkdownFormatType::LINK: + if( start_ ) + { + ImGui::PushStyleColor( ImGuiCol_Text, ImGui::GetStyle().Colors[ImGuiCol_ButtonHovered] ); + } + else + { + ImGui::PopStyleColor(); + if( markdownFormatInfo_.itemHovered ) + { + ImGui::UnderLine( ImGui::GetStyle().Colors[ImGuiCol_ButtonHovered] ); + } + else + { + ImGui::UnderLine( ImGui::GetStyle().Colors[ImGuiCol_Button] ); + } + } + break; + } + } + +} + diff --git a/cpp-projects/3d-engine/imgui/extra/implot/implot.cpp b/cpp-projects/3d-engine/imgui/extra/implot/implot.cpp new file mode 100644 index 0000000..5d5ef50 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/implot/implot.cpp @@ -0,0 +1,4309 @@ +// MIT License + +// Copyright (c) 2020 Evan Pezent + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// ImPlot v0.9 WIP + +/* + +API BREAKING CHANGES +==================== +Occasionally introducing changes that are breaking the API. We try to make the breakage minor and easy to fix. +Below is a change-log of API breaking changes only. If you are using one of the functions listed, expect to have to fix some code. +When you are not sure about a old symbol or function name, try using the Search/Find function of your IDE to look for comments or references in all implot files. +You can read releases logs https://github.com/epezent/implot/releases for more details. + +- 2021/01/18 (0.9) - The default behavior for opening context menus was change from double right-click to single right-click. ImPlotInputMap and related functions were moved + to implot_internal.h due to its immaturity. +- 2020/10/16 (0.8) - ImPlotStyleVar_InfoPadding was changed to ImPlotStyleVar_MousePosPadding +- 2020/09/10 (0.8) - The single array versions of PlotLine, PlotScatter, PlotStems, and PlotShaded were given additional arguments for x-scale and x0. +- 2020/09/07 (0.8) - Plotting functions which accept a custom getter function pointer have been post-fixed with a G (e.g. PlotLineG) +- 2020/09/06 (0.7) - Several flags under ImPlotFlags and ImPlotAxisFlags were inverted (e.g. ImPlotFlags_Legend -> ImPlotFlags_NoLegend) so that the default flagset + is simply 0. This more closely matches ImGui's style and makes it easier to enable non-default but commonly used flags (e.g. ImPlotAxisFlags_Time). +- 2020/08/28 (0.5) - ImPlotMarker_ can no longer be combined with bitwise OR, |. This features caused unecessary slow-down, and almost no one used it. +- 2020/08/25 (0.5) - ImPlotAxisFlags_Scientific was removed. Logarithmic axes automatically uses scientific notation. +- 2020/08/17 (0.5) - PlotText was changed so that text is centered horizontally and vertically about the desired point. +- 2020/08/16 (0.5) - An ImPlotContext must be explicitly created and destroyed now with `CreateContext` and `DestroyContext`. Previously, the context was statically initialized in this source file. +- 2020/06/13 (0.4) - The flags `ImPlotAxisFlag_Adaptive` and `ImPlotFlags_Cull` were removed. Both are now done internally by default. +- 2020/06/03 (0.3) - The signature and behavior of PlotPieChart was changed so that data with sum less than 1 can optionally be normalized. The label format can now be specified as well. +- 2020/06/01 (0.3) - SetPalette was changed to `SetColormap` for consistency with other plotting libraries. `RestorePalette` was removed. Use `SetColormap(ImPlotColormap_Default)`. +- 2020/05/31 (0.3) - Plot functions taking custom ImVec2* getters were removed. Use the ImPlotPoint* getter versions instead. +- 2020/05/29 (0.3) - The signature of ImPlotLimits::Contains was changed to take two doubles instead of ImVec2 +- 2020/05/16 (0.2) - All plotting functions were reverted to being prefixed with "Plot" to maintain a consistent VerbNoun style. `Plot` was split into `PlotLine` + and `PlotScatter` (however, `PlotLine` can still be used to plot scatter points as `Plot` did before.). `Bar` is not `PlotBars`, to indicate + that multiple bars will be plotted. +- 2020/05/13 (0.2) - `ImMarker` was change to `ImPlotMarker` and `ImAxisFlags` was changed to `ImPlotAxisFlags`. +- 2020/05/11 (0.2) - `ImPlotFlags_Selection` was changed to `ImPlotFlags_BoxSelect` +- 2020/05/11 (0.2) - The namespace ImGui:: was replaced with ImPlot::. As a result, the following additional changes were made: + - Functions that were prefixed or decorated with the word "Plot" have been truncated. E.g., `ImGui::PlotBars` is now just `ImPlot::Bar`. + It should be fairly obvious what was what. + - Some functions have been given names that would have otherwise collided with the ImGui namespace. This has been done to maintain a consistent + style with ImGui. E.g., 'ImGui::PushPlotStyleVar` is now 'ImPlot::PushStyleVar'. +- 2020/05/10 (0.2) - The following function/struct names were changes: + - ImPlotRange -> ImPlotLimits + - GetPlotRange() -> GetPlotLimits() + - SetNextPlotRange -> SetNextPlotLimits + - SetNextPlotRangeX -> SetNextPlotLimitsX + - SetNextPlotRangeY -> SetNextPlotLimitsY +- 2020/05/10 (0.2) - Plot queries are pixel based by default. Query rects that maintain relative plot position have been removed. This was done to support multi-y-axis. + +*/ + +#include "implot.h" +#include "implot_internal.h" + +#ifdef _MSC_VER +#define sprintf sprintf_s +#endif + +// Global plot context +ImPlotContext* GImPlot = NULL; + +//----------------------------------------------------------------------------- +// Struct Implementations +//----------------------------------------------------------------------------- + +ImPlotInputMap::ImPlotInputMap() { + PanButton = ImGuiMouseButton_Left; + PanMod = ImGuiKeyModFlags_None; + FitButton = ImGuiMouseButton_Left; + ContextMenuButton = ImGuiMouseButton_Right; + BoxSelectButton = ImGuiMouseButton_Right; + BoxSelectMod = ImGuiKeyModFlags_None; + BoxSelectCancelButton = ImGuiMouseButton_Left; + QueryButton = ImGuiMouseButton_Middle; + QueryMod = ImGuiKeyModFlags_None; + QueryToggleMod = ImGuiKeyModFlags_Ctrl; + HorizontalMod = ImGuiKeyModFlags_Alt; + VerticalMod = ImGuiKeyModFlags_Shift; +} + +ImPlotStyle::ImPlotStyle() { + + LineWeight = 1; + Marker = ImPlotMarker_None; + MarkerSize = 4; + MarkerWeight = 1; + FillAlpha = 1; + ErrorBarSize = 5; + ErrorBarWeight = 1.5f; + DigitalBitHeight = 8; + DigitalBitGap = 4; + + PlotBorderSize = 1; + MinorAlpha = 0.25f; + MajorTickLen = ImVec2(10,10); + MinorTickLen = ImVec2(5,5); + MajorTickSize = ImVec2(1,1); + MinorTickSize = ImVec2(1,1); + MajorGridSize = ImVec2(1,1); + MinorGridSize = ImVec2(1,1); + PlotPadding = ImVec2(10,10); + LabelPadding = ImVec2(5,5); + LegendPadding = ImVec2(10,10); + LegendInnerPadding = ImVec2(5,5); + LegendSpacing = ImVec2(0,0); + MousePosPadding = ImVec2(10,10); + AnnotationPadding = ImVec2(2,2); + FitPadding = ImVec2(0,0); + PlotDefaultSize = ImVec2(400,300); + PlotMinSize = ImVec2(300,225); + + ImPlot::StyleColorsAuto(this); + + AntiAliasedLines = false; + UseLocalTime = false; + Use24HourClock = false; + UseISO8601 = false; +} + +ImPlotItem* ImPlotPlot::GetLegendItem(int i) { + IM_ASSERT(Items.GetSize() > 0); + return Items.GetByIndex(LegendData.Indices[i]); +} + +const char* ImPlotPlot::GetLegendLabel(int i) { + ImPlotItem* item = GetLegendItem(i); + IM_ASSERT(item != NULL); + IM_ASSERT(item->NameOffset != -1 && item->NameOffset < LegendData.Labels.Buf.Size); + return LegendData.Labels.Buf.Data + item->NameOffset; +} + +//----------------------------------------------------------------------------- +// Style +//----------------------------------------------------------------------------- + +namespace ImPlot { + +const char* GetStyleColorName(ImPlotCol col) { + static const char* col_names[] = { + "Line", + "Fill", + "MarkerOutline", + "MarkerFill", + "ErrorBar", + "FrameBg", + "PlotBg", + "PlotBorder", + "LegendBg", + "LegendBorder", + "LegendText", + "TitleText", + "InlayText", + "XAxis", + "XAxisGrid", + "YAxis", + "YAxisGrid", + "YAxis2", + "YAxisGrid2", + "YAxis3", + "YAxisGrid3", + "Selection", + "Query", + "Crosshairs" + }; + return col_names[col]; +} + +const char* GetMarkerName(ImPlotMarker marker) { + switch (marker) { + case ImPlotMarker_None: return "None"; + case ImPlotMarker_Circle: return "Circle"; + case ImPlotMarker_Square: return "Square"; + case ImPlotMarker_Diamond: return "Diamond"; + case ImPlotMarker_Up: return "Up"; + case ImPlotMarker_Down: return "Down"; + case ImPlotMarker_Left: return "Left"; + case ImPlotMarker_Right: return "Right"; + case ImPlotMarker_Cross: return "Cross"; + case ImPlotMarker_Plus: return "Plus"; + case ImPlotMarker_Asterisk: return "Asterisk"; + default: return ""; + } +} + +ImVec4 GetAutoColor(ImPlotCol idx) { + ImVec4 col(0,0,0,1); + switch(idx) { + case ImPlotCol_Line: return col; // these are plot dependent! + case ImPlotCol_Fill: return col; // these are plot dependent! + case ImPlotCol_MarkerOutline: return col; // these are plot dependent! + case ImPlotCol_MarkerFill: return col; // these are plot dependent! + case ImPlotCol_ErrorBar: return ImGui::GetStyleColorVec4(ImGuiCol_Text); + case ImPlotCol_FrameBg: return ImGui::GetStyleColorVec4(ImGuiCol_FrameBg); + case ImPlotCol_PlotBg: return ImGui::GetStyleColorVec4(ImGuiCol_WindowBg); + case ImPlotCol_PlotBorder: return ImGui::GetStyleColorVec4(ImGuiCol_Border); + case ImPlotCol_LegendBg: return ImGui::GetStyleColorVec4(ImGuiCol_PopupBg); + case ImPlotCol_LegendBorder: return GetStyleColorVec4(ImPlotCol_PlotBorder); + case ImPlotCol_LegendText: return GetStyleColorVec4(ImPlotCol_InlayText); + case ImPlotCol_TitleText: return ImGui::GetStyleColorVec4(ImGuiCol_Text); + case ImPlotCol_InlayText: return ImGui::GetStyleColorVec4(ImGuiCol_Text); + case ImPlotCol_XAxis: return ImGui::GetStyleColorVec4(ImGuiCol_Text); + case ImPlotCol_XAxisGrid: return GetStyleColorVec4(ImPlotCol_XAxis) * ImVec4(1,1,1,0.25f); + case ImPlotCol_YAxis: return ImGui::GetStyleColorVec4(ImGuiCol_Text); + case ImPlotCol_YAxisGrid: return GetStyleColorVec4(ImPlotCol_YAxis) * ImVec4(1,1,1,0.25f); + case ImPlotCol_YAxis2: return ImGui::GetStyleColorVec4(ImGuiCol_Text); + case ImPlotCol_YAxisGrid2: return GetStyleColorVec4(ImPlotCol_YAxis2) * ImVec4(1,1,1,0.25f); + case ImPlotCol_YAxis3: return ImGui::GetStyleColorVec4(ImGuiCol_Text); + case ImPlotCol_YAxisGrid3: return GetStyleColorVec4(ImPlotCol_YAxis3) * ImVec4(1,1,1,0.25f); + case ImPlotCol_Selection: return ImVec4(1,1,0,1); + case ImPlotCol_Query: return ImVec4(0,1,0,1); + case ImPlotCol_Crosshairs: return GetStyleColorVec4(ImPlotCol_PlotBorder); + default: return col; + } +} + +struct ImPlotStyleVarInfo { + ImGuiDataType Type; + ImU32 Count; + ImU32 Offset; + void* GetVarPtr(ImPlotStyle* style) const { return (void*)((unsigned char*)style + Offset); } +}; + +static const ImPlotStyleVarInfo GPlotStyleVarInfo[] = +{ + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImPlotStyle, LineWeight) }, // ImPlotStyleVar_LineWeight + { ImGuiDataType_S32, 1, (ImU32)IM_OFFSETOF(ImPlotStyle, Marker) }, // ImPlotStyleVar_Marker + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImPlotStyle, MarkerSize) }, // ImPlotStyleVar_MarkerSize + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImPlotStyle, MarkerWeight) }, // ImPlotStyleVar_MarkerWeight + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImPlotStyle, FillAlpha) }, // ImPlotStyleVar_FillAlpha + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImPlotStyle, ErrorBarSize) }, // ImPlotStyleVar_ErrorBarSize + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImPlotStyle, ErrorBarWeight) }, // ImPlotStyleVar_ErrorBarWeight + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImPlotStyle, DigitalBitHeight) }, // ImPlotStyleVar_DigitalBitHeight + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImPlotStyle, DigitalBitGap) }, // ImPlotStyleVar_DigitalBitGap + + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImPlotStyle, PlotBorderSize) }, // ImPlotStyleVar_PlotBorderSize + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImPlotStyle, MinorAlpha) }, // ImPlotStyleVar_MinorAlpha + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, MajorTickLen) }, // ImPlotStyleVar_MajorTickLen + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, MinorTickLen) }, // ImPlotStyleVar_MinorTickLen + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, MajorTickSize) }, // ImPlotStyleVar_MajorTickSize + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, MinorTickSize) }, // ImPlotStyleVar_MinorTickSize + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, MajorGridSize) }, // ImPlotStyleVar_MajorGridSize + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, MinorGridSize) }, // ImPlotStyleVar_MinorGridSize + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, PlotPadding) }, // ImPlotStyleVar_PlotPadding + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, LabelPadding) }, // ImPlotStyleVar_LabelPaddine + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, LegendPadding) }, // ImPlotStyleVar_LegendPadding + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, LegendInnerPadding) }, // ImPlotStyleVar_LegendInnerPadding + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, LegendSpacing) }, // ImPlotStyleVar_LegendSpacing + + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, MousePosPadding) }, // ImPlotStyleVar_MousePosPadding + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, AnnotationPadding) }, // ImPlotStyleVar_AnnotationPadding + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, FitPadding) }, // ImPlotStyleVar_FitPadding + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, PlotDefaultSize) }, // ImPlotStyleVar_PlotDefaultSize + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImPlotStyle, PlotMinSize) } // ImPlotStyleVar_PlotMinSize +}; + +static const ImPlotStyleVarInfo* GetPlotStyleVarInfo(ImPlotStyleVar idx) { + IM_ASSERT(idx >= 0 && idx < ImPlotStyleVar_COUNT); + IM_ASSERT(IM_ARRAYSIZE(GPlotStyleVarInfo) == ImPlotStyleVar_COUNT); + return &GPlotStyleVarInfo[idx]; +} + +//----------------------------------------------------------------------------- +// Generic Helpers +//----------------------------------------------------------------------------- + +void AddTextVertical(ImDrawList *DrawList, ImVec2 pos, ImU32 col, const char *text_begin, const char* text_end) { + // the code below is based loosely on ImFont::RenderText + if (!text_end) + text_end = text_begin + strlen(text_begin); + ImGuiContext& g = *GImGui; + ImFont* font = g.Font; + // Align to be pixel perfect + pos.x = IM_FLOOR(pos.x); + pos.y = IM_FLOOR(pos.y); + const float scale = g.FontSize / font->FontSize; + const char* s = text_begin; + int chars_exp = (int)(text_end - s); + int chars_rnd = 0; + const int vtx_count_max = chars_exp * 4; + const int idx_count_max = chars_exp * 6; + DrawList->PrimReserve(idx_count_max, vtx_count_max); + while (s < text_end) { + unsigned int c = (unsigned int)*s; + if (c < 0x80) { + s += 1; + } + else { + s += ImTextCharFromUtf8(&c, s, text_end); + if (c == 0) // Malformed UTF-8? + break; + } + const ImFontGlyph * glyph = font->FindGlyph((ImWchar)c); + if (glyph == NULL) { + continue; + } + DrawList->PrimQuadUV(pos + ImVec2(glyph->Y0, -glyph->X0) * scale, pos + ImVec2(glyph->Y0, -glyph->X1) * scale, + pos + ImVec2(glyph->Y1, -glyph->X1) * scale, pos + ImVec2(glyph->Y1, -glyph->X0) * scale, + ImVec2(glyph->U0, glyph->V0), ImVec2(glyph->U1, glyph->V0), + ImVec2(glyph->U1, glyph->V1), ImVec2(glyph->U0, glyph->V1), + col); + pos.y -= glyph->AdvanceX * scale; + chars_rnd++; + } + // Give back unused vertices + int chars_skp = chars_exp-chars_rnd; + DrawList->PrimUnreserve(chars_skp*6, chars_skp*4); +} + +double NiceNum(double x, bool round) { + double f; /* fractional part of x */ + double nf; /* nice, rounded fraction */ + int expv = (int)floor(ImLog10(x)); + f = x / ImPow(10.0, (double)expv); /* between 1 and 10 */ + if (round) + if (f < 1.5) + nf = 1; + else if (f < 3) + nf = 2; + else if (f < 7) + nf = 5; + else + nf = 10; + else if (f <= 1) + nf = 1; + else if (f <= 2) + nf = 2; + else if (f <= 5) + nf = 5; + else + nf = 10; + return nf * ImPow(10.0, expv); +} + +//----------------------------------------------------------------------------- +// Context Utils +//----------------------------------------------------------------------------- + +void SetImGuiContext(ImGuiContext* ctx) { + ImGui::SetCurrentContext(ctx); +} + +ImPlotContext* CreateContext() { + ImPlotContext* ctx = IM_NEW(ImPlotContext)(); + Initialize(ctx); + if (GImPlot == NULL) + SetCurrentContext(ctx); + return ctx; +} + +void DestroyContext(ImPlotContext* ctx) { + if (ctx == NULL) + ctx = GImPlot; + if (GImPlot == ctx) + SetCurrentContext(NULL); + IM_DELETE(ctx); +} + +ImPlotContext* GetCurrentContext() { + return GImPlot; +} + +void SetCurrentContext(ImPlotContext* ctx) { + GImPlot = ctx; +} + +void Initialize(ImPlotContext* ctx) { + Reset(ctx); + ctx->Colormap = GetColormap(ImPlotColormap_Default, &ctx->ColormapSize); +} + +void Reset(ImPlotContext* ctx) { + // end child window if it was made + if (ctx->ChildWindowMade) + ImGui::EndChild(); + ctx->ChildWindowMade = false; + // reset the next plot/item data + ctx->NextPlotData.Reset(); + ctx->NextItemData.Reset(); + // reset items count + ctx->VisibleItemCount = 0; + // reset ticks/labels + ctx->XTicks.Reset(); + for (int i = 0; i < 3; ++i) + ctx->YTicks[i].Reset(); + // reset labels + ctx->Annotations.Reset(); + // reset extents/fit + ctx->FitThisFrame = false; + ctx->FitX = false; + ctx->ExtentsX.Min = HUGE_VAL; + ctx->ExtentsX.Max = -HUGE_VAL; + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + ctx->ExtentsY[i].Min = HUGE_VAL; + ctx->ExtentsY[i].Max = -HUGE_VAL; + ctx->FitY[i] = false; + } + // reset digital plot items count + ctx->DigitalPlotItemCnt = 0; + ctx->DigitalPlotOffset = 0; + // nullify plot + ctx->CurrentPlot = NULL; + ctx->CurrentItem = NULL; + ctx->PreviousItem = NULL; +} + +//----------------------------------------------------------------------------- +// Plot Utils +//----------------------------------------------------------------------------- + +ImPlotPlot* GetPlot(const char* title) { + ImGuiWindow* Window = GImGui->CurrentWindow; + const ImGuiID ID = Window->GetID(title); + return GImPlot->Plots.GetByKey(ID); +} + +ImPlotPlot* GetCurrentPlot() { + return GImPlot->CurrentPlot; +} + +void BustPlotCache() { + GImPlot->Plots.Clear(); +} + +void FitPoint(const ImPlotPoint& p) { + FitPointX(p.x); + FitPointY(p.y); +} + +void FitPointX(double x) { + ImPlotContext& gp = *GImPlot; + ImPlotRange& ex_x = gp.ExtentsX; + const bool log_x = ImHasFlag(gp.CurrentPlot->XAxis.Flags, ImPlotAxisFlags_LogScale); + if (!ImNanOrInf(x) && !(log_x && x <= 0)) { + ex_x.Min = x < ex_x.Min ? x : ex_x.Min; + ex_x.Max = x > ex_x.Max ? x : ex_x.Max; + } +} + +void FitPointY(double y) { + ImPlotContext& gp = *GImPlot; + const ImPlotYAxis y_axis = gp.CurrentPlot->CurrentYAxis; + ImPlotRange& ex_y = gp.ExtentsY[y_axis]; + const bool log_y = ImHasFlag(gp.CurrentPlot->YAxis[y_axis].Flags, ImPlotAxisFlags_LogScale); + if (!ImNanOrInf(y) && !(log_y && y <= 0)) { + ex_y.Min = y < ex_y.Min ? y : ex_y.Min; + ex_y.Max = y > ex_y.Max ? y : ex_y.Max; + } +} + +void PushLinkedAxis(ImPlotAxis& axis) { + if (axis.LinkedMin) { *axis.LinkedMin = axis.Range.Min; } + if (axis.LinkedMax) { *axis.LinkedMax = axis.Range.Max; } +} + +void PullLinkedAxis(ImPlotAxis& axis) { + if (axis.LinkedMin) { axis.SetMin(*axis.LinkedMin); } + if (axis.LinkedMax) { axis.SetMax(*axis.LinkedMax); } +} + +//----------------------------------------------------------------------------- +// Coordinate Utils +//----------------------------------------------------------------------------- + +void UpdateTransformCache() { + ImPlotContext& gp = *GImPlot; + // get pixels for transforms + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + gp.PixelRange[i] = ImRect(ImHasFlag(gp.CurrentPlot->XAxis.Flags, ImPlotAxisFlags_Invert) ? gp.CurrentPlot->PlotRect.Max.x : gp.CurrentPlot->PlotRect.Min.x, + ImHasFlag(gp.CurrentPlot->YAxis[i].Flags, ImPlotAxisFlags_Invert) ? gp.CurrentPlot->PlotRect.Min.y : gp.CurrentPlot->PlotRect.Max.y, + ImHasFlag(gp.CurrentPlot->XAxis.Flags, ImPlotAxisFlags_Invert) ? gp.CurrentPlot->PlotRect.Min.x : gp.CurrentPlot->PlotRect.Max.x, + ImHasFlag(gp.CurrentPlot->YAxis[i].Flags, ImPlotAxisFlags_Invert) ? gp.CurrentPlot->PlotRect.Max.y : gp.CurrentPlot->PlotRect.Min.y); + gp.My[i] = (gp.PixelRange[i].Max.y - gp.PixelRange[i].Min.y) / gp.CurrentPlot->YAxis[i].Range.Size(); + } + gp.LogDenX = ImLog10(gp.CurrentPlot->XAxis.Range.Max / gp.CurrentPlot->XAxis.Range.Min); + for (int i = 0; i < IMPLOT_Y_AXES; i++) + gp.LogDenY[i] = ImLog10(gp.CurrentPlot->YAxis[i].Range.Max / gp.CurrentPlot->YAxis[i].Range.Min); + gp.Mx = (gp.PixelRange[0].Max.x - gp.PixelRange[0].Min.x) / gp.CurrentPlot->XAxis.Range.Size(); +} + +ImPlotPoint PixelsToPlot(float x, float y, ImPlotYAxis y_axis_in) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "PixelsToPlot() needs to be called between BeginPlot() and EndPlot()!"); + const ImPlotYAxis y_axis = y_axis_in >= 0 ? y_axis_in : gp.CurrentPlot->CurrentYAxis; + ImPlotPoint plt; + plt.x = (x - gp.PixelRange[y_axis].Min.x) / gp.Mx + gp.CurrentPlot->XAxis.Range.Min; + plt.y = (y - gp.PixelRange[y_axis].Min.y) / gp.My[y_axis] + gp.CurrentPlot->YAxis[y_axis].Range.Min; + if (ImHasFlag(gp.CurrentPlot->XAxis.Flags, ImPlotAxisFlags_LogScale)) { + double t = (plt.x - gp.CurrentPlot->XAxis.Range.Min) / gp.CurrentPlot->XAxis.Range.Size(); + plt.x = ImPow(10, t * gp.LogDenX) * gp.CurrentPlot->XAxis.Range.Min; + } + if (ImHasFlag(gp.CurrentPlot->YAxis[y_axis].Flags, ImPlotAxisFlags_LogScale)) { + double t = (plt.y - gp.CurrentPlot->YAxis[y_axis].Range.Min) / gp.CurrentPlot->YAxis[y_axis].Range.Size(); + plt.y = ImPow(10, t * gp.LogDenY[y_axis]) * gp.CurrentPlot->YAxis[y_axis].Range.Min; + } + return plt; +} + +ImPlotPoint PixelsToPlot(const ImVec2& pix, ImPlotYAxis y_axis) { + return PixelsToPlot(pix.x, pix.y, y_axis); +} + +// This function is convenient but should not be used to process a high volume of points. Use the Transformer structs below instead. +ImVec2 PlotToPixels(double x, double y, ImPlotYAxis y_axis_in) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "PlotToPixels() needs to be called between BeginPlot() and EndPlot()!"); + const ImPlotYAxis y_axis = y_axis_in >= 0 ? y_axis_in : gp.CurrentPlot->CurrentYAxis; + ImVec2 pix; + if (ImHasFlag(gp.CurrentPlot->XAxis.Flags, ImPlotAxisFlags_LogScale)) { + double t = ImLog10(x / gp.CurrentPlot->XAxis.Range.Min) / gp.LogDenX; + x = ImLerp(gp.CurrentPlot->XAxis.Range.Min, gp.CurrentPlot->XAxis.Range.Max, (float)t); + } + if (ImHasFlag(gp.CurrentPlot->YAxis[y_axis].Flags, ImPlotAxisFlags_LogScale)) { + double t = ImLog10(y / gp.CurrentPlot->YAxis[y_axis].Range.Min) / gp.LogDenY[y_axis]; + y = ImLerp(gp.CurrentPlot->YAxis[y_axis].Range.Min, gp.CurrentPlot->YAxis[y_axis].Range.Max, (float)t); + } + pix.x = (float)(gp.PixelRange[y_axis].Min.x + gp.Mx * (x - gp.CurrentPlot->XAxis.Range.Min)); + pix.y = (float)(gp.PixelRange[y_axis].Min.y + gp.My[y_axis] * (y - gp.CurrentPlot->YAxis[y_axis].Range.Min)); + return pix; +} + +// This function is convenient but should not be used to process a high volume of points. Use the Transformer structs below instead. +ImVec2 PlotToPixels(const ImPlotPoint& plt, ImPlotYAxis y_axis) { + return PlotToPixels(plt.x, plt.y, y_axis); +} + +//----------------------------------------------------------------------------- +// Legend Utils +//----------------------------------------------------------------------------- + +ImVec2 GetLocationPos(const ImRect& outer_rect, const ImVec2& inner_size, ImPlotLocation loc, const ImVec2& pad) { + ImVec2 pos; + if (ImHasFlag(loc, ImPlotLocation_West) && !ImHasFlag(loc, ImPlotLocation_East)) + pos.x = outer_rect.Min.x + pad.x; + else if (!ImHasFlag(loc, ImPlotLocation_West) && ImHasFlag(loc, ImPlotLocation_East)) + pos.x = outer_rect.Max.x - pad.x - inner_size.x; + else + pos.x = outer_rect.GetCenter().x - inner_size.x * 0.5f; + // legend reference point y + if (ImHasFlag(loc, ImPlotLocation_North) && !ImHasFlag(loc, ImPlotLocation_South)) + pos.y = outer_rect.Min.y + pad.y; + else if (!ImHasFlag(loc, ImPlotLocation_North) && ImHasFlag(loc, ImPlotLocation_South)) + pos.y = outer_rect.Max.y - pad.y - inner_size.y; + else + pos.y = outer_rect.GetCenter().y - inner_size.y * 0.5f; + pos.x = IM_ROUND(pos.x); + pos.y = IM_ROUND(pos.y); + return pos; +} + +ImVec2 CalcLegendSize(ImPlotPlot& plot, const ImVec2& pad, const ImVec2& spacing, ImPlotOrientation orn) { + // vars + const int nItems = plot.GetLegendCount(); + const float txt_ht = ImGui::GetTextLineHeight(); + const float icon_size = txt_ht; + // get label max width + float max_label_width = 0; + float sum_label_width = 0; + for (int i = 0; i < nItems; ++i) { + const char* label = plot.GetLegendLabel(i); + const float label_width = ImGui::CalcTextSize(label, NULL, true).x; + max_label_width = label_width > max_label_width ? label_width : max_label_width; + sum_label_width += label_width; + } + // calc legend size + const ImVec2 legend_size = orn == ImPlotOrientation_Vertical ? + ImVec2(pad.x * 2 + icon_size + max_label_width, pad.y * 2 + nItems * txt_ht + (nItems - 1) * spacing.y) : + ImVec2(pad.x * 2 + icon_size * nItems + sum_label_width + (nItems - 1) * spacing.x, pad.y * 2 + txt_ht); + return legend_size; +} + +void ShowLegendEntries(ImPlotPlot& plot, const ImRect& legend_bb, bool interactable, const ImVec2& pad, const ImVec2& spacing, ImPlotOrientation orn, ImDrawList& DrawList) { + ImGuiIO& IO = ImGui::GetIO(); + // vars + const float txt_ht = ImGui::GetTextLineHeight(); + const float icon_size = txt_ht; + const float icon_shrink = 2; + ImVec4 col_txt = GetStyleColorVec4(ImPlotCol_LegendText); + ImU32 col_txt_dis = ImGui::GetColorU32(col_txt * ImVec4(1,1,1,0.25f)); + // render each legend item + float sum_label_width = 0; + for (int i = 0; i < plot.GetLegendCount(); ++i) { + ImPlotItem* item = plot.GetLegendItem(i); + const char* label = plot.GetLegendLabel(i); + const float label_width = ImGui::CalcTextSize(label, NULL, true).x; + const ImVec2 top_left = orn == ImPlotOrientation_Vertical ? + legend_bb.Min + pad + ImVec2(0, i * (txt_ht + spacing.y)) : + legend_bb.Min + pad + ImVec2(i * (icon_size + spacing.x) + sum_label_width, 0); + sum_label_width += label_width; + ImRect icon_bb; + icon_bb.Min = top_left + ImVec2(icon_shrink,icon_shrink); + icon_bb.Max = top_left + ImVec2(icon_size - icon_shrink, icon_size - icon_shrink); + ImRect label_bb; + label_bb.Min = top_left; + label_bb.Max = top_left + ImVec2(label_width + icon_size, icon_size); + ImU32 col_hl_txt; + if (interactable && (icon_bb.Contains(IO.MousePos) || label_bb.Contains(IO.MousePos))) { + item->LegendHovered = true; + col_hl_txt = ImGui::GetColorU32(ImLerp(col_txt, item->Color, 0.25f)); + } + else { + // item->LegendHovered = false; + col_hl_txt = ImGui::GetColorU32(col_txt); + } + ImU32 iconColor; + ImVec4 item_color = item->Color; + item_color.w = 1; + if (interactable && icon_bb.Contains(IO.MousePos)) { + ImVec4 colAlpha = item_color; + colAlpha.w = 0.5f; + iconColor = item->Show ? ImGui::GetColorU32(colAlpha) : ImGui::GetColorU32(ImGuiCol_TextDisabled, 0.5f); + if (IO.MouseClicked[0]) + item->Show = !item->Show; + } + else { + iconColor = item->Show ? ImGui::GetColorU32(item_color) : col_txt_dis; + } + DrawList.AddRectFilled(icon_bb.Min, icon_bb.Max, iconColor, 1); + const char* text_display_end = ImGui::FindRenderedTextEnd(label, NULL); + if (label != text_display_end) + DrawList.AddText(top_left + ImVec2(icon_size, 0), item->Show ? col_hl_txt : col_txt_dis, label, text_display_end); + } +} + +//----------------------------------------------------------------------------- +// Tick Utils +//----------------------------------------------------------------------------- + +void LabelTickDefault(ImPlotTick& tick, ImGuiTextBuffer& buffer) { + char temp[32]; + if (tick.ShowLabel) { + tick.TextOffset = buffer.size(); + snprintf(temp, 32, "%.10g", tick.PlotPos); + buffer.append(temp, temp + strlen(temp) + 1); + tick.LabelSize = ImGui::CalcTextSize(buffer.Buf.Data + tick.TextOffset); + } +} + +void LabelTickScientific(ImPlotTick& tick, ImGuiTextBuffer& buffer) { + char temp[32]; + if (tick.ShowLabel) { + tick.TextOffset = buffer.size(); + snprintf(temp, 32, "%.0E", tick.PlotPos); + buffer.append(temp, temp + strlen(temp) + 1); + tick.LabelSize = ImGui::CalcTextSize(buffer.Buf.Data + tick.TextOffset); + } +} + +void AddTicksDefault(const ImPlotRange& range, int nMajor, int nMinor, ImPlotTickCollection& ticks) { + const double nice_range = NiceNum(range.Size() * 0.99, false); + const double interval = NiceNum(nice_range / (nMajor - 1), true); + const double graphmin = floor(range.Min / interval) * interval; + const double graphmax = ceil(range.Max / interval) * interval; + for (double major = graphmin; major < graphmax + 0.5 * interval; major += interval) { + if (range.Contains(major)) + ticks.Append(major, true, true, LabelTickDefault); + for (int i = 1; i < nMinor; ++i) { + double minor = major + i * interval / nMinor; + if (range.Contains(minor)) + ticks.Append(minor, false, true, LabelTickDefault); + } + } +} + +void AddTicksLogarithmic(const ImPlotRange& range, int nMajor, ImPlotTickCollection& ticks) { + if (range.Min <= 0 || range.Max <= 0) + return; + double log_min = ImLog10(range.Min); + double log_max = ImLog10(range.Max); + int exp_step = ImMax(1,(int)(log_max - log_min) / nMajor); + int exp_min = (int)log_min; + int exp_max = (int)log_max; + if (exp_step != 1) { + while(exp_step % 3 != 0) exp_step++; // make step size multiple of three + while(exp_min % exp_step != 0) exp_min--; // decrease exp_min until exp_min + N * exp_step will be 0 + } + for (int e = exp_min - exp_step; e < (exp_max + exp_step); e += exp_step) { + double major1 = ImPow(10, (double)(e)); + double major2 = ImPow(10, (double)(e + 1)); + double interval = (major2 - major1) / 9; + if (major1 >= (range.Min - DBL_EPSILON) && major1 <= (range.Max + DBL_EPSILON)) + ticks.Append(major1, true, true, LabelTickScientific); + for (int j = 0; j < exp_step; ++j) { + major1 = ImPow(10, (double)(e+j)); + major2 = ImPow(10, (double)(e+j+1)); + interval = (major2 - major1) / 9; + for (int i = 1; i < (9 + (int)(j < (exp_step - 1))); ++i) { + double minor = major1 + i * interval; + if (minor >= (range.Min - DBL_EPSILON) && minor <= (range.Max + DBL_EPSILON)) + ticks.Append(minor, false, false, LabelTickScientific); + + } + } + } +} + +void AddTicksCustom(const double* values, const char* const labels[], int n, ImPlotTickCollection& ticks) { + for (int i = 0; i < n; ++i) { + ImPlotTick tick(values[i], false, true); + if (labels != NULL) { + tick.TextOffset = ticks.TextBuffer.size(); + ticks.TextBuffer.append(labels[i], labels[i] + strlen(labels[i]) + 1); + tick.LabelSize = ImGui::CalcTextSize(labels[i]); + } + else { + LabelTickDefault(tick, ticks.TextBuffer); + } + ticks.Append(tick); + } +} + +//----------------------------------------------------------------------------- +// Time Ticks and Utils +//----------------------------------------------------------------------------- + +// this may not be thread safe? +static const double TimeUnitSpans[ImPlotTimeUnit_COUNT] = { + 0.000001, + 0.001, + 1, + 60, + 3600, + 86400, + 2629800, + 31557600 +}; + +inline ImPlotTimeUnit GetUnitForRange(double range) { + static double cutoffs[ImPlotTimeUnit_COUNT] = {0.001, 1, 60, 3600, 86400, 2629800, 31557600, IMPLOT_MAX_TIME}; + for (int i = 0; i < ImPlotTimeUnit_COUNT; ++i) { + if (range <= cutoffs[i]) + return (ImPlotTimeUnit)i; + } + return ImPlotTimeUnit_Yr; +} + +inline int LowerBoundStep(int max_divs, const int* divs, const int* step, int size) { + if (max_divs < divs[0]) + return 0; + for (int i = 1; i < size; ++i) { + if (max_divs < divs[i]) + return step[i-1]; + } + return step[size-1]; +} + +inline int GetTimeStep(int max_divs, ImPlotTimeUnit unit) { + if (unit == ImPlotTimeUnit_Ms || unit == ImPlotTimeUnit_Us) { + static const int step[] = {500,250,200,100,50,25,20,10,5,2,1}; + static const int divs[] = {2,4,5,10,20,40,50,100,200,500,1000}; + return LowerBoundStep(max_divs, divs, step, 11); + } + if (unit == ImPlotTimeUnit_S || unit == ImPlotTimeUnit_Min) { + static const int step[] = {30,15,10,5,1}; + static const int divs[] = {2,4,6,12,60}; + return LowerBoundStep(max_divs, divs, step, 5); + } + else if (unit == ImPlotTimeUnit_Hr) { + static const int step[] = {12,6,3,2,1}; + static const int divs[] = {2,4,8,12,24}; + return LowerBoundStep(max_divs, divs, step, 5); + } + else if (unit == ImPlotTimeUnit_Day) { + static const int step[] = {14,7,2,1}; + static const int divs[] = {2,4,14,28}; + return LowerBoundStep(max_divs, divs, step, 4); + } + else if (unit == ImPlotTimeUnit_Mo) { + static const int step[] = {6,3,2,1}; + static const int divs[] = {2,4,6,12}; + return LowerBoundStep(max_divs, divs, step, 4); + } + return 0; +} + +ImPlotTime MkGmtTime(struct tm *ptm) { + ImPlotTime t; +#ifdef _WIN32 + t.S = _mkgmtime(ptm); +#else + t.S = timegm(ptm); +#endif + if (t.S < 0) + t.S = 0; + return t; +} + +tm* GetGmtTime(const ImPlotTime& t, tm* ptm) +{ +#ifdef _WIN32 + if (gmtime_s(ptm, &t.S) == 0) + return ptm; + else + return NULL; +#else + return gmtime_r(&t.S, ptm); +#endif +} + +ImPlotTime MkLocTime(struct tm *ptm) { + ImPlotTime t; + t.S = mktime(ptm); + if (t.S < 0) + t.S = 0; + return t; +} + +tm* GetLocTime(const ImPlotTime& t, tm* ptm) { +#ifdef _WIN32 + if (localtime_s(ptm, &t.S) == 0) + return ptm; + else + return NULL; +#else + return localtime_r(&t.S, ptm); +#endif +} + +inline ImPlotTime MkTime(struct tm *ptm) { + if (GetStyle().UseLocalTime) + return MkLocTime(ptm); + else + return MkGmtTime(ptm); +} + +inline tm* GetTime(const ImPlotTime& t, tm* ptm) { + if (GetStyle().UseLocalTime) + return GetLocTime(t,ptm); + else + return GetGmtTime(t,ptm); +} + +ImPlotTime MakeTime(int year, int month, int day, int hour, int min, int sec, int us) { + tm& Tm = GImPlot->Tm; + + int yr = year - 1900; + if (yr < 0) + yr = 0; + + sec = sec + us / 1000000; + us = us % 1000000; + + Tm.tm_sec = sec; + Tm.tm_min = min; + Tm.tm_hour = hour; + Tm.tm_mday = day; + Tm.tm_mon = month; + Tm.tm_year = yr; + + ImPlotTime t = MkTime(&Tm); + + t.Us = us; + return t; +} + +int GetYear(const ImPlotTime& t) { + tm& Tm = GImPlot->Tm; + GetTime(t, &Tm); + return Tm.tm_year + 1900; +} + +ImPlotTime AddTime(const ImPlotTime& t, ImPlotTimeUnit unit, int count) { + tm& Tm = GImPlot->Tm; + ImPlotTime t_out = t; + switch(unit) { + case ImPlotTimeUnit_Us: t_out.Us += count; break; + case ImPlotTimeUnit_Ms: t_out.Us += count * 1000; break; + case ImPlotTimeUnit_S: t_out.S += count; break; + case ImPlotTimeUnit_Min: t_out.S += count * 60; break; + case ImPlotTimeUnit_Hr: t_out.S += count * 3600; break; + case ImPlotTimeUnit_Day: t_out.S += count * 86400; break; + case ImPlotTimeUnit_Mo: for (int i = 0; i < abs(count); ++i) { + GetTime(t_out, &Tm); + if (count > 0) + t_out.S += 86400 * GetDaysInMonth(Tm.tm_year + 1900, Tm.tm_mon); + else if (count < 0) + t_out.S -= 86400 * GetDaysInMonth(Tm.tm_year + 1900 - (Tm.tm_mon == 0 ? 1 : 0), Tm.tm_mon == 0 ? 11 : Tm.tm_mon - 1); // NOT WORKING + } + break; + case ImPlotTimeUnit_Yr: for (int i = 0; i < abs(count); ++i) { + if (count > 0) + t_out.S += 86400 * (365 + (int)IsLeapYear(GetYear(t_out))); + else if (count < 0) + t_out.S -= 86400 * (365 + (int)IsLeapYear(GetYear(t_out) - 1)); + // this is incorrect if leap year and we are past Feb 28 + } + break; + default: break; + } + t_out.RollOver(); + return t_out; +} + +ImPlotTime FloorTime(const ImPlotTime& t, ImPlotTimeUnit unit) { + GetTime(t, &GImPlot->Tm); + switch (unit) { + case ImPlotTimeUnit_S: return ImPlotTime(t.S, 0); + case ImPlotTimeUnit_Ms: return ImPlotTime(t.S, (t.Us / 1000) * 1000); + case ImPlotTimeUnit_Us: return t; + case ImPlotTimeUnit_Yr: GImPlot->Tm.tm_mon = 0; // fall-through + case ImPlotTimeUnit_Mo: GImPlot->Tm.tm_mday = 1; // fall-through + case ImPlotTimeUnit_Day: GImPlot->Tm.tm_hour = 0; // fall-through + case ImPlotTimeUnit_Hr: GImPlot->Tm.tm_min = 0; // fall-through + case ImPlotTimeUnit_Min: GImPlot->Tm.tm_sec = 0; break; + default: return t; + } + return MkTime(&GImPlot->Tm); +} + +ImPlotTime CeilTime(const ImPlotTime& t, ImPlotTimeUnit unit) { + return AddTime(FloorTime(t, unit), unit, 1); +} + +ImPlotTime RoundTime(const ImPlotTime& t, ImPlotTimeUnit unit) { + ImPlotTime t1 = FloorTime(t, unit); + ImPlotTime t2 = AddTime(t1,unit,1); + if (t1.S == t2.S) + return t.Us - t1.Us < t2.Us - t.Us ? t1 : t2; + return t.S - t1.S < t2.S - t.S ? t1 : t2; +} + +ImPlotTime CombineDateTime(const ImPlotTime& date_part, const ImPlotTime& tod_part) { + tm& Tm = GImPlot->Tm; + GetTime(date_part, &GImPlot->Tm); + int y = Tm.tm_year; + int m = Tm.tm_mon; + int d = Tm.tm_mday; + GetTime(tod_part, &GImPlot->Tm); + Tm.tm_year = y; + Tm.tm_mon = m; + Tm.tm_mday = d; + ImPlotTime t = MkTime(&Tm); + t.Us = tod_part.Us; + return t; +} + +static const char* MONTH_NAMES[] = {"January","February","March","April","May","June","July","August","September","October","November","December"}; +static const char* WD_ABRVS[] = {"Su","Mo","Tu","We","Th","Fr","Sa"}; +static const char* MONTH_ABRVS[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"}; + +int FormatTime(const ImPlotTime& t, char* buffer, int size, ImPlotTimeFmt fmt, bool use_24_hr_clk) { + tm& Tm = GImPlot->Tm; + GetTime(t, &Tm); + const int us = t.Us % 1000; + const int ms = t.Us / 1000; + const int sec = Tm.tm_sec; + const int min = Tm.tm_min; + if (use_24_hr_clk) { + const int hr = Tm.tm_hour; + switch(fmt) { + case ImPlotTimeFmt_Us: return snprintf(buffer, size, ".%03d %03d", ms, us); + case ImPlotTimeFmt_SUs: return snprintf(buffer, size, ":%02d.%03d %03d", sec, ms, us); + case ImPlotTimeFmt_SMs: return snprintf(buffer, size, ":%02d.%03d", sec, ms); + case ImPlotTimeFmt_S: return snprintf(buffer, size, ":%02d", sec); + case ImPlotTimeFmt_HrMinSMs: return snprintf(buffer, size, "%02d:%02d:%02d.%03d", hr, min, sec, ms); + case ImPlotTimeFmt_HrMinS: return snprintf(buffer, size, "%02d:%02d:%02d", hr, min, sec); + case ImPlotTimeFmt_HrMin: return snprintf(buffer, size, "%02d:%02d", hr, min); + case ImPlotTimeFmt_Hr: return snprintf(buffer, size, "%02d:00", hr); + default: return 0; + } + } + else { + const char* ap = Tm.tm_hour < 12 ? "am" : "pm"; + const int hr = (Tm.tm_hour == 0 || Tm.tm_hour == 12) ? 12 : Tm.tm_hour % 12; + switch(fmt) { + case ImPlotTimeFmt_Us: return snprintf(buffer, size, ".%03d %03d", ms, us); + case ImPlotTimeFmt_SUs: return snprintf(buffer, size, ":%02d.%03d %03d", sec, ms, us); + case ImPlotTimeFmt_SMs: return snprintf(buffer, size, ":%02d.%03d", sec, ms); + case ImPlotTimeFmt_S: return snprintf(buffer, size, ":%02d", sec); + case ImPlotTimeFmt_HrMinSMs: return snprintf(buffer, size, "%d:%02d:%02d.%03d%s", hr, min, sec, ms, ap); + case ImPlotTimeFmt_HrMinS: return snprintf(buffer, size, "%d:%02d:%02d%s", hr, min, sec, ap); + case ImPlotTimeFmt_HrMin: return snprintf(buffer, size, "%d:%02d%s", hr, min, ap); + case ImPlotTimeFmt_Hr: return snprintf(buffer, size, "%d%s", hr, ap); + default: return 0; + } + } +} + +int FormatDate(const ImPlotTime& t, char* buffer, int size, ImPlotDateFmt fmt, bool use_iso_8601) { + tm& Tm = GImPlot->Tm; + GetTime(t, &Tm); + const int day = Tm.tm_mday; + const int mon = Tm.tm_mon + 1; + const int year = Tm.tm_year + 1900; + const int yr = year % 100; + if (use_iso_8601) { + switch (fmt) { + case ImPlotDateFmt_DayMo: return snprintf(buffer, size, "--%02d-%02d", mon, day); + case ImPlotDateFmt_DayMoYr: return snprintf(buffer, size, "%d-%02d-%02d", year, mon, day); + case ImPlotDateFmt_MoYr: return snprintf(buffer, size, "%d-%02d", year, mon); + case ImPlotDateFmt_Mo: return snprintf(buffer, size, "--%02d", mon); + case ImPlotDateFmt_Yr: return snprintf(buffer, size, "%d", year); + default: return 0; + } + } + else { + switch (fmt) { + case ImPlotDateFmt_DayMo: return snprintf(buffer, size, "%d/%d", mon, day); + case ImPlotDateFmt_DayMoYr: return snprintf(buffer, size, "%d/%d/%02d", mon, day, yr); + case ImPlotDateFmt_MoYr: return snprintf(buffer, size, "%s %d", MONTH_ABRVS[Tm.tm_mon], year); + case ImPlotDateFmt_Mo: return snprintf(buffer, size, "%s", MONTH_ABRVS[Tm.tm_mon]); + case ImPlotDateFmt_Yr: return snprintf(buffer, size, "%d", year); + default: return 0; + } + } + } + +int FormatDateTime(const ImPlotTime& t, char* buffer, int size, ImPlotDateTimeFmt fmt) { + int written = 0; + if (fmt.Date != ImPlotDateFmt_None) + written += FormatDate(t, buffer, size, fmt.Date, fmt.UseISO8601); + if (fmt.Time != ImPlotTimeFmt_None) { + if (fmt.Date != ImPlotDateFmt_None) + buffer[written++] = ' '; + written += FormatTime(t, &buffer[written], size - written, fmt.Time, fmt.Use24HourClock); + } + return written; +} + +inline float GetDateTimeWidth(ImPlotDateTimeFmt fmt) { + static ImPlotTime t_max_width = MakeTime(2888, 12, 22, 12, 58, 58, 888888); // best guess at time that maximizes pixel width + char buffer[32]; + FormatDateTime(t_max_width, buffer, 32, fmt); + return ImGui::CalcTextSize(buffer).x; +} + +void LabelTickTime(ImPlotTick& tick, ImGuiTextBuffer& buffer, const ImPlotTime& t, ImPlotDateTimeFmt fmt) { + char temp[32]; + if (tick.ShowLabel) { + tick.TextOffset = buffer.size(); + FormatDateTime(t, temp, 32, fmt); + buffer.append(temp, temp + strlen(temp) + 1); + tick.LabelSize = ImGui::CalcTextSize(buffer.Buf.Data + tick.TextOffset); + } +} + +inline bool TimeLabelSame(const char* l1, const char* l2) { + size_t len1 = strlen(l1); + size_t len2 = strlen(l2); + size_t n = len1 < len2 ? len1 : len2; + return strcmp(l1 + len1 - n, l2 + len2 - n) == 0; +} + +static const ImPlotDateTimeFmt TimeFormatLevel0[ImPlotTimeUnit_COUNT] = { + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_Us), + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_SMs), + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_S), + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_HrMin), + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_Hr), + ImPlotDateTimeFmt(ImPlotDateFmt_DayMo, ImPlotTimeFmt_None), + ImPlotDateTimeFmt(ImPlotDateFmt_Mo, ImPlotTimeFmt_None), + ImPlotDateTimeFmt(ImPlotDateFmt_Yr, ImPlotTimeFmt_None) +}; + +static const ImPlotDateTimeFmt TimeFormatLevel1[ImPlotTimeUnit_COUNT] = { + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_HrMin), + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_HrMinS), + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_HrMin), + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_HrMin), + ImPlotDateTimeFmt(ImPlotDateFmt_DayMoYr, ImPlotTimeFmt_None), + ImPlotDateTimeFmt(ImPlotDateFmt_DayMoYr, ImPlotTimeFmt_None), + ImPlotDateTimeFmt(ImPlotDateFmt_Yr, ImPlotTimeFmt_None), + ImPlotDateTimeFmt(ImPlotDateFmt_Yr, ImPlotTimeFmt_None) +}; + +static const ImPlotDateTimeFmt TimeFormatLevel1First[ImPlotTimeUnit_COUNT] = { + ImPlotDateTimeFmt(ImPlotDateFmt_DayMoYr, ImPlotTimeFmt_HrMinS), + ImPlotDateTimeFmt(ImPlotDateFmt_DayMoYr, ImPlotTimeFmt_HrMinS), + ImPlotDateTimeFmt(ImPlotDateFmt_DayMoYr, ImPlotTimeFmt_HrMin), + ImPlotDateTimeFmt(ImPlotDateFmt_DayMoYr, ImPlotTimeFmt_HrMin), + ImPlotDateTimeFmt(ImPlotDateFmt_DayMoYr, ImPlotTimeFmt_None), + ImPlotDateTimeFmt(ImPlotDateFmt_DayMoYr, ImPlotTimeFmt_None), + ImPlotDateTimeFmt(ImPlotDateFmt_Yr, ImPlotTimeFmt_None), + ImPlotDateTimeFmt(ImPlotDateFmt_Yr, ImPlotTimeFmt_None) +}; + +static const ImPlotDateTimeFmt TimeFormatMouseCursor[ImPlotTimeUnit_COUNT] = { + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_Us), + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_SUs), + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_SMs), + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_HrMinS), + ImPlotDateTimeFmt(ImPlotDateFmt_None, ImPlotTimeFmt_HrMin), + ImPlotDateTimeFmt(ImPlotDateFmt_DayMo, ImPlotTimeFmt_Hr), + ImPlotDateTimeFmt(ImPlotDateFmt_DayMoYr, ImPlotTimeFmt_None), + ImPlotDateTimeFmt(ImPlotDateFmt_MoYr, ImPlotTimeFmt_None) +}; + +inline ImPlotDateTimeFmt GetDateTimeFmt(const ImPlotDateTimeFmt* ctx, ImPlotTimeUnit idx) { + ImPlotStyle& style = GetStyle(); + ImPlotDateTimeFmt fmt = ctx[idx]; + fmt.UseISO8601 = style.UseISO8601; + fmt.Use24HourClock = style.Use24HourClock; + return fmt; +} + +void AddTicksTime(const ImPlotRange& range, float plot_width, ImPlotTickCollection& ticks) { + // get units for level 0 and level 1 labels + const ImPlotTimeUnit unit0 = GetUnitForRange(range.Size() / (plot_width / 100)); // level = 0 (top) + const ImPlotTimeUnit unit1 = unit0 + 1; // level = 1 (bottom) + // get time format specs + const ImPlotDateTimeFmt fmt0 = GetDateTimeFmt(TimeFormatLevel0, unit0); + const ImPlotDateTimeFmt fmt1 = GetDateTimeFmt(TimeFormatLevel1, unit1); + const ImPlotDateTimeFmt fmtf = GetDateTimeFmt(TimeFormatLevel1First, unit1); + // min max times + const ImPlotTime t_min = ImPlotTime::FromDouble(range.Min); + const ImPlotTime t_max = ImPlotTime::FromDouble(range.Max); + // maximum allowable density of labels + const float max_density = 0.5f; + // book keeping + const char* last_major = NULL; + if (unit0 != ImPlotTimeUnit_Yr) { + // pixels per major (level 1) division + const float pix_per_major_div = plot_width / (float)(range.Size() / TimeUnitSpans[unit1]); + // nominal pixels taken up by labels + const float fmt0_width = GetDateTimeWidth(fmt0); + const float fmt1_width = GetDateTimeWidth(fmt1); + const float fmtf_width = GetDateTimeWidth(fmtf); + // the maximum number of minor (level 0) labels that can fit between major (level 1) divisions + const int minor_per_major = (int)(max_density * pix_per_major_div / fmt0_width); + // the minor step size (level 0) + const int step = GetTimeStep(minor_per_major, unit0); + // generate ticks + ImPlotTime t1 = FloorTime(ImPlotTime::FromDouble(range.Min), unit1); + while (t1 < t_max) { + // get next major + const ImPlotTime t2 = AddTime(t1, unit1, 1); + // add major tick + if (t1 >= t_min && t1 <= t_max) { + // minor level 0 tick + ImPlotTick tick_min(t1.ToDouble(),true,true); + tick_min.Level = 0; + LabelTickTime(tick_min,ticks.TextBuffer,t1,fmt0); + ticks.Append(tick_min); + // major level 1 tick + ImPlotTick tick_maj(t1.ToDouble(),true,true); + tick_maj.Level = 1; + LabelTickTime(tick_maj,ticks.TextBuffer,t1, last_major == NULL ? fmtf : fmt1); + const char* this_major = ticks.TextBuffer.Buf.Data + tick_maj.TextOffset; + if (last_major && TimeLabelSame(last_major,this_major)) + tick_maj.ShowLabel = false; + last_major = this_major; + ticks.Append(tick_maj); + } + // add minor ticks up until next major + if (minor_per_major > 1 && (t_min <= t2 && t1 <= t_max)) { + ImPlotTime t12 = AddTime(t1, unit0, step); + while (t12 < t2) { + float px_to_t2 = (float)((t2 - t12).ToDouble()/range.Size()) * plot_width; + if (t12 >= t_min && t12 <= t_max) { + ImPlotTick tick(t12.ToDouble(),false,px_to_t2 >= fmt0_width); + tick.Level = 0; + LabelTickTime(tick,ticks.TextBuffer,t12,fmt0); + ticks.Append(tick); + if (last_major == NULL && px_to_t2 >= fmt0_width && px_to_t2 >= (fmt1_width + fmtf_width) / 2) { + ImPlotTick tick_maj(t12.ToDouble(),true,true); + tick_maj.Level = 1; + LabelTickTime(tick_maj,ticks.TextBuffer,t12,fmtf); + last_major = ticks.TextBuffer.Buf.Data + tick_maj.TextOffset; + ticks.Append(tick_maj); + } + } + t12 = AddTime(t12, unit0, step); + } + } + t1 = t2; + } + } + else { + const ImPlotDateTimeFmt fmty = GetDateTimeFmt(TimeFormatLevel0, ImPlotTimeUnit_Yr); + const float label_width = GetDateTimeWidth(fmty); + const int max_labels = (int)(max_density * plot_width / label_width); + const int year_min = GetYear(t_min); + const int year_max = GetYear(CeilTime(t_max, ImPlotTimeUnit_Yr)); + const double nice_range = NiceNum((year_max - year_min)*0.99,false); + const double interval = NiceNum(nice_range / (max_labels - 1), true); + const int graphmin = (int)(floor(year_min / interval) * interval); + const int graphmax = (int)(ceil(year_max / interval) * interval); + const int step = (int)interval <= 0 ? 1 : (int)interval; + + for (int y = graphmin; y < graphmax; y += step) { + ImPlotTime t = MakeTime(y); + if (t >= t_min && t <= t_max) { + ImPlotTick tick(t.ToDouble(), true, true); + tick.Level = 0; + LabelTickTime(tick, ticks.TextBuffer, t, fmty); + ticks.Append(tick); + } + } + } +} + +//----------------------------------------------------------------------------- +// Axis Utils +//----------------------------------------------------------------------------- + +int LabelAxisValue(const ImPlotAxis& axis, const ImPlotTickCollection& ticks, double value, char* buff, int size) { + ImPlotContext& gp = *GImPlot; + if (ImHasFlag(axis.Flags, ImPlotAxisFlags_LogScale)) { + return snprintf(buff, size, "%.3E", value); + } + else if (ImHasFlag(axis.Flags, ImPlotAxisFlags_Time)) { + ImPlotTimeUnit unit = (axis.Orientation == ImPlotOrientation_Horizontal) + ? GetUnitForRange(axis.Range.Size() / (gp.CurrentPlot->PlotRect.GetWidth() / 100)) + : GetUnitForRange(axis.Range.Size() / (gp.CurrentPlot->PlotRect.GetHeight() / 100)); + return FormatDateTime(ImPlotTime::FromDouble(value), buff, size, GetDateTimeFmt(TimeFormatMouseCursor, unit)); + } + else { + double range = ticks.Size > 1 ? (ticks.Ticks[1].PlotPos - ticks.Ticks[0].PlotPos) : axis.Range.Size(); + return snprintf(buff, size, "%.*f", Precision(range), value); + } +} + +void UpdateAxisColors(int axis_flag, ImPlotAxis* axis) { + const ImVec4 col_label = GetStyleColorVec4(axis_flag); + const ImVec4 col_grid = GetStyleColorVec4(axis_flag + 1); + axis->ColorMaj = ImGui::GetColorU32(col_grid); + axis->ColorMin = ImGui::GetColorU32(col_grid*ImVec4(1,1,1,GImPlot->Style.MinorAlpha)); + axis->ColorTxt = ImGui::GetColorU32(col_label); +} + +//----------------------------------------------------------------------------- +// BeginPlot() +//----------------------------------------------------------------------------- + +bool BeginPlot(const char* title, const char* x_label, const char* y1_label, const ImVec2& size, + ImPlotFlags flags, ImPlotAxisFlags x_flags, ImPlotAxisFlags y1_flags, ImPlotAxisFlags y2_flags, ImPlotAxisFlags y3_flags, + const char* y2_label, const char* y3_label) +{ + IM_ASSERT_USER_ERROR(GImPlot != NULL, "No current context. Did you call ImPlot::CreateContext() or ImPlot::SetCurrentContext()?"); + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot == NULL, "Mismatched BeginPlot()/EndPlot()!"); + IM_ASSERT_USER_ERROR(!(ImHasFlag(x_flags, ImPlotAxisFlags_Time) && ImHasFlag(x_flags, ImPlotAxisFlags_LogScale)), "ImPlotAxisFlags_Time and ImPlotAxisFlags_LogScale cannot be enabled at the same time!"); + IM_ASSERT_USER_ERROR(!ImHasFlag(y1_flags, ImPlotAxisFlags_Time), "Y axes cannot display time formatted labels!"); + + // FRONT MATTER ----------------------------------------------------------- + + ImGuiContext &G = *GImGui; + ImGuiWindow * Window = G.CurrentWindow; + if (Window->SkipItems) { + Reset(GImPlot); + return false; + } + + const ImGuiID ID = Window->GetID(title); + const ImGuiStyle &Style = G.Style; + const ImGuiIO & IO = ImGui::GetIO(); + + bool just_created = gp.Plots.GetByKey(ID) == NULL; + gp.CurrentPlot = gp.Plots.GetOrAddByKey(ID); + gp.CurrentPlot->ID = ID; + ImPlotPlot &plot = *gp.CurrentPlot; + + plot.CurrentYAxis = 0; + + if (just_created) { + plot.Flags = flags; + plot.XAxis.Flags = x_flags; + plot.YAxis[0].Flags = y1_flags; + plot.YAxis[1].Flags = y2_flags; + plot.YAxis[2].Flags = y3_flags; + } + else { + // TODO: Check which individual flags changed, and only reset those! + // There's probably an easy bit mask trick I'm not aware of. + if (flags != plot.PreviousFlags) + plot.Flags = flags; + if (x_flags != plot.XAxis.PreviousFlags) + plot.XAxis.Flags = x_flags; + if (y1_flags != plot.YAxis[0].PreviousFlags) + plot.YAxis[0].Flags = y1_flags; + if (y2_flags != plot.YAxis[1].PreviousFlags) + plot.YAxis[1].Flags = y2_flags; + if (y3_flags != plot.YAxis[2].PreviousFlags) + plot.YAxis[2].Flags = y3_flags; + } + + plot.PreviousFlags = flags; + plot.XAxis.PreviousFlags = x_flags; + plot.YAxis[0].PreviousFlags = y1_flags; + plot.YAxis[1].PreviousFlags = y2_flags; + plot.YAxis[2].PreviousFlags = y3_flags; + + // capture scroll with a child region + if (!ImHasFlag(plot.Flags, ImPlotFlags_NoChild)) { + ImGui::BeginChild(title, ImVec2(size.x == 0 ? gp.Style.PlotDefaultSize.x : size.x, size.y == 0 ? gp.Style.PlotDefaultSize.y : size.y), false, ImGuiWindowFlags_NoScrollbar); + Window = ImGui::GetCurrentWindow(); + Window->ScrollMax.y = 1.0f; + gp.ChildWindowMade = true; + } + else { + gp.ChildWindowMade = false; + } + + ImDrawList &DrawList = *Window->DrawList; + + // NextPlotData ----------------------------------------------------------- + + // linked axes + plot.XAxis.LinkedMin = gp.NextPlotData.LinkedXmin; + plot.XAxis.LinkedMax = gp.NextPlotData.LinkedXmax; + PullLinkedAxis(plot.XAxis); + for (int i = 0; i < IMPLOT_Y_AXES; ++i) { + plot.YAxis[i].LinkedMin = gp.NextPlotData.LinkedYmin[i]; + plot.YAxis[i].LinkedMax = gp.NextPlotData.LinkedYmax[i]; + PullLinkedAxis(plot.YAxis[i]); + } + + if (gp.NextPlotData.HasXRange) { + if (just_created || gp.NextPlotData.XRangeCond == ImGuiCond_Always) + plot.XAxis.SetRange(gp.NextPlotData.X); + } + + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (gp.NextPlotData.HasYRange[i]) { + if (just_created || gp.NextPlotData.YRangeCond[i] == ImGuiCond_Always) + plot.YAxis[i].SetRange(gp.NextPlotData.Y[i]); + } + } + + // AXIS STATES ------------------------------------------------------------ + plot.XAxis.HasRange = gp.NextPlotData.HasXRange; plot.XAxis.RangeCond = gp.NextPlotData.XRangeCond; plot.XAxis.Present = true; + plot.YAxis[0].HasRange = gp.NextPlotData.HasYRange[0]; plot.YAxis[0].RangeCond = gp.NextPlotData.YRangeCond[0]; plot.YAxis[0].Present = true; + plot.YAxis[1].HasRange = gp.NextPlotData.HasYRange[1]; plot.YAxis[1].RangeCond = gp.NextPlotData.YRangeCond[1]; plot.YAxis[1].Present = ImHasFlag(plot.Flags, ImPlotFlags_YAxis2); + plot.YAxis[2].HasRange = gp.NextPlotData.HasYRange[2]; plot.YAxis[2].RangeCond = gp.NextPlotData.YRangeCond[2]; plot.YAxis[2].Present = ImHasFlag(plot.Flags, ImPlotFlags_YAxis3); + + for (int i = 0; i < IMPLOT_Y_AXES; ++i) { + if (!ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_LogScale) && !ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_LogScale)) + gp.Scales[i] = ImPlotScale_LinLin; + else if (ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_LogScale) && !ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_LogScale)) + gp.Scales[i] = ImPlotScale_LogLin; + else if (!ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_LogScale) && ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_LogScale)) + gp.Scales[i] = ImPlotScale_LinLog; + else if (ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_LogScale) && ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_LogScale)) + gp.Scales[i] = ImPlotScale_LogLog; + } + + // constraints + plot.XAxis.Constrain(); + for (int i = 0; i < IMPLOT_Y_AXES; ++i) + plot.YAxis[i].Constrain(); + + // constain equal axes for primary x and y if not approximately equal + // constains x to y since x pixel size depends on y labels width, and causes feedback loops in opposite case + if (ImHasFlag(plot.Flags, ImPlotFlags_Equal)) { + double xar = plot.XAxis.GetAspect(); + double yar = plot.YAxis[0].GetAspect(); + if (!ImAlmostEqual(xar,yar) && !plot.YAxis[0].IsLocked()) + plot.XAxis.SetAspect(yar); + } + + // AXIS COLORS ----------------------------------------------------------------- + + UpdateAxisColors(ImPlotCol_XAxis, &plot.XAxis); + UpdateAxisColors(ImPlotCol_YAxis, &plot.YAxis[0]); + UpdateAxisColors(ImPlotCol_YAxis2, &plot.YAxis[1]); + UpdateAxisColors(ImPlotCol_YAxis3, &plot.YAxis[2]); + + // BB, PADDING, HOVER ----------------------------------------------------------- + + // frame + ImVec2 frame_size = ImGui::CalcItemSize(size, gp.Style.PlotDefaultSize.x, gp.Style.PlotDefaultSize.y); + if (frame_size.x < gp.Style.PlotMinSize.x && size.x < 0.0f) + frame_size.x = gp.Style.PlotMinSize.x; + if (frame_size.y < gp.Style.PlotMinSize.y && size.y < 0.0f) + frame_size.y = gp.Style.PlotMinSize.y; + plot.FrameRect = ImRect(Window->DC.CursorPos, Window->DC.CursorPos + frame_size); + ImGui::ItemSize(plot.FrameRect); + if (!ImGui::ItemAdd(plot.FrameRect, ID, &plot.FrameRect)) { + Reset(GImPlot); + return false; + } + plot.FrameHovered = ImGui::ItemHoverable(plot.FrameRect, ID); + if (G.HoveredIdPreviousFrame != 0 && G.HoveredIdPreviousFrame != ID) + plot.FrameHovered = false; + ImGui::SetItemAllowOverlap(); + ImGui::RenderFrame(plot.FrameRect.Min, plot.FrameRect.Max, GetStyleColorU32(ImPlotCol_FrameBg), true, Style.FrameRounding); + + // canvas/axes bb + plot.CanvasRect = ImRect(plot.FrameRect.Min + gp.Style.PlotPadding, plot.FrameRect.Max - gp.Style.PlotPadding); + plot.AxesRect = plot.FrameRect; + + // outside legend adjustments + if (!ImHasFlag(plot.Flags, ImPlotFlags_NoLegend) && plot.GetLegendCount() > 0 && plot.LegendOutside) { + const ImVec2 legend_size = CalcLegendSize(plot, gp.Style.LegendInnerPadding, gp.Style.LegendSpacing, plot.LegendOrientation); + const bool west = ImHasFlag(plot.LegendLocation, ImPlotLocation_West) && !ImHasFlag(plot.LegendLocation, ImPlotLocation_East); + const bool east = ImHasFlag(plot.LegendLocation, ImPlotLocation_East) && !ImHasFlag(plot.LegendLocation, ImPlotLocation_West); + const bool north = ImHasFlag(plot.LegendLocation, ImPlotLocation_North) && !ImHasFlag(plot.LegendLocation, ImPlotLocation_South); + const bool south = ImHasFlag(plot.LegendLocation, ImPlotLocation_South) && !ImHasFlag(plot.LegendLocation, ImPlotLocation_North); + const bool horz = plot.LegendOrientation == ImPlotOrientation_Horizontal; + if ((west && !horz) || (west && horz && !north && !south)) { + plot.CanvasRect.Min.x += (legend_size.x + gp.Style.LegendPadding.x); + plot.AxesRect.Min.x += (legend_size.x + gp.Style.PlotPadding.x); + } + if ((east && !horz) || (east && horz && !north && !south)) { + plot.CanvasRect.Max.x -= (legend_size.x + gp.Style.LegendPadding.x); + plot.AxesRect.Max.x -= (legend_size.x + gp.Style.PlotPadding.x); + } + if ((north && horz) || (north && !horz && !west && !east)) { + plot.CanvasRect.Min.y += (legend_size.y + gp.Style.LegendPadding.y); + plot.AxesRect.Min.y += (legend_size.y + gp.Style.PlotPadding.y); + } + if ((south && horz) || (south && !horz && !west && !east)) { + plot.CanvasRect.Max.y -= (legend_size.y + gp.Style.LegendPadding.y); + plot.AxesRect.Max.y -= (legend_size.y + gp.Style.PlotPadding.y); + } + } + + gp.RenderX = (!ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_NoGridLines) || + !ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_NoTickMarks) || + !ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_NoTickLabels)); + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + gp.RenderY[i] = plot.YAxis[i].Present && + (!ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_NoGridLines) || + !ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_NoTickMarks) || + !ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_NoTickLabels)); + } + + // plot bb + + // (1) calc top/bot padding and plot height + ImVec2 title_size = ImVec2(0.0f, 0.0f); + const float txt_height = ImGui::GetTextLineHeight(); + if (!ImHasFlag(plot.Flags, ImPlotFlags_NoTitle)){ + title_size = ImGui::CalcTextSize(title, NULL, true); + } + + const bool show_x_label = x_label && !ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_NoLabel); + + const float pad_top = title_size.x > 0.0f ? txt_height + gp.Style.LabelPadding.y : 0; + const float pad_bot = (plot.XAxis.IsLabeled() ? txt_height + gp.Style.LabelPadding.y + (plot.XAxis.IsTime() ? txt_height + gp.Style.LabelPadding.y : 0) : 0) + + (show_x_label ? txt_height + gp.Style.LabelPadding.y : 0); + + const float plot_height = plot.CanvasRect.GetHeight() - pad_top - pad_bot; + + // (2) get y tick labels (needed for left/right pad) + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (gp.RenderY[i] && gp.NextPlotData.ShowDefaultTicksY[i]) { + if (ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_LogScale)) + AddTicksLogarithmic(plot.YAxis[i].Range, ImMax(2, (int)IM_ROUND(plot_height * 0.02f)) ,gp.YTicks[i]); + else + AddTicksDefault(plot.YAxis[i].Range, ImMax(2, (int)IM_ROUND(0.0025 * plot_height)), IMPLOT_SUB_DIV, gp.YTicks[i]); + } + } + + // (3) calc left/right pad + const bool show_y1_label = y1_label && !ImHasFlag(plot.YAxis[0].Flags, ImPlotAxisFlags_NoLabel); + const bool show_y2_label = y2_label && !ImHasFlag(plot.YAxis[1].Flags, ImPlotAxisFlags_NoLabel); + const bool show_y3_label = y3_label && !ImHasFlag(plot.YAxis[2].Flags, ImPlotAxisFlags_NoLabel); + + const float pad_left = (show_y1_label ? txt_height + gp.Style.LabelPadding.x : 0) + + (plot.YAxis[0].IsLabeled() ? gp.YTicks[0].MaxWidth + gp.Style.LabelPadding.x : 0); + const float pad_right = ((plot.YAxis[1].Present && plot.YAxis[1].IsLabeled()) ? gp.YTicks[1].MaxWidth + gp.Style.LabelPadding.x : 0) + + ((plot.YAxis[1].Present && show_y2_label) ? txt_height + gp.Style.LabelPadding.x : 0) + + ((plot.YAxis[1].Present && plot.YAxis[2].Present) ? gp.Style.LabelPadding.x + gp.Style.MinorTickLen.y : 0) + + ((plot.YAxis[2].Present && plot.YAxis[2].IsLabeled()) ? gp.YTicks[2].MaxWidth + gp.Style.LabelPadding.x : 0) + + ((plot.YAxis[2].Present && show_y3_label) ? txt_height + gp.Style.LabelPadding.x : 0); + + const float plot_width = plot.CanvasRect.GetWidth() - pad_left - pad_right; + + // (4) get x ticks + if (gp.RenderX && gp.NextPlotData.ShowDefaultTicksX) { + if (plot.XAxis.IsTime()) + AddTicksTime(plot.XAxis.Range, plot_width, gp.XTicks); + else if (ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_LogScale)) + AddTicksLogarithmic(plot.XAxis.Range, (int)IM_ROUND(plot_width * 0.01f), gp.XTicks); + else + AddTicksDefault(plot.XAxis.Range, ImMax(2, (int)IM_ROUND(0.0025 * plot_width)), IMPLOT_SUB_DIV, gp.XTicks); + } + + // (5) calc plot bb + plot.PlotRect = ImRect(plot.CanvasRect.Min + ImVec2(pad_left, pad_top), plot.CanvasRect.Max - ImVec2(pad_right, pad_bot)); + plot.PlotHovered = plot.PlotRect.Contains(IO.MousePos) && plot.FrameHovered; + + // x axis region bb and hover + plot.XAxis.HoverRect = ImRect(plot.PlotRect.GetBL(), ImVec2(plot.PlotRect.Max.x, plot.AxesRect.Max.y)); + plot.XAxis.ExtHovered = plot.XAxis.HoverRect.Contains(IO.MousePos); + plot.XAxis.AllHovered = plot.XAxis.ExtHovered || plot.PlotHovered; + + // axis label reference + gp.YAxisReference[0] = plot.PlotRect.Min.x; + gp.YAxisReference[1] = plot.PlotRect.Max.x; + gp.YAxisReference[2] = !plot.YAxis[1].Present + ? plot.PlotRect.Max.x + : gp.YAxisReference[1] + + (plot.YAxis[1].IsLabeled() ? gp.Style.LabelPadding.x + gp.YTicks[1].MaxWidth : 0) + + (show_y2_label ? txt_height + gp.Style.LabelPadding.x : 0) + + gp.Style.LabelPadding.x + gp.Style.MinorTickLen.y; + + // y axis regions bb and hover + plot.YAxis[0].HoverRect = ImRect(ImVec2(plot.AxesRect.Min.x, plot.PlotRect.Min.y), ImVec2(plot.PlotRect.Min.x, plot.PlotRect.Max.y)); + plot.YAxis[1].HoverRect = plot.YAxis[2].Present + ? ImRect(plot.PlotRect.GetTR(), ImVec2(gp.YAxisReference[2], plot.PlotRect.Max.y)) + : ImRect(plot.PlotRect.GetTR(), ImVec2(plot.AxesRect.Max.x, plot.PlotRect.Max.y)); + + plot.YAxis[2].HoverRect = ImRect(ImVec2(gp.YAxisReference[2], plot.PlotRect.Min.y), ImVec2(plot.AxesRect.Max.x, plot.PlotRect.Max.y)); + + for (int i = 0; i < IMPLOT_Y_AXES; ++i) { + plot.YAxis[i].ExtHovered = plot.YAxis[i].Present && plot.YAxis[i].HoverRect.Contains(IO.MousePos); + plot.YAxis[i].AllHovered = plot.YAxis[i].ExtHovered || plot.PlotHovered; + } + + const bool any_hov_y_axis_region = plot.YAxis[0].AllHovered || plot.YAxis[1].AllHovered || plot.YAxis[2].AllHovered; + + bool hov_query = false; + if (plot.FrameHovered && plot.PlotHovered && plot.Queried && !plot.Querying) { + ImRect bb_query = plot.QueryRect; + bb_query.Min += plot.PlotRect.Min; + bb_query.Max += plot.PlotRect.Min; + hov_query = bb_query.Contains(IO.MousePos); + } + + // AXIS ASPECT RATIOS + plot.XAxis.Pixels = plot.PlotRect.GetWidth(); + for (int i = 0; i < IMPLOT_Y_AXES; ++i) + plot.YAxis[i].Pixels = plot.PlotRect.GetHeight(); + + // QUERY DRAG ------------------------------------------------------------- + if (plot.DraggingQuery && (IO.MouseReleased[gp.InputMap.PanButton] || !IO.MouseDown[gp.InputMap.PanButton])) { + plot.DraggingQuery = false; + } + if (plot.DraggingQuery) { + ImGui::SetMouseCursor(ImGuiMouseCursor_ResizeAll); + plot.QueryRect.Min += IO.MouseDelta; + plot.QueryRect.Max += IO.MouseDelta; + } + if (plot.FrameHovered && plot.PlotHovered && hov_query && !plot.DraggingQuery && !plot.Selecting && !plot.LegendHovered) { + ImGui::SetMouseCursor(ImGuiMouseCursor_ResizeAll); + const bool any_y_dragging = plot.YAxis[0].Dragging || plot.YAxis[1].Dragging || plot.YAxis[2].Dragging; + if (IO.MouseDown[gp.InputMap.PanButton] && !plot.XAxis.Dragging && !any_y_dragging) { + plot.DraggingQuery = true; + } + } + + // DRAG INPUT ------------------------------------------------------------- + + const bool axis_equal = ImHasFlag(plot.Flags, ImPlotFlags_Equal); + + // end drags + if (plot.XAxis.Dragging && (IO.MouseReleased[gp.InputMap.PanButton] || !IO.MouseDown[gp.InputMap.PanButton])) { + plot.XAxis.Dragging = false; + G.IO.MouseDragMaxDistanceSqr[0] = 0; + } + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (plot.YAxis[i].Dragging && (IO.MouseReleased[gp.InputMap.PanButton] || !IO.MouseDown[gp.InputMap.PanButton])) { + plot.YAxis[i].Dragging = false; + G.IO.MouseDragMaxDistanceSqr[0] = 0; + } + } + const bool any_y_dragging = plot.YAxis[0].Dragging || plot.YAxis[1].Dragging || plot.YAxis[2].Dragging; + bool drag_in_progress = plot.XAxis.Dragging || any_y_dragging; + // do drag + if (drag_in_progress) { + UpdateTransformCache(); + bool equal_dragged = false; + // special case for axis equal and both x and y0 hovered + if (axis_equal && !plot.XAxis.IsLocked() && plot.XAxis.Dragging && !plot.YAxis[0].IsLocked() && plot.YAxis[0].Dragging) { + ImPlotPoint plot_tl = PixelsToPlot(plot.PlotRect.Min - IO.MouseDelta, 0); + ImPlotPoint plot_br = PixelsToPlot(plot.PlotRect.Max - IO.MouseDelta, 0); + if (!plot.XAxis.IsLockedMin()) + plot.XAxis.SetMin(plot.XAxis.IsInverted() ? plot_br.x : plot_tl.x); + if (!plot.XAxis.IsLockedMax()) + plot.XAxis.SetMax(plot.XAxis.IsInverted() ? plot_tl.x : plot_br.x); + if (!plot.YAxis[0].IsLockedMin()) + plot.YAxis[0].SetMin(plot.YAxis[0].IsInverted() ? plot_tl.y : plot_br.y); + if (!plot.YAxis[0].IsLockedMax()) + plot.YAxis[0].SetMax(plot.YAxis[0].IsInverted() ? plot_br.y : plot_tl.y); + double xar = plot.XAxis.GetAspect(); + double yar = plot.YAxis[0].GetAspect(); + if (!ImAlmostEqual(xar,yar) && !plot.YAxis[0].IsLocked()) + plot.XAxis.SetAspect(yar); + equal_dragged = true; + } + if (!plot.XAxis.IsLocked() && plot.XAxis.Dragging && !equal_dragged) { + ImPlotPoint plot_tl = PixelsToPlot(plot.PlotRect.Min - IO.MouseDelta, 0); + ImPlotPoint plot_br = PixelsToPlot(plot.PlotRect.Max - IO.MouseDelta, 0); + if (!plot.XAxis.IsLockedMin()) + plot.XAxis.SetMin(plot.XAxis.IsInverted() ? plot_br.x : plot_tl.x); + if (!plot.XAxis.IsLockedMax()) + plot.XAxis.SetMax(plot.XAxis.IsInverted() ? plot_tl.x : plot_br.x); + if (axis_equal) + plot.YAxis[0].SetAspect(plot.XAxis.GetAspect()); + } + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (!plot.YAxis[i].IsLocked() && plot.YAxis[i].Dragging && !(i == 0 && equal_dragged)) { + ImPlotPoint plot_tl = PixelsToPlot(plot.PlotRect.Min - IO.MouseDelta, i); + ImPlotPoint plot_br = PixelsToPlot(plot.PlotRect.Max - IO.MouseDelta, i); + if (!plot.YAxis[i].IsLockedMin()) + plot.YAxis[i].SetMin(plot.YAxis[i].IsInverted() ? plot_tl.y : plot_br.y); + if (!plot.YAxis[i].IsLockedMax()) + plot.YAxis[i].SetMax(plot.YAxis[i].IsInverted() ? plot_br.y : plot_tl.y); + if (i == 0 && axis_equal) + plot.XAxis.SetAspect(plot.YAxis[0].GetAspect()); + } + } + // Set the mouse cursor based on which axes are moving. + int direction = 0; + if (!plot.XAxis.IsLocked() && plot.XAxis.Dragging) { + direction |= (1 << 1); + } + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (!plot.YAxis[i].Present) { continue; } + if (!plot.YAxis[i].IsLocked() && plot.YAxis[i].Dragging) { + direction |= (1 << 2); + break; + } + } + if (IO.MouseDragMaxDistanceSqr[0] > 5) { + if (direction == 0) + ImGui::SetMouseCursor(ImGuiMouseCursor_NotAllowed); + else if (direction == (1 << 1)) + ImGui::SetMouseCursor(ImGuiMouseCursor_ResizeEW); + else if (direction == (1 << 2)) + ImGui::SetMouseCursor(ImGuiMouseCursor_ResizeNS); + else + ImGui::SetMouseCursor(ImGuiMouseCursor_ResizeAll); + } + } + // start drag + if (!drag_in_progress && plot.FrameHovered && IO.MouseClicked[gp.InputMap.PanButton] && ImHasFlag(IO.KeyMods, gp.InputMap.PanMod) && !plot.Selecting && !plot.LegendHovered && !hov_query && !plot.DraggingQuery) { + if (plot.XAxis.AllHovered) { + plot.XAxis.Dragging = true; + } + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (plot.YAxis[i].AllHovered) { + plot.YAxis[i].Dragging = true; + } + } + } + + // SCROLL INPUT ----------------------------------------------------------- + + if (plot.FrameHovered && (plot.XAxis.AllHovered || any_hov_y_axis_region) && IO.MouseWheel != 0) { + UpdateTransformCache(); + float zoom_rate = IMPLOT_ZOOM_RATE; + if (IO.MouseWheel > 0) + zoom_rate = (-zoom_rate) / (1.0f + (2.0f * zoom_rate)); + float tx = ImRemap(IO.MousePos.x, plot.PlotRect.Min.x, plot.PlotRect.Max.x, 0.0f, 1.0f); + float ty = ImRemap(IO.MousePos.y, plot.PlotRect.Min.y, plot.PlotRect.Max.y, 0.0f, 1.0f); + bool equal_zoomed = false; + // special case for axis equal and both x and y0 hovered + if (axis_equal && plot.XAxis.AllHovered && !plot.XAxis.IsLocked() && plot.YAxis[0].AllHovered && !plot.YAxis[0].IsLocked()) { + const ImPlotPoint& plot_tl = PixelsToPlot(plot.PlotRect.Min - plot.PlotRect.GetSize() * ImVec2(tx * zoom_rate, ty * zoom_rate), 0); + const ImPlotPoint& plot_br = PixelsToPlot(plot.PlotRect.Max + plot.PlotRect.GetSize() * ImVec2((1 - tx) * zoom_rate, (1 - ty) * zoom_rate), 0); + if (!plot.XAxis.IsLockedMin()) + plot.XAxis.SetMin(plot.XAxis.IsInverted() ? plot_br.x : plot_tl.x); + if (!plot.XAxis.IsLockedMax()) + plot.XAxis.SetMax(plot.XAxis.IsInverted() ? plot_tl.x : plot_br.x); + if (!plot.YAxis[0].IsLockedMin()) + plot.YAxis[0].SetMin(plot.YAxis[0].IsInverted() ? plot_tl.y : plot_br.y); + if (!plot.YAxis[0].IsLockedMax()) + plot.YAxis[0].SetMax(plot.YAxis[0].IsInverted() ? plot_br.y : plot_tl.y); + double xar = plot.XAxis.GetAspect(); + double yar = plot.YAxis[0].GetAspect(); + if (!ImAlmostEqual(xar,yar) && !plot.YAxis[0].IsLocked()) + plot.XAxis.SetAspect(yar); + equal_zoomed = true; + } + if (plot.XAxis.AllHovered && !plot.XAxis.IsLocked() && !equal_zoomed) { + const ImPlotPoint& plot_tl = PixelsToPlot(plot.PlotRect.Min - plot.PlotRect.GetSize() * ImVec2(tx * zoom_rate, ty * zoom_rate), 0); + const ImPlotPoint& plot_br = PixelsToPlot(plot.PlotRect.Max + plot.PlotRect.GetSize() * ImVec2((1 - tx) * zoom_rate, (1 - ty) * zoom_rate), 0); + if (!plot.XAxis.IsLockedMin()) + plot.XAxis.SetMin(plot.XAxis.IsInverted() ? plot_br.x : plot_tl.x); + if (!plot.XAxis.IsLockedMax()) + plot.XAxis.SetMax(plot.XAxis.IsInverted() ? plot_tl.x : plot_br.x); + if (axis_equal) + plot.YAxis[0].SetAspect(plot.XAxis.GetAspect()); + } + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (plot.YAxis[i].AllHovered && !plot.YAxis[i].IsLocked() && !(i == 0 && equal_zoomed)) { + const ImPlotPoint& plot_tl = PixelsToPlot(plot.PlotRect.Min - plot.PlotRect.GetSize() * ImVec2(tx * zoom_rate, ty * zoom_rate), i); + const ImPlotPoint& plot_br = PixelsToPlot(plot.PlotRect.Max + plot.PlotRect.GetSize() * ImVec2((1 - tx) * zoom_rate, (1 - ty) * zoom_rate), i); + if (!plot.YAxis[i].IsLockedMin()) + plot.YAxis[i].SetMin(plot.YAxis[i].IsInverted() ? plot_tl.y : plot_br.y); + if (!plot.YAxis[i].IsLockedMax()) + plot.YAxis[i].SetMax(plot.YAxis[i].IsInverted() ? plot_br.y : plot_tl.y); + if (i == 0 && axis_equal) + plot.XAxis.SetAspect(plot.YAxis[0].GetAspect()); + } + } + } + + // BOX-SELECTION AND QUERY ------------------------------------------------ + + // confirm selection + if (plot.Selecting && (IO.MouseReleased[gp.InputMap.BoxSelectButton] || !IO.MouseDown[gp.InputMap.BoxSelectButton])) { + UpdateTransformCache(); + ImVec2 select_size = plot.SelectStart - IO.MousePos; + if (!ImHasFlag(plot.Flags, ImPlotFlags_NoBoxSelect)) { + ImPlotPoint p1 = PixelsToPlot(plot.SelectStart); + ImPlotPoint p2 = PixelsToPlot(IO.MousePos); + const bool x_can_change = !ImHasFlag(IO.KeyMods,gp.InputMap.HorizontalMod) && ImFabs(select_size.x) > 2; + const bool y_can_change = !ImHasFlag(IO.KeyMods,gp.InputMap.VerticalMod) && ImFabs(select_size.y) > 2; + if (!plot.XAxis.IsLockedMin() && x_can_change) + plot.XAxis.SetMin(ImMin(p1.x, p2.x)); + if (!plot.XAxis.IsLockedMax() && x_can_change) + plot.XAxis.SetMax(ImMax(p1.x, p2.x)); + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + p1 = PixelsToPlot(plot.SelectStart, i); + p2 = PixelsToPlot(IO.MousePos, i); + if (!plot.YAxis[i].IsLockedMin() && y_can_change) + plot.YAxis[i].SetMin(ImMin(p1.y, p2.y)); + if (!plot.YAxis[i].IsLockedMax() && y_can_change) + plot.YAxis[i].SetMax(ImMax(p1.y, p2.y)); + } + if (x_can_change || y_can_change || (ImHasFlag(IO.KeyMods,gp.InputMap.HorizontalMod) && ImHasFlag(IO.KeyMods,gp.InputMap.VerticalMod))) + plot.ContextLocked = gp.InputMap.BoxSelectButton == gp.InputMap.ContextMenuButton; + } + plot.Selecting = false; + } + // bad selection + if (plot.Selecting && (ImHasFlag(plot.Flags, ImPlotFlags_NoBoxSelect) || plot.IsLocked()) && ImLengthSqr(plot.SelectStart - IO.MousePos) > 4) { + ImGui::SetMouseCursor(ImGuiMouseCursor_NotAllowed); + plot.ContextLocked = gp.InputMap.BoxSelectButton == gp.InputMap.ContextMenuButton; + } + // cancel selection + if (plot.Selecting && (IO.MouseClicked[gp.InputMap.BoxSelectCancelButton] || IO.MouseDown[gp.InputMap.BoxSelectCancelButton])) { + plot.Selecting = false; + plot.ContextLocked = gp.InputMap.BoxSelectButton == gp.InputMap.ContextMenuButton; + } + // begin selection or query + if (plot.FrameHovered && plot.PlotHovered && IO.MouseClicked[gp.InputMap.BoxSelectButton] && ImHasFlag(IO.KeyMods, gp.InputMap.BoxSelectMod)) { + plot.SelectStart = IO.MousePos; + plot.Selecting = true; + } + // update query + if (plot.Querying) { + UpdateTransformCache(); + plot.QueryRect.Min.x = ImHasFlag(IO.KeyMods, gp.InputMap.HorizontalMod) ? plot.PlotRect.Min.x : ImMin(plot.QueryStart.x, IO.MousePos.x); + plot.QueryRect.Max.x = ImHasFlag(IO.KeyMods, gp.InputMap.HorizontalMod) ? plot.PlotRect.Max.x : ImMax(plot.QueryStart.x, IO.MousePos.x); + plot.QueryRect.Min.y = ImHasFlag(IO.KeyMods, gp.InputMap.VerticalMod) ? plot.PlotRect.Min.y : ImMin(plot.QueryStart.y, IO.MousePos.y); + plot.QueryRect.Max.y = ImHasFlag(IO.KeyMods, gp.InputMap.VerticalMod) ? plot.PlotRect.Max.y : ImMax(plot.QueryStart.y, IO.MousePos.y); + + plot.QueryRect.Min -= plot.PlotRect.Min; + plot.QueryRect.Max -= plot.PlotRect.Min; + } + // end query + if (plot.Querying && (IO.MouseReleased[gp.InputMap.QueryButton] || IO.MouseReleased[gp.InputMap.BoxSelectButton])) { + plot.Querying = false; + if (plot.QueryRect.GetWidth() > 2 && plot.QueryRect.GetHeight() > 2) { + plot.Queried = true; + plot.ContextLocked = gp.InputMap.BoxSelectButton == gp.InputMap.ContextMenuButton; + } + else + plot.Queried = false; + } + + // begin query + if (ImHasFlag(plot.Flags, ImPlotFlags_Query) && plot.FrameHovered && plot.PlotHovered && IO.MouseClicked[gp.InputMap.QueryButton] && ImHasFlag(IO.KeyMods, gp.InputMap.QueryMod)) { + plot.QueryRect = ImRect(0,0,0,0); + plot.Querying = true; + plot.Queried = true; + plot.QueryStart = IO.MousePos; + } + // toggle between select/query + if (ImHasFlag(plot.Flags, ImPlotFlags_Query) && plot.Selecting && ImHasFlag(IO.KeyMods,gp.InputMap.QueryToggleMod)) { + plot.Selecting = false; + plot.QueryRect = ImRect(0,0,0,0); + plot.Querying = true; + plot.Queried = true; + plot.QueryStart = plot.SelectStart; + } + if (!ImHasFlag(plot.Flags, ImPlotFlags_NoBoxSelect) && plot.Querying && !ImHasFlag(IO.KeyMods, gp.InputMap.QueryToggleMod) && !IO.MouseDown[gp.InputMap.QueryButton]) { + plot.Selecting = true; + plot.Querying = false; + plot.Queried = false; + plot.QueryRect = ImRect(0,0,0,0); + } + if (!ImHasFlag(plot.Flags, ImPlotFlags_Query)) { + plot.Queried = false; + plot.Querying = false; + plot.QueryRect = ImRect(0,0,0,0); + } + + // FIT ----------------------------------------------------------- + + // fit from double click + if ( IO.MouseDoubleClicked[gp.InputMap.FitButton] && plot.FrameHovered && (plot.XAxis.AllHovered || any_hov_y_axis_region) && !plot.LegendHovered && !hov_query ) { + gp.FitThisFrame = true; + gp.FitX = plot.XAxis.AllHovered; + for (int i = 0; i < IMPLOT_Y_AXES; i++) + gp.FitY[i] = plot.YAxis[i].AllHovered; + } + // fit from FitNextPlotAxes + if (gp.NextPlotData.FitX) { + gp.FitThisFrame = true; + gp.FitX = true; + } + for (int i = 0; i < IMPLOT_Y_AXES; ++i) { + if (gp.NextPlotData.FitY[i]) { + gp.FitThisFrame = true; + gp.FitY[i] = true; + } + } + + // FOCUS ------------------------------------------------------------------ + + // focus window + if ((IO.MouseClicked[0] || IO.MouseClicked[1] || IO.MouseClicked[2]) && plot.FrameHovered) + ImGui::FocusWindow(ImGui::GetCurrentWindow()); + + UpdateTransformCache(); + + // set mouse position + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + gp.MousePos[i] = PixelsToPlot(IO.MousePos, i); + } + + // RENDER ----------------------------------------------------------------- + + // grid bg + DrawList.AddRectFilled(plot.PlotRect.Min, plot.PlotRect.Max, GetStyleColorU32(ImPlotCol_PlotBg)); + + // render axes + PushPlotClipRect(); + + // transform ticks (TODO: Move this into ImPlotTickCollection) + if (gp.RenderX) { + for (int t = 0; t < gp.XTicks.Size; t++) { + ImPlotTick *xt = &gp.XTicks.Ticks[t]; + xt->PixelPos = PlotToPixels(xt->PlotPos, 0, 0).x; + } + } + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (gp.RenderY[i]) { + for (int t = 0; t < gp.YTicks[i].Size; t++) { + ImPlotTick *yt = &gp.YTicks[i].Ticks[t]; + yt->PixelPos = PlotToPixels(0, yt->PlotPos, i).y; + } + } + } + + // render grid + if (!ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_NoGridLines)) { + float density = gp.XTicks.Size / plot.PlotRect.GetWidth(); + ImVec4 col_min = ImGui::ColorConvertU32ToFloat4(plot.XAxis.ColorMin); + col_min.w *= ImClamp(ImRemap(density, 0.1f, 0.2f, 1.0f, 0.0f), 0.0f, 1.0f); + ImU32 col_min32 = ImGui::ColorConvertFloat4ToU32(col_min); + for (int t = 0; t < gp.XTicks.Size; t++) { + ImPlotTick& xt = gp.XTicks.Ticks[t]; + if (xt.Level == 0) { + if (xt.Major) + DrawList.AddLine(ImVec2(xt.PixelPos, plot.PlotRect.Min.y), ImVec2(xt.PixelPos, plot.PlotRect.Max.y), plot.XAxis.ColorMaj, gp.Style.MajorGridSize.x); + else if (density < 0.2f) + DrawList.AddLine(ImVec2(xt.PixelPos, plot.PlotRect.Min.y), ImVec2(xt.PixelPos, plot.PlotRect.Max.y), col_min32, gp.Style.MinorGridSize.x); + } + } + } + + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (plot.YAxis[i].Present && !ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_NoGridLines)) { + float density = gp.YTicks[i].Size / plot.PlotRect.GetHeight(); + ImVec4 col_min = ImGui::ColorConvertU32ToFloat4(plot.YAxis[i].ColorMin); + col_min.w *= ImClamp(ImRemap(density, 0.1f, 0.2f, 1.0f, 0.0f), 0.0f, 1.0f); + ImU32 col_min32 = ImGui::ColorConvertFloat4ToU32(col_min); + for (int t = 0; t < gp.YTicks[i].Size; t++) { + ImPlotTick& yt = gp.YTicks[i].Ticks[t]; + if (yt.Major) + DrawList.AddLine(ImVec2(plot.PlotRect.Min.x, yt.PixelPos), ImVec2(plot.PlotRect.Max.x, yt.PixelPos), plot.YAxis[i].ColorMaj, gp.Style.MajorGridSize.y); + else if (density < 0.2f) + DrawList.AddLine(ImVec2(plot.PlotRect.Min.x, yt.PixelPos), ImVec2(plot.PlotRect.Max.x, yt.PixelPos), col_min32, gp.Style.MinorGridSize.y); + } + } + } + + PopPlotClipRect(); + + // render title + if (title_size.x > 0.0f && !ImHasFlag(plot.Flags, ImPlotFlags_NoTitle)) { + ImU32 col = GetStyleColorU32(ImPlotCol_TitleText); + const char* title_end = ImGui::FindRenderedTextEnd(title, NULL); + DrawList.AddText(ImVec2(plot.CanvasRect.GetCenter().x - title_size.x * 0.5f, plot.CanvasRect.Min.y),col,title,title_end); + } + + // render axis labels + if (show_x_label) { + const ImVec2 xLabel_size = ImGui::CalcTextSize(x_label); + const ImVec2 xLabel_pos(plot.PlotRect.GetCenter().x - xLabel_size.x * 0.5f, plot.CanvasRect.Max.y - txt_height); + DrawList.AddText(xLabel_pos, plot.XAxis.ColorTxt, x_label); + } + + if (show_y1_label) { + const ImVec2 yLabel_size = CalcTextSizeVertical(y1_label); + const ImVec2 yLabel_pos(plot.CanvasRect.Min.x, plot.PlotRect.GetCenter().y + yLabel_size.y * 0.5f); + AddTextVertical(&DrawList, yLabel_pos, plot.YAxis[0].ColorTxt, y1_label); + } + + const char* y_labels[] = {y2_label, y3_label}; + for (int i = 1; i < IMPLOT_Y_AXES; i++) { + const char* current_label = y_labels[i-1]; + if (plot.YAxis[i].Present && current_label && !ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_NoLabel)) { + const ImVec2 yLabel_size = CalcTextSizeVertical(current_label); + float label_offset = (plot.YAxis[i].IsLabeled() ? gp.YTicks[i].MaxWidth + gp.Style.LabelPadding.x : 0.0f) + gp.Style.LabelPadding.x; + const ImVec2 yLabel_pos(gp.YAxisReference[i] + label_offset, plot.PlotRect.GetCenter().y + yLabel_size.y * 0.5f); + AddTextVertical(&DrawList, yLabel_pos, plot.YAxis[i].ColorTxt, current_label); + } + } + + // render tick labels + ImGui::PushClipRect(plot.FrameRect.Min, plot.FrameRect.Max, true); + if (!ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_NoTickLabels)) { + for (int t = 0; t < gp.XTicks.Size; t++) { + ImPlotTick *xt = &gp.XTicks.Ticks[t]; + if (xt->ShowLabel && xt->PixelPos >= plot.PlotRect.Min.x - 1 && xt->PixelPos <= plot.PlotRect.Max.x + 1) + DrawList.AddText(ImVec2(xt->PixelPos - xt->LabelSize.x * 0.5f, plot.PlotRect.Max.y + gp.Style.LabelPadding.y + xt->Level * (txt_height + gp.Style.LabelPadding.y)), + xt->Major ? plot.XAxis.ColorTxt : plot.XAxis.ColorTxt, gp.XTicks.GetText(t)); + } + } + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (plot.YAxis[i].Present && !ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_NoTickLabels)) { + for (int t = 0; t < gp.YTicks[i].Size; t++) { + const float x_start = gp.YAxisReference[i] + (i == 0 ? (-gp.Style.LabelPadding.x - gp.YTicks[i].Ticks[t].LabelSize.x) : gp.Style.LabelPadding.x); + ImPlotTick *yt = &gp.YTicks[i].Ticks[t]; + if (yt->ShowLabel && yt->PixelPos >= plot.PlotRect.Min.y - 1 && yt->PixelPos <= plot.PlotRect.Max.y + 1) { + ImVec2 start(x_start, yt->PixelPos - 0.5f * yt->LabelSize.y); + DrawList.AddText(start, yt->Major ? plot.YAxis[i].ColorTxt : plot.YAxis[i].ColorTxt, gp.YTicks[i].GetText(t)); + } + } + } + } + ImGui::PopClipRect(); + + // clear legend + plot.LegendData.Reset(); + // push plot ID into stack + ImGui::PushID(ID); + return true; +} + +//----------------------------------------------------------------------------- +// Context Menu +//----------------------------------------------------------------------------- + +template +bool DragFloat(const char*, F*, float, F, F) { + return false; +} + +template <> +bool DragFloat(const char* label, double* v, float v_speed, double v_min, double v_max) { + return ImGui::DragScalar(label, ImGuiDataType_Double, v, v_speed, &v_min, &v_max, "%.3f", 1); +} + +template <> +bool DragFloat(const char* label, float* v, float v_speed, float v_min, float v_max) { + return ImGui::DragScalar(label, ImGuiDataType_Float, v, v_speed, &v_min, &v_max, "%.3f", 1); +} + +inline void BeginDisabledControls(bool cond) { + if (cond) { + ImGui::PushItemFlag(ImGuiItemFlags_Disabled, true); + ImGui::PushStyleVar(ImGuiStyleVar_Alpha, ImGui::GetStyle().Alpha * 0.25f); + } +} + +inline void EndDisabledControls(bool cond) { + if (cond) { + ImGui::PopItemFlag(); + ImGui::PopStyleVar(); + } +} + +void ShowAxisContextMenu(ImPlotAxis& axis, ImPlotAxis* equal_axis, bool time_allowed) { + + ImGui::PushItemWidth(75); + bool always_locked = axis.IsAlwaysLocked(); + bool label = !ImHasFlag(axis.Flags, ImPlotAxisFlags_NoLabel); + bool grid = !ImHasFlag(axis.Flags, ImPlotAxisFlags_NoGridLines); + bool ticks = !ImHasFlag(axis.Flags, ImPlotAxisFlags_NoTickMarks); + bool labels = !ImHasFlag(axis.Flags, ImPlotAxisFlags_NoTickLabels); + double drag_speed = (axis.Range.Size() <= DBL_EPSILON) ? DBL_EPSILON * 1.0e+13 : 0.01 * axis.Range.Size(); // recover from almost equal axis limits. + + if (axis.IsTime()) { + ImPlotTime tmin = ImPlotTime::FromDouble(axis.Range.Min); + ImPlotTime tmax = ImPlotTime::FromDouble(axis.Range.Max); + + BeginDisabledControls(always_locked); + ImGui::CheckboxFlags("##LockMin", (unsigned int*)&axis.Flags, ImPlotAxisFlags_LockMin); + EndDisabledControls(always_locked); + ImGui::SameLine(); + BeginDisabledControls(axis.IsLockedMin()); + if (ImGui::BeginMenu("Min Time")) { + if (ShowTimePicker("mintime", &tmin)) { + if (tmin >= tmax) + tmax = AddTime(tmin, ImPlotTimeUnit_S, 1); + axis.SetRange(tmin.ToDouble(),tmax.ToDouble()); + } + ImGui::Separator(); + if (ShowDatePicker("mindate",&axis.PickerLevel,&axis.PickerTimeMin,&tmin,&tmax)) { + tmin = CombineDateTime(axis.PickerTimeMin, tmin); + if (tmin >= tmax) + tmax = AddTime(tmin, ImPlotTimeUnit_S, 1); + axis.SetRange(tmin.ToDouble(), tmax.ToDouble()); + } + ImGui::EndMenu(); + } + EndDisabledControls(axis.IsLockedMin()); + + BeginDisabledControls(always_locked); + ImGui::CheckboxFlags("##LockMax", (unsigned int*)&axis.Flags, ImPlotAxisFlags_LockMax); + EndDisabledControls(always_locked); + ImGui::SameLine(); + BeginDisabledControls(axis.IsLockedMax()); + if (ImGui::BeginMenu("Max Time")) { + if (ShowTimePicker("maxtime", &tmax)) { + if (tmax <= tmin) + tmin = AddTime(tmax, ImPlotTimeUnit_S, -1); + axis.SetRange(tmin.ToDouble(),tmax.ToDouble()); + } + ImGui::Separator(); + if (ShowDatePicker("maxdate",&axis.PickerLevel,&axis.PickerTimeMax,&tmin,&tmax)) { + tmax = CombineDateTime(axis.PickerTimeMax, tmax); + if (tmax <= tmin) + tmin = AddTime(tmax, ImPlotTimeUnit_S, -1); + axis.SetRange(tmin.ToDouble(), tmax.ToDouble()); + } + ImGui::EndMenu(); + } + EndDisabledControls(axis.IsLockedMax()); + } + else { + BeginDisabledControls(always_locked); + ImGui::CheckboxFlags("##LockMin", (unsigned int*)&axis.Flags, ImPlotAxisFlags_LockMin); + EndDisabledControls(always_locked); + ImGui::SameLine(); + BeginDisabledControls(axis.IsLockedMin()); + double temp_min = axis.Range.Min; + if (DragFloat("Min", &temp_min, (float)drag_speed, -HUGE_VAL, axis.Range.Max - DBL_EPSILON)) { + axis.SetMin(temp_min); + if (equal_axis != NULL) + equal_axis->SetAspect(axis.GetAspect()); + } + EndDisabledControls(axis.IsLockedMin()); + + BeginDisabledControls(always_locked); + ImGui::CheckboxFlags("##LockMax", (unsigned int*)&axis.Flags, ImPlotAxisFlags_LockMax); + EndDisabledControls(always_locked); + ImGui::SameLine(); + BeginDisabledControls(axis.IsLockedMax()); + double temp_max = axis.Range.Max; + if (DragFloat("Max", &temp_max, (float)drag_speed, axis.Range.Min + DBL_EPSILON, HUGE_VAL)) { + axis.SetMax(temp_max); + if (equal_axis != NULL) + equal_axis->SetAspect(axis.GetAspect()); + } + EndDisabledControls(axis.IsLockedMax()); + } + + ImGui::Separator(); + + + ImGui::CheckboxFlags("Invert",(unsigned int*)&axis.Flags, ImPlotAxisFlags_Invert); + BeginDisabledControls(axis.IsTime() && time_allowed); + ImGui::CheckboxFlags("Log Scale",(unsigned int*)&axis.Flags, ImPlotAxisFlags_LogScale); + EndDisabledControls(axis.IsTime() && time_allowed); + + if (time_allowed) { + BeginDisabledControls(axis.IsLog()); + ImGui::CheckboxFlags("Time",(unsigned int*)&axis.Flags, ImPlotAxisFlags_Time); + EndDisabledControls(axis.IsLog()); + } + + ImGui::Separator(); + if (ImGui::Checkbox("Label", &label)) + ImFlipFlag(axis.Flags, ImPlotAxisFlags_NoLabel); + if (ImGui::Checkbox("Grid Lines", &grid)) + ImFlipFlag(axis.Flags, ImPlotAxisFlags_NoGridLines); + if (ImGui::Checkbox("Tick Marks", &ticks)) + ImFlipFlag(axis.Flags, ImPlotAxisFlags_NoTickMarks); + if (ImGui::Checkbox("Tick Labels", &labels)) + ImFlipFlag(axis.Flags, ImPlotAxisFlags_NoTickLabels); +} + +void ShowPlotContextMenu(ImPlotPlot& plot) { + const bool equal = ImHasFlag(plot.Flags, ImPlotFlags_Equal); + if (ImGui::BeginMenu("X-Axis")) { + ImGui::PushID("X"); + ShowAxisContextMenu(plot.XAxis, equal ? &plot.YAxis[0] : NULL, true); + ImGui::PopID(); + ImGui::EndMenu(); + } + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (i == 1 && !ImHasFlag(plot.Flags, ImPlotFlags_YAxis2)) { + continue; + } + if (i == 2 && !ImHasFlag(plot.Flags, ImPlotFlags_YAxis3)) { + continue; + } + char buf[10] = {}; + if (i == 0) { + snprintf(buf, sizeof(buf) - 1, "Y-Axis"); + } else { + snprintf(buf, sizeof(buf) - 1, "Y-Axis %d", i + 1); + } + if (ImGui::BeginMenu(buf)) { + ImGui::PushID(i); + ShowAxisContextMenu(plot.YAxis[i], (equal && i == 0) ? &plot.XAxis : NULL, false); + ImGui::PopID(); + ImGui::EndMenu(); + } + } + + ImGui::Separator(); + if ((ImGui::BeginMenu("Settings"))) { + if (ImGui::MenuItem("Anti-Aliased Lines",NULL,ImHasFlag(plot.Flags, ImPlotFlags_AntiAliased))) + ImFlipFlag(plot.Flags, ImPlotFlags_AntiAliased); + if (ImGui::MenuItem("Equal", NULL, ImHasFlag(plot.Flags, ImPlotFlags_Equal))) + ImFlipFlag(plot.Flags, ImPlotFlags_Equal); + if (ImGui::MenuItem("Box Select",NULL,!ImHasFlag(plot.Flags, ImPlotFlags_NoBoxSelect))) + ImFlipFlag(plot.Flags, ImPlotFlags_NoBoxSelect); + if (ImGui::MenuItem("Query",NULL,ImHasFlag(plot.Flags, ImPlotFlags_Query))) + ImFlipFlag(plot.Flags, ImPlotFlags_Query); + if (ImGui::MenuItem("Title",NULL,!ImHasFlag(plot.Flags, ImPlotFlags_NoTitle))) + ImFlipFlag(plot.Flags, ImPlotFlags_NoTitle); + if (ImGui::MenuItem("Mouse Position",NULL,!ImHasFlag(plot.Flags, ImPlotFlags_NoMousePos))) + ImFlipFlag(plot.Flags, ImPlotFlags_NoMousePos); + if (ImGui::MenuItem("Crosshairs",NULL,ImHasFlag(plot.Flags, ImPlotFlags_Crosshairs))) + ImFlipFlag(plot.Flags, ImPlotFlags_Crosshairs); + if ((ImGui::BeginMenu("Legend"))) { + const float s = ImGui::GetFrameHeight(); + if (ImGui::RadioButton("H", plot.LegendOrientation == ImPlotOrientation_Horizontal)) + plot.LegendOrientation = ImPlotOrientation_Horizontal; + ImGui::SameLine(); + if (ImGui::RadioButton("V", plot.LegendOrientation == ImPlotOrientation_Vertical)) + plot.LegendOrientation = ImPlotOrientation_Vertical; + ImGui::Checkbox("Outside", &plot.LegendOutside); + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(1,1)); + if (ImGui::Button("##NW",ImVec2(1.5f*s,s))) { plot.LegendLocation = ImPlotLocation_NorthWest; } ImGui::SameLine(); + if (ImGui::Button("##N", ImVec2(1.5f*s,s))) { plot.LegendLocation = ImPlotLocation_North; } ImGui::SameLine(); + if (ImGui::Button("##NE",ImVec2(1.5f*s,s))) { plot.LegendLocation = ImPlotLocation_NorthEast; } + if (ImGui::Button("##W", ImVec2(1.5f*s,s))) { plot.LegendLocation = ImPlotLocation_West; } ImGui::SameLine(); + if (ImGui::Button("##C", ImVec2(1.5f*s,s))) { plot.LegendLocation = ImPlotLocation_Center; } ImGui::SameLine(); + if (ImGui::Button("##E", ImVec2(1.5f*s,s))) { plot.LegendLocation = ImPlotLocation_East; } + if (ImGui::Button("##SW",ImVec2(1.5f*s,s))) { plot.LegendLocation = ImPlotLocation_SouthWest; } ImGui::SameLine(); + if (ImGui::Button("##S", ImVec2(1.5f*s,s))) { plot.LegendLocation = ImPlotLocation_South; } ImGui::SameLine(); + if (ImGui::Button("##SE",ImVec2(1.5f*s,s))) { plot.LegendLocation = ImPlotLocation_SouthEast; } + ImGui::PopStyleVar(); + ImGui::EndMenu(); + } + ImGui::EndMenu(); + } + if (ImGui::MenuItem("Legend",NULL,!ImHasFlag(plot.Flags, ImPlotFlags_NoLegend))) { + ImFlipFlag(plot.Flags, ImPlotFlags_NoLegend); + } +} + +//----------------------------------------------------------------------------- +// EndPlot() +//----------------------------------------------------------------------------- + +void EndPlot() { + IM_ASSERT_USER_ERROR(GImPlot != NULL, "No current context. Did you call ImPlot::CreateContext() or ImPlot::SetCurrentContext()?"); + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "Mismatched BeginPlot()/EndPlot()!"); + ImGuiContext &G = *GImGui; + ImPlotPlot &plot = *gp.CurrentPlot; + ImGuiWindow * Window = G.CurrentWindow; + ImDrawList & DrawList = *Window->DrawList; + const ImGuiIO & IO = ImGui::GetIO(); + + // AXIS STATES ------------------------------------------------------------ + + const bool any_y_locked = plot.YAxis[0].IsLocked() || plot.YAxis[1].Present ? plot.YAxis[1].IsLocked() : false || plot.YAxis[2].Present ? plot.YAxis[2].IsLocked() : false; + const bool any_y_dragging = plot.YAxis[0].Dragging || plot.YAxis[1].Dragging || plot.YAxis[2].Dragging; + + + // FINAL RENDER ----------------------------------------------------------- + + // render ticks + PushPlotClipRect(); + if (!ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_NoTickMarks)) { + for (int t = 0; t < gp.XTicks.Size; t++) { + ImPlotTick *xt = &gp.XTicks.Ticks[t]; + if (xt->Level == 0) + DrawList.AddLine(ImVec2(xt->PixelPos, plot.PlotRect.Max.y), + ImVec2(xt->PixelPos, plot.PlotRect.Max.y - (xt->Major ? gp.Style.MajorTickLen.x : gp.Style.MinorTickLen.x)), + plot.XAxis.ColorMaj, + xt->Major ? gp.Style.MajorTickSize.x : gp.Style.MinorTickSize.x); + } + } + PopPlotClipRect(); + + ImGui::PushClipRect(plot.PlotRect.Min, ImVec2(plot.FrameRect.Max.x, plot.PlotRect.Max.y), true); + int axis_count = 0; + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (!plot.YAxis[i].Present) { continue; } + axis_count++; + + float x_start = gp.YAxisReference[i]; + if (!ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_NoTickMarks)) { + float direction = (i == 0) ? 1.0f : -1.0f; + bool no_major = axis_count >= 3; + for (int t = 0; t < gp.YTicks[i].Size; t++) { + ImPlotTick *yt = &gp.YTicks[i].Ticks[t]; + ImVec2 start = ImVec2(x_start, yt->PixelPos); + DrawList.AddLine(start, + start + ImVec2(direction * ((!no_major && yt->Major) ? gp.Style.MajorTickLen.y : gp.Style.MinorTickLen.y), 0), + plot.YAxis[i].ColorMaj, + (!no_major && yt->Major) ? gp.Style.MajorTickSize.y : gp.Style.MinorTickSize.y); + } + } + + if (axis_count >= 3) { + // Draw a bar next to the ticks to act as a visual separator. + DrawList.AddLine( + ImVec2(x_start, plot.PlotRect.Min.y), + ImVec2(x_start, plot.PlotRect.Max.y), + GetStyleColorU32(ImPlotCol_YAxisGrid3), 1); + } + } + ImGui::PopClipRect(); + + // render annotations + PushPlotClipRect(); + for (int i = 0; i < gp.Annotations.Size; ++i) { + const char* txt = gp.Annotations.GetText(i); + ImPlotAnnotation& an = gp.Annotations.Annotations[i]; + const ImVec2 txt_size = ImGui::CalcTextSize(txt); + const ImVec2 size = txt_size + gp.Style.AnnotationPadding * 2; + ImVec2 pos = an.Pos; + if (an.Offset.x == 0) + pos.x -= size.x / 2; + else if (an.Offset.x > 0) + pos.x += an.Offset.x; + else + pos.x -= size.x - an.Offset.x; + if (an.Offset.y == 0) + pos.y -= size.y / 2; + else if (an.Offset.y > 0) + pos.y += an.Offset.y; + else + pos.y -= size.y - an.Offset.y; + if (an.Clamp) + pos = ClampLabelPos(pos, size, plot.PlotRect.Min, plot.PlotRect.Max); + ImRect rect(pos,pos+size); + if (an.Offset.x != 0 || an.Offset.y != 0) { + ImVec2 corners[4] = {rect.GetTL(), rect.GetTR(), rect.GetBR(), rect.GetBL()}; + int min_corner = 0; + float min_len = FLT_MAX; + for (int c = 0; c < 4; ++c) { + float len = ImLengthSqr(an.Pos - corners[c]); + if (len < min_len) { + min_corner = c; + min_len = len; + } + } + DrawList.AddLine(an.Pos, corners[min_corner], an.ColorBg); + } + DrawList.AddRectFilled(rect.Min, rect.Max, an.ColorBg); + DrawList.AddText(pos + gp.Style.AnnotationPadding, an.ColorFg, txt); + } + PopPlotClipRect(); + + // render y-axis drag/drop hover + if ((plot.YAxis[1].Present || plot.YAxis[2].Present) && ImGui::IsDragDropPayloadBeingAccepted()) { + for (int i = 0; i < IMPLOT_Y_AXES; ++i) { + if (plot.YAxis[i].ExtHovered) { + float x_loc = gp.YAxisReference[i]; + ImVec2 p1(x_loc - 5, plot.PlotRect.Min.y - 5); + ImVec2 p2(x_loc + 5, plot.PlotRect.Max.y + 5); + DrawList.AddRect(p1, p2, ImGui::GetColorU32(ImGuiCol_DragDropTarget), 0.0f, ImDrawCornerFlags_All, 2.0f); + } + } + } + + // render x-axis drag/drop hover + if (plot.XAxis.Present && ImGui::IsDragDropPayloadBeingAccepted()) { + if (plot.XAxis.ExtHovered) { + float y_loc = plot.XAxis.HoverRect.Min.y; + ImVec2 p1(plot.XAxis.HoverRect.Min.x - 5, y_loc - 5); + ImVec2 p2(plot.XAxis.HoverRect.Max.x + 5, y_loc + 5); + DrawList.AddRect(p1, p2, ImGui::GetColorU32(ImGuiCol_DragDropTarget), 0.0f, ImDrawCornerFlags_All, 2.0f); + } + } + + PushPlotClipRect(); + // render selection/query + if (plot.Selecting) { + const ImRect select_bb(ImMin(IO.MousePos, plot.SelectStart), ImMax(IO.MousePos, plot.SelectStart)); + const bool wide_enough = ImFabs(select_bb.GetWidth()) > 2; + const bool tall_enough = ImFabs(select_bb.GetHeight()) > 2; + const bool big_enough = wide_enough && tall_enough; + if (plot.Selecting && !plot.IsLocked() && !ImHasFlag(plot.Flags, ImPlotFlags_NoBoxSelect)) { + const ImVec4 col = GetStyleColorVec4(ImPlotCol_Selection); + const ImU32 col_bg = ImGui::GetColorU32(col * ImVec4(1,1,1,0.25f)); + const ImU32 col_bd = ImGui::GetColorU32(col); + if (IO.KeyMods == (gp.InputMap.HorizontalMod | gp.InputMap.VerticalMod) && big_enough) { + DrawList.AddRectFilled(plot.PlotRect.Min, plot.PlotRect.Max, col_bg); + DrawList.AddRect( plot.PlotRect.Min, plot.PlotRect.Max, col_bd); + } + else if ((plot.XAxis.IsLocked() || IO.KeyMods == gp.InputMap.HorizontalMod) && tall_enough) { + DrawList.AddRectFilled(ImVec2(plot.PlotRect.Min.x, select_bb.Min.y), ImVec2(plot.PlotRect.Max.x, select_bb.Max.y), col_bg); + DrawList.AddRect( ImVec2(plot.PlotRect.Min.x, select_bb.Min.y), ImVec2(plot.PlotRect.Max.x, select_bb.Max.y), col_bd); + } + else if ((any_y_locked || IO.KeyMods == gp.InputMap.VerticalMod) && wide_enough) { + DrawList.AddRectFilled(ImVec2(select_bb.Min.x, plot.PlotRect.Min.y), ImVec2(select_bb.Max.x, plot.PlotRect.Max.y), col_bg); + DrawList.AddRect( ImVec2(select_bb.Min.x, plot.PlotRect.Min.y), ImVec2(select_bb.Max.x, plot.PlotRect.Max.y), col_bd); + } + else if (big_enough) { + DrawList.AddRectFilled(select_bb.Min, select_bb.Max, col_bg); + DrawList.AddRect( select_bb.Min, select_bb.Max, col_bd); + } + } + } + + if (ImHasFlag(plot.Flags, ImPlotFlags_Query)) // draw query rect only when query enabled. + { + const ImVec4 col = GetStyleColorVec4(ImPlotCol_Query); + const ImU32 col_bd = ImGui::GetColorU32(col * ImVec4(1,1,1,0.25f)); + const ImU32 col_bg = ImGui::GetColorU32(col); + if (plot.Querying || plot.Queried) { + if (plot.QueryRect.GetWidth() > 2 && plot.QueryRect.GetHeight() > 2) { + DrawList.AddRectFilled(plot.QueryRect.Min + plot.PlotRect.Min, plot.QueryRect.Max + plot.PlotRect.Min, col_bd); + DrawList.AddRect( plot.QueryRect.Min + plot.PlotRect.Min, plot.QueryRect.Max + plot.PlotRect.Min, col_bg); + } + } + else if (plot.Queried) { + ImRect bb_query = plot.QueryRect; + bb_query.Min += plot.PlotRect.Min; + bb_query.Max += plot.PlotRect.Min; + DrawList.AddRectFilled(bb_query.Min, bb_query.Max, col_bd); + DrawList.AddRect( bb_query.Min, bb_query.Max, col_bg); + } + } + + // render crosshairs + if (ImHasFlag(plot.Flags, ImPlotFlags_Crosshairs) && plot.PlotHovered && plot.FrameHovered && + !(plot.XAxis.Dragging || any_y_dragging) && !plot.Selecting && !plot.Querying && !plot.LegendHovered) { + ImGui::SetMouseCursor(ImGuiMouseCursor_None); + ImVec2 xy = IO.MousePos; + ImVec2 h1(plot.PlotRect.Min.x, xy.y); + ImVec2 h2(xy.x - 5, xy.y); + ImVec2 h3(xy.x + 5, xy.y); + ImVec2 h4(plot.PlotRect.Max.x, xy.y); + ImVec2 v1(xy.x, plot.PlotRect.Min.y); + ImVec2 v2(xy.x, xy.y - 5); + ImVec2 v3(xy.x, xy.y + 5); + ImVec2 v4(xy.x, plot.PlotRect.Max.y); + ImU32 col = GetStyleColorU32(ImPlotCol_Crosshairs); + DrawList.AddLine(h1, h2, col); + DrawList.AddLine(h3, h4, col); + DrawList.AddLine(v1, v2, col); + DrawList.AddLine(v3, v4, col); + } + + // render mouse pos (TODO: use LabelAxisValue) + if (!ImHasFlag(plot.Flags, ImPlotFlags_NoMousePos) && plot.PlotHovered) { + char buffer[128] = {}; + ImBufferWriter writer(buffer, sizeof(buffer)); + + // x + if (ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_LogScale)) { + writer.Write("%.3E", gp.MousePos[0].x); + } + else if (ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_Time)) { + ImPlotTimeUnit unit = GetUnitForRange(plot.XAxis.Range.Size() / (plot.PlotRect.GetWidth() / 100)); + const int written = FormatDateTime(ImPlotTime::FromDouble(gp.MousePos[0].x), &writer.Buffer[writer.Pos], writer.Size - writer.Pos - 1, GetDateTimeFmt(TimeFormatMouseCursor, unit)); + if (written > 0) + writer.Pos += ImMin(written, writer.Size - writer.Pos - 1); + } + else { + double range_x = gp.XTicks.Size > 1 ? (gp.XTicks.Ticks[1].PlotPos - gp.XTicks.Ticks[0].PlotPos) : plot.XAxis.Range.Size(); + writer.Write("%.*f", Precision(range_x), gp.MousePos[0].x); + } + // y1 + if (ImHasFlag(plot.YAxis[0].Flags, ImPlotAxisFlags_LogScale)) { + writer.Write(",%.3E", gp.MousePos[0].y); + } + else { + double range_y = gp.YTicks[0].Size > 1 ? (gp.YTicks[0].Ticks[1].PlotPos - gp.YTicks[0].Ticks[0].PlotPos) : plot.YAxis[0].Range.Size(); + writer.Write(",%.*f", Precision(range_y), gp.MousePos[0].y); + } + // y2 + if (ImHasFlag(plot.Flags, ImPlotFlags_YAxis2)) { + if (ImHasFlag(plot.YAxis[1].Flags, ImPlotAxisFlags_LogScale)) { + writer.Write(",(%.3E)", gp.MousePos[1].y); + } + else { + double range_y = gp.YTicks[1].Size > 1 ? (gp.YTicks[1].Ticks[1].PlotPos - gp.YTicks[1].Ticks[0].PlotPos) : plot.YAxis[1].Range.Size(); + writer.Write(",(%.*f)", Precision(range_y), gp.MousePos[1].y); + } + } + // y3 + if (ImHasFlag(plot.Flags, ImPlotFlags_YAxis3)) { + if (ImHasFlag(plot.YAxis[2].Flags, ImPlotAxisFlags_LogScale)) { + writer.Write(",(%.3E)", gp.MousePos[2].y); + } + else { + double range_y = gp.YTicks[2].Size > 1 ? (gp.YTicks[2].Ticks[1].PlotPos - gp.YTicks[2].Ticks[0].PlotPos) : plot.YAxis[2].Range.Size(); + writer.Write(",(%.*f)", Precision(range_y), gp.MousePos[2].y); + } + } + const ImVec2 size = ImGui::CalcTextSize(buffer); + const ImVec2 pos = GetLocationPos(plot.PlotRect, size, plot.MousePosLocation, gp.Style.MousePosPadding); + DrawList.AddText(pos, GetStyleColorU32(ImPlotCol_InlayText), buffer); + } + PopPlotClipRect(); + + // reset legend hovers + plot.LegendHovered = false; + for (int i = 0; i < plot.Items.GetSize(); ++i) + plot.Items.GetByIndex(i)->LegendHovered = false; + // render legend + if (!ImHasFlag(plot.Flags, ImPlotFlags_NoLegend) && plot.GetLegendCount() > 0) { + const ImVec2 legend_size = CalcLegendSize(plot, gp.Style.LegendInnerPadding, gp.Style.LegendSpacing, plot.LegendOrientation); + const ImVec2 legend_pos = GetLocationPos(plot.LegendOutside ? plot.FrameRect : plot.PlotRect, + legend_size, + plot.LegendLocation, + plot.LegendOutside ? gp.Style.PlotPadding : gp.Style.LegendPadding); + const ImRect legend_bb(legend_pos, legend_pos + legend_size); + // test hover + plot.LegendHovered = plot.FrameHovered && legend_bb.Contains(IO.MousePos); + + if (plot.LegendOutside) + ImGui::PushClipRect(plot.FrameRect.Min, plot.FrameRect.Max, true); + else + PushPlotClipRect(); + ImU32 col_bg = GetStyleColorU32(ImPlotCol_LegendBg); + ImU32 col_bd = GetStyleColorU32(ImPlotCol_LegendBorder); + DrawList.AddRectFilled(legend_bb.Min, legend_bb.Max, col_bg); + DrawList.AddRect(legend_bb.Min, legend_bb.Max, col_bd); + ShowLegendEntries(plot, legend_bb, plot.LegendHovered, gp.Style.LegendInnerPadding, gp.Style.LegendSpacing, plot.LegendOrientation, DrawList); + ImGui::PopClipRect(); + } + if (plot.LegendFlipSideNextFrame) { + plot.LegendOutside = !plot.LegendOutside; + plot.LegendFlipSideNextFrame = false; + } + + // render border + if (gp.Style.PlotBorderSize > 0) + DrawList.AddRect(plot.PlotRect.Min, plot.PlotRect.Max, GetStyleColorU32(ImPlotCol_PlotBorder), 0, ImDrawCornerFlags_All, gp.Style.PlotBorderSize); + + // FIT DATA -------------------------------------------------------------- + const bool axis_equal = ImHasFlag(plot.Flags, ImPlotFlags_Equal); + if (gp.FitThisFrame && (gp.VisibleItemCount > 0 || plot.Queried)) { + if (gp.FitX) { + const double ext_size = gp.ExtentsX.Size() * 0.5; + gp.ExtentsX.Min -= ext_size * gp.Style.FitPadding.x; + gp.ExtentsX.Max += ext_size * gp.Style.FitPadding.x; + if (!ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_LockMin) && !ImNanOrInf(gp.ExtentsX.Min)) + plot.XAxis.Range.Min = (gp.ExtentsX.Min); + if (!ImHasFlag(plot.XAxis.Flags, ImPlotAxisFlags_LockMax) && !ImNanOrInf(gp.ExtentsX.Max)) + plot.XAxis.Range.Max = (gp.ExtentsX.Max); + if (ImAlmostEqual(plot.XAxis.Range.Max, plot.XAxis.Range.Min)) { + plot.XAxis.Range.Max += 0.5; + plot.XAxis.Range.Min -= 0.5; + } + plot.XAxis.Constrain(); + if (axis_equal && !gp.FitY[0]) + plot.YAxis[0].SetAspect(plot.XAxis.GetAspect()); + } + for (int i = 0; i < IMPLOT_Y_AXES; i++) { + if (gp.FitY[i]) { + const double ext_size = gp.ExtentsY[i].Size() * 0.5; + gp.ExtentsY[i].Min -= ext_size * gp.Style.FitPadding.y; + gp.ExtentsY[i].Max += ext_size * gp.Style.FitPadding.y; + if (!ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_LockMin) && !ImNanOrInf(gp.ExtentsY[i].Min)) + plot.YAxis[i].Range.Min = (gp.ExtentsY[i].Min); + if (!ImHasFlag(plot.YAxis[i].Flags, ImPlotAxisFlags_LockMax) && !ImNanOrInf(gp.ExtentsY[i].Max)) + plot.YAxis[i].Range.Max = (gp.ExtentsY[i].Max); + if (ImAlmostEqual(plot.YAxis[i].Range.Max, plot.YAxis[i].Range.Min)) { + plot.YAxis[i].Range.Max += 0.5; + plot.YAxis[i].Range.Min -= 0.5; + } + plot.YAxis[i].Constrain(); + if (i == 0 && axis_equal && !gp.FitX) + plot.XAxis.SetAspect(plot.YAxis[0].GetAspect()); + } + } + if (axis_equal && gp.FitX && gp.FitY[0]) { + double aspect = ImMax(plot.XAxis.GetAspect(), plot.YAxis[0].GetAspect()); + plot.XAxis.SetAspect(aspect); + plot.YAxis[0].SetAspect(aspect); + } + } + + // CONTEXT MENUS ----------------------------------------------------------- + + // main ctx menu + if (!ImHasFlag(plot.Flags, ImPlotFlags_NoMenus) && plot.FrameHovered && plot.PlotHovered && IO.MouseReleased[gp.InputMap.ContextMenuButton] && !plot.LegendHovered && !plot.ContextLocked) + ImGui::OpenPopup("##PlotContext"); + if (ImGui::BeginPopup("##PlotContext")) { + ShowPlotContextMenu(plot); + ImGui::EndPopup(); + } + + // x-axis ctx menu + if (!ImHasFlag(plot.Flags, ImPlotFlags_NoMenus) && plot.FrameHovered && plot.XAxis.ExtHovered && IO.MouseReleased[gp.InputMap.ContextMenuButton] && !plot.LegendHovered && !plot.ContextLocked) + ImGui::OpenPopup("##XContext"); + if (ImGui::BeginPopup("##XContext")) { + ImGui::Text("X-Axis"); ImGui::Separator(); + ShowAxisContextMenu(plot.XAxis, ImHasFlag(plot.Flags, ImPlotFlags_Equal) ? &plot.YAxis[0] : NULL, true); + ImGui::EndPopup(); + } + + // y-axes ctx menus + for (int i = 0; i < IMPLOT_Y_AXES; ++i) { + ImGui::PushID(i); + if (!ImHasFlag(plot.Flags, ImPlotFlags_NoMenus) && plot.FrameHovered && plot.YAxis[i].ExtHovered && IO.MouseReleased[gp.InputMap.ContextMenuButton] && !plot.LegendHovered && !plot.ContextLocked) + ImGui::OpenPopup("##YContext"); + if (ImGui::BeginPopup("##YContext")) { + if (i == 0) { + ImGui::Text("Y-Axis"); ImGui::Separator(); + } + else { + ImGui::Text("Y-Axis %d", i + 1); ImGui::Separator(); + } + ShowAxisContextMenu(plot.YAxis[i], (i == 0 && ImHasFlag(plot.Flags, ImPlotFlags_Equal)) ? &plot.XAxis : NULL, false); + ImGui::EndPopup(); + } + ImGui::PopID(); + } + + + // LINKED AXES ------------------------------------------------------------ + + PushLinkedAxis(plot.XAxis); + for (int i = 0; i < IMPLOT_Y_AXES; ++i) + PushLinkedAxis(plot.YAxis[i]); + + // CLEANUP ---------------------------------------------------------------- + + // resset context locked flag + if (plot.ContextLocked && IO.MouseReleased[gp.InputMap.BoxSelectButton]) + plot.ContextLocked = false; + + + // reset the plot items for the next frame + for (int i = 0; i < gp.CurrentPlot->Items.GetSize(); ++i) { + gp.CurrentPlot->Items.GetByIndex(i)->SeenThisFrame = false; + } + + // Pop ImGui::PushID at the end of BeginPlot + ImGui::PopID(); + // Reset context for next plot + Reset(GImPlot); +} + +//----------------------------------------------------------------------------- +// MISC API +//----------------------------------------------------------------------------- + +ImPlotInputMap& GetInputMap() { + return GImPlot->InputMap; +} + +void SetNextPlotLimits(double x_min, double x_max, double y_min, double y_max, ImGuiCond cond) { + IM_ASSERT_USER_ERROR(GImPlot->CurrentPlot == NULL, "SetNextPlotLimits() needs to be called before BeginPlot()!"); + SetNextPlotLimitsX(x_min, x_max, cond); + SetNextPlotLimitsY(y_min, y_max, cond); +} + +void SetNextPlotLimitsX(double x_min, double x_max, ImGuiCond cond) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot == NULL, "SetNextPlotLSetNextPlotLimitsXimitsY() needs to be called before BeginPlot()!"); + IM_ASSERT(cond == 0 || ImIsPowerOfTwo(cond)); // Make sure the user doesn't attempt to combine multiple condition flags. + gp.NextPlotData.HasXRange = true; + gp.NextPlotData.XRangeCond = cond; + gp.NextPlotData.X.Min = x_min; + gp.NextPlotData.X.Max = x_max; +} + +void SetNextPlotLimitsY(double y_min, double y_max, ImGuiCond cond, ImPlotYAxis y_axis) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot == NULL, "SetNextPlotLimitsY() needs to be called before BeginPlot()!"); + IM_ASSERT_USER_ERROR(y_axis >= 0 && y_axis < IMPLOT_Y_AXES, "y_axis needs to be between 0 and IMPLOT_Y_AXES"); + IM_ASSERT(cond == 0 || ImIsPowerOfTwo(cond)); // Make sure the user doesn't attempt to combine multiple condition flags. + gp.NextPlotData.HasYRange[y_axis] = true; + gp.NextPlotData.YRangeCond[y_axis] = cond; + gp.NextPlotData.Y[y_axis].Min = y_min; + gp.NextPlotData.Y[y_axis].Max = y_max; +} + +void LinkNextPlotLimits(double* xmin, double* xmax, double* ymin, double* ymax, double* ymin2, double* ymax2, double* ymin3, double* ymax3) { + ImPlotContext& gp = *GImPlot; + gp.NextPlotData.LinkedXmin = xmin; + gp.NextPlotData.LinkedXmax = xmax; + gp.NextPlotData.LinkedYmin[0] = ymin; + gp.NextPlotData.LinkedYmax[0] = ymax; + gp.NextPlotData.LinkedYmin[1] = ymin2; + gp.NextPlotData.LinkedYmax[1] = ymax2; + gp.NextPlotData.LinkedYmin[2] = ymin3; + gp.NextPlotData.LinkedYmax[2] = ymax3; +} + +void FitNextPlotAxes(bool x, bool y, bool y2, bool y3) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot == NULL, "FitNextPlotAxes() needs to be called before BeginPlot()!"); + gp.NextPlotData.FitX = x; + gp.NextPlotData.FitY[0] = y; + gp.NextPlotData.FitY[1] = y2; + gp.NextPlotData.FitY[2] = y3; +} + +void SetNextPlotTicksX(const double* values, int n_ticks, const char* const labels[], bool show_default) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot == NULL, "SetNextPlotTicksX() needs to be called before BeginPlot()!"); + gp.NextPlotData.ShowDefaultTicksX = show_default; + AddTicksCustom(values, labels, n_ticks, gp.XTicks); +} + +void SetNextPlotTicksX(double x_min, double x_max, int n_ticks, const char* const labels[], bool show_default) { + IM_ASSERT_USER_ERROR(n_ticks > 1, "The number of ticks must be greater than 1"); + static ImVector buffer; + FillRange(buffer, n_ticks, x_min, x_max); + SetNextPlotTicksX(&buffer[0], n_ticks, labels, show_default); +} + +void SetNextPlotTicksY(const double* values, int n_ticks, const char* const labels[], bool show_default, ImPlotYAxis y_axis) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot == NULL, "SetNextPlotTicksY() needs to be called before BeginPlot()!"); + IM_ASSERT_USER_ERROR(y_axis >= 0 && y_axis < IMPLOT_Y_AXES, "y_axis needs to be between 0 and IMPLOT_Y_AXES"); + gp.NextPlotData.ShowDefaultTicksY[y_axis] = show_default; + AddTicksCustom(values, labels, n_ticks, gp.YTicks[y_axis]); +} + +void SetNextPlotTicksY(double y_min, double y_max, int n_ticks, const char* const labels[], bool show_default, ImPlotYAxis y_axis) { + IM_ASSERT_USER_ERROR(n_ticks > 1, "The number of ticks must be greater than 1"); + static ImVector buffer; + FillRange(buffer, n_ticks, y_min, y_max); + SetNextPlotTicksY(&buffer[0], n_ticks, labels, show_default,y_axis); +} + +void SetPlotYAxis(ImPlotYAxis y_axis) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "SetPlotYAxis() needs to be called between BeginPlot() and EndPlot()!"); + IM_ASSERT_USER_ERROR(y_axis >= 0 && y_axis < IMPLOT_Y_AXES, "y_axis needs to be between 0 and IMPLOT_Y_AXES"); + gp.CurrentPlot->CurrentYAxis = y_axis; +} + +ImVec2 GetPlotPos() { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "GetPlotPos() needs to be called between BeginPlot() and EndPlot()!"); + return gp.CurrentPlot->PlotRect.Min; +} + +ImVec2 GetPlotSize() { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "GetPlotSize() needs to be called between BeginPlot() and EndPlot()!"); + return gp.CurrentPlot->PlotRect.GetSize(); +} + +ImDrawList* GetPlotDrawList() { + return ImGui::GetWindowDrawList(); +} + +void PushPlotClipRect() { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "PushPlotClipRect() needs to be called between BeginPlot() and EndPlot()!"); + ImGui::PushClipRect(gp.CurrentPlot->PlotRect.Min, gp.CurrentPlot->PlotRect.Max, true); +} + +void PopPlotClipRect() { + ImGui::PopClipRect(); +} + +bool IsPlotHovered() { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "IsPlotHovered() needs to be called between BeginPlot() and EndPlot()!"); + return gp.CurrentPlot->FrameHovered && gp.CurrentPlot->PlotHovered; +} + +bool IsPlotXAxisHovered() { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "IsPlotXAxisHovered() needs to be called between BeginPlot() and EndPlot()!"); + return gp.CurrentPlot->XAxis.ExtHovered; +} + +bool IsPlotYAxisHovered(ImPlotYAxis y_axis_in) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(y_axis_in >= -1 && y_axis_in < IMPLOT_Y_AXES, "y_axis needs to between -1 and IMPLOT_Y_AXES"); + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "IsPlotYAxisHovered() needs to be called between BeginPlot() and EndPlot()!"); + const ImPlotYAxis y_axis = y_axis_in >= 0 ? y_axis_in : gp.CurrentPlot->CurrentYAxis; + return gp.CurrentPlot->YAxis[y_axis].ExtHovered; +} + +ImPlotPoint GetPlotMousePos(ImPlotYAxis y_axis_in) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(y_axis_in >= -1 && y_axis_in < IMPLOT_Y_AXES, "y_axis needs to between -1 and IMPLOT_Y_AXES"); + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "GetPlotMousePos() needs to be called between BeginPlot() and EndPlot()!"); + const ImPlotYAxis y_axis = y_axis_in >= 0 ? y_axis_in : gp.CurrentPlot->CurrentYAxis; + return gp.MousePos[y_axis]; +} + + +ImPlotLimits GetPlotLimits(ImPlotYAxis y_axis_in) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(y_axis_in >= -1 && y_axis_in < IMPLOT_Y_AXES, "y_axis needs to between -1 and IMPLOT_Y_AXES"); + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "GetPlotLimits() needs to be called between BeginPlot() and EndPlot()!"); + const ImPlotYAxis y_axis = y_axis_in >= 0 ? y_axis_in : gp.CurrentPlot->CurrentYAxis; + + ImPlotPlot& plot = *gp.CurrentPlot; + ImPlotLimits limits; + limits.X = plot.XAxis.Range; + limits.Y = plot.YAxis[y_axis].Range; + return limits; +} + +bool IsPlotQueried() { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "IsPlotQueried() needs to be called between BeginPlot() and EndPlot()!"); + return gp.CurrentPlot->Queried; +} + +ImPlotLimits GetPlotQuery(ImPlotYAxis y_axis_in) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(y_axis_in >= -1 && y_axis_in < IMPLOT_Y_AXES, "y_axis needs to between -1 and IMPLOT_Y_AXES"); + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "GetPlotQuery() needs to be called between BeginPlot() and EndPlot()!"); + ImPlotPlot& plot = *gp.CurrentPlot; + const ImPlotYAxis y_axis = y_axis_in >= 0 ? y_axis_in : gp.CurrentPlot->CurrentYAxis; + + UpdateTransformCache(); + ImPlotPoint p1 = PixelsToPlot(plot.QueryRect.Min + plot.PlotRect.Min, y_axis); + ImPlotPoint p2 = PixelsToPlot(plot.QueryRect.Max + plot.PlotRect.Min, y_axis); + + ImPlotLimits result; + result.X.Min = ImMin(p1.x, p2.x); + result.X.Max = ImMax(p1.x, p2.x); + result.Y.Min = ImMin(p1.y, p2.y); + result.Y.Max = ImMax(p1.y, p2.y); + return result; +} + +void AnnotateEx(double x, double y, bool clamp, const ImVec4& col, const ImVec2& off, const char* fmt, va_list args) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "Annotate() needs to be called between BeginPlot() and EndPlot()!"); + ImVec2 pos = PlotToPixels(x,y); + ImU32 bg = ImGui::GetColorU32(col); + ImU32 fg = col.w == 0 ? GetStyleColorU32(ImPlotCol_InlayText) : CalcTextColor(col); + gp.Annotations.AppendV(pos, off, bg, fg, clamp, fmt, args); +} + +void AnnotateV(double x, double y, const ImVec2& offset, const char* fmt, va_list args) { + AnnotateEx(x,y,false,ImVec4(0,0,0,0),offset,fmt,args); +} + +void Annotate(double x, double y, const ImVec2& offset, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + AnnotateV(x,y,offset,fmt,args); + va_end(args); +} + +void AnnotateV(double x, double y, const ImVec2& offset, const ImVec4& col, const char* fmt, va_list args) { + AnnotateEx(x,y,false,col,offset,fmt,args); +} + +void Annotate(double x, double y, const ImVec2& offset, const ImVec4& col, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + AnnotateV(x,y,offset,col,fmt,args); + va_end(args); +} + +void AnnotateClampedV(double x, double y, const ImVec2& offset, const char* fmt, va_list args) { + AnnotateEx(x,y,true,ImVec4(0,0,0,0),offset,fmt,args); +} + +void AnnotateClamped(double x, double y, const ImVec2& offset, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + AnnotateClampedV(x,y,offset,fmt,args); + va_end(args); +} + +void AnnotateClampedV(double x, double y, const ImVec2& offset, const ImVec4& col, const char* fmt, va_list args) { + AnnotateEx(x,y,true,col,offset,fmt,args); +} + +void AnnotateClamped(double x, double y, const ImVec2& offset, const ImVec4& col, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + AnnotateClampedV(x,y,offset,col,fmt,args); + va_end(args); +} + +bool DragLineX(const char* id, double* value, bool show_label, const ImVec4& col, float thickness) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "DragLineX() needs to be called between BeginPlot() and EndPlot()!"); + const float grab_size = ImMax(5.0f, thickness); + float yt = gp.CurrentPlot->PlotRect.Min.y; + float yb = gp.CurrentPlot->PlotRect.Max.y; + float x = IM_ROUND(PlotToPixels(*value,0).x); + const bool outside = x < (gp.CurrentPlot->PlotRect.Min.x - grab_size / 2) || x > (gp.CurrentPlot->PlotRect.Max.x + grab_size / 2); + if (outside) + return false; + float len = gp.Style.MajorTickLen.x; + ImVec4 color = IsColorAuto(col) ? ImGui::GetStyleColorVec4(ImGuiCol_Text) : col; + ImU32 col32 = ImGui::ColorConvertFloat4ToU32(color); + ImDrawList& DrawList = *GetPlotDrawList(); + PushPlotClipRect(); + DrawList.AddLine(ImVec2(x,yt), ImVec2(x,yb), col32, thickness); + DrawList.AddLine(ImVec2(x,yt), ImVec2(x,yt+len), col32, 3*thickness); + DrawList.AddLine(ImVec2(x,yb), ImVec2(x,yb-len), col32, 3*thickness); + PopPlotClipRect(); + if (gp.CurrentPlot->Selecting || gp.CurrentPlot->Querying) + return false; + ImVec2 old_cursor_pos = ImGui::GetCursorScreenPos(); + ImVec2 new_cursor_pos = ImVec2(x - grab_size / 2.0f, yt); + ImGui::GetCurrentWindow()->DC.CursorPos = new_cursor_pos; + ImGui::InvisibleButton(id, ImVec2(grab_size, yb-yt)); + ImGui::GetCurrentWindow()->DC.CursorPos = old_cursor_pos; + if (ImGui::IsItemHovered() || ImGui::IsItemActive()) { + gp.CurrentPlot->PlotHovered = false; + ImGui::SetMouseCursor(ImGuiMouseCursor_ResizeEW); + if (show_label) { + char buff[32]; + LabelAxisValue(gp.CurrentPlot->XAxis, gp.XTicks, *value, buff, 32); + gp.Annotations.Append(ImVec2(x,yb),ImVec2(0,0),col32,CalcTextColor(color),true,"%s = %s", id, buff); + } + } + bool dragging = false; + if (ImGui::IsItemActive() && ImGui::IsMouseDragging(0)) { + *value = ImPlot::GetPlotMousePos().x; + *value = ImClamp(*value, gp.CurrentPlot->XAxis.Range.Min, gp.CurrentPlot->XAxis.Range.Max); + dragging = true; + } + return dragging; +} + +bool DragLineY(const char* id, double* value, bool show_label, const ImVec4& col, float thickness) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "DragLineY() needs to be called between BeginPlot() and EndPlot()!"); + const float grab_size = ImMax(5.0f, thickness); + float xl = gp.CurrentPlot->PlotRect.Min.x; + float xr = gp.CurrentPlot->PlotRect.Max.x; + float y = IM_ROUND(PlotToPixels(0, *value).y); + const bool outside = y < (gp.CurrentPlot->PlotRect.Min.y - grab_size / 2) || y > (gp.CurrentPlot->PlotRect.Max.y + grab_size / 2); + if (outside) + return false; + float len = gp.Style.MajorTickLen.y; + ImVec4 color = IsColorAuto(col) ? ImGui::GetStyleColorVec4(ImGuiCol_Text) : col; + ImU32 col32 = ImGui::ColorConvertFloat4ToU32(color); + ImDrawList& DrawList = *GetPlotDrawList(); + PushPlotClipRect(); + DrawList.AddLine(ImVec2(xl,y), ImVec2(xr,y), col32, thickness); + DrawList.AddLine(ImVec2(xl,y), ImVec2(xl+len,y), col32, 3*thickness); + DrawList.AddLine(ImVec2(xr,y), ImVec2(xr-len,y), col32, 3*thickness); + PopPlotClipRect(); + if (gp.CurrentPlot->Selecting || gp.CurrentPlot->Querying) + return false; + ImVec2 old_cursor_pos = ImGui::GetCursorScreenPos(); + ImVec2 new_cursor_pos = ImVec2(xl, y - grab_size / 2.0f); + ImGui::SetItemAllowOverlap(); + ImGui::GetCurrentWindow()->DC.CursorPos = new_cursor_pos; + ImGui::InvisibleButton(id, ImVec2(xr - xl, grab_size)); + ImGui::GetCurrentWindow()->DC.CursorPos = old_cursor_pos; + int yax = GetCurrentYAxis(); + if (ImGui::IsItemHovered() || ImGui::IsItemActive()) { + gp.CurrentPlot->PlotHovered = false; + ImGui::SetMouseCursor(ImGuiMouseCursor_ResizeNS); + if (show_label) { + char buff[32]; + LabelAxisValue(gp.CurrentPlot->YAxis[yax], gp.YTicks[yax], *value, buff, 32); + gp.Annotations.Append(ImVec2(yax == 0 ? xl : xr,y), ImVec2(0,0), col32, CalcTextColor(color), true, "%s = %s", id, buff); + } + } + bool dragging = false; + if (ImGui::IsItemActive() && ImGui::IsMouseDragging(0)) { + *value = ImPlot::GetPlotMousePos().y; + *value = ImClamp(*value, gp.CurrentPlot->YAxis[yax].Range.Min, gp.CurrentPlot->YAxis[yax].Range.Max); + dragging = true; + } + return dragging; +} + +bool DragPoint(const char* id, double* x, double* y, bool show_label, const ImVec4& col, float radius) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "DragPoint() needs to be called between BeginPlot() and EndPlot()!"); + const float grab_size = ImMax(5.0f, 2*radius); + const bool outside = !GetPlotLimits().Contains(*x,*y); + if (outside) + return false; + ImVec4 color = IsColorAuto(col) ? ImGui::GetStyleColorVec4(ImGuiCol_Text) : col; + ImU32 col32 = ImGui::ColorConvertFloat4ToU32(color); + ImDrawList& DrawList = *GetPlotDrawList(); + ImVec2 pos = PlotToPixels(*x,*y); + PushPlotClipRect(); + DrawList.AddCircleFilled(pos, radius, col32); + PopPlotClipRect(); + int yax = GetCurrentYAxis(); + ImVec2 old_cursor_pos = ImGui::GetCursorScreenPos(); + ImVec2 new_cursor_pos = ImVec2(pos - ImVec2(grab_size,grab_size)*0.5f); + ImGui::GetCurrentWindow()->DC.CursorPos = new_cursor_pos; + ImGui::InvisibleButton(id, ImVec2(grab_size, grab_size)); + ImGui::GetCurrentWindow()->DC.CursorPos = old_cursor_pos; + if (ImGui::IsItemHovered() || ImGui::IsItemActive()) { + gp.CurrentPlot->PlotHovered = false; + ImGui::SetMouseCursor(ImGuiMouseCursor_ResizeAll); + if (show_label) { + ImVec2 label_pos = pos + ImVec2(16 * GImGui->Style.MouseCursorScale, 8 * GImGui->Style.MouseCursorScale); + char buff1[32]; + char buff2[32]; + LabelAxisValue(gp.CurrentPlot->XAxis, gp.XTicks, *x, buff1, 32); + LabelAxisValue(gp.CurrentPlot->YAxis[yax], gp.YTicks[yax], *y, buff2, 32); + gp.Annotations.Append(label_pos, ImVec2(0.0001f,0.00001f), col32, CalcTextColor(color), true, "%s = %s,%s", id, buff1, buff2); + } + } + bool dragging = false; + if (ImGui::IsItemActive() && ImGui::IsMouseDragging(0)) { + *x = ImPlot::GetPlotMousePos().x; + *y = ImPlot::GetPlotMousePos().y; + *x = ImClamp(*x, gp.CurrentPlot->XAxis.Range.Min, gp.CurrentPlot->XAxis.Range.Max); + *y = ImClamp(*y, gp.CurrentPlot->YAxis[yax].Range.Min, gp.CurrentPlot->YAxis[yax].Range.Max); + dragging = true; + } + return dragging; +} + +void SetLegendLocation(ImPlotLocation location, ImPlotOrientation orientation, bool outside) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "SetLegendLocation() needs to be called between BeginPlot() and EndPlot()!"); + gp.CurrentPlot->LegendLocation = location; + gp.CurrentPlot->LegendOrientation = orientation; + if (gp.CurrentPlot->LegendOutside != outside) + gp.CurrentPlot->LegendFlipSideNextFrame = true; +} + +void SetMousePosLocation(ImPlotLocation location) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "SetMousePosLocation() needs to be called between BeginPlot() and EndPlot()!"); + gp.CurrentPlot->MousePosLocation = location; +} + +bool IsLegendEntryHovered(const char* label_id) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "IsPlotItemHighlight() needs to be called between BeginPlot() and EndPlot()!"); + ImGuiID id = ImGui::GetID(label_id); + ImPlotItem* item = gp.CurrentPlot->Items.GetByKey(id); + return item && item->LegendHovered; +} + +bool BeginLegendDragDropSource(const char* label_id, ImGuiDragDropFlags flags) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "BeginLegendDragDropSource() needs to be called between BeginPlot() and EndPlot()!"); + ImGuiID source_id = ImGui::GetID(label_id); + ImPlotItem* item = gp.CurrentPlot->Items.GetByKey(source_id); + bool is_hovered = item && item->LegendHovered; + + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + ImGuiMouseButton mouse_button = ImGuiMouseButton_Left; + + if (g.IO.MouseDown[mouse_button] == false) { + if (g.ActiveId == source_id) + ImGui::ClearActiveID(); + return false; + } + + if (is_hovered && g.IO.MouseClicked[mouse_button]) { + ImGui::SetActiveID(source_id, window); + ImGui::FocusWindow(window); + } + + if (g.ActiveId != source_id) + return false; + + // Allow the underlying widget to display/return hovered during the mouse + // release frame, else we would get a flicker. + g.ActiveIdAllowOverlap = is_hovered; + + // Disable navigation and key inputs while dragging + g.ActiveIdUsingNavDirMask = ~(ImU32)0; + g.ActiveIdUsingNavInputMask = ~(ImU32)0; + g.ActiveIdUsingKeyInputMask = ~(ImU64)0; + + if (ImGui::IsMouseDragging(mouse_button)) { + if (!g.DragDropActive) { + ImGui::ClearDragDrop(); + ImGuiPayload& payload = g.DragDropPayload; + payload.SourceId = source_id; + payload.SourceParentId = 0; + g.DragDropActive = true; + g.DragDropSourceFlags = 0; + g.DragDropMouseButton = mouse_button; + } + g.DragDropSourceFrameCount = g.FrameCount; + g.DragDropWithinSource = true; + + if (!(flags & ImGuiDragDropFlags_SourceNoPreviewTooltip)) { + // Target can request the Source to not display its tooltip (we use a + // dedicated flag to make this request explicit) We unfortunately can't + // just modify the source flags and skip the call to BeginTooltip, as + // caller may be emitting contents. + ImGui::BeginTooltip(); + if (g.DragDropAcceptIdPrev && (g.DragDropAcceptFlags & ImGuiDragDropFlags_AcceptNoPreviewTooltip)) { + ImGuiWindow* tooltip_window = g.CurrentWindow; + tooltip_window->SkipItems = true; + tooltip_window->HiddenFramesCanSkipItems = 1; + } + } + return true; + } + return false; +} + +void EndLegendDragDropSource() { + ImGui::EndDragDropSource(); +} + +bool BeginLegendPopup(const char* label_id, ImGuiMouseButton mouse_button) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "BeginLegendPopup() needs to be called between BeginPlot() and EndPlot()!"); + ImGuiWindow* window = GImGui->CurrentWindow; + if (window->SkipItems) + return false; + ImGuiID id = ImGui::GetID(label_id); + if (ImGui::IsMouseReleased(mouse_button)) { + ImPlotItem* item = gp.CurrentPlot->Items.GetByKey(id); + if (item && item->LegendHovered) + ImGui::OpenPopupEx(id); + } + return ImGui::BeginPopupEx(id, ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoSavedSettings); +} + +void EndLegendPopup() { + ImGui::EndPopup(); +} + +void ShowAltLegend(const char* title_id, ImPlotOrientation orientation, const ImVec2 size, bool interactable) { + ImPlotContext& gp = *GImPlot; + ImGuiContext &G = *GImGui; + ImGuiWindow * Window = G.CurrentWindow; + if (Window->SkipItems) + return; + ImDrawList &DrawList = *Window->DrawList; + ImPlotPlot* plot = GetPlot(title_id); + ImVec2 legend_size; + ImVec2 default_size = gp.Style.LegendPadding * 2; + if (plot != NULL) { + legend_size = CalcLegendSize(*plot, gp.Style.LegendInnerPadding, gp.Style.LegendSpacing, orientation); + default_size = legend_size + gp.Style.LegendPadding * 2; + } + ImVec2 frame_size = ImGui::CalcItemSize(size, default_size.x, default_size.y); + ImRect bb_frame = ImRect(Window->DC.CursorPos, Window->DC.CursorPos + frame_size); + ImGui::ItemSize(bb_frame); + if (!ImGui::ItemAdd(bb_frame, 0, &bb_frame)) + return; + ImGui::RenderFrame(bb_frame.Min, bb_frame.Max, GetStyleColorU32(ImPlotCol_FrameBg), true, G.Style.FrameRounding); + DrawList.PushClipRect(bb_frame.Min, bb_frame.Max, true); + if (plot != NULL) { + const ImVec2 legend_pos = GetLocationPos(bb_frame, legend_size, 0, gp.Style.LegendPadding); + const ImRect legend_bb(legend_pos, legend_pos + legend_size); + interactable = interactable && bb_frame.Contains(ImGui::GetIO().MousePos); + // render legend box + ImU32 col_bg = GetStyleColorU32(ImPlotCol_LegendBg); + ImU32 col_bd = GetStyleColorU32(ImPlotCol_LegendBorder); + DrawList.AddRectFilled(legend_bb.Min, legend_bb.Max, col_bg); + DrawList.AddRect(legend_bb.Min, legend_bb.Max, col_bd); + // render entries + ShowLegendEntries(*plot, legend_bb, interactable, gp.Style.LegendInnerPadding, gp.Style.LegendSpacing, orientation, DrawList); + } + DrawList.PopClipRect(); +} + +//----------------------------------------------------------------------------- +// STYLING +//----------------------------------------------------------------------------- + +ImPlotStyle& GetStyle() { + ImPlotContext& gp = *GImPlot; + return gp.Style; +} + +void PushStyleColor(ImPlotCol idx, ImU32 col) { + ImPlotContext& gp = *GImPlot; + ImGuiColorMod backup; + backup.Col = idx; + backup.BackupValue = gp.Style.Colors[idx]; + gp.ColorModifiers.push_back(backup); + gp.Style.Colors[idx] = ImGui::ColorConvertU32ToFloat4(col); +} + +void PushStyleColor(ImPlotCol idx, const ImVec4& col) { + ImPlotContext& gp = *GImPlot; + ImGuiColorMod backup; + backup.Col = idx; + backup.BackupValue = gp.Style.Colors[idx]; + gp.ColorModifiers.push_back(backup); + gp.Style.Colors[idx] = col; +} + +void PopStyleColor(int count) { + ImPlotContext& gp = *GImPlot; + while (count > 0) + { + ImGuiColorMod& backup = gp.ColorModifiers.back(); + gp.Style.Colors[backup.Col] = backup.BackupValue; + gp.ColorModifiers.pop_back(); + count--; + } +} + +void PushStyleVar(ImPlotStyleVar idx, float val) { + ImPlotContext& gp = *GImPlot; + const ImPlotStyleVarInfo* var_info = GetPlotStyleVarInfo(idx); + if (var_info->Type == ImGuiDataType_Float && var_info->Count == 1) { + float* pvar = (float*)var_info->GetVarPtr(&gp.Style); + gp.StyleModifiers.push_back(ImGuiStyleMod(idx, *pvar)); + *pvar = val; + return; + } + IM_ASSERT(0 && "Called PushStyleVar() float variant but variable is not a float!"); +} + +void PushStyleVar(ImPlotStyleVar idx, int val) { + ImPlotContext& gp = *GImPlot; + const ImPlotStyleVarInfo* var_info = GetPlotStyleVarInfo(idx); + if (var_info->Type == ImGuiDataType_S32 && var_info->Count == 1) { + int* pvar = (int*)var_info->GetVarPtr(&gp.Style); + gp.StyleModifiers.push_back(ImGuiStyleMod(idx, *pvar)); + *pvar = val; + return; + } + else if (var_info->Type == ImGuiDataType_Float && var_info->Count == 1) { + float* pvar = (float*)var_info->GetVarPtr(&gp.Style); + gp.StyleModifiers.push_back(ImGuiStyleMod(idx, *pvar)); + *pvar = (float)val; + return; + } + IM_ASSERT(0 && "Called PushStyleVar() int variant but variable is not a int!"); +} + +void PushStyleVar(ImGuiStyleVar idx, const ImVec2& val) +{ + ImPlotContext& gp = *GImPlot; + const ImPlotStyleVarInfo* var_info = GetPlotStyleVarInfo(idx); + if (var_info->Type == ImGuiDataType_Float && var_info->Count == 2) + { + ImVec2* pvar = (ImVec2*)var_info->GetVarPtr(&gp.Style); + gp.StyleModifiers.push_back(ImGuiStyleMod(idx, *pvar)); + *pvar = val; + return; + } + IM_ASSERT(0 && "Called PushStyleVar() ImVec2 variant but variable is not a ImVec2!"); +} + +void PopStyleVar(int count) { + ImPlotContext& gp = *GImPlot; + while (count > 0) { + ImGuiStyleMod& backup = gp.StyleModifiers.back(); + const ImPlotStyleVarInfo* info = GetPlotStyleVarInfo(backup.VarIdx); + void* data = info->GetVarPtr(&gp.Style); + if (info->Type == ImGuiDataType_Float && info->Count == 1) { + ((float*)data)[0] = backup.BackupFloat[0]; + } + else if (info->Type == ImGuiDataType_Float && info->Count == 2) { + ((float*)data)[0] = backup.BackupFloat[0]; + ((float*)data)[1] = backup.BackupFloat[1]; + } + else if (info->Type == ImGuiDataType_S32 && info->Count == 1) { + ((int*)data)[0] = backup.BackupInt[0]; + } + gp.StyleModifiers.pop_back(); + count--; + } +} + +//------------------------------------------------------------------------------ +// COLORMAPS +//------------------------------------------------------------------------------ + + +void PushColormap(ImPlotColormap colormap) { + ImPlotContext& gp = *GImPlot; + gp.ColormapModifiers.push_back(ImPlotColormapMod(gp.Colormap, gp.ColormapSize)); + gp.Colormap = GetColormap(colormap, &gp.ColormapSize); +} + +void PushColormap(const ImVec4* colormap, int size) { + ImPlotContext& gp = *GImPlot; + gp.ColormapModifiers.push_back(ImPlotColormapMod(gp.Colormap, gp.ColormapSize)); + gp.Colormap = colormap; + gp.ColormapSize = size; +} + +void PopColormap(int count) { + ImPlotContext& gp = *GImPlot; + while (count > 0) { + const ImPlotColormapMod& backup = gp.ColormapModifiers.back(); + gp.Colormap = backup.Colormap; + gp.ColormapSize = backup.ColormapSize; + gp.ColormapModifiers.pop_back(); + count--; + } +} + +void SetColormap(ImPlotColormap colormap, int samples) { + ImPlotContext& gp = *GImPlot; + gp.Colormap = GetColormap(colormap, &gp.ColormapSize); + if (samples > 1) { + static ImVector resampled; + resampled.resize(samples); + ResampleColormap(gp.Colormap, gp.ColormapSize, &resampled[0], samples); + SetColormap(&resampled[0], samples); + } +} + +void SetColormap(const ImVec4* colors, int size) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(colors != NULL, "You can't set the colors to NULL!"); + IM_ASSERT_USER_ERROR(size > 0, "The number of colors must be greater than 0!"); + static ImVector user_colormap; + user_colormap.shrink(0); + user_colormap.reserve(size); + for (int i = 0; i < size; ++i) + user_colormap.push_back(colors[i]); + gp.Colormap = &user_colormap[0]; + gp.ColormapSize = size; +} + +const ImVec4* GetColormap(ImPlotColormap colormap, int* size_out) { + static const int csizes[ImPlotColormap_COUNT] = {10,10,9,9,12,11,11,11,11,11,11}; + static const ImOffsetCalculator coffs(csizes); + static ImVec4 cdata[] = { + // ImPlotColormap_Default // X11 Named Colors + ImVec4(0.0f, 0.7490196228f, 1.0f, 1.0f), // Blues::DeepSkyBlue, + ImVec4(1.0f, 0.0f, 0.0f, 1.0f), // Reds::Red, + ImVec4(0.4980392158f, 1.0f, 0.0f, 1.0f), // Greens::Chartreuse, + ImVec4(1.0f, 1.0f, 0.0f, 1.0f), // Yellows::Yellow, + ImVec4(0.0f, 1.0f, 1.0f, 1.0f), // Cyans::Cyan, + ImVec4(1.0f, 0.6470588446f, 0.0f, 1.0f), // Oranges::Orange, + ImVec4(1.0f, 0.0f, 1.0f, 1.0f), // Purples::Magenta, + ImVec4(0.5411764979f, 0.1686274558f, 0.8862745166f, 1.0f), // Purples::BlueViolet, + ImVec4(0.5f, 0.5f, 0.5f, 1.0f), // Grays::Gray50, + ImVec4(0.8235294223f, 0.7058823705f, 0.5490196347f, 1.0f), // Browns::Tan + // ImPlotColormap_Deep + ImVec4(0.298f, 0.447f, 0.690f, 1.000f), + ImVec4(0.867f, 0.518f, 0.322f, 1.000f), + ImVec4(0.333f, 0.659f, 0.408f, 1.000f), + ImVec4(0.769f, 0.306f, 0.322f, 1.000f), + ImVec4(0.506f, 0.446f, 0.702f, 1.000f), + ImVec4(0.576f, 0.471f, 0.376f, 1.000f), + ImVec4(0.855f, 0.545f, 0.765f, 1.000f), + ImVec4(0.549f, 0.549f, 0.549f, 1.000f), + ImVec4(0.800f, 0.725f, 0.455f, 1.000f), + ImVec4(0.392f, 0.710f, 0.804f, 1.000f), + // ImPlotColormap_Dark + ImVec4(0.894118f, 0.101961f, 0.109804f, 1.0f), + ImVec4(0.215686f, 0.494118f, 0.721569f, 1.0f), + ImVec4(0.301961f, 0.686275f, 0.290196f, 1.0f), + ImVec4(0.596078f, 0.305882f, 0.639216f, 1.0f), + ImVec4(1.000000f, 0.498039f, 0.000000f, 1.0f), + ImVec4(1.000000f, 1.000000f, 0.200000f, 1.0f), + ImVec4(0.650980f, 0.337255f, 0.156863f, 1.0f), + ImVec4(0.968627f, 0.505882f, 0.749020f, 1.0f), + ImVec4(0.600000f, 0.600000f, 0.600000f, 1.0f), + // ImPlotColormap_Pastel + ImVec4(0.984314f, 0.705882f, 0.682353f, 1.0f), + ImVec4(0.701961f, 0.803922f, 0.890196f, 1.0f), + ImVec4(0.800000f, 0.921569f, 0.772549f, 1.0f), + ImVec4(0.870588f, 0.796078f, 0.894118f, 1.0f), + ImVec4(0.996078f, 0.850980f, 0.650980f, 1.0f), + ImVec4(1.000000f, 1.000000f, 0.800000f, 1.0f), + ImVec4(0.898039f, 0.847059f, 0.741176f, 1.0f), + ImVec4(0.992157f, 0.854902f, 0.925490f, 1.0f), + ImVec4(0.949020f, 0.949020f, 0.949020f, 1.0f), + // ImPlotColormap_Paired + ImVec4(0.258824f, 0.807843f, 0.890196f, 1.0f), + ImVec4(0.121569f, 0.470588f, 0.705882f, 1.0f), + ImVec4(0.698039f, 0.874510f, 0.541176f, 1.0f), + ImVec4(0.200000f, 0.627451f, 0.172549f, 1.0f), + ImVec4(0.984314f, 0.603922f, 0.600000f, 1.0f), + ImVec4(0.890196f, 0.101961f, 0.109804f, 1.0f), + ImVec4(0.992157f, 0.749020f, 0.435294f, 1.0f), + ImVec4(1.000000f, 0.498039f, 0.000000f, 1.0f), + ImVec4(0.792157f, 0.698039f, 0.839216f, 1.0f), + ImVec4(0.415686f, 0.239216f, 0.603922f, 1.0f), + ImVec4(1.000000f, 1.000000f, 0.600000f, 1.0f), + ImVec4(0.694118f, 0.349020f, 0.156863f, 1.0f), + // ImPlotColormap_Viridis + ImVec4(0.267004f, 0.004874f, 0.329415f, 1.0f), + ImVec4(0.282623f, 0.140926f, 0.457517f, 1.0f), + ImVec4(0.253935f, 0.265254f, 0.529983f, 1.0f), + ImVec4(0.206756f, 0.371758f, 0.553117f, 1.0f), + ImVec4(0.163625f, 0.471133f, 0.558148f, 1.0f), + ImVec4(0.127568f, 0.566949f, 0.550556f, 1.0f), + ImVec4(0.134692f, 0.658636f, 0.517649f, 1.0f), + ImVec4(0.266941f, 0.748751f, 0.440573f, 1.0f), + ImVec4(0.477504f, 0.821444f, 0.318195f, 1.0f), + ImVec4(0.741388f, 0.873449f, 0.149561f, 1.0f), + ImVec4(0.993248f, 0.906157f, 0.143936f, 1.0f), + // ImPlotColormap_Plasma + ImVec4(5.03830e-02f, 2.98030e-02f, 5.27975e-01f, 1.00000e+00f), + ImVec4(2.54627e-01f, 1.38820e-02f, 6.15419e-01f, 1.00000e+00f), + ImVec4(4.17642e-01f, 5.64000e-04f, 6.58390e-01f, 1.00000e+00f), + ImVec4(5.62738e-01f, 5.15450e-02f, 6.41509e-01f, 1.00000e+00f), + ImVec4(6.92840e-01f, 1.65141e-01f, 5.64522e-01f, 1.00000e+00f), + ImVec4(7.98216e-01f, 2.80197e-01f, 4.69538e-01f, 1.00000e+00f), + ImVec4(8.81443e-01f, 3.92529e-01f, 3.83229e-01f, 1.00000e+00f), + ImVec4(9.49217e-01f, 5.17763e-01f, 2.95662e-01f, 1.00000e+00f), + ImVec4(9.88260e-01f, 6.52325e-01f, 2.11364e-01f, 1.00000e+00f), + ImVec4(9.88648e-01f, 8.09579e-01f, 1.45357e-01f, 1.00000e+00f), + ImVec4(9.40015e-01f, 9.75158e-01f, 1.31326e-01f, 1.00000e+00f), + // ImPlotColormap_Hot + ImVec4(0.2500f, 0.f, 0.f, 1.0f), + ImVec4(0.5000f, 0.f, 0.f, 1.0f), + ImVec4(0.7500f, 0.f, 0.f, 1.0f), + ImVec4(1.0000f, 0.f, 0.f, 1.0f), + ImVec4(1.0000f, 0.2500f, 0.f, 1.0f), + ImVec4(1.0000f, 0.5000f, 0.f, 1.0f), + ImVec4(1.0000f, 0.7500f, 0.f, 1.0f), + ImVec4(1.0000f, 1.0000f, 0.f, 1.0f), + ImVec4(1.0000f, 1.0000f, 0.3333f, 1.0f), + ImVec4(1.0000f, 1.0000f, 0.6667f, 1.0f), + ImVec4(1.0000f, 1.0000f, 1.0000f, 1.0f), + // ImPlotColormap_Cool + ImVec4( 0.f, 1.0000f, 1.0000f, 1.0f), + ImVec4(0.1000f, 0.9000f, 1.0000f, 1.0f), + ImVec4(0.2000f, 0.8000f, 1.0000f, 1.0f), + ImVec4(0.3000f, 0.7000f, 1.0000f, 1.0f), + ImVec4(0.4000f, 0.6000f, 1.0000f, 1.0f), + ImVec4(0.5000f, 0.5000f, 1.0000f, 1.0f), + ImVec4(0.6000f, 0.4000f, 1.0000f, 1.0f), + ImVec4(0.7000f, 0.3000f, 1.0000f, 1.0f), + ImVec4(0.8000f, 0.2000f, 1.0000f, 1.0f), + ImVec4(0.9000f, 0.1000f, 1.0000f, 1.0f), + ImVec4(1.0000f, 0.f, 1.0000f, 1.0f), + // ImPlotColormap_Pink + ImVec4(0.2887f, 0.f, 0.f, 1.0f), + ImVec4(0.4830f, 0.2582f, 0.2582f, 1.0f), + ImVec4(0.6191f, 0.3651f, 0.3651f, 1.0f), + ImVec4(0.7303f, 0.4472f, 0.4472f, 1.0f), + ImVec4(0.7746f, 0.5916f, 0.5164f, 1.0f), + ImVec4(0.8165f, 0.7071f, 0.5774f, 1.0f), + ImVec4(0.8563f, 0.8062f, 0.6325f, 1.0f), + ImVec4(0.8944f, 0.8944f, 0.6831f, 1.0f), + ImVec4(0.9309f, 0.9309f, 0.8028f, 1.0f), + ImVec4(0.9661f, 0.9661f, 0.9068f, 1.0f), + ImVec4(1.0000f, 1.0000f, 1.0000f, 1.0f), + // ImPlotColormap_Jet + ImVec4( 0.f, 0.f, 0.6667f, 1.0f), + ImVec4( 0.f, 0.f, 1.0000f, 1.0f), + ImVec4( 0.f, 0.3333f, 1.0000f, 1.0f), + ImVec4( 0.f, 0.6667f, 1.0000f, 1.0f), + ImVec4( 0.f, 1.0000f, 1.0000f, 1.0f), + ImVec4(0.3333f, 1.0000f, 0.6667f, 1.0f), + ImVec4(0.6667f, 1.0000f, 0.3333f, 1.0f), + ImVec4(1.0000f, 1.0000f, 0.f, 1.0f), + ImVec4(1.0000f, 0.6667f, 0.f, 1.0f), + ImVec4(1.0000f, 0.3333f, 0.f, 1.0f), + ImVec4(1.0000f, 0.f, 0.f, 1.0f) + }; + *size_out = csizes[colormap]; + return &cdata[coffs.Offsets[colormap]]; +} + +const char* GetColormapName(ImPlotColormap colormap) { + static const char* cmap_names[] = {"Default","Deep","Dark","Pastel","Paired","Viridis","Plasma","Hot","Cool","Pink","Jet"}; + return cmap_names[colormap]; +} + +void ResampleColormap(const ImVec4* colormap_in, int size_in, ImVec4* colormap_out, int size_out) { + for (int i = 0; i < size_out; ++i) { + float t = i * 1.0f / (size_out - 1); + colormap_out[i] = LerpColormap(colormap_in, size_in, t); + } +} + +int GetColormapSize() { + ImPlotContext& gp = *GImPlot; + return gp.ColormapSize; +} + +ImVec4 GetColormapColor(int index) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(index >= 0, "The Colormap index must be greater than zero!"); + return gp.Colormap[index % gp.ColormapSize]; +} + +ImVec4 LerpColormap(const ImVec4* colormap, int size, float t) { + float tc = ImClamp(t,0.0f,1.0f); + int i1 = (int)((size -1 ) * tc); + int i2 = i1 + 1; + if (i2 == size || size == 1) + return colormap[i1]; + float t1 = (float)i1 / (float)(size - 1); + float t2 = (float)i2 / (float)(size - 1); + float tr = ImRemap(t, t1, t2, 0.0f, 1.0f); + return ImLerp(colormap[i1], colormap[i2], tr); +} + +ImVec4 LerpColormap(float t) { + ImPlotContext& gp = *GImPlot; + return LerpColormap(gp.Colormap, gp.ColormapSize, t); +} + +ImVec4 NextColormapColor() { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "NextColormapColor() needs to be called between BeginPlot() and EndPlot()!"); + ImVec4 col = gp.Colormap[gp.CurrentPlot->ColormapIdx % gp.ColormapSize]; + gp.CurrentPlot->ColormapIdx++; + return col; +} + +void ShowColormapScale(double scale_min, double scale_max, float height) { + ImPlotContext& gp = *GImPlot; + static ImPlotTickCollection ticks; + ticks.Reset(); + ImPlotRange range; + range.Min = scale_min; + range.Max = scale_max; + + AddTicksDefault(range, 10, 0, ticks); + + + ImGuiContext &G = *GImGui; + ImGuiWindow * Window = G.CurrentWindow; + if (Window->SkipItems) + return; + const float txt_off = 5; + const float bar_w = 20; + + ImDrawList &DrawList = *Window->DrawList; + ImVec2 size(bar_w + txt_off + ticks.MaxWidth + 2 * gp.Style.PlotPadding.x, height); + ImRect bb_frame = ImRect(Window->DC.CursorPos, Window->DC.CursorPos + size); + ImGui::ItemSize(bb_frame); + if (!ImGui::ItemAdd(bb_frame, 0, &bb_frame)) + return; + ImGui::RenderFrame(bb_frame.Min, bb_frame.Max, GetStyleColorU32(ImPlotCol_FrameBg), true, G.Style.FrameRounding); + ImRect bb_grad(bb_frame.Min + gp.Style.PlotPadding, bb_frame.Min + ImVec2(bar_w + gp.Style.PlotPadding.x, height - gp.Style.PlotPadding.y)); + + int num_cols = GetColormapSize(); + float h_step = (height - 2 * gp.Style.PlotPadding.y) / (num_cols - 1); + for (int i = 0; i < num_cols-1; ++i) { + ImRect rect(bb_grad.Min.x, bb_grad.Min.y + h_step * i, bb_grad.Max.x, bb_grad.Min.y + h_step * (i + 1)); + ImU32 col1 = ImGui::GetColorU32(GetColormapColor(num_cols - 1 - i)); + ImU32 col2 = ImGui::GetColorU32(GetColormapColor(num_cols - 1 - (i+1))); + DrawList.AddRectFilledMultiColor(rect.Min, rect.Max, col1, col1, col2, col2); + } + ImVec4 col_tik4 = ImGui::GetStyleColorVec4(ImGuiCol_Text); + col_tik4.w *= 0.25f; + const ImU32 col_tick = ImGui::GetColorU32(col_tik4); + + ImGui::PushClipRect(bb_frame.Min, bb_frame.Max, true); + for (int i = 0; i < ticks.Size; ++i) { + float ypos = ImRemap((float)ticks.Ticks[i].PlotPos, (float)range.Max, (float)range.Min, bb_grad.Min.y, bb_grad.Max.y); + if (ypos < bb_grad.Max.y - 2 && ypos > bb_grad.Min.y + 2) + DrawList.AddLine(ImVec2(bb_grad.Max.x-1, ypos), ImVec2(bb_grad.Max.x - (ticks.Ticks[i].Major ? 10.0f : 5.0f), ypos), col_tick, 1.0f); + DrawList.AddText(ImVec2(bb_grad.Max.x-1, ypos) + ImVec2(txt_off, -ticks.Ticks[i].LabelSize.y * 0.5f), GetStyleColorU32(ImPlotCol_TitleText), ticks.GetText(i)); + } + ImGui::PopClipRect(); + + DrawList.AddRect(bb_grad.Min, bb_grad.Max, GetStyleColorU32(ImPlotCol_PlotBorder)); +} + + +//----------------------------------------------------------------------------- +// Style Editor etc. +//----------------------------------------------------------------------------- + +static void HelpMarker(const char* desc) { + ImGui::TextDisabled("(?)"); + if (ImGui::IsItemHovered()) { + ImGui::BeginTooltip(); + ImGui::PushTextWrapPos(ImGui::GetFontSize() * 35.0f); + ImGui::TextUnformatted(desc); + ImGui::PopTextWrapPos(); + ImGui::EndTooltip(); + } +} + +bool ShowStyleSelector(const char* label) +{ + static int style_idx = -1; + if (ImGui::Combo(label, &style_idx, "Auto\0Classic\0Dark\0Light\0")) + { + switch (style_idx) + { + case 0: StyleColorsAuto(); break; + case 1: StyleColorsClassic(); break; + case 2: StyleColorsDark(); break; + case 3: StyleColorsLight(); break; + } + return true; + } + return false; +} + +bool ShowColormapSelector(const char* label) { + bool set = false; + static const char* map = ImPlot::GetColormapName(ImPlotColormap_Default); + if (ImGui::BeginCombo(label, map)) { + for (int i = 0; i < ImPlotColormap_COUNT; ++i) { + const char* name = GetColormapName(i); + if (ImGui::Selectable(name, map == name)) { + map = name; + ImPlot::SetColormap(i); + ImPlot::BustItemCache(); + set = true; + } + } + ImGui::EndCombo(); + } + return set; +} + +void ShowStyleEditor(ImPlotStyle* ref) { + ImPlotContext& gp = *GImPlot; + ImPlotStyle& style = GetStyle(); + static ImPlotStyle ref_saved_style; + // Default to using internal storage as reference + static bool init = true; + if (init && ref == NULL) + ref_saved_style = style; + init = false; + if (ref == NULL) + ref = &ref_saved_style; + + if (ImPlot::ShowStyleSelector("Colors##Selector")) + ref_saved_style = style; + + // Save/Revert button + if (ImGui::Button("Save Ref")) + *ref = ref_saved_style = style; + ImGui::SameLine(); + if (ImGui::Button("Revert Ref")) + style = *ref; + ImGui::SameLine(); + HelpMarker("Save/Revert in local non-persistent storage. Default Colors definition are not affected. " + "Use \"Export\" below to save them somewhere."); + if (ImGui::BeginTabBar("##StyleEditor")) { + if (ImGui::BeginTabItem("Variables")) { + ImGui::Text("Item Styling"); + ImGui::SliderFloat("LineWeight", &style.LineWeight, 0.0f, 5.0f, "%.1f"); + ImGui::SliderFloat("MarkerSize", &style.MarkerSize, 2.0f, 10.0f, "%.1f"); + ImGui::SliderFloat("MarkerWeight", &style.MarkerWeight, 0.0f, 5.0f, "%.1f"); + ImGui::SliderFloat("FillAlpha", &style.FillAlpha, 0.0f, 1.0f, "%.2f"); + ImGui::SliderFloat("ErrorBarSize", &style.ErrorBarSize, 0.0f, 10.0f, "%.1f"); + ImGui::SliderFloat("ErrorBarWeight", &style.ErrorBarWeight, 0.0f, 5.0f, "%.1f"); + ImGui::SliderFloat("DigitalBitHeight", &style.DigitalBitHeight, 0.0f, 20.0f, "%.1f"); + ImGui::SliderFloat("DigitalBitGap", &style.DigitalBitGap, 0.0f, 20.0f, "%.1f"); + float indent = ImGui::CalcItemWidth() - ImGui::GetFrameHeight(); + ImGui::Indent(ImGui::CalcItemWidth() - ImGui::GetFrameHeight()); + ImGui::Checkbox("AntiAliasedLines", &style.AntiAliasedLines); + ImGui::Unindent(indent); + ImGui::Text("Plot Styling"); + ImGui::SliderFloat("PlotBorderSize", &style.PlotBorderSize, 0.0f, 2.0f, "%.0f"); + ImGui::SliderFloat("MinorAlpha", &style.MinorAlpha, 0.0f, 1.0f, "%.2f"); + ImGui::SliderFloat2("MajorTickLen", (float*)&style.MajorTickLen, 0.0f, 20.0f, "%.0f"); + ImGui::SliderFloat2("MinorTickLen", (float*)&style.MinorTickLen, 0.0f, 20.0f, "%.0f"); + ImGui::SliderFloat2("MajorTickSize", (float*)&style.MajorTickSize, 0.0f, 2.0f, "%.1f"); + ImGui::SliderFloat2("MinorTickSize", (float*)&style.MinorTickSize, 0.0f, 2.0f, "%.1f"); + ImGui::SliderFloat2("MajorGridSize", (float*)&style.MajorGridSize, 0.0f, 2.0f, "%.1f"); + ImGui::SliderFloat2("MinorGridSize", (float*)&style.MinorGridSize, 0.0f, 2.0f, "%.1f"); + ImGui::SliderFloat2("PlotDefaultSize", (float*)&style.PlotDefaultSize, 0.0f, 1000, "%.0f"); + ImGui::SliderFloat2("PlotMinSize", (float*)&style.PlotMinSize, 0.0f, 300, "%.0f"); + ImGui::Text("Plot Padding"); + ImGui::SliderFloat2("PlotPadding", (float*)&style.PlotPadding, 0.0f, 20.0f, "%.0f"); + ImGui::SliderFloat2("LabelPadding", (float*)&style.LabelPadding, 0.0f, 20.0f, "%.0f"); + ImGui::SliderFloat2("LegendPadding", (float*)&style.LegendPadding, 0.0f, 20.0f, "%.0f"); + ImGui::SliderFloat2("LegendInnerPadding", (float*)&style.LegendInnerPadding, 0.0f, 10.0f, "%.0f"); + ImGui::SliderFloat2("LegendSpacing", (float*)&style.LegendSpacing, 0.0f, 5.0f, "%.0f"); + ImGui::SliderFloat2("MousePosPadding", (float*)&style.MousePosPadding, 0.0f, 20.0f, "%.0f"); + ImGui::SliderFloat2("AnnotationPadding", (float*)&style.AnnotationPadding, 0.0f, 5.0f, "%.0f"); + ImGui::SliderFloat2("FitPadding", (float*)&style.FitPadding, 0, 0.2f, "%.2f"); + + ImGui::EndTabItem(); + } + if (ImGui::BeginTabItem("Colors")) { + static int output_dest = 0; + static bool output_only_modified = false; + + if (ImGui::Button("Export", ImVec2(75,0))) { + if (output_dest == 0) + ImGui::LogToClipboard(); + else + ImGui::LogToTTY(); + ImGui::LogText("ImVec4* colors = ImPlot::GetStyle().Colors;\n"); + for (int i = 0; i < ImPlotCol_COUNT; i++) { + const ImVec4& col = style.Colors[i]; + const char* name = ImPlot::GetStyleColorName(i); + if (!output_only_modified || memcmp(&col, &ref->Colors[i], sizeof(ImVec4)) != 0) { + if (IsColorAuto(i)) + ImGui::LogText("colors[ImPlotCol_%s]%*s= IMPLOT_AUTO_COL;\n",name,14 - (int)strlen(name), ""); + else + ImGui::LogText("colors[ImPlotCol_%s]%*s= ImVec4(%.2ff, %.2ff, %.2ff, %.2ff);\n", + name, 14 - (int)strlen(name), "", col.x, col.y, col.z, col.w); + } + } + ImGui::LogFinish(); + } + ImGui::SameLine(); ImGui::SetNextItemWidth(120); ImGui::Combo("##output_type", &output_dest, "To Clipboard\0To TTY\0"); + ImGui::SameLine(); ImGui::Checkbox("Only Modified Colors", &output_only_modified); + + static ImGuiTextFilter filter; + filter.Draw("Filter colors", ImGui::GetFontSize() * 16); + + static ImGuiColorEditFlags alpha_flags = ImGuiColorEditFlags_AlphaPreviewHalf; + if (ImGui::RadioButton("Opaque", alpha_flags == ImGuiColorEditFlags_None)) { alpha_flags = ImGuiColorEditFlags_None; } ImGui::SameLine(); + if (ImGui::RadioButton("Alpha", alpha_flags == ImGuiColorEditFlags_AlphaPreview)) { alpha_flags = ImGuiColorEditFlags_AlphaPreview; } ImGui::SameLine(); + if (ImGui::RadioButton("Both", alpha_flags == ImGuiColorEditFlags_AlphaPreviewHalf)) { alpha_flags = ImGuiColorEditFlags_AlphaPreviewHalf; } ImGui::SameLine(); + HelpMarker( + "In the color list:\n" + "Left-click on colored square to open color picker,\n" + "Right-click to open edit options menu."); + ImGui::Separator(); + ImGui::PushItemWidth(-160); + for (int i = 0; i < ImPlotCol_COUNT; i++) { + const char* name = ImPlot::GetStyleColorName(i); + if (!filter.PassFilter(name)) + continue; + ImGui::PushID(i); + ImVec4 temp = GetStyleColorVec4(i); + const bool is_auto = IsColorAuto(i); + if (!is_auto) + ImGui::PushStyleVar(ImGuiStyleVar_Alpha, 0.25f); + if (ImGui::Button("Auto")) { + if (is_auto) + style.Colors[i] = temp; + else + style.Colors[i] = IMPLOT_AUTO_COL; + BustItemCache(); + } + if (!is_auto) + ImGui::PopStyleVar(); + ImGui::SameLine(); + if (ImGui::ColorEdit4(name, &temp.x, ImGuiColorEditFlags_NoInputs | alpha_flags)) { + style.Colors[i] = temp; + BustItemCache(); + } + if (memcmp(&style.Colors[i], &ref->Colors[i], sizeof(ImVec4)) != 0) { + ImGui::SameLine(175); if (ImGui::Button("Save")) { ref->Colors[i] = style.Colors[i]; } + ImGui::SameLine(); if (ImGui::Button("Revert")) { + style.Colors[i] = ref->Colors[i]; + BustItemCache(); + } + } + ImGui::PopID(); + } + ImGui::PopItemWidth(); + ImGui::Separator(); + ImGui::Text("Colors that are set to Auto (i.e. IMPLOT_AUTO_COL) will\n" + "be automatically deduced from your ImGui style or the\n" + "current ImPlot Colormap. If you want to style individual\n" + "plot items, use Push/PopStyleColor around its function."); + ImGui::EndTabItem(); + } + if (ImGui::BeginTabItem("Colormaps")) { + static int output_dest = 0; + if (ImGui::Button("Export", ImVec2(75,0))) { + if (output_dest == 0) + ImGui::LogToClipboard(); + else + ImGui::LogToTTY(); + ImGui::LogText("static const ImVec4 colormap[%d] = {\n", gp.ColormapSize); + for (int i = 0; i < gp.ColormapSize; ++i) { + const ImVec4& col = gp.Colormap[i]; + ImGui::LogText(" ImVec4(%.2ff, %.2ff, %.2ff, %.2ff)%s\n", col.x, col.y, col.z, col.w, i == gp.ColormapSize - 1 ? "" : ","); + } + ImGui::LogText("};"); + ImGui::LogFinish(); + } + ImGui::SameLine(); ImGui::SetNextItemWidth(120); ImGui::Combo("##output_type", &output_dest, "To Clipboard\0To TTY\0"); + ImGui::SameLine(); HelpMarker("Export code for selected Colormap\n(built in or custom)."); + ImGui::Separator(); + static ImVector custom; + static bool custom_set = false; + for (int i = 0; i < ImPlotColormap_COUNT; ++i) { + ImGui::PushID(i); + int size; + const ImVec4* cmap = GetColormap(i, &size); + bool selected = cmap == gp.Colormap; + if (selected) { + custom_set = false; + } + + if (!selected) + ImGui::PushStyleVar(ImGuiStyleVar_Alpha, 0.25f); + if (ImGui::Button(GetColormapName(i), ImVec2(75,0))) { + SetColormap(i); + BustItemCache(); + custom_set = false; + } + if (!selected) + ImGui::PopStyleVar(); + ImGui::SameLine(); + for (int c = 0; c < size; ++c) { + ImGui::PushID(c); + ImGui::ColorButton("",cmap[c]); + if (c != size -1) + ImGui::SameLine(); + ImGui::PopID(); + } + ImGui::PopID(); + } + if (custom.Size == 0) { + custom.push_back(ImVec4(1,1,1,1)); + custom.push_back(ImVec4(0.5f,0.5f,0.5f,1)); + } + ImGui::Separator(); + ImGui::BeginGroup(); + bool custom_set_now = custom_set; + if (!custom_set_now) + ImGui::PushStyleVar(ImGuiStyleVar_Alpha, 0.25f); + if (ImGui::Button("Custom", ImVec2(75, 0))) { + SetColormap(&custom[0], custom.Size); + BustItemCache(); + custom_set = true; + } + if (!custom_set_now) + ImGui::PopStyleVar(); + if (ImGui::Button("+", ImVec2((75 - ImGui::GetStyle().ItemSpacing.x)/2,0))) { + custom.push_back(ImVec4(0,0,0,1)); + if (custom_set) { + SetColormap(&custom[0], custom.Size); + BustItemCache(); + } + } + ImGui::SameLine(); + if (ImGui::Button("-", ImVec2((75 - ImGui::GetStyle().ItemSpacing.x)/2,0)) && custom.Size > 1) { + custom.pop_back(); + if (custom_set) { + SetColormap(&custom[0], custom.Size); + BustItemCache(); + } + } + ImGui::EndGroup(); + ImGui::SameLine(); + ImGui::BeginGroup(); + for (int c = 0; c < custom.Size; ++c) { + ImGui::PushID(c); + if (ImGui::ColorEdit4("##Col1", &custom[c].x, ImGuiColorEditFlags_NoInputs) && custom_set) { + SetColormap(&custom[0], custom.Size); + BustItemCache(); + } + if ((c + 1) % 12 != 0) + ImGui::SameLine(); + ImGui::PopID(); + } + ImGui::EndGroup(); + ImGui::EndTabItem(); + } + ImGui::EndTabBar(); + } +} + +void ShowUserGuide() { + ImGui::BulletText("Left-click drag within the plot area to pan X and Y axes."); + ImGui::Indent(); + ImGui::BulletText("Left-click drag on axis labels to pan an individual axis."); + ImGui::Unindent(); + ImGui::BulletText("Scroll in the plot area to zoom both X any Y axes."); + ImGui::Indent(); + ImGui::BulletText("Scroll on axis labels to zoom an individual axis."); + ImGui::Unindent(); + ImGui::BulletText("Right-click drag to box select data."); + ImGui::Indent(); + ImGui::BulletText("Hold Alt to expand box selection horizontally."); + ImGui::BulletText("Hold Shift to expand box selection vertically."); + ImGui::BulletText("Left-click while box selecting to cancel the selection."); + ImGui::Unindent(); + ImGui::BulletText("Double left-click to fit all visible data."); + ImGui::Indent(); + ImGui::BulletText("Double left-click axis labels to fit the individual axis."); + ImGui::Unindent(); + ImGui::BulletText("Right-click open the full plot context menu."); + ImGui::Indent(); + ImGui::BulletText("Right-click axis labels to open an individual axis context menu."); + ImGui::Unindent(); + ImGui::BulletText("Click legend label icons to show/hide plot items."); +} + +void ShowAxisMetrics(ImPlotAxis* axis, bool show_axis_rects) { + ImGui::Bullet(); ImGui::Text("Flags: %d", axis->Flags); + ImGui::Bullet(); ImGui::Text("Range: [%f,%f]",axis->Range.Min, axis->Range.Max); + ImGui::Bullet(); ImGui::Text("Pixels: %f", axis->Pixels); + ImGui::Bullet(); ImGui::Text("Aspect: %f", axis->GetAspect()); + ImGui::Bullet(); ImGui::Text("Dragging: %s", axis->Dragging ? "true" : "false"); + ImGui::Bullet(); ImGui::Text("ExtHovered: %s", axis->ExtHovered ? "true" : "false"); + ImGui::Bullet(); ImGui::Text("AllHovered: %s", axis->AllHovered ? "true" : "false"); + ImGui::Bullet(); ImGui::Text("Present: %s", axis->Present ? "true" : "false"); + ImGui::Bullet(); ImGui::Text("HasRange: %s", axis->HasRange ? "true" : "false"); + ImGui::Bullet(); ImGui::Text("LinkedMin: %p", axis->LinkedMin); + ImGui::Bullet(); ImGui::Text("LinkedMax: %p", axis->LinkedMax); + if (show_axis_rects) { + ImDrawList& fg = *ImGui::GetForegroundDrawList(); + fg.AddRect(axis->HoverRect.Min, axis->HoverRect.Max, IM_COL32(0,255,0,255)); + } +} + +void ShowMetricsWindow(bool* p_popen) { + + static bool show_plot_rects = false; + static bool show_axes_rects = false; + + ImDrawList& fg = *ImGui::GetForegroundDrawList(); + + ImPlotContext& gp = *GImPlot; + // ImGuiContext& g = *GImGui; + ImGuiIO& io = ImGui::GetIO(); + ImGui::Begin("ImPlot Metrics", p_popen); + ImGui::Text("ImPlot " IMPLOT_VERSION); + ImGui::Text("Application average %.3f ms/frame (%.1f FPS)", 1000.0f / io.Framerate, io.Framerate); + ImGui::Separator(); + int n_plots = gp.Plots.GetSize(); + if (ImGui::TreeNode("Tools")) { + ImGui::Checkbox("Show Plot Rects", &show_plot_rects); + ImGui::Checkbox("Show Axes Rects", &show_axes_rects); + ImGui::TreePop(); + } + if (ImGui::TreeNode("Plots","Plots (%d)", n_plots)) { + for (int p = 0; p < n_plots; ++p) { + // plot + ImPlotPlot* plot = gp.Plots.GetByIndex(p); + ImGui::PushID(p); + if (ImGui::TreeNode("Plot", "Plot [ID=%u]", plot->ID)) { + int n_items = plot->Items.GetSize(); + if (ImGui::TreeNode("Items", "Items (%d)", n_items)) { + for (int i = 0; i < n_items; ++i) { + ImPlotItem* item = plot->Items.GetByIndex(i); + ImGui::PushID(i); + if (ImGui::TreeNode("Item", "Item [ID=%u]", item->ID)) { + ImGui::Bullet(); ImGui::Checkbox("Show", &item->Show); + ImGui::Bullet(); ImGui::ColorEdit4("Color",&item->Color.x, ImGuiColorEditFlags_NoInputs); + ImGui::Bullet(); ImGui::Text("NameOffset: %d",item->NameOffset); + ImGui::Bullet(); ImGui::Text("Name: %s", item->NameOffset != -1 ? plot->LegendData.Labels.Buf.Data + item->NameOffset : "N/A"); + ImGui::Bullet(); ImGui::Value("Hovered: %s",item->LegendHovered ? "true" : "false"); + ImGui::TreePop(); + } + ImGui::PopID(); + } + ImGui::TreePop(); + } + if (ImGui::TreeNode("X-Axis")) { + ShowAxisMetrics(&plot->XAxis, show_axes_rects); + ImGui::TreePop(); + } + if (ImGui::TreeNode("Y-Axis")) { + ShowAxisMetrics(&plot->YAxis[0], show_axes_rects); + ImGui::TreePop(); + } + if (ImHasFlag(plot->Flags, ImPlotFlags_YAxis2) && ImGui::TreeNode("Y-Axis 2")) { + ShowAxisMetrics(&plot->YAxis[1], show_axes_rects); + ImGui::TreePop(); + } + if (ImHasFlag(plot->Flags, ImPlotFlags_YAxis3) && ImGui::TreeNode("Y-Axis 3")) { + ShowAxisMetrics(&plot->YAxis[2], show_axes_rects); + ImGui::TreePop(); + } + ImGui::Bullet(); ImGui::Text("Flags: %d", plot->Flags); + ImGui::Bullet(); ImGui::Text("Selecting: %s", plot->Selecting ? "true" : "false"); + ImGui::Bullet(); ImGui::Text("Querying: %s", plot->Querying ? "true" : "false"); + ImGui::Bullet(); ImGui::Text("Queried: %s", plot->Queried ? "true" : "false"); + ImGui::Bullet(); ImGui::Text("FrameHovered: %s", plot->FrameHovered ? "true" : "false"); + ImGui::Bullet(); ImGui::Text("PlotHovered: %s", plot->PlotHovered ? "true" : "false"); + ImGui::Bullet(); ImGui::Text("LegendHovered: %s", plot->LegendHovered ? "true" : "false"); + ImGui::TreePop(); + if (show_plot_rects) + fg.AddRect(plot->PlotRect.Min, plot->PlotRect.Max, IM_COL32(255,255,0,255)); + } + ImGui::PopID(); + } + ImGui::TreePop(); + } + ImGui::End(); +} + +bool ShowDatePicker(const char* id, int* level, ImPlotTime* t, const ImPlotTime* t1, const ImPlotTime* t2) { + + ImGui::PushID(id); + ImGui::BeginGroup(); + ImGui::PushStyleColor(ImGuiCol_Button, ImVec4(0,0,0,0)); + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(0,0)); + + ImGuiStyle& style = ImGui::GetStyle(); + ImVec4 col_txt = style.Colors[ImGuiCol_Text]; + ImVec4 col_dis = style.Colors[ImGuiCol_TextDisabled]; + const float ht = ImGui::GetFrameHeight(); + ImVec2 cell_size(ht*1.25f,ht); + char buff[32]; + bool clk = false; + tm& Tm = GImPlot->Tm; + + const int min_yr = 1970; + const int max_yr = 2999; + + // t1 parts + int t1_mo = 0; int t1_md = 0; int t1_yr = 0; + if (t1 != NULL) { + GetTime(*t1,&Tm); + t1_mo = Tm.tm_mon; + t1_md = Tm.tm_mday; + t1_yr = Tm.tm_year + 1900; + } + + // t2 parts + int t2_mo = 0; int t2_md = 0; int t2_yr = 0; + if (t2 != NULL) { + GetTime(*t2,&Tm); + t2_mo = Tm.tm_mon; + t2_md = Tm.tm_mday; + t2_yr = Tm.tm_year + 1900; + } + + // day widget + if (*level == 0) { + *t = FloorTime(*t, ImPlotTimeUnit_Day); + GetTime(*t, &Tm); + const int this_year = Tm.tm_year + 1900; + const int last_year = this_year - 1; + const int next_year = this_year + 1; + const int this_mon = Tm.tm_mon; + const int last_mon = this_mon == 0 ? 11 : this_mon - 1; + const int next_mon = this_mon == 11 ? 0 : this_mon + 1; + const int days_this_mo = GetDaysInMonth(this_year, this_mon); + const int days_last_mo = GetDaysInMonth(this_mon == 0 ? last_year : this_year, last_mon); + ImPlotTime t_first_mo = FloorTime(*t,ImPlotTimeUnit_Mo); + GetTime(t_first_mo,&Tm); + const int first_wd = Tm.tm_wday; + // month year + snprintf(buff, 32, "%s %d", MONTH_NAMES[this_mon], this_year); + if (ImGui::Button(buff)) + *level = 1; + ImGui::SameLine(5*cell_size.x); + BeginDisabledControls(this_year <= min_yr && this_mon == 0); + if (ImGui::ArrowButtonEx("##Up",ImGuiDir_Up,cell_size)) + *t = AddTime(*t, ImPlotTimeUnit_Mo, -1); + EndDisabledControls(this_year <= min_yr && this_mon == 0); + ImGui::SameLine(); + BeginDisabledControls(this_year >= max_yr && this_mon == 11); + if (ImGui::ArrowButtonEx("##Down",ImGuiDir_Down,cell_size)) + *t = AddTime(*t, ImPlotTimeUnit_Mo, 1); + EndDisabledControls(this_year >= max_yr && this_mon == 11); + // render weekday abbreviations + ImGui::PushItemFlag(ImGuiItemFlags_Disabled, true); + for (int i = 0; i < 7; ++i) { + ImGui::Button(WD_ABRVS[i],cell_size); + if (i != 6) { ImGui::SameLine(); } + } + ImGui::PopItemFlag(); + // 0 = last mo, 1 = this mo, 2 = next mo + int mo = first_wd > 0 ? 0 : 1; + int day = mo == 1 ? 1 : days_last_mo - first_wd + 1; + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 7; ++j) { + if (mo == 0 && day > days_last_mo) { + mo = 1; day = 1; + } + else if (mo == 1 && day > days_this_mo) { + mo = 2; day = 1; + } + const int now_yr = (mo == 0 && this_mon == 0) ? last_year : ((mo == 2 && this_mon == 11) ? next_year : this_year); + const int now_mo = mo == 0 ? last_mon : (mo == 1 ? this_mon : next_mon); + const int now_md = day; + + const bool off_mo = mo == 0 || mo == 2; + const bool t1_or_t2 = (t1 != NULL && t1_mo == now_mo && t1_yr == now_yr && t1_md == now_md) || + (t2 != NULL && t2_mo == now_mo && t2_yr == now_yr && t2_md == now_md); + + if (off_mo) + ImGui::PushStyleColor(ImGuiCol_Text, col_dis); + if (t1_or_t2) { + ImGui::PushStyleColor(ImGuiCol_Button, col_dis); + ImGui::PushStyleColor(ImGuiCol_Text, col_txt); + } + ImGui::PushID(i*7+j); + snprintf(buff,32,"%d",day); + if (now_yr == min_yr-1 || now_yr == max_yr+1) { + ImGui::Dummy(cell_size); + } + else if (ImGui::Button(buff,cell_size) && !clk) { + *t = MakeTime(now_yr, now_mo, now_md); + clk = true; + } + ImGui::PopID(); + if (t1_or_t2) + ImGui::PopStyleColor(2); + if (off_mo) + ImGui::PopStyleColor(); + if (j != 6) + ImGui::SameLine(); + day++; + } + } + } + // month widget + else if (*level == 1) { + *t = FloorTime(*t, ImPlotTimeUnit_Mo); + GetTime(*t, &Tm); + int this_yr = Tm.tm_year + 1900; + snprintf(buff, 32, "%d", this_yr); + if (ImGui::Button(buff)) + *level = 2; + BeginDisabledControls(this_yr <= min_yr); + ImGui::SameLine(5*cell_size.x); + if (ImGui::ArrowButtonEx("##Up",ImGuiDir_Up,cell_size)) + *t = AddTime(*t, ImPlotTimeUnit_Yr, -1); + EndDisabledControls(this_yr <= min_yr); + ImGui::SameLine(); + BeginDisabledControls(this_yr >= max_yr); + if (ImGui::ArrowButtonEx("##Down",ImGuiDir_Down,cell_size)) + *t = AddTime(*t, ImPlotTimeUnit_Yr, 1); + EndDisabledControls(this_yr >= max_yr); + // ImGui::Dummy(cell_size); + cell_size.x *= 7.0f/4.0f; + cell_size.y *= 7.0f/3.0f; + int mo = 0; + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 4; ++j) { + const bool t1_or_t2 = (t1 != NULL && t1_yr == this_yr && t1_mo == mo) || + (t2 != NULL && t2_yr == this_yr && t2_mo == mo); + if (t1_or_t2) + ImGui::PushStyleColor(ImGuiCol_Button, col_dis); + if (ImGui::Button(MONTH_ABRVS[mo],cell_size) && !clk) { + *t = MakeTime(this_yr, mo); + *level = 0; + } + if (t1_or_t2) + ImGui::PopStyleColor(); + if (j != 3) + ImGui::SameLine(); + mo++; + } + } + } + else if (*level == 2) { + *t = FloorTime(*t, ImPlotTimeUnit_Yr); + int this_yr = GetYear(*t); + int yr = this_yr - this_yr % 20; + ImGui::PushItemFlag(ImGuiItemFlags_Disabled, true); + snprintf(buff,32,"%d-%d",yr,yr+19); + ImGui::Button(buff); + ImGui::PopItemFlag(); + ImGui::SameLine(5*cell_size.x); + BeginDisabledControls(yr <= min_yr); + if (ImGui::ArrowButtonEx("##Up",ImGuiDir_Up,cell_size)) + *t = MakeTime(yr-20); + EndDisabledControls(yr <= min_yr); + ImGui::SameLine(); + BeginDisabledControls(yr + 20 >= max_yr); + if (ImGui::ArrowButtonEx("##Down",ImGuiDir_Down,cell_size)) + *t = MakeTime(yr+20); + EndDisabledControls(yr+ 20 >= max_yr); + // ImGui::Dummy(cell_size); + cell_size.x *= 7.0f/4.0f; + cell_size.y *= 7.0f/5.0f; + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 4; ++j) { + const bool t1_or_t2 = (t1 != NULL && t1_yr == yr) || (t2 != NULL && t2_yr == yr); + if (t1_or_t2) + ImGui::PushStyleColor(ImGuiCol_Button, col_dis); + snprintf(buff,32,"%d",yr); + if (yr<1970||yr>3000) { + ImGui::Dummy(cell_size); + } + else if (ImGui::Button(buff,cell_size)) { + *t = MakeTime(yr); + *level = 1; + } + if (t1_or_t2) + ImGui::PopStyleColor(); + if (j != 3) + ImGui::SameLine(); + yr++; + } + } + } + ImGui::PopStyleVar(); + ImGui::PopStyleColor(); + ImGui::EndGroup(); + ImGui::PopID(); + return clk; +} + +bool ShowTimePicker(const char* id, ImPlotTime* t) { + ImGui::PushID(id); + tm& Tm = GImPlot->Tm; + GetTime(*t,&Tm); + + static const char* nums[] = { "00","01","02","03","04","05","06","07","08","09", + "10","11","12","13","14","15","16","17","18","19", + "20","21","22","23","24","25","26","27","28","29", + "30","31","32","33","34","35","36","37","38","39", + "40","41","42","43","44","45","46","47","48","49", + "50","51","52","53","54","55","56","57","58","59"}; + + static const char* am_pm[] = {"am","pm"}; + + bool hour24 = GImPlot->Style.Use24HourClock; + + int hr = hour24 ? Tm.tm_hour : ((Tm.tm_hour == 0 || Tm.tm_hour == 12) ? 12 : Tm.tm_hour % 12); + int min = Tm.tm_min; + int sec = Tm.tm_sec; + int ap = Tm.tm_hour < 12 ? 0 : 1; + + bool changed = false; + + ImVec2 spacing = ImGui::GetStyle().ItemSpacing; + spacing.x = 0; + float width = ImGui::CalcTextSize("888").x; + float height = ImGui::GetFrameHeight(); + + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, spacing); + ImGui::PushStyleVar(ImGuiStyleVar_ScrollbarSize,2.0f); + ImGui::PushStyleColor(ImGuiCol_FrameBg, ImVec4(0,0,0,0)); + ImGui::PushStyleColor(ImGuiCol_Button, ImVec4(0,0,0,0)); + ImGui::PushStyleColor(ImGuiCol_FrameBgHovered, ImGui::GetStyleColorVec4(ImGuiCol_ButtonHovered)); + + ImGui::SetNextItemWidth(width); + if (ImGui::BeginCombo("##hr",nums[hr],ImGuiComboFlags_NoArrowButton)) { + const int ia = hour24 ? 0 : 1; + const int ib = hour24 ? 24 : 13; + for (int i = ia; i < ib; ++i) { + if (ImGui::Selectable(nums[i],i==hr)) { + hr = i; + changed = true; + } + } + ImGui::EndCombo(); + } + ImGui::SameLine(); + ImGui::Text(":"); + ImGui::SameLine(); + ImGui::SetNextItemWidth(width); + if (ImGui::BeginCombo("##min",nums[min],ImGuiComboFlags_NoArrowButton)) { + for (int i = 0; i < 60; ++i) { + if (ImGui::Selectable(nums[i],i==min)) { + min = i; + changed = true; + } + } + ImGui::EndCombo(); + } + ImGui::SameLine(); + ImGui::Text(":"); + ImGui::SameLine(); + ImGui::SetNextItemWidth(width); + if (ImGui::BeginCombo("##sec",nums[sec],ImGuiComboFlags_NoArrowButton)) { + for (int i = 0; i < 60; ++i) { + if (ImGui::Selectable(nums[i],i==sec)) { + sec = i; + changed = true; + } + } + ImGui::EndCombo(); + } + if (!hour24) { + ImGui::SameLine(); + if (ImGui::Button(am_pm[ap],ImVec2(height,height))) { + ap = 1 - ap; + changed = true; + } + } + + ImGui::PopStyleColor(3); + ImGui::PopStyleVar(2); + ImGui::PopID(); + + if (changed) { + if (!hour24) + hr = hr % 12 + ap * 12; + Tm.tm_hour = hr; + Tm.tm_min = min; + Tm.tm_sec = sec; + *t = MkTime(&Tm); + } + + return changed; +} + +void StyleColorsAuto(ImPlotStyle* dst) { + ImPlotStyle* style = dst ? dst : &ImPlot::GetStyle(); + ImVec4* colors = style->Colors; + + style->MinorAlpha = 0.25f; + + colors[ImPlotCol_Line] = IMPLOT_AUTO_COL; + colors[ImPlotCol_Fill] = IMPLOT_AUTO_COL; + colors[ImPlotCol_MarkerOutline] = IMPLOT_AUTO_COL; + colors[ImPlotCol_MarkerFill] = IMPLOT_AUTO_COL; + colors[ImPlotCol_ErrorBar] = IMPLOT_AUTO_COL; + colors[ImPlotCol_FrameBg] = IMPLOT_AUTO_COL; + colors[ImPlotCol_PlotBg] = IMPLOT_AUTO_COL; + colors[ImPlotCol_PlotBorder] = IMPLOT_AUTO_COL; + colors[ImPlotCol_LegendBg] = IMPLOT_AUTO_COL; + colors[ImPlotCol_LegendBorder] = IMPLOT_AUTO_COL; + colors[ImPlotCol_LegendText] = IMPLOT_AUTO_COL; + colors[ImPlotCol_TitleText] = IMPLOT_AUTO_COL; + colors[ImPlotCol_InlayText] = IMPLOT_AUTO_COL; + colors[ImPlotCol_PlotBorder] = IMPLOT_AUTO_COL; + colors[ImPlotCol_XAxis] = IMPLOT_AUTO_COL; + colors[ImPlotCol_XAxisGrid] = IMPLOT_AUTO_COL; + colors[ImPlotCol_YAxis] = IMPLOT_AUTO_COL; + colors[ImPlotCol_YAxisGrid] = IMPLOT_AUTO_COL; + colors[ImPlotCol_YAxis2] = IMPLOT_AUTO_COL; + colors[ImPlotCol_YAxisGrid2] = IMPLOT_AUTO_COL; + colors[ImPlotCol_YAxis3] = IMPLOT_AUTO_COL; + colors[ImPlotCol_YAxisGrid3] = IMPLOT_AUTO_COL; + colors[ImPlotCol_Selection] = IMPLOT_AUTO_COL; + colors[ImPlotCol_Query] = IMPLOT_AUTO_COL; + colors[ImPlotCol_Crosshairs] = IMPLOT_AUTO_COL; +} + +void StyleColorsClassic(ImPlotStyle* dst) { + ImPlotStyle* style = dst ? dst : &ImPlot::GetStyle(); + ImVec4* colors = style->Colors; + + style->MinorAlpha = 0.5f; + + colors[ImPlotCol_Line] = IMPLOT_AUTO_COL; + colors[ImPlotCol_Fill] = IMPLOT_AUTO_COL; + colors[ImPlotCol_MarkerOutline] = IMPLOT_AUTO_COL; + colors[ImPlotCol_MarkerFill] = IMPLOT_AUTO_COL; + colors[ImPlotCol_ErrorBar] = ImVec4(0.90f, 0.90f, 0.90f, 1.00f); + colors[ImPlotCol_FrameBg] = ImVec4(0.43f, 0.43f, 0.43f, 0.39f); + colors[ImPlotCol_PlotBg] = ImVec4(0.00f, 0.00f, 0.00f, 0.35f); + colors[ImPlotCol_PlotBorder] = ImVec4(0.50f, 0.50f, 0.50f, 0.50f); + colors[ImPlotCol_LegendBg] = ImVec4(0.11f, 0.11f, 0.14f, 0.92f); + colors[ImPlotCol_LegendBorder] = ImVec4(0.50f, 0.50f, 0.50f, 0.50f); + colors[ImPlotCol_LegendText] = ImVec4(0.90f, 0.90f, 0.90f, 1.00f); + colors[ImPlotCol_TitleText] = ImVec4(0.90f, 0.90f, 0.90f, 1.00f); + colors[ImPlotCol_InlayText] = ImVec4(0.90f, 0.90f, 0.90f, 1.00f); + colors[ImPlotCol_XAxis] = ImVec4(0.90f, 0.90f, 0.90f, 1.00f); + colors[ImPlotCol_XAxisGrid] = ImVec4(0.90f, 0.90f, 0.90f, 0.25f); + colors[ImPlotCol_YAxis] = ImVec4(0.90f, 0.90f, 0.90f, 1.00f); + colors[ImPlotCol_YAxisGrid] = ImVec4(0.90f, 0.90f, 0.90f, 0.25f); + colors[ImPlotCol_YAxis2] = ImVec4(0.90f, 0.90f, 0.90f, 1.00f); + colors[ImPlotCol_YAxisGrid2] = ImVec4(0.90f, 0.90f, 0.90f, 0.25f); + colors[ImPlotCol_YAxis3] = ImVec4(0.90f, 0.90f, 0.90f, 1.00f); + colors[ImPlotCol_YAxisGrid3] = ImVec4(0.90f, 0.90f, 0.90f, 0.25f); + colors[ImPlotCol_Selection] = ImVec4(0.97f, 0.97f, 0.39f, 1.00f); + colors[ImPlotCol_Query] = ImVec4(0.00f, 1.00f, 0.59f, 1.00f); + colors[ImPlotCol_Crosshairs] = ImVec4(0.50f, 0.50f, 0.50f, 0.75f); +} + +void StyleColorsDark(ImPlotStyle* dst) { + ImPlotStyle* style = dst ? dst : &ImPlot::GetStyle(); + ImVec4* colors = style->Colors; + + style->MinorAlpha = 0.25f; + + colors[ImPlotCol_Line] = IMPLOT_AUTO_COL; + colors[ImPlotCol_Fill] = IMPLOT_AUTO_COL; + colors[ImPlotCol_MarkerOutline] = IMPLOT_AUTO_COL; + colors[ImPlotCol_MarkerFill] = IMPLOT_AUTO_COL; + colors[ImPlotCol_ErrorBar] = IMPLOT_AUTO_COL; + colors[ImPlotCol_FrameBg] = ImVec4(1.00f, 1.00f, 1.00f, 0.07f); + colors[ImPlotCol_PlotBg] = ImVec4(0.00f, 0.00f, 0.00f, 0.50f); + colors[ImPlotCol_PlotBorder] = ImVec4(0.43f, 0.43f, 0.50f, 0.50f); + colors[ImPlotCol_LegendBg] = ImVec4(0.08f, 0.08f, 0.08f, 0.94f); + colors[ImPlotCol_LegendBorder] = ImVec4(0.43f, 0.43f, 0.50f, 0.50f); + colors[ImPlotCol_LegendText] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImPlotCol_TitleText] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImPlotCol_InlayText] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImPlotCol_XAxis] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImPlotCol_XAxisGrid] = ImVec4(1.00f, 1.00f, 1.00f, 0.25f); + colors[ImPlotCol_YAxis] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImPlotCol_YAxisGrid] = ImVec4(1.00f, 1.00f, 1.00f, 0.25f); + colors[ImPlotCol_YAxis2] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImPlotCol_YAxisGrid2] = ImVec4(1.00f, 1.00f, 1.00f, 0.25f); + colors[ImPlotCol_YAxis3] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImPlotCol_YAxisGrid3] = ImVec4(1.00f, 1.00f, 1.00f, 0.25f); + colors[ImPlotCol_Selection] = ImVec4(1.00f, 0.60f, 0.00f, 1.00f); + colors[ImPlotCol_Query] = ImVec4(0.00f, 1.00f, 0.44f, 1.00f); + colors[ImPlotCol_Crosshairs] = ImVec4(1.00f, 1.00f, 1.00f, 0.50f); +} + +void StyleColorsLight(ImPlotStyle* dst) { + ImPlotStyle* style = dst ? dst : &ImPlot::GetStyle(); + ImVec4* colors = style->Colors; + + style->MinorAlpha = 1.0f; + + colors[ImPlotCol_Line] = IMPLOT_AUTO_COL; + colors[ImPlotCol_Fill] = IMPLOT_AUTO_COL; + colors[ImPlotCol_MarkerOutline] = IMPLOT_AUTO_COL; + colors[ImPlotCol_MarkerFill] = IMPLOT_AUTO_COL; + colors[ImPlotCol_ErrorBar] = IMPLOT_AUTO_COL; + colors[ImPlotCol_FrameBg] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImPlotCol_PlotBg] = ImVec4(0.42f, 0.57f, 1.00f, 0.13f); + colors[ImPlotCol_PlotBorder] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f); + colors[ImPlotCol_LegendBg] = ImVec4(1.00f, 1.00f, 1.00f, 0.98f); + colors[ImPlotCol_LegendBorder] = ImVec4(0.82f, 0.82f, 0.82f, 0.80f); + colors[ImPlotCol_LegendText] = ImVec4(0.00f, 0.00f, 0.00f, 1.00f); + colors[ImPlotCol_TitleText] = ImVec4(0.00f, 0.00f, 0.00f, 1.00f); + colors[ImPlotCol_InlayText] = ImVec4(0.00f, 0.00f, 0.00f, 1.00f); + colors[ImPlotCol_XAxis] = ImVec4(0.00f, 0.00f, 0.00f, 1.00f); + colors[ImPlotCol_XAxisGrid] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImPlotCol_YAxis] = ImVec4(0.00f, 0.00f, 0.00f, 1.00f); + colors[ImPlotCol_YAxisGrid] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImPlotCol_YAxis2] = ImVec4(0.00f, 0.00f, 0.00f, 1.00f); + colors[ImPlotCol_YAxisGrid2] = ImVec4(0.00f, 0.00f, 0.00f, 0.50f); + colors[ImPlotCol_YAxis3] = ImVec4(0.00f, 0.00f, 0.00f, 1.00f); + colors[ImPlotCol_YAxisGrid3] = ImVec4(0.00f, 0.00f, 0.00f, 0.50f); + colors[ImPlotCol_Selection] = ImVec4(0.82f, 0.64f, 0.03f, 1.00f); + colors[ImPlotCol_Query] = ImVec4(0.00f, 0.84f, 0.37f, 1.00f); + colors[ImPlotCol_Crosshairs] = ImVec4(0.00f, 0.00f, 0.00f, 0.50f); +} + +} // namespace ImPlot diff --git a/cpp-projects/3d-engine/imgui/extra/implot/implot.h b/cpp-projects/3d-engine/imgui/extra/implot/implot.h new file mode 100644 index 0000000..63e67c8 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/implot/implot.h @@ -0,0 +1,679 @@ +// MIT License + +// Copyright (c) 2020 Evan Pezent + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// ImPlot v0.9 WIP + +#pragma once +#include "imgui/imgui.h" + +//----------------------------------------------------------------------------- +// Macros and Defines +//----------------------------------------------------------------------------- + +// Define attributes of all API symbols declarations (e.g. for DLL under Windows) +// Using ImPlot via a shared library is not recommended, because we don't guarantee +// backward nor forward ABI compatibility and also function call overhead. If you +// do use ImPlot as a DLL, be sure to call SetImGuiContext (details below). +#ifndef IMPLOT_API +#define IMPLOT_API +#endif + +// ImPlot version string +#define IMPLOT_VERSION "0.9 WIP" +// Indicates variable should deduced automatically. +#define IMPLOT_AUTO -1 +// Special color used to indicate that a color should be deduced automatically. +#define IMPLOT_AUTO_COL ImVec4(0,0,0,-1) + +//----------------------------------------------------------------------------- +// Forward Declarations and Basic Types +//----------------------------------------------------------------------------- + +// Forward declarations +struct ImPlotContext; // ImPlot context (opaque struct, see implot_internal.h) + +// Enums/Flags +typedef int ImPlotFlags; // -> enum ImPlotFlags_ +typedef int ImPlotAxisFlags; // -> enum ImPlotAxisFlags_ +typedef int ImPlotCol; // -> enum ImPlotCol_ +typedef int ImPlotStyleVar; // -> enum ImPlotStyleVar_ +typedef int ImPlotMarker; // -> enum ImPlotMarker_ +typedef int ImPlotColormap; // -> enum ImPlotColormap_ +typedef int ImPlotLocation; // -> enum ImPlotLocation_ +typedef int ImPlotOrientation; // -> enum ImPlotOrientation_ +typedef int ImPlotYAxis; // -> enum ImPlotYAxis_; + +// Options for plots. +enum ImPlotFlags_ { + ImPlotFlags_None = 0, // default + ImPlotFlags_NoTitle = 1 << 0, // the plot title will not be displayed (titles are also hidden if preceeded by double hashes, e.g. "##MyPlot") + ImPlotFlags_NoLegend = 1 << 1, // the legend will not be displayed + ImPlotFlags_NoMenus = 1 << 2, // the user will not be able to open context menus with right-click + ImPlotFlags_NoBoxSelect = 1 << 3, // the user will not be able to box-select with right-click drag + ImPlotFlags_NoMousePos = 1 << 4, // the mouse position, in plot coordinates, will not be displayed inside of the plot + ImPlotFlags_NoHighlight = 1 << 5, // plot items will not be highlighted when their legend entry is hovered + ImPlotFlags_NoChild = 1 << 6, // a child window region will not be used to capture mouse scroll (can boost performance for single ImGui window applications) + ImPlotFlags_Equal = 1 << 7, // primary x and y axes will be constrained to have the same units/pixel (does not apply to auxiliary y-axes) + ImPlotFlags_YAxis2 = 1 << 8, // enable a 2nd y-axis on the right side + ImPlotFlags_YAxis3 = 1 << 9, // enable a 3rd y-axis on the right side + ImPlotFlags_Query = 1 << 10, // the user will be able to draw query rects with middle-mouse or CTRL + right-click drag + ImPlotFlags_Crosshairs = 1 << 11, // the default mouse cursor will be replaced with a crosshair when hovered + ImPlotFlags_AntiAliased = 1 << 12, // plot lines will be software anti-aliased (not recommended for high density plots, prefer MSAA) + ImPlotFlags_CanvasOnly = ImPlotFlags_NoTitle | ImPlotFlags_NoLegend | ImPlotFlags_NoMenus | ImPlotFlags_NoBoxSelect | ImPlotFlags_NoMousePos +}; + +// Options for plot axes (X and Y). +enum ImPlotAxisFlags_ { + ImPlotAxisFlags_None = 0, // default + ImPlotAxisFlags_NoLabel = 1 << 0, // the axis label will not be displayed (axis labels also hidden if the supplied string name is NULL) + ImPlotAxisFlags_NoGridLines = 1 << 1, // the axis grid lines will not be displayed + ImPlotAxisFlags_NoTickMarks = 1 << 2, // the axis tick marks will not be displayed + ImPlotAxisFlags_NoTickLabels = 1 << 3, // the axis tick labels will not be displayed + ImPlotAxisFlags_LogScale = 1 << 4, // a logartithmic (base 10) axis scale will be used (mutually exclusive with ImPlotAxisFlags_Time) + ImPlotAxisFlags_Time = 1 << 5, // axis will display date/time formatted labels (mutually exclusive with ImPlotAxisFlags_LogScale) + ImPlotAxisFlags_Invert = 1 << 6, // the axis will be inverted + ImPlotAxisFlags_LockMin = 1 << 7, // the axis minimum value will be locked when panning/zooming + ImPlotAxisFlags_LockMax = 1 << 8, // the axis maximum value will be locked when panning/zooming + ImPlotAxisFlags_Lock = ImPlotAxisFlags_LockMin | ImPlotAxisFlags_LockMax, + ImPlotAxisFlags_NoDecorations = ImPlotAxisFlags_NoLabel | ImPlotAxisFlags_NoGridLines | ImPlotAxisFlags_NoTickMarks | ImPlotAxisFlags_NoTickLabels +}; + +// Plot styling colors. +enum ImPlotCol_ { + // item styling colors + ImPlotCol_Line, // plot line/outline color (defaults to next unused color in current colormap) + ImPlotCol_Fill, // plot fill color for bars (defaults to the current line color) + ImPlotCol_MarkerOutline, // marker outline color (defaults to the current line color) + ImPlotCol_MarkerFill, // marker fill color (defaults to the current line color) + ImPlotCol_ErrorBar, // error bar color (defaults to ImGuiCol_Text) + // plot styling colors + ImPlotCol_FrameBg, // plot frame background color (defaults to ImGuiCol_FrameBg) + ImPlotCol_PlotBg, // plot area background color (defaults to ImGuiCol_WindowBg) + ImPlotCol_PlotBorder, // plot area border color (defaults to ImGuiCol_Border) + ImPlotCol_LegendBg, // legend background color (defaults to ImGuiCol_PopupBg) + ImPlotCol_LegendBorder, // legend border color (defaults to ImPlotCol_PlotBorder) + ImPlotCol_LegendText, // legend text color (defaults to ImPlotCol_InlayText) + ImPlotCol_TitleText, // plot title text color (defaults to ImGuiCol_Text) + ImPlotCol_InlayText, // color of text appearing inside of plots (defaults to ImGuiCol_Text) + ImPlotCol_XAxis, // x-axis label and tick lables color (defaults to ImGuiCol_Text) + ImPlotCol_XAxisGrid, // x-axis grid color (defaults to 25% ImPlotCol_XAxis) + ImPlotCol_YAxis, // y-axis label and tick labels color (defaults to ImGuiCol_Text) + ImPlotCol_YAxisGrid, // y-axis grid color (defaults to 25% ImPlotCol_YAxis) + ImPlotCol_YAxis2, // 2nd y-axis label and tick labels color (defaults to ImGuiCol_Text) + ImPlotCol_YAxisGrid2, // 2nd y-axis grid/label color (defaults to 25% ImPlotCol_YAxis2) + ImPlotCol_YAxis3, // 3rd y-axis label and tick labels color (defaults to ImGuiCol_Text) + ImPlotCol_YAxisGrid3, // 3rd y-axis grid/label color (defaults to 25% ImPlotCol_YAxis3) + ImPlotCol_Selection, // box-selection color (defaults to yellow) + ImPlotCol_Query, // box-query color (defaults to green) + ImPlotCol_Crosshairs, // crosshairs color (defaults to ImPlotCol_PlotBorder) + ImPlotCol_COUNT +}; + +// Plot styling variables. +enum ImPlotStyleVar_ { + // item styling variables + ImPlotStyleVar_LineWeight, // float, plot item line weight in pixels + ImPlotStyleVar_Marker, // int, marker specification + ImPlotStyleVar_MarkerSize, // float, marker size in pixels (roughly the marker's "radius") + ImPlotStyleVar_MarkerWeight, // float, plot outline weight of markers in pixels + ImPlotStyleVar_FillAlpha, // float, alpha modifier applied to all plot item fills + ImPlotStyleVar_ErrorBarSize, // float, error bar whisker width in pixels + ImPlotStyleVar_ErrorBarWeight, // float, error bar whisker weight in pixels + ImPlotStyleVar_DigitalBitHeight, // float, digital channels bit height (at 1) in pixels + ImPlotStyleVar_DigitalBitGap, // float, digital channels bit padding gap in pixels + // plot styling variables + ImPlotStyleVar_PlotBorderSize, // float, thickness of border around plot area + ImPlotStyleVar_MinorAlpha, // float, alpha multiplier applied to minor axis grid lines + ImPlotStyleVar_MajorTickLen, // ImVec2, major tick lengths for X and Y axes + ImPlotStyleVar_MinorTickLen, // ImVec2, minor tick lengths for X and Y axes + ImPlotStyleVar_MajorTickSize, // ImVec2, line thickness of major ticks + ImPlotStyleVar_MinorTickSize, // ImVec2, line thickness of minor ticks + ImPlotStyleVar_MajorGridSize, // ImVec2, line thickness of major grid lines + ImPlotStyleVar_MinorGridSize, // ImVec2, line thickness of minor grid lines + ImPlotStyleVar_PlotPadding, // ImVec2, padding between widget frame and plot area, labels, or outside legends (i.e. main padding) + ImPlotStyleVar_LabelPadding, // ImVec2, padding between axes labels, tick labels, and plot edge + ImPlotStyleVar_LegendPadding, // ImVec2, legend padding from plot edges + ImPlotStyleVar_LegendInnerPadding, // ImVec2, legend inner padding from legend edges + ImPlotStyleVar_LegendSpacing, // ImVec2, spacing between legend entries + ImPlotStyleVar_MousePosPadding, // ImVec2, padding between plot edge and interior info text + ImPlotStyleVar_AnnotationPadding, // ImVec2, text padding around annotation labels + ImPlotStyleVar_FitPadding, // ImVec2, additional fit padding as a percentage of the fit extents (e.g. ImVec2(0.1f,0.1f) adds 10% to the fit extents of X and Y) + ImPlotStyleVar_PlotDefaultSize, // ImVec2, default size used when ImVec2(0,0) is passed to BeginPlot + ImPlotStyleVar_PlotMinSize, // ImVec2, minimum size plot frame can be when shrunk + ImPlotStyleVar_COUNT +}; + +// Marker specifications. +enum ImPlotMarker_ { + ImPlotMarker_None = -1, // no marker + ImPlotMarker_Circle, // a circle marker + ImPlotMarker_Square, // a square maker + ImPlotMarker_Diamond, // a diamond marker + ImPlotMarker_Up, // an upward-pointing triangle marker + ImPlotMarker_Down, // an downward-pointing triangle marker + ImPlotMarker_Left, // an leftward-pointing triangle marker + ImPlotMarker_Right, // an rightward-pointing triangle marker + ImPlotMarker_Cross, // a cross marker (not fillable) + ImPlotMarker_Plus, // a plus marker (not fillable) + ImPlotMarker_Asterisk, // a asterisk marker (not fillable) + ImPlotMarker_COUNT +}; + +// Built-in colormaps +enum ImPlotColormap_ { + ImPlotColormap_Default = 0, // ImPlot default colormap (n=10) + ImPlotColormap_Deep = 1, // a.k.a. seaborn deep (n=10) + ImPlotColormap_Dark = 2, // a.k.a. matplotlib "Set1" (n=9) + ImPlotColormap_Pastel = 3, // a.k.a. matplotlib "Pastel1" (n=9) + ImPlotColormap_Paired = 4, // a.k.a. matplotlib "Paired" (n=12) + ImPlotColormap_Viridis = 5, // a.k.a. matplotlib "viridis" (n=11) + ImPlotColormap_Plasma = 6, // a.k.a. matplotlib "plasma" (n=11) + ImPlotColormap_Hot = 7, // a.k.a. matplotlib/MATLAB "hot" (n=11) + ImPlotColormap_Cool = 8, // a.k.a. matplotlib/MATLAB "cool" (n=11) + ImPlotColormap_Pink = 9, // a.k.a. matplotlib/MATLAB "pink" (n=11) + ImPlotColormap_Jet = 10, // a.k.a. MATLAB "jet" (n=11) + ImPlotColormap_COUNT +}; + +// Used to position items on a plot (e.g. legends, labels, etc.) +enum ImPlotLocation_ { + ImPlotLocation_Center = 0, // center-center + ImPlotLocation_North = 1 << 0, // top-center + ImPlotLocation_South = 1 << 1, // bottom-center + ImPlotLocation_West = 1 << 2, // center-left + ImPlotLocation_East = 1 << 3, // center-right + ImPlotLocation_NorthWest = ImPlotLocation_North | ImPlotLocation_West, // top-left + ImPlotLocation_NorthEast = ImPlotLocation_North | ImPlotLocation_East, // top-right + ImPlotLocation_SouthWest = ImPlotLocation_South | ImPlotLocation_West, // bottom-left + ImPlotLocation_SouthEast = ImPlotLocation_South | ImPlotLocation_East // bottom-right +}; + +// Used to orient items on a plot (e.g. legends, labels, etc.) +enum ImPlotOrientation_ { + ImPlotOrientation_Horizontal, // left/right + ImPlotOrientation_Vertical // up/down +}; + +// Enums for different y-axes. +enum ImPlotYAxis_ { + ImPlotYAxis_1 = 0, // left (default) + ImPlotYAxis_2 = 1, // first on right side + ImPlotYAxis_3 = 2 // second on right side +}; + +// Double precision version of ImVec2 used by ImPlot. Extensible by end users. +struct ImPlotPoint { + double x, y; + ImPlotPoint() { x = y = 0.0; } + ImPlotPoint(double _x, double _y) { x = _x; y = _y; } + ImPlotPoint(const ImVec2& p) { x = p.x; y = p.y; } + double operator[] (size_t idx) const { return (&x)[idx]; } + double& operator[] (size_t idx) { return (&x)[idx]; } +#ifdef IMPLOT_POINT_CLASS_EXTRA + IMPLOT_POINT_CLASS_EXTRA // Define additional constructors and implicit cast operators in imconfig.h + // to convert back and forth between your math types and ImPlotPoint. +#endif +}; + +// A range defined by a min/max value. Used for plot axes ranges. +struct ImPlotRange { + double Min, Max; + ImPlotRange() { Min = 0; Max = 0; } + ImPlotRange(double _min, double _max) { Min = _min; Max = _max; } + bool Contains(double value) const { return value >= Min && value <= Max; }; + double Size() const { return Max - Min; }; +}; + +// Combination of two ranges for X and Y axes. +struct ImPlotLimits { + ImPlotRange X, Y; + bool Contains(const ImPlotPoint& p) const { return Contains(p.x, p.y); } + bool Contains(double x, double y) const { return X.Contains(x) && Y.Contains(y); } +}; + +// Plot style structure +struct ImPlotStyle { + // item styling variables + float LineWeight; // = 1, item line weight in pixels + int Marker; // = ImPlotMarker_None, marker specification + float MarkerSize; // = 4, marker size in pixels (roughly the marker's "radius") + float MarkerWeight; // = 1, outline weight of markers in pixels + float FillAlpha; // = 1, alpha modifier applied to plot fills + float ErrorBarSize; // = 5, error bar whisker width in pixels + float ErrorBarWeight; // = 1.5, error bar whisker weight in pixels + float DigitalBitHeight; // = 8, digital channels bit height (at y = 1.0f) in pixels + float DigitalBitGap; // = 4, digital channels bit padding gap in pixels + // plot styling variables + float PlotBorderSize; // = 1, line thickness of border around plot area + float MinorAlpha; // = 0.25 alpha multiplier applied to minor axis grid lines + ImVec2 MajorTickLen; // = 10,10 major tick lengths for X and Y axes + ImVec2 MinorTickLen; // = 5,5 minor tick lengths for X and Y axes + ImVec2 MajorTickSize; // = 1,1 line thickness of major ticks + ImVec2 MinorTickSize; // = 1,1 line thickness of minor ticks + ImVec2 MajorGridSize; // = 1,1 line thickness of major grid lines + ImVec2 MinorGridSize; // = 1,1 line thickness of minor grid lines + ImVec2 PlotPadding; // = 10,10 padding between widget frame and plot area, labels, or outside legends (i.e. main padding) + ImVec2 LabelPadding; // = 5,5 padding between axes labels, tick labels, and plot edge + ImVec2 LegendPadding; // = 10,10 legend padding from plot edges + ImVec2 LegendInnerPadding; // = 5,5 legend inner padding from legend edges + ImVec2 LegendSpacing; // = 0,0 spacing between legend entries + ImVec2 MousePosPadding; // = 10,10 padding between plot edge and interior mouse location text + ImVec2 AnnotationPadding; // = 2,2 text padding around annotation labels + ImVec2 FitPadding; // = 0,0 additional fit padding as a percentage of the fit extents (e.g. ImVec2(0.1f,0.1f) adds 10% to the fit extents of X and Y) + ImVec2 PlotDefaultSize; // = 400,300 default size used when ImVec2(0,0) is passed to BeginPlot + ImVec2 PlotMinSize; // = 300,225 minimum size plot frame can be when shrunk + // colors + ImVec4 Colors[ImPlotCol_COUNT]; // array of plot specific colors + // settings/flags + bool AntiAliasedLines; // = false, enable global anti-aliasing on plot lines (overrides ImPlotFlags_AntiAliased) + bool UseLocalTime; // = false, axis labels will be formatted for your timezone when ImPlotAxisFlag_Time is enabled + bool UseISO8601; // = false, dates will be formatted according to ISO 8601 where applicable (e.g. YYYY-MM-DD, YYYY-MM, --MM-DD, etc.) + bool Use24HourClock; // = false, times will be formatted using a 24 hour clock + IMPLOT_API ImPlotStyle(); +}; + +//----------------------------------------------------------------------------- +// ImPlot End-User API +//----------------------------------------------------------------------------- + +namespace ImPlot { + +//----------------------------------------------------------------------------- +// ImPlot Context +//----------------------------------------------------------------------------- + +// Creates a new ImPlot context. Call this after ImGui::CreateContext. +IMPLOT_API ImPlotContext* CreateContext(); +// Destroys an ImPlot context. Call this before ImGui::DestroyContext. NULL = destroy current context +IMPLOT_API void DestroyContext(ImPlotContext* ctx = NULL); +// Returns the current ImPlot context. NULL if no context has ben set. +IMPLOT_API ImPlotContext* GetCurrentContext(); +// Sets the current ImPlot context. +IMPLOT_API void SetCurrentContext(ImPlotContext* ctx); + +//----------------------------------------------------------------------------- +// Begin/End Plot +//----------------------------------------------------------------------------- + +// Starts a 2D plotting context. If this function returns true, EndPlot() must +// be called, e.g. "if (BeginPlot(...)) { ... EndPlot(); }". #title_id must +// be unique. If you need to avoid ID collisions or don't want to display a +// title in the plot, use double hashes (e.g. "MyPlot##Hidden" or "##NoTitle"). +// If #x_label and/or #y_label are provided, axes labels will be displayed. +IMPLOT_API bool BeginPlot(const char* title_id, + const char* x_label = NULL, + const char* y_label = NULL, + const ImVec2& size = ImVec2(-1,0), + ImPlotFlags flags = ImPlotFlags_None, + ImPlotAxisFlags x_flags = ImPlotAxisFlags_None, + ImPlotAxisFlags y_flags = ImPlotAxisFlags_None, + ImPlotAxisFlags y2_flags = ImPlotAxisFlags_NoGridLines, + ImPlotAxisFlags y3_flags = ImPlotAxisFlags_NoGridLines, + const char* y2_label = NULL, + const char* y3_label = NULL); + +// Only call EndPlot() if BeginPlot() returns true! Typically called at the end +// of an if statement conditioned on BeginPlot(). +IMPLOT_API void EndPlot(); + +//----------------------------------------------------------------------------- +// Plot Items +//----------------------------------------------------------------------------- + +// The template functions below are explicitly instantiated in implot_items.cpp. +// They are not intended to be used generically with custom types. You will get +// a linker error if you try! All functions support the following scalar types: +// +// float, double, ImS8, ImU8, ImS16, ImU16, ImS32, ImU32, ImS64, ImU64 +// +// +// If you need to plot custom or non-homogenous data you have a few options: +// +// 1. If your data is a simple struct/class (e.g. Vector2f), you can use striding. +// This is the most performant option if applicable. +// +// struct Vector2f { float X, Y; }; +// ... +// Vector2f data[42]; +// ImPlot::PlotLine("line", &data[0].x, &data[0].y, 42, 0, sizeof(Vector2f)); // or sizeof(float)*2 +// +// 2. Write a custom getter function or C++ lambda and pass it and your data to +// an ImPlot function post-fixed with a G (e.g. PlotScatterG). This has a +// slight performance cost, but probably not enough to worry about. +// +// ImPlotPoint MyDataGetter(void* data, int idx) { +// MyData* my_data = (MyData*)data; +// ImPlotPoint p; +// p.x = my_data->GetTime(idx); +// p.y = my_data->GetValue(idx); +// return p +// } +// ... +// MyData my_data; +// ImPlot::PlotScatterG("scatter", MyDataGetter, &my_data, my_data.Size()); +// +// NB: All types are converted to double before plotting. You may lose information +// if you try plotting extremely large 64-bit integral types. Proceed with caution! + +// Plots a standard 2D line plot. +template IMPLOT_API void PlotLine(const char* label_id, const T* values, int count, double xscale=1, double x0=0, int offset=0, int stride=sizeof(T)); +template IMPLOT_API void PlotLine(const char* label_id, const T* xs, const T* ys, int count, int offset=0, int stride=sizeof(T)); + IMPLOT_API void PlotLineG(const char* label_id, ImPlotPoint (*getter)(void* data, int idx), void* data, int count, int offset=0); + +// Plots a standard 2D scatter plot. Default marker is ImPlotMarker_Circle. +template IMPLOT_API void PlotScatter(const char* label_id, const T* values, int count, double xscale=1, double x0=0, int offset=0, int stride=sizeof(T)); +template IMPLOT_API void PlotScatter(const char* label_id, const T* xs, const T* ys, int count, int offset=0, int stride=sizeof(T)); + IMPLOT_API void PlotScatterG(const char* label_id, ImPlotPoint (*getter)(void* data, int idx), void* data, int count, int offset=0); + +// Plots a a stairstep graph. The y value is continued constantly from every x position, i.e. the interval [x[i], x[i+1]) has the value y[i]. +template IMPLOT_API void PlotStairs(const char* label_id, const T* values, int count, double xscale=1, double x0=0, int offset=0, int stride=sizeof(T)); +template IMPLOT_API void PlotStairs(const char* label_id, const T* xs, const T* ys, int count, int offset=0, int stride=sizeof(T)); + IMPLOT_API void PlotStairsG(const char* label_id, ImPlotPoint (*getter)(void* data, int idx), void* data, int count, int offset=0); + +// Plots a shaded (filled) region between two lines, or a line and a horizontal reference. Set y_ref to +/-INFINITY for infinite fill extents. +template IMPLOT_API void PlotShaded(const char* label_id, const T* values, int count, double y_ref=0, double xscale=1, double x0=0, int offset=0, int stride=sizeof(T)); +template IMPLOT_API void PlotShaded(const char* label_id, const T* xs, const T* ys, int count, double y_ref=0, int offset=0, int stride=sizeof(T)); +template IMPLOT_API void PlotShaded(const char* label_id, const T* xs, const T* ys1, const T* ys2, int count, int offset=0, int stride=sizeof(T)); + IMPLOT_API void PlotShadedG(const char* label_id, ImPlotPoint (*getter1)(void* data, int idx), void* data1, ImPlotPoint (*getter2)(void* data, int idx), void* data2, int count, int offset=0); + +// Plots a vertical bar graph. #width and #shift are in X units. +template IMPLOT_API void PlotBars(const char* label_id, const T* values, int count, double width=0.67, double shift=0, int offset=0, int stride=sizeof(T)); +template IMPLOT_API void PlotBars(const char* label_id, const T* xs, const T* ys, int count, double width, int offset=0, int stride=sizeof(T)); + IMPLOT_API void PlotBarsG(const char* label_id, ImPlotPoint (*getter)(void* data, int idx), void* data, int count, double width, int offset=0); + +// Plots a horizontal bar graph. #height and #shift are in Y units. +template IMPLOT_API void PlotBarsH(const char* label_id, const T* values, int count, double height=0.67, double shift=0, int offset=0, int stride=sizeof(T)); +template IMPLOT_API void PlotBarsH(const char* label_id, const T* xs, const T* ys, int count, double height, int offset=0, int stride=sizeof(T)); + IMPLOT_API void PlotBarsHG(const char* label_id, ImPlotPoint (*getter)(void* data, int idx), void* data, int count, double height, int offset=0); + +// Plots vertical error bar. The label_id should be the same as the label_id of the associated line or bar plot. +template IMPLOT_API void PlotErrorBars(const char* label_id, const T* xs, const T* ys, const T* err, int count, int offset=0, int stride=sizeof(T)); +template IMPLOT_API void PlotErrorBars(const char* label_id, const T* xs, const T* ys, const T* neg, const T* pos, int count, int offset=0, int stride=sizeof(T)); + +// Plots horizontal error bars. The label_id should be the same as the label_id of the associated line or bar plot. +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const T* xs, const T* ys, const T* err, int count, int offset=0, int stride=sizeof(T)); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const T* xs, const T* ys, const T* neg, const T* pos, int count, int offset=0, int stride=sizeof(T)); + +/// Plots vertical stems. +template IMPLOT_API void PlotStems(const char* label_id, const T* values, int count, double y_ref=0, double xscale=1, double x0=0, int offset=0, int stride=sizeof(T)); +template IMPLOT_API void PlotStems(const char* label_id, const T* xs, const T* ys, int count, double y_ref=0, int offset=0, int stride=sizeof(T)); + +/// Plots infinite vertical or horizontal lines (e.g. for references or asymptotes). +template IMPLOT_API void PlotVLines(const char* label_id, const T* xs, int count, int offset=0, int stride=sizeof(T)); +template IMPLOT_API void PlotHLines(const char* label_id, const T* ys, int count, int offset=0, int stride=sizeof(T)); + +// Plots a pie chart. If the sum of values > 1 or normalize is true, each value will be normalized. Center and radius are in plot units. #label_fmt can be set to NULL for no labels. +template IMPLOT_API void PlotPieChart(const char* const label_ids[], const T* values, int count, double x, double y, double radius, bool normalize=false, const char* label_fmt="%.1f", double angle0=90); + +// Plots a 2D heatmap chart. Values are expected to be in row-major order. #label_fmt can be set to NULL for no labels. +template IMPLOT_API void PlotHeatmap(const char* label_id, const T* values, int rows, int cols, double scale_min, double scale_max, const char* label_fmt="%.1f", const ImPlotPoint& bounds_min=ImPlotPoint(0,0), const ImPlotPoint& bounds_max=ImPlotPoint(1,1)); + +// Plots digital data. Digital plots do not respond to y drag or zoom, and are always referenced to the bottom of the plot. +template IMPLOT_API void PlotDigital(const char* label_id, const T* xs, const T* ys, int count, int offset=0, int stride=sizeof(T)); + IMPLOT_API void PlotDigitalG(const char* label_id, ImPlotPoint (*getter)(void* data, int idx), void* data, int count, int offset=0); + +// Plots an axis-aligned image. #bounds_min/bounds_max are in plot coordinatse (y-up) and #uv0/uv1 are in texture coordinates (y-down). +IMPLOT_API void PlotImage(const char* label_id, ImTextureID user_texture_id, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max, const ImVec2& uv0=ImVec2(0,0), const ImVec2& uv1=ImVec2(1,1), const ImVec4& tint_col=ImVec4(1,1,1,1)); + +// Plots a centered text label at point x,y with optional pixel offset. Text color can be changed with ImPlot::PushStyleColor(ImPlotCol_InlayText, ...). +IMPLOT_API void PlotText(const char* text, double x, double y, bool vertical=false, const ImVec2& pix_offset=ImVec2(0,0)); + +// Plots an dummy item (i.e. adds a legend entry colored by ImPlotCol_Line) +IMPLOT_API void PlotDummy(const char* label_id); + +//----------------------------------------------------------------------------- +// Plot Utils +//----------------------------------------------------------------------------- + +// The following functions MUST be called BEFORE BeginPlot! + +// Set the axes range limits of the next plot. Call right before BeginPlot(). If ImGuiCond_Always is used, the axes limits will be locked. +IMPLOT_API void SetNextPlotLimits(double xmin, double xmax, double ymin, double ymax, ImGuiCond cond = ImGuiCond_Once); +// Set the X axis range limits of the next plot. Call right before BeginPlot(). If ImGuiCond_Always is used, the X axis limits will be locked. +IMPLOT_API void SetNextPlotLimitsX(double xmin, double xmax, ImGuiCond cond = ImGuiCond_Once); +// Set the Y axis range limits of the next plot. Call right before BeginPlot(). If ImGuiCond_Always is used, the Y axis limits will be locked. +IMPLOT_API void SetNextPlotLimitsY(double ymin, double ymax, ImGuiCond cond = ImGuiCond_Once, ImPlotYAxis y_axis = 0); +// Links the next plot limits to external values. Set to NULL for no linkage. The pointer data must remain valid until the matching call EndPlot. +IMPLOT_API void LinkNextPlotLimits(double* xmin, double* xmax, double* ymin, double* ymax, double* ymin2 = NULL, double* ymax2 = NULL, double* ymin3 = NULL, double* ymax3 = NULL); +// Fits the next plot axes to all plotted data if they are unlocked (equivalent to double-clicks). +IMPLOT_API void FitNextPlotAxes(bool x = true, bool y = true, bool y2 = true, bool y3 = true); + +// Set the X axis ticks and optionally the labels for the next plot. +IMPLOT_API void SetNextPlotTicksX(const double* values, int n_ticks, const char* const labels[] = NULL, bool show_default = false); +IMPLOT_API void SetNextPlotTicksX(double x_min, double x_max, int n_ticks, const char* const labels[] = NULL, bool show_default = false); + +// Set the Y axis ticks and optionally the labels for the next plot. +IMPLOT_API void SetNextPlotTicksY(const double* values, int n_ticks, const char* const labels[] = NULL, bool show_default = false, ImPlotYAxis y_axis = 0); +IMPLOT_API void SetNextPlotTicksY(double y_min, double y_max, int n_ticks, const char* const labels[] = NULL, bool show_default = false, ImPlotYAxis y_axis = 0); + +// The following functions MUST be called BETWEEN Begin/EndPlot! + +// Select which Y axis will be used for subsequent plot elements. The default is ImPlotYAxis_1, or the first (left) Y axis. Enable 2nd and 3rd axes with ImPlotFlags_YAxisX. +IMPLOT_API void SetPlotYAxis(ImPlotYAxis y_axis); +// Hides or shows the next plot item (i.e. as if it were toggled from the legend). Use ImGuiCond_Always if you need to forcefully set this every frame. +IMPLOT_API void HideNextItem(bool hidden = true, ImGuiCond cond = ImGuiCond_Once); + +// Convert pixels to a position in the current plot's coordinate system. A negative y_axis uses the current value of SetPlotYAxis (ImPlotYAxis_1 initially). +IMPLOT_API ImPlotPoint PixelsToPlot(const ImVec2& pix, ImPlotYAxis y_axis = IMPLOT_AUTO); +IMPLOT_API ImPlotPoint PixelsToPlot(float x, float y, ImPlotYAxis y_axis = IMPLOT_AUTO); +// Convert a position in the current plot's coordinate system to pixels. A negative y_axis uses the current value of SetPlotYAxis (ImPlotYAxis_1 initially). +IMPLOT_API ImVec2 PlotToPixels(const ImPlotPoint& plt, ImPlotYAxis y_axis = IMPLOT_AUTO); +IMPLOT_API ImVec2 PlotToPixels(double x, double y, ImPlotYAxis y_axis = IMPLOT_AUTO); +// Get the current Plot position (top-left) in pixels. +IMPLOT_API ImVec2 GetPlotPos(); +// Get the curent Plot size in pixels. +IMPLOT_API ImVec2 GetPlotSize(); +// Returns true if the plot area in the current plot is hovered. +IMPLOT_API bool IsPlotHovered(); +// Returns true if the XAxis plot area in the current plot is hovered. +IMPLOT_API bool IsPlotXAxisHovered(); +// Returns true if the YAxis[n] plot area in the current plot is hovered. +IMPLOT_API bool IsPlotYAxisHovered(ImPlotYAxis y_axis = 0); +// Returns the mouse position in x,y coordinates of the current plot. A negative y_axis uses the current value of SetPlotYAxis (ImPlotYAxis_1 initially). +IMPLOT_API ImPlotPoint GetPlotMousePos(ImPlotYAxis y_axis = IMPLOT_AUTO); +// Returns the current plot axis range. A negative y_axis uses the current value of SetPlotYAxis (ImPlotYAxis_1 initially). +IMPLOT_API ImPlotLimits GetPlotLimits(ImPlotYAxis y_axis = IMPLOT_AUTO); + +// Returns true if the current plot is being queried. Query must be enabled with ImPlotFlags_Query. +IMPLOT_API bool IsPlotQueried(); +// Returns the current plot query bounds. Query must be enabled with ImPlotFlags_Query. +IMPLOT_API ImPlotLimits GetPlotQuery(ImPlotYAxis y_axis = IMPLOT_AUTO); + +//----------------------------------------------------------------------------- +// Plot Tools +//----------------------------------------------------------------------------- + +// The following functions MUST be called BETWEEN Begin/EndPlot! + +// Shows an annotation callout at a chosen point. +IMPLOT_API void Annotate(double x, double y, const ImVec2& pix_offset, const char* fmt, ...) IM_FMTARGS(4); +IMPLOT_API void Annotate(double x, double y, const ImVec2& pix_offset, const ImVec4& color, const char* fmt, ...) IM_FMTARGS(5); +IMPLOT_API void AnnotateV(double x, double y, const ImVec2& pix_offset, const char* fmt, va_list args) IM_FMTLIST(4); +IMPLOT_API void AnnotateV(double x, double y, const ImVec2& pix_offset, const ImVec4& color, const char* fmt, va_list args) IM_FMTLIST(5); + +// Same as above, but the annotation will always be clamped to stay inside the plot area. +IMPLOT_API void AnnotateClamped(double x, double y, const ImVec2& pix_offset, const char* fmt, ...) IM_FMTARGS(4); +IMPLOT_API void AnnotateClamped(double x, double y, const ImVec2& pix_offset, const ImVec4& color, const char* fmt, ...) IM_FMTARGS(5); +IMPLOT_API void AnnotateClampedV(double x, double y, const ImVec2& pix_offset, const char* fmt, va_list args) IM_FMTLIST(4); +IMPLOT_API void AnnotateClampedV(double x, double y, const ImVec2& pix_offset, const ImVec4& color, const char* fmt, va_list args) IM_FMTLIST(5); + +// Shows a draggable vertical guide line at an x-value. #col defaults to ImGuiCol_Text. +IMPLOT_API bool DragLineX(const char* id, double* x_value, bool show_label = true, const ImVec4& col = IMPLOT_AUTO_COL, float thickness = 1); +// Shows a draggable horizontal guide line at a y-value. #col defaults to ImGuiCol_Text. +IMPLOT_API bool DragLineY(const char* id, double* y_value, bool show_label = true, const ImVec4& col = IMPLOT_AUTO_COL, float thickness = 1); +// Shows a draggable point at x,y. #col defaults to ImGuiCol_Text. +IMPLOT_API bool DragPoint(const char* id, double* x, double* y, bool show_label = true, const ImVec4& col = IMPLOT_AUTO_COL, float radius = 4); + +//----------------------------------------------------------------------------- +// Legend Utils and Tools +//----------------------------------------------------------------------------- + +// The following functions MUST be called between Begin/EndPlot! + +// Set the location of the current plot's legend. +IMPLOT_API void SetLegendLocation(ImPlotLocation location, ImPlotOrientation orientation = ImPlotOrientation_Vertical, bool outside = false); +// Set the location of the current plot's mouse position text (default = South|East). +IMPLOT_API void SetMousePosLocation(ImPlotLocation location); +// Returns true if a plot item legend entry is hovered. +IMPLOT_API bool IsLegendEntryHovered(const char* label_id); +// Begin a drag and drop source from a legend entry. The only supported flag is SourceNoPreviewTooltip +IMPLOT_API bool BeginLegendDragDropSource(const char* label_id, ImGuiDragDropFlags flags = 0); +// End legend drag and drop source. +IMPLOT_API void EndLegendDragDropSource(); +// Begin a popup for a legend entry. +IMPLOT_API bool BeginLegendPopup(const char* label_id, ImGuiMouseButton mouse_button = 1); +// End a popup for a legend entry. +IMPLOT_API void EndLegendPopup(); + +//----------------------------------------------------------------------------- +// Plot and Item Styling +//----------------------------------------------------------------------------- + +// Provides access to plot style structure for permanant modifications to colors, sizes, etc. +IMPLOT_API ImPlotStyle& GetStyle(); + +// Style colors for current ImGui style (default). +IMPLOT_API void StyleColorsAuto(ImPlotStyle* dst = NULL); +// Style colors for ImGui "Classic". +IMPLOT_API void StyleColorsClassic(ImPlotStyle* dst = NULL); +// Style colors for ImGui "Dark". +IMPLOT_API void StyleColorsDark(ImPlotStyle* dst = NULL); +// Style colors for ImGui "Light". +IMPLOT_API void StyleColorsLight(ImPlotStyle* dst = NULL); + +// Use PushStyleX to temporarily modify your ImPlotStyle. The modification +// will last until the matching call to PopStyleX. You MUST call a pop for +// every push, otherwise you will leak memory! This behaves just like ImGui. + +// Temporarily modify a plot color. Don't forget to call PopStyleColor! +IMPLOT_API void PushStyleColor(ImPlotCol idx, ImU32 col); +IMPLOT_API void PushStyleColor(ImPlotCol idx, const ImVec4& col); +// Undo temporary color modification. Undo multiple pushes at once by increasing count. +IMPLOT_API void PopStyleColor(int count = 1); + +// Temporarily modify a style variable of float type. Don't forget to call PopStyleVar! +IMPLOT_API void PushStyleVar(ImPlotStyleVar idx, float val); +// Temporarily modify a style variable of int type. Don't forget to call PopStyleVar! +IMPLOT_API void PushStyleVar(ImPlotStyleVar idx, int val); +// Temporarily modify a style variable of ImVec2 type. Don't forget to call PopStyleVar! +IMPLOT_API void PushStyleVar(ImPlotStyleVar idx, const ImVec2& val); +// Undo temporary style modification. Undo multiple pushes at once by increasing count. +IMPLOT_API void PopStyleVar(int count = 1); + +// The following can be used to modify the style of the next plot item ONLY. They do +// NOT require calls to PopStyleX. Leave style attributes you don't want modified to +// IMPLOT_AUTO or IMPLOT_AUTO_COL. Automatic styles will be deduced from the current +// values in your ImPlotStyle or from Colormap data. + +// Set the line color and weight for the next item only. +IMPLOT_API void SetNextLineStyle(const ImVec4& col = IMPLOT_AUTO_COL, float weight = IMPLOT_AUTO); +// Set the fill color for the next item only. +IMPLOT_API void SetNextFillStyle(const ImVec4& col = IMPLOT_AUTO_COL, float alpha_mod = IMPLOT_AUTO); +// Set the marker style for the next item only. +IMPLOT_API void SetNextMarkerStyle(ImPlotMarker marker = IMPLOT_AUTO, float size = IMPLOT_AUTO, const ImVec4& fill = IMPLOT_AUTO_COL, float weight = IMPLOT_AUTO, const ImVec4& outline = IMPLOT_AUTO_COL); +// Set the error bar style for the next item only. +IMPLOT_API void SetNextErrorBarStyle(const ImVec4& col = IMPLOT_AUTO_COL, float size = IMPLOT_AUTO, float weight = IMPLOT_AUTO); + +// Gets the last item primary color (i.e. its legend icon color) +IMPLOT_API ImVec4 GetLastItemColor(); + +// Returns the null terminated string name for an ImPlotCol. +IMPLOT_API const char* GetStyleColorName(ImPlotCol idx); +// Returns the null terminated string name for an ImPlotMarker. +IMPLOT_API const char* GetMarkerName(ImPlotMarker idx); + +//----------------------------------------------------------------------------- +// Colormaps +//----------------------------------------------------------------------------- + +// Item styling is based on Colormaps when the relevant ImPlotCol_ is set to +// IMPLOT_AUTO_COL (default). Several built in colormaps are available and can be +// toggled in the demo. You can push/pop or set your own colormaps as well. + +// The Colormap data will be ignored and a custom color will be used if you have done one of the following: +// 1) Modified an item style color in your ImPlotStyle to anything other than IMPLOT_AUTO_COL. +// 2) Pushed an item style color using PushStyleColor(). +// 3) Set the next item style with a SetNextXStyle function. + +// Temporarily switch to one of the built-in colormaps. +IMPLOT_API void PushColormap(ImPlotColormap colormap); +// Temporarily switch to your custom colormap. The pointer data must persist until the matching call to PopColormap! +IMPLOT_API void PushColormap(const ImVec4* colormap, int size); +// Undo temporary colormap modification. +IMPLOT_API void PopColormap(int count = 1); + +// Permanently sets a custom colormap. The colors will be copied to internal memory. Typically used on startup. Prefer PushColormap instead of calling this each frame. +IMPLOT_API void SetColormap(const ImVec4* colormap, int size); +// Permanently switch to one of the built-in colormaps. If samples is greater than 1, the map will be linearly resampled. Typically used on startup. Don't call this each frame. +IMPLOT_API void SetColormap(ImPlotColormap colormap, int samples = 0); + +// Returns the size of the current colormap. +IMPLOT_API int GetColormapSize(); +// Returns a color from the Color map given an index >= 0 (modulo will be performed). +IMPLOT_API ImVec4 GetColormapColor(int index); +// Linearly interpolates a color from the current colormap given t between 0 and 1. +IMPLOT_API ImVec4 LerpColormap(float t); +// Returns the next unused colormap color and advances the colormap. Can be used to skip colors if desired. +IMPLOT_API ImVec4 NextColormapColor(); + +// Renders a vertical color scale using the current color map. Call this before or after Begin/EndPlot. +IMPLOT_API void ShowColormapScale(double scale_min, double scale_max, float height); + +// Returns a null terminated string name for a built-in colormap. +IMPLOT_API const char* GetColormapName(ImPlotColormap colormap); + +//----------------------------------------------------------------------------- +// Miscellaneous +//----------------------------------------------------------------------------- + +// Get the plot draw list for rendering to the current plot area. +IMPLOT_API ImDrawList* GetPlotDrawList(); +// Push clip rect for rendering to current plot area. +IMPLOT_API void PushPlotClipRect(); +// Pop plot clip rect. +IMPLOT_API void PopPlotClipRect(); + +// Shows ImPlot style selector dropdown menu. +IMPLOT_API bool ShowStyleSelector(const char* label); +// Shows ImPlot colormap selector dropdown menu. +IMPLOT_API bool ShowColormapSelector(const char* label); +// Shows ImPlot style editor block (not a window). +IMPLOT_API void ShowStyleEditor(ImPlotStyle* ref = NULL); +// Add basic help/info block (not a window): how to manipulate ImPlot as an end-user. +IMPLOT_API void ShowUserGuide(); +// Shows ImPlot metrics/debug information. +IMPLOT_API void ShowMetricsWindow(bool* p_popen = NULL); + +// Sets the current _ImGui_ context. This is ONLY necessary if you are compiling +// ImPlot as a DLL (not recommended) separate from your ImGui compilation. It +// sets the global variable GImGui, which is not shared across DLL boundaries. +// See GImGui documentation in imgui.cpp for more details. +IMPLOT_API void SetImGuiContext(ImGuiContext* ctx); + +//----------------------------------------------------------------------------- +// Demo (add implot_demo.cpp to your sources!) +//----------------------------------------------------------------------------- + +// Shows the ImPlot demo. +IMPLOT_API void ShowDemoWindow(bool* p_open = NULL); + +} // namespace ImPlot diff --git a/cpp-projects/3d-engine/imgui/extra/implot/implot_internal.h b/cpp-projects/3d-engine/imgui/extra/implot/implot_internal.h new file mode 100644 index 0000000..3ef7ae9 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/implot/implot_internal.h @@ -0,0 +1,993 @@ +// MIT License + +// Copyright (c) 2020 Evan Pezent + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// ImPlot v0.9 WIP + +// You may use this file to debug, understand or extend ImPlot features but we +// don't provide any guarantee of forward compatibility! + +//----------------------------------------------------------------------------- +// [SECTION] Header Mess +//----------------------------------------------------------------------------- + +#pragma once + +#ifndef IMGUI_DEFINE_MATH_OPERATORS +#define IMGUI_DEFINE_MATH_OPERATORS +#endif + +#include +#include "imgui_internal.h" + +#ifndef IMPLOT_VERSION +#error Must include implot.h before implot_internal.h +#endif + +//----------------------------------------------------------------------------- +// [SECTION] Forward Declarations +//----------------------------------------------------------------------------- + +struct ImPlotTick; +struct ImPlotAxis; +struct ImPlotAxisState; +struct ImPlotAxisColor; +struct ImPlotItem; +struct ImPlotLegendData; +struct ImPlotPlot; +struct ImPlotNextPlotData; + +//----------------------------------------------------------------------------- +// [SECTION] Context Pointer +//----------------------------------------------------------------------------- + +extern IMPLOT_API ImPlotContext* GImPlot; // Current implicit context pointer + +//----------------------------------------------------------------------------- +// [SECTION] Macros and Constants +//----------------------------------------------------------------------------- + +// Constants can be changed unless stated otherwise. We may move some of these +// to ImPlotStyleVar_ over time. + +// The maximum number of supported y-axes (DO NOT CHANGE THIS) +#define IMPLOT_Y_AXES 3 +// The number of times to subdivided grid divisions (best if a multiple of 1, 2, and 5) +#define IMPLOT_SUB_DIV 10 +// Zoom rate for scroll (e.g. 0.1f = 10% plot range every scroll click) +#define IMPLOT_ZOOM_RATE 0.1f +// Mimimum allowable timestamp value 01/01/1970 @ 12:00am (UTC) (DO NOT DECREASE THIS) +#define IMPLOT_MIN_TIME 0 +// Maximum allowable timestamp value 01/01/3000 @ 12:00am (UTC) (DO NOT INCREASE THIS) +#define IMPLOT_MAX_TIME 32503680000 + +//----------------------------------------------------------------------------- +// [SECTION] Generic Helpers +//----------------------------------------------------------------------------- + +// Computes the common (base-10) logarithm +static inline float ImLog10(float x) { return log10f(x); } +static inline double ImLog10(double x) { return log10(x); } +// Returns true if a flag is set +template +inline bool ImHasFlag(TSet set, TFlag flag) { return (set & flag) == flag; } +// Flips a flag in a flagset +template +inline void ImFlipFlag(TSet& set, TFlag flag) { ImHasFlag(set, flag) ? set &= ~flag : set |= flag; } +// Linearly remaps x from [x0 x1] to [y0 y1]. +template +inline T ImRemap(T x, T x0, T x1, T y0, T y1) { return y0 + (x - x0) * (y1 - y0) / (x1 - x0); } +// Returns always positive modulo (assumes r != 0) +inline int ImPosMod(int l, int r) { return (l % r + r) % r; } +// Returns true if val is NAN or INFINITY +inline bool ImNanOrInf(double val) { return val == HUGE_VAL || val == -HUGE_VAL || isnan(val); } +// Turns NANs to 0s +inline double ImConstrainNan(double val) { return isnan(val) ? 0 : val; } +// Turns infinity to floating point maximums +inline double ImConstrainInf(double val) { return val == HUGE_VAL ? DBL_MAX : val == -HUGE_VAL ? - DBL_MAX : val; } +// Turns numbers less than or equal to 0 to 0.001 (sort of arbitrary, is there a better way?) +inline double ImConstrainLog(double val) { return val <= 0 ? 0.001f : val; } +// Turns numbers less than 0 to zero +inline double ImConstrainTime(double val) { return val < IMPLOT_MIN_TIME ? IMPLOT_MIN_TIME : (val > IMPLOT_MAX_TIME ? IMPLOT_MAX_TIME : val); } +// True if two numbers are approximately equal using units in the last place. +inline bool ImAlmostEqual(double v1, double v2, int ulp = 2) { return ImAbs(v1-v2) < DBL_EPSILON * ImAbs(v1+v2) * ulp || ImAbs(v1-v2) < DBL_MIN; } + +// Offset calculator helper +template +struct ImOffsetCalculator { + ImOffsetCalculator(const int* sizes) { + Offsets[0] = 0; + for (int i = 1; i < Count; ++i) + Offsets[i] = Offsets[i-1] + sizes[i-1]; + } + int Offsets[Count]; +}; + +// Character buffer writer helper (FIXME: Can't we replace this with ImGuiTextBuffer?) +struct ImBufferWriter +{ + char* Buffer; + int Size; + int Pos; + + ImBufferWriter(char* buffer, int size) { + Buffer = buffer; + Size = size; + Pos = 0; + } + + void Write(const char* fmt, ...) { + va_list args; + va_start(args, fmt); + WriteV(fmt, args); + va_end(args); + } + + void WriteV(const char* fmt, va_list args) { + const int written = ::vsnprintf(&Buffer[Pos], Size - Pos - 1, fmt, args); + if (written > 0) + Pos += ImMin(written, Size-Pos-1); + } +}; + +// Fixed size point array +template +struct ImPlotPointArray { + inline ImPlotPoint& operator[](int i) { return Data[i]; } + inline const ImPlotPoint& operator[](int i) const { return Data[i]; } + inline int Size() { return N; } + ImPlotPoint Data[N]; +}; + +//----------------------------------------------------------------------------- +// [SECTION] ImPlot Enums +//----------------------------------------------------------------------------- + +typedef int ImPlotScale; // -> enum ImPlotScale_ +typedef int ImPlotTimeUnit; // -> enum ImPlotTimeUnit_ +typedef int ImPlotDateFmt; // -> enum ImPlotDateFmt_ +typedef int ImPlotTimeFmt; // -> enum ImPlotTimeFmt_ + +// XY axes scaling combinations +enum ImPlotScale_ { + ImPlotScale_LinLin, // linear x, linear y + ImPlotScale_LogLin, // log x, linear y + ImPlotScale_LinLog, // linear x, log y + ImPlotScale_LogLog // log x, log y +}; + +enum ImPlotTimeUnit_ { + ImPlotTimeUnit_Us, // microsecond + ImPlotTimeUnit_Ms, // millisecond + ImPlotTimeUnit_S, // second + ImPlotTimeUnit_Min, // minute + ImPlotTimeUnit_Hr, // hour + ImPlotTimeUnit_Day, // day + ImPlotTimeUnit_Mo, // month + ImPlotTimeUnit_Yr, // year + ImPlotTimeUnit_COUNT +}; + +enum ImPlotDateFmt_ { // default [ ISO 8601 ] + ImPlotDateFmt_None = 0, + ImPlotDateFmt_DayMo, // 10/3 [ --10-03 ] + ImPlotDateFmt_DayMoYr, // 10/3/91 [ 1991-10-03 ] + ImPlotDateFmt_MoYr, // Oct 1991 [ 1991-10 ] + ImPlotDateFmt_Mo, // Oct [ --10 ] + ImPlotDateFmt_Yr // 1991 [ 1991 ] +}; + +enum ImPlotTimeFmt_ { // default [ 24 Hour Clock ] + ImPlotTimeFmt_None = 0, + ImPlotTimeFmt_Us, // .428 552 [ .428 552 ] + ImPlotTimeFmt_SUs, // :29.428 552 [ :29.428 552 ] + ImPlotTimeFmt_SMs, // :29.428 [ :29.428 ] + ImPlotTimeFmt_S, // :29 [ :29 ] + ImPlotTimeFmt_HrMinSMs, // 7:21:29.428pm [ 19:21:29.428 ] + ImPlotTimeFmt_HrMinS, // 7:21:29pm [ 19:21:29 ] + ImPlotTimeFmt_HrMin, // 7:21pm [ 19:21 ] + ImPlotTimeFmt_Hr // 7pm [ 19:00 ] +}; + +// Input mapping structure, default values listed in the comments. +struct ImPlotInputMap { + ImGuiMouseButton PanButton; // LMB enables panning when held + ImGuiKeyModFlags PanMod; // none optional modifier that must be held for panning + ImGuiMouseButton FitButton; // LMB fits visible data when double clicked + ImGuiMouseButton ContextMenuButton; // RMB opens plot context menu (if enabled) when clicked + ImGuiMouseButton BoxSelectButton; // RMB begins box selection when pressed and confirms selection when released + ImGuiKeyModFlags BoxSelectMod; // none optional modifier that must be held for box selection + ImGuiMouseButton BoxSelectCancelButton; // LMB cancels active box selection when pressed + ImGuiMouseButton QueryButton; // MMB begins query selection when pressed and end query selection when released + ImGuiKeyModFlags QueryMod; // none optional modifier that must be held for query selection + ImGuiKeyModFlags QueryToggleMod; // Ctrl when held, active box selections turn into queries + ImGuiKeyModFlags HorizontalMod; // Alt expands active box selection/query horizontally to plot edge when held + ImGuiKeyModFlags VerticalMod; // Shift expands active box selection/query vertically to plot edge when held + IMPLOT_API ImPlotInputMap(); +}; + +//----------------------------------------------------------------------------- +// [SECTION] ImPlot Structs +//----------------------------------------------------------------------------- + +// Combined date/time format spec +struct ImPlotDateTimeFmt { + ImPlotDateTimeFmt(ImPlotDateFmt date_fmt, ImPlotTimeFmt time_fmt, bool use_24_hr_clk = false, bool use_iso_8601 = false) { + Date = date_fmt; + Time = time_fmt; + UseISO8601 = use_iso_8601; + Use24HourClock = use_24_hr_clk; + } + ImPlotDateFmt Date; + ImPlotTimeFmt Time; + bool UseISO8601; + bool Use24HourClock; +}; + +// Two part timestamp struct. +struct ImPlotTime { + time_t S; // second part + int Us; // microsecond part + ImPlotTime() { S = 0; Us = 0; } + ImPlotTime(time_t s, int us = 0) { S = s + us / 1000000; Us = us % 1000000; } + void RollOver() { S = S + Us / 1000000; Us = Us % 1000000; } + double ToDouble() const { return (double)S + (double)Us / 1000000.0; } + static ImPlotTime FromDouble(double t) { return ImPlotTime((time_t)t, (int)(t * 1000000 - floor(t) * 1000000)); } +}; + +static inline ImPlotTime operator+(const ImPlotTime& lhs, const ImPlotTime& rhs) +{ return ImPlotTime(lhs.S + rhs.S, lhs.Us + rhs.Us); } +static inline ImPlotTime operator-(const ImPlotTime& lhs, const ImPlotTime& rhs) +{ return ImPlotTime(lhs.S - rhs.S, lhs.Us - rhs.Us); } +static inline bool operator==(const ImPlotTime& lhs, const ImPlotTime& rhs) +{ return lhs.S == rhs.S && lhs.Us == rhs.Us; } +static inline bool operator<(const ImPlotTime& lhs, const ImPlotTime& rhs) +{ return lhs.S == rhs.S ? lhs.Us < rhs.Us : lhs.S < rhs.S; } +static inline bool operator>(const ImPlotTime& lhs, const ImPlotTime& rhs) +{ return rhs < lhs; } +static inline bool operator<=(const ImPlotTime& lhs, const ImPlotTime& rhs) +{ return lhs < rhs || lhs == rhs; } +static inline bool operator>=(const ImPlotTime& lhs, const ImPlotTime& rhs) +{ return lhs > rhs || lhs == rhs; } + +// Storage for colormap modifiers +struct ImPlotColormapMod { + ImPlotColormapMod(const ImVec4* colormap, int colormap_size) { + Colormap = colormap; + ColormapSize = colormap_size; + } + const ImVec4* Colormap; + int ColormapSize; +}; + +// ImPlotPoint with positive/negative error values +struct ImPlotPointError +{ + double X, Y, Neg, Pos; + ImPlotPointError(double x, double y, double neg, double pos) { + X = x; Y = y; Neg = neg; Pos = pos; + } +}; + +// Interior plot label/annotation +struct ImPlotAnnotation { + ImVec2 Pos; + ImVec2 Offset; + ImU32 ColorBg; + ImU32 ColorFg; + int TextOffset; + bool Clamp; +}; + +// Collection of plot labels +struct ImPlotAnnotationCollection { + + ImVector Annotations; + ImGuiTextBuffer TextBuffer; + int Size; + + ImPlotAnnotationCollection() { Reset(); } + + void AppendV(const ImVec2& pos, const ImVec2& off, ImU32 bg, ImU32 fg, bool clamp, const char* fmt, va_list args) IM_FMTLIST(7) { + ImPlotAnnotation an; + an.Pos = pos; an.Offset = off; + an.ColorBg = bg; an.ColorFg = fg; + an.TextOffset = TextBuffer.size(); + an.Clamp = clamp; + Annotations.push_back(an); + TextBuffer.appendfv(fmt, args); + const char nul[] = ""; + TextBuffer.append(nul,nul+1); + Size++; + } + + void Append(const ImVec2& pos, const ImVec2& off, ImU32 bg, ImU32 fg, bool clamp, const char* fmt, ...) IM_FMTARGS(7) { + va_list args; + va_start(args, fmt); + AppendV(pos, off, bg, fg, clamp, fmt, args); + va_end(args); + } + + const char* GetText(int idx) { + return TextBuffer.Buf.Data + Annotations[idx].TextOffset; + } + + void Reset() { + Annotations.shrink(0); + TextBuffer.Buf.shrink(0); + Size = 0; + } +}; + +// Tick mark info +struct ImPlotTick +{ + double PlotPos; + float PixelPos; + ImVec2 LabelSize; + int TextOffset; + bool Major; + bool ShowLabel; + int Level; + + ImPlotTick(double value, bool major, bool show_label) { + PlotPos = value; + Major = major; + ShowLabel = show_label; + TextOffset = -1; + Level = 0; + } +}; + +// Collection of ticks +struct ImPlotTickCollection { + ImVector Ticks; + ImGuiTextBuffer TextBuffer; + float TotalWidth; + float TotalHeight; + float MaxWidth; + float MaxHeight; + int Size; + + ImPlotTickCollection() { Reset(); } + + void Append(const ImPlotTick& tick) { + if (tick.ShowLabel) { + TotalWidth += tick.ShowLabel ? tick.LabelSize.x : 0; + TotalHeight += tick.ShowLabel ? tick.LabelSize.y : 0; + MaxWidth = tick.LabelSize.x > MaxWidth ? tick.LabelSize.x : MaxWidth; + MaxHeight = tick.LabelSize.y > MaxHeight ? tick.LabelSize.y : MaxHeight; + } + Ticks.push_back(tick); + Size++; + } + + void Append(double value, bool major, bool show_label, void (*labeler)(ImPlotTick& tick, ImGuiTextBuffer& buf)) { + ImPlotTick tick(value, major, show_label); + if (labeler) + labeler(tick, TextBuffer); + Append(tick); + } + + const char* GetText(int idx) { + return TextBuffer.Buf.Data + Ticks[idx].TextOffset; + } + + void Reset() { + Ticks.shrink(0); + TextBuffer.Buf.shrink(0); + TotalWidth = TotalHeight = MaxWidth = MaxHeight = 0; + Size = 0; + } +}; + +// Axis state information that must persist after EndPlot +struct ImPlotAxis +{ + ImPlotAxisFlags Flags; + ImPlotAxisFlags PreviousFlags; + ImPlotRange Range; + float Pixels; + ImPlotOrientation Orientation; + bool Dragging; + bool ExtHovered; + bool AllHovered; + bool Present; + bool HasRange; + double* LinkedMin; + double* LinkedMax; + ImPlotTime PickerTimeMin, PickerTimeMax; + int PickerLevel; + ImU32 ColorMaj, ColorMin, ColorTxt; + ImGuiCond RangeCond; + ImRect HoverRect; + + ImPlotAxis() { + Flags = PreviousFlags = ImPlotAxisFlags_None; + Range.Min = 0; + Range.Max = 1; + Dragging = false; + ExtHovered = false; + AllHovered = false; + LinkedMin = LinkedMax = NULL; + PickerLevel = 0; + ColorMaj = ColorMin = ColorTxt = 0; + } + + bool SetMin(double _min) { + _min = ImConstrainNan(ImConstrainInf(_min)); + if (ImHasFlag(Flags, ImPlotAxisFlags_LogScale)) + _min = ImConstrainLog(_min); + if (ImHasFlag(Flags, ImPlotAxisFlags_Time)) + _min = ImConstrainTime(_min); + if (_min >= Range.Max) + return false; + Range.Min = _min; + PickerTimeMin = ImPlotTime::FromDouble(Range.Min); + return true; + }; + + bool SetMax(double _max) { + _max = ImConstrainNan(ImConstrainInf(_max)); + if (ImHasFlag(Flags, ImPlotAxisFlags_LogScale)) + _max = ImConstrainLog(_max); + if (ImHasFlag(Flags, ImPlotAxisFlags_Time)) + _max = ImConstrainTime(_max); + if (_max <= Range.Min) + return false; + Range.Max = _max; + PickerTimeMax = ImPlotTime::FromDouble(Range.Max); + return true; + }; + + void SetRange(double _min, double _max) { + Range.Min = _min; + Range.Max = _max; + Constrain(); + PickerTimeMin = ImPlotTime::FromDouble(Range.Min); + PickerTimeMax = ImPlotTime::FromDouble(Range.Max); + } + + void SetRange(const ImPlotRange& range) { + SetRange(range.Min, range.Max); + } + + void SetAspect(double unit_per_pix) { + double new_size = unit_per_pix * Pixels; + double delta = (new_size - Range.Size()) * 0.5f; + if (IsLocked()) + return; + else if (IsLockedMin() && !IsLockedMax()) + SetRange(Range.Min, Range.Max + 2*delta); + else if (!IsLockedMin() && IsLockedMax()) + SetRange(Range.Min - 2*delta, Range.Max); + else + SetRange(Range.Min - delta, Range.Max + delta); + } + + double GetAspect() const { return Range.Size() / Pixels; } + + void Constrain() { + Range.Min = ImConstrainNan(ImConstrainInf(Range.Min)); + Range.Max = ImConstrainNan(ImConstrainInf(Range.Max)); + if (ImHasFlag(Flags, ImPlotAxisFlags_LogScale)) { + Range.Min = ImConstrainLog(Range.Min); + Range.Max = ImConstrainLog(Range.Max); + } + if (ImHasFlag(Flags, ImPlotAxisFlags_Time)) { + Range.Min = ImConstrainTime(Range.Min); + Range.Max = ImConstrainTime(Range.Max); + } + if (Range.Max <= Range.Min) + Range.Max = Range.Min + DBL_EPSILON; + } + + inline bool IsLabeled() const { return !ImHasFlag(Flags, ImPlotAxisFlags_NoTickLabels); } + inline bool IsInverted() const { return ImHasFlag(Flags, ImPlotAxisFlags_Invert); } + inline bool IsAlwaysLocked() const { return HasRange && RangeCond == ImGuiCond_Always; } + inline bool IsLockedMin() const { return ImHasFlag(Flags, ImPlotAxisFlags_LockMin) || IsAlwaysLocked(); } + inline bool IsLockedMax() const { return ImHasFlag(Flags, ImPlotAxisFlags_LockMax) || IsAlwaysLocked(); } + inline bool IsLocked() const { return !Present || ((IsLockedMin() && IsLockedMax()) || IsAlwaysLocked()); } + inline bool IsTime() const { return ImHasFlag(Flags, ImPlotAxisFlags_Time); } + inline bool IsLog() const { return ImHasFlag(Flags, ImPlotAxisFlags_LogScale); } +}; + +// State information for Plot items +struct ImPlotItem +{ + ImGuiID ID; + ImVec4 Color; + int NameOffset; + bool Show; + bool LegendHovered; + bool SeenThisFrame; + + ImPlotItem() { + ID = 0; + Color = ImPlot::NextColormapColor(); + NameOffset = -1; + Show = true; + SeenThisFrame = false; + LegendHovered = false; + } + + ~ImPlotItem() { ID = 0; } +}; + +// Holds Legend state labels and item references +struct ImPlotLegendData +{ + ImVector Indices; + ImGuiTextBuffer Labels; + void Reset() { Indices.shrink(0); Labels.Buf.shrink(0); } +}; + +// Holds Plot state information that must persist after EndPlot +struct ImPlotPlot +{ + ImGuiID ID; + ImPlotFlags Flags; + ImPlotFlags PreviousFlags; + ImPlotAxis XAxis; + ImPlotAxis YAxis[IMPLOT_Y_AXES]; + ImPlotLegendData LegendData; + ImPool Items; + ImVec2 SelectStart; + ImVec2 QueryStart; + ImRect QueryRect; + bool Selecting; + bool ContextLocked; + bool Querying; + bool Queried; + bool DraggingQuery; + bool LegendHovered; + bool LegendOutside; + bool LegendFlipSideNextFrame; + bool FrameHovered; + bool PlotHovered; + int ColormapIdx; + int CurrentYAxis; + ImPlotLocation MousePosLocation; + ImPlotLocation LegendLocation; + ImPlotOrientation LegendOrientation; + ImRect FrameRect; + ImRect CanvasRect; + ImRect PlotRect; + ImRect AxesRect; + + ImPlotPlot() { + Flags = PreviousFlags = ImPlotFlags_None; + XAxis.Orientation = ImPlotOrientation_Horizontal; + for (int i = 0; i < IMPLOT_Y_AXES; ++i) + YAxis[i].Orientation = ImPlotOrientation_Vertical; + SelectStart = QueryStart = ImVec2(0,0); + Selecting = ContextLocked = Querying = Queried = DraggingQuery = LegendHovered = LegendOutside = LegendFlipSideNextFrame = false; + ColormapIdx = CurrentYAxis = 0; + LegendLocation = ImPlotLocation_North | ImPlotLocation_West; + LegendOrientation = ImPlotOrientation_Vertical; + MousePosLocation = ImPlotLocation_South | ImPlotLocation_East; + } + + int GetLegendCount() const { return LegendData.Indices.size(); } + ImPlotItem* GetLegendItem(int i); + const char* GetLegendLabel(int i); + + inline bool IsLocked() const { return XAxis.IsLocked() && YAxis[0].IsLocked() && YAxis[1].IsLocked() && YAxis[2].IsLocked(); } +}; + +// Temporary data storage for upcoming plot +struct ImPlotNextPlotData +{ + ImGuiCond XRangeCond; + ImGuiCond YRangeCond[IMPLOT_Y_AXES]; + ImPlotRange X; + ImPlotRange Y[IMPLOT_Y_AXES]; + bool HasXRange; + bool HasYRange[IMPLOT_Y_AXES]; + bool ShowDefaultTicksX; + bool ShowDefaultTicksY[IMPLOT_Y_AXES]; + bool FitX; + bool FitY[IMPLOT_Y_AXES]; + double* LinkedXmin; + double* LinkedXmax; + double* LinkedYmin[IMPLOT_Y_AXES]; + double* LinkedYmax[IMPLOT_Y_AXES]; + + ImPlotNextPlotData() { Reset(); } + + void Reset() { + HasXRange = false; + ShowDefaultTicksX = true; + FitX = false; + LinkedXmin = LinkedXmax = NULL; + for (int i = 0; i < IMPLOT_Y_AXES; ++i) { + HasYRange[i] = false; + ShowDefaultTicksY[i] = true; + FitY[i] = false; + LinkedYmin[i] = LinkedYmax[i] = NULL; + } + } + +}; + +// Temporary data storage for upcoming item +struct ImPlotNextItemData { + ImVec4 Colors[5]; // ImPlotCol_Line, ImPlotCol_Fill, ImPlotCol_MarkerOutline, ImPlotCol_MarkerFill, ImPlotCol_ErrorBar + float LineWeight; + ImPlotMarker Marker; + float MarkerSize; + float MarkerWeight; + float FillAlpha; + float ErrorBarSize; + float ErrorBarWeight; + float DigitalBitHeight; + float DigitalBitGap; + bool RenderLine; + bool RenderFill; + bool RenderMarkerLine; + bool RenderMarkerFill; + bool HasHidden; + bool Hidden; + ImGuiCond HiddenCond; + ImPlotNextItemData() { Reset(); } + void Reset() { + for (int i = 0; i < 5; ++i) + Colors[i] = IMPLOT_AUTO_COL; + LineWeight = MarkerSize = MarkerWeight = FillAlpha = ErrorBarSize = ErrorBarWeight = DigitalBitHeight = DigitalBitGap = IMPLOT_AUTO; + Marker = IMPLOT_AUTO; + HasHidden = Hidden = false; + } +}; + +// Holds state information that must persist between calls to BeginPlot()/EndPlot() +struct ImPlotContext { + // Plot States + ImPool Plots; + ImPlotPlot* CurrentPlot; + ImPlotItem* CurrentItem; + ImPlotItem* PreviousItem; + + // Tick Marks and Labels + ImPlotTickCollection XTicks; + ImPlotTickCollection YTicks[IMPLOT_Y_AXES]; + float YAxisReference[IMPLOT_Y_AXES]; + + // Annotation and User Labels + ImPlotAnnotationCollection Annotations; + + // Transformations and Data Extents + ImPlotScale Scales[IMPLOT_Y_AXES]; + ImRect PixelRange[IMPLOT_Y_AXES]; + double Mx; + double My[IMPLOT_Y_AXES]; + double LogDenX; + double LogDenY[IMPLOT_Y_AXES]; + ImPlotRange ExtentsX; + ImPlotRange ExtentsY[IMPLOT_Y_AXES]; + + // Data Fitting Flags + bool FitThisFrame; + bool FitX; + bool FitY[IMPLOT_Y_AXES]; + + // Axis Rendering Flags + bool RenderX; + bool RenderY[IMPLOT_Y_AXES]; + + // Axis Locking Flags + bool ChildWindowMade; + + // Style and Colormaps + ImPlotStyle Style; + ImVector ColorModifiers; + ImVector StyleModifiers; + const ImVec4* Colormap; + int ColormapSize; + ImVector ColormapModifiers; + + // Time + tm Tm; + + // Misc + int VisibleItemCount; + int DigitalPlotItemCnt; + int DigitalPlotOffset; + ImPlotNextPlotData NextPlotData; + ImPlotNextItemData NextItemData; + ImPlotInputMap InputMap; + ImPlotPoint MousePos[IMPLOT_Y_AXES]; +}; + +//----------------------------------------------------------------------------- +// [SECTION] Internal API +// No guarantee of forward compatibility here! +//----------------------------------------------------------------------------- + +namespace ImPlot { + +//----------------------------------------------------------------------------- +// [SECTION] Context Utils +//----------------------------------------------------------------------------- + +// Initializes an ImPlotContext +IMPLOT_API void Initialize(ImPlotContext* ctx); +// Resets an ImPlot context for the next call to BeginPlot +IMPLOT_API void Reset(ImPlotContext* ctx); + +//----------------------------------------------------------------------------- +// [SECTION] Input Utils +//----------------------------------------------------------------------------- + +// Allows changing how keyboard/mouse interaction works. +IMPLOT_API ImPlotInputMap& GetInputMap(); + +//----------------------------------------------------------------------------- +// [SECTION] Plot Utils +//----------------------------------------------------------------------------- + +// Gets a plot from the current ImPlotContext +IMPLOT_API ImPlotPlot* GetPlot(const char* title); +// Gets the current plot from the current ImPlotContext +IMPLOT_API ImPlotPlot* GetCurrentPlot(); +// Busts the cache for every plot in the current context +IMPLOT_API void BustPlotCache(); + +// Shows a plot's context menu. +IMPLOT_API void ShowPlotContextMenu(ImPlotPlot& plot); + +//----------------------------------------------------------------------------- +// [SECTION] Item Utils +//----------------------------------------------------------------------------- + +// Begins a new item. Returns false if the item should not be plotted. Pushes PlotClipRect. +IMPLOT_API bool BeginItem(const char* label_id, ImPlotCol recolor_from = -1); +// Ends an item (call only if BeginItem returns true). Pops PlotClipRect. +IMPLOT_API void EndItem(); + +// Register or get an existing item from the current plot. +IMPLOT_API ImPlotItem* RegisterOrGetItem(const char* label_id, bool* just_created = NULL); +// Get a plot item from the current plot. +IMPLOT_API ImPlotItem* GetItem(const char* label_id); +// Gets the current item. +IMPLOT_API ImPlotItem* GetCurrentItem(); +// Busts the cache for every item for every plot in the current context. +IMPLOT_API void BustItemCache(); + +//----------------------------------------------------------------------------- +// [SECTION] Axis Utils +//----------------------------------------------------------------------------- + +// Gets the current y-axis for the current plot +inline int GetCurrentYAxis() { return GImPlot->CurrentPlot->CurrentYAxis; } +// Updates axis ticks, lins, and label colors +IMPLOT_API void UpdateAxisColors(int axis_flag, ImPlotAxis* axis); + +// Updates plot-to-pixel space transformation variables for the current plot. +IMPLOT_API void UpdateTransformCache(); +// Gets the XY scale for the current plot and y-axis +inline ImPlotScale GetCurrentScale() { return GImPlot->Scales[GetCurrentYAxis()]; } + +// Returns true if the user has requested data to be fit. +inline bool FitThisFrame() { return GImPlot->FitThisFrame; } +// Extends the current plot's axes so that it encompasses point p +IMPLOT_API void FitPoint(const ImPlotPoint& p); +// Extends the current plot's axes so that it encompasses a vertical line at x +IMPLOT_API void FitPointX(double x); +// Extends the current plot's axes so that it encompasses a horizontal line at y +IMPLOT_API void FitPointY(double y); + +// Returns true if two ranges overlap +inline bool RangesOverlap(const ImPlotRange& r1, const ImPlotRange& r2) +{ return r1.Min <= r2.Max && r2.Min <= r1.Max; } + +// Updates pointers for linked axes from axis internal range. +IMPLOT_API void PushLinkedAxis(ImPlotAxis& axis); +// Updates axis internal range from points for linked axes. +IMPLOT_API void PullLinkedAxis(ImPlotAxis& axis); + +// Shows an axis's context menu. +IMPLOT_API void ShowAxisContextMenu(ImPlotAxis& axis, ImPlotAxis* equal_axis, bool time_allowed = false); + +//----------------------------------------------------------------------------- +// [SECTION] Legend Utils +//----------------------------------------------------------------------------- + +// Gets the position of an inner rect that is located inside of an outer rect according to an ImPlotLocation and padding amount. +IMPLOT_API ImVec2 GetLocationPos(const ImRect& outer_rect, const ImVec2& inner_size, ImPlotLocation location, const ImVec2& pad = ImVec2(0,0)); +// Calculates the bounding box size of a legend +IMPLOT_API ImVec2 CalcLegendSize(ImPlotPlot& plot, const ImVec2& pad, const ImVec2& spacing, ImPlotOrientation orientation); +// Renders legend entries into a bounding box +IMPLOT_API void ShowLegendEntries(ImPlotPlot& plot, const ImRect& legend_bb, bool interactable, const ImVec2& pad, const ImVec2& spacing, ImPlotOrientation orientation, ImDrawList& DrawList); +// Shows an alternate legend for the plot identified by #title_id, outside of the plot frame (can be called before or after of Begin/EndPlot but must occur in the same ImGui window!). +IMPLOT_API void ShowAltLegend(const char* title_id, ImPlotOrientation orientation = ImPlotOrientation_Vertical, const ImVec2 size = ImVec2(0,0), bool interactable = true); + +//----------------------------------------------------------------------------- +// [SECTION] Tick Utils +//----------------------------------------------------------------------------- + +// Label a tick with default formatting. +IMPLOT_API void LabelTickDefault(ImPlotTick& tick, ImGuiTextBuffer& buffer); +// Label a tick with scientific formating. +IMPLOT_API void LabelTickScientific(ImPlotTick& tick, ImGuiTextBuffer& buffer); +// Label a tick with time formatting. +IMPLOT_API void LabelTickTime(ImPlotTick& tick, ImGuiTextBuffer& buffer, const ImPlotTime& t, ImPlotDateTimeFmt fmt); + +// Populates a list of ImPlotTicks with normal spaced and formatted ticks +IMPLOT_API void AddTicksDefault(const ImPlotRange& range, int nMajor, int nMinor, ImPlotTickCollection& ticks); +// Populates a list of ImPlotTicks with logarithmic space and formatted ticks +IMPLOT_API void AddTicksLogarithmic(const ImPlotRange& range, int nMajor, ImPlotTickCollection& ticks); +// Populates a list of ImPlotTicks with time formatted ticks. +IMPLOT_API void AddTicksTime(const ImPlotRange& range, float plot_width, ImPlotTickCollection& ticks); +// Populates a list of ImPlotTicks with custom spaced and labeled ticks +IMPLOT_API void AddTicksCustom(const double* values, const char* const labels[], int n, ImPlotTickCollection& ticks); + +// Create a a string label for a an axis value +IMPLOT_API int LabelAxisValue(const ImPlotAxis& axis, const ImPlotTickCollection& ticks, double value, char* buff, int size); + +//----------------------------------------------------------------------------- +// [SECTION] Styling Utils +//----------------------------------------------------------------------------- + +// Get styling data for next item (call between Begin/EndItem) +inline const ImPlotNextItemData& GetItemData() { return GImPlot->NextItemData; } + +// Returns true if a color is set to be automatically determined +inline bool IsColorAuto(const ImVec4& col) { return col.w == -1; } +// Returns true if a style color is set to be automaticaly determined +inline bool IsColorAuto(ImPlotCol idx) { return IsColorAuto(GImPlot->Style.Colors[idx]); } +// Returns the automatically deduced style color +IMPLOT_API ImVec4 GetAutoColor(ImPlotCol idx); + +// Returns the style color whether it is automatic or custom set +inline ImVec4 GetStyleColorVec4(ImPlotCol idx) { return IsColorAuto(idx) ? GetAutoColor(idx) : GImPlot->Style.Colors[idx]; } +inline ImU32 GetStyleColorU32(ImPlotCol idx) { return ImGui::ColorConvertFloat4ToU32(GetStyleColorVec4(idx)); } + +// Get built-in colormap data and size +IMPLOT_API const ImVec4* GetColormap(ImPlotColormap colormap, int* size_out); +// Linearly interpolates a color from the current colormap given t between 0 and 1. +IMPLOT_API ImVec4 LerpColormap(const ImVec4* colormap, int size, float t); +// Resamples a colormap. #size_out must be greater than 1. +IMPLOT_API void ResampleColormap(const ImVec4* colormap_in, int size_in, ImVec4* colormap_out, int size_out); + +// Draws vertical text. The position is the bottom left of the text rect. +IMPLOT_API void AddTextVertical(ImDrawList *DrawList, ImVec2 pos, ImU32 col, const char* text_begin, const char* text_end = NULL); +// Calculates the size of vertical text +inline ImVec2 CalcTextSizeVertical(const char *text) { ImVec2 sz = ImGui::CalcTextSize(text); return ImVec2(sz.y, sz.x); } +// Returns white or black text given background color +inline ImU32 CalcTextColor(const ImVec4& bg) { return (bg.x * 0.299 + bg.y * 0.587 + bg.z * 0.114) > 0.5 ? IM_COL32_BLACK : IM_COL32_WHITE; } + +// Clamps a label position so that it fits a rect defined by Min/Max +inline ImVec2 ClampLabelPos(ImVec2 pos, const ImVec2& size, const ImVec2& Min, const ImVec2& Max) { + if (pos.x < Min.x) pos.x = Min.x; + if (pos.y < Min.y) pos.y = Min.y; + if ((pos.x + size.x) > Max.x) pos.x = Max.x - size.x; + if ((pos.y + size.y) > Max.y) pos.y = Max.y - size.y; + return pos; +} + +//----------------------------------------------------------------------------- +// [SECTION] Math and Misc Utils +//----------------------------------------------------------------------------- + +// Rounds x to powers of 2,5 and 10 for generating axis labels (from Graphics Gems 1 Chapter 11.2) +IMPLOT_API double NiceNum(double x, bool round); +// Computes order of magnitude of double. +inline int OrderOfMagnitude(double val) { return val == 0 ? 0 : (int)(floor(log10(fabs(val)))); } +// Returns the precision required for a order of magnitude. +inline int OrderToPrecision(int order) { return order > 0 ? 0 : 1 - order; } +// Returns a floating point precision to use given a value +inline int Precision(double val) { return OrderToPrecision(OrderOfMagnitude(val)); } + +// Returns the intersection point of two lines A and B (assumes they are not parallel!) +inline ImVec2 Intersection(const ImVec2& a1, const ImVec2& a2, const ImVec2& b1, const ImVec2& b2) { + float v1 = (a1.x * a2.y - a1.y * a2.x); float v2 = (b1.x * b2.y - b1.y * b2.x); + float v3 = ((a1.x - a2.x) * (b1.y - b2.y) - (a1.y - a2.y) * (b1.x - b2.x)); + return ImVec2((v1 * (b1.x - b2.x) - v2 * (a1.x - a2.x)) / v3, (v1 * (b1.y - b2.y) - v2 * (a1.y - a2.y)) / v3); +} + +// Fills a buffer with n samples linear interpolated from vmin to vmax +template +void FillRange(ImVector& buffer, int n, T vmin, T vmax) { + buffer.resize(n); + T step = (vmax - vmin) / (n - 1); + for (int i = 0; i < n; ++i) { + buffer[i] = vmin + i * step; + } +} + +// Offsets and strides a data buffer +template +inline T OffsetAndStride(const T* data, int idx, int count, int offset, int stride) { + idx = ImPosMod(offset + idx, count); + return *(const T*)(const void*)((const unsigned char*)data + (size_t)idx * stride); +} + +//----------------------------------------------------------------------------- +// Time Utils +//----------------------------------------------------------------------------- + +// Returns true if year is leap year (366 days long) +inline bool IsLeapYear(int year) { + return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0); +} +// Returns the number of days in a month, accounting for Feb. leap years. #month is zero indexed. +inline int GetDaysInMonth(int year, int month) { + static const int days[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + return days[month] + (int)(month == 1 && IsLeapYear(year)); +} + +// Make a UNIX timestamp from a tm struct expressed in UTC time (i.e. GMT timezone). +IMPLOT_API ImPlotTime MkGmtTime(struct tm *ptm); +// Make a tm struct expressed in UTC time (i.e. GMT timezone) from a UNIX timestamp. +IMPLOT_API tm* GetGmtTime(const ImPlotTime& t, tm* ptm); + +// Make a UNIX timestamp from a tm struct expressed in local time. +IMPLOT_API ImPlotTime MkLocTime(struct tm *ptm); +// Make a tm struct expressed in local time from a UNIX timestamp. +IMPLOT_API tm* GetLocTime(const ImPlotTime& t, tm* ptm); + +// NB: The following functions only work if there is a current ImPlotContext because the +// internal tm struct is owned by the context! They are aware of ImPlotStyle.UseLocalTime. + +// Make a timestamp from time components. +// year[1970-3000], month[0-11], day[1-31], hour[0-23], min[0-59], sec[0-59], us[0,999999] +IMPLOT_API ImPlotTime MakeTime(int year, int month = 0, int day = 1, int hour = 0, int min = 0, int sec = 0, int us = 0); +// Get year component from timestamp [1970-3000] +IMPLOT_API int GetYear(const ImPlotTime& t); + +// Adds or subtracts time from a timestamp. #count > 0 to add, < 0 to subtract. +IMPLOT_API ImPlotTime AddTime(const ImPlotTime& t, ImPlotTimeUnit unit, int count); +// Rounds a timestamp down to nearest unit. +IMPLOT_API ImPlotTime FloorTime(const ImPlotTime& t, ImPlotTimeUnit unit); +// Rounds a timestamp up to the nearest unit. +IMPLOT_API ImPlotTime CeilTime(const ImPlotTime& t, ImPlotTimeUnit unit); +// Rounds a timestamp up or down to the nearest unit. +IMPLOT_API ImPlotTime RoundTime(const ImPlotTime& t, ImPlotTimeUnit unit); +// Combines the date of one timestamp with the time-of-day of another timestamp. +IMPLOT_API ImPlotTime CombineDateTime(const ImPlotTime& date_part, const ImPlotTime& time_part); + +// Formats the time part of timestamp t into a buffer according to #fmt +IMPLOT_API int FormatTime(const ImPlotTime& t, char* buffer, int size, ImPlotTimeFmt fmt, bool use_24_hr_clk); +// Formats the date part of timestamp t into a buffer according to #fmt +IMPLOT_API int FormatDate(const ImPlotTime& t, char* buffer, int size, ImPlotDateFmt fmt, bool use_iso_8601); +// Formats the time and/or date parts of a timestamp t into a buffer according to #fmt +IMPLOT_API int FormatDateTime(const ImPlotTime& t, char* buffer, int size, ImPlotDateTimeFmt fmt); + +// Shows a date picker widget block (year/month/day). +// #level = 0 for day, 1 for month, 2 for year. Modified by user interaction. +// #t will be set when a day is clicked and the function will return true. +// #t1 and #t2 are optional dates to highlight. +IMPLOT_API bool ShowDatePicker(const char* id, int* level, ImPlotTime* t, const ImPlotTime* t1 = NULL, const ImPlotTime* t2 = NULL); +// Shows a time picker widget block (hour/min/sec). +// #t will be set when a new hour, minute, or sec is selected or am/pm is toggled, and the function will return true. +IMPLOT_API bool ShowTimePicker(const char* id, ImPlotTime* t); + +//----------------------------------------------------------------------------- +// [SECTION] Internal / Experimental Plotters +// No guarantee of forward compatibility here! +//----------------------------------------------------------------------------- + +// Plots axis-aligned, filled rectangles. Every two consecutive points defines opposite corners of a single rectangle. +IMPLOT_API void PlotRects(const char* label_id, const float* xs, const float* ys, int count, int offset = 0, int stride = sizeof(float)); +IMPLOT_API void PlotRects(const char* label_id, const double* xs, const double* ys, int count, int offset = 0, int stride = sizeof(double)); +IMPLOT_API void PlotRects(const char* label_id, ImPlotPoint (*getter)(void* data, int idx), void* data, int count, int offset = 0); + +} // namespace ImPlot diff --git a/cpp-projects/3d-engine/imgui/extra/implot/implot_items.cpp b/cpp-projects/3d-engine/imgui/extra/implot/implot_items.cpp new file mode 100644 index 0000000..a18296e --- /dev/null +++ b/cpp-projects/3d-engine/imgui/extra/implot/implot_items.cpp @@ -0,0 +1,2032 @@ +// MIT License + +// Copyright (c) 2020 Evan Pezent + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// ImPlot v0.9 WIP + +#include "implot.h" +#include "implot_internal.h" + +#ifdef _MSC_VER +#define sprintf sprintf_s +#endif + +#define SQRT_1_2 0.70710678118f +#define SQRT_3_2 0.86602540378f + +#define IMPLOT_NORMALIZE2F_OVER_ZERO(VX, VY) \ + { \ + float d2 = VX * VX + VY * VY; \ + if (d2 > 0.0f) { \ + float inv_len = 1.0f / ImSqrt(d2); \ + VX *= inv_len; \ + VY *= inv_len; \ + } \ + } + +namespace ImPlot { + +//----------------------------------------------------------------------------- +// Item Utils +//----------------------------------------------------------------------------- + +ImPlotItem* RegisterOrGetItem(const char* label_id, bool* just_created) { + ImPlotContext& gp = *GImPlot; + ImGuiID id = ImGui::GetID(label_id); + if (just_created != NULL) + *just_created = gp.CurrentPlot->Items.GetByKey(id) == NULL; + ImPlotItem* item = gp.CurrentPlot->Items.GetOrAddByKey(id); + if (item->SeenThisFrame) + return item; + item->SeenThisFrame = true; + int idx = gp.CurrentPlot->Items.GetIndex(item); + item->ID = id; + if (ImGui::FindRenderedTextEnd(label_id, NULL) != label_id) { + gp.CurrentPlot->LegendData.Indices.push_back(idx); + item->NameOffset = gp.CurrentPlot->LegendData.Labels.size(); + gp.CurrentPlot->LegendData.Labels.append(label_id, label_id + strlen(label_id) + 1); + } + else { + item->Show = true; + } + if (item->Show) + gp.VisibleItemCount++; + return item; +} + +ImPlotItem* GetItem(const char* label_id) { + ImPlotContext& gp = *GImPlot; + ImGuiID id = ImGui::GetID(label_id); + return gp.CurrentPlot->Items.GetByKey(id); +} + +ImPlotItem* GetCurrentItem() { + ImPlotContext& gp = *GImPlot; + return gp.CurrentItem; +} + +void SetNextLineStyle(const ImVec4& col, float weight) { + ImPlotContext& gp = *GImPlot; + gp.NextItemData.Colors[ImPlotCol_Line] = col; + gp.NextItemData.LineWeight = weight; +} + +void SetNextFillStyle(const ImVec4& col, float alpha) { + ImPlotContext& gp = *GImPlot; + gp.NextItemData.Colors[ImPlotCol_Fill] = col; + gp.NextItemData.FillAlpha = alpha; +} + +void SetNextMarkerStyle(ImPlotMarker marker, float size, const ImVec4& fill, float weight, const ImVec4& outline) { + ImPlotContext& gp = *GImPlot; + gp.NextItemData.Marker = marker; + gp.NextItemData.Colors[ImPlotCol_MarkerFill] = fill; + gp.NextItemData.MarkerSize = size; + gp.NextItemData.Colors[ImPlotCol_MarkerOutline] = outline; + gp.NextItemData.MarkerWeight = weight; +} + +void SetNextErrorBarStyle(const ImVec4& col, float size, float weight) { + ImPlotContext& gp = *GImPlot; + gp.NextItemData.Colors[ImPlotCol_ErrorBar] = col; + gp.NextItemData.ErrorBarSize = size; + gp.NextItemData.ErrorBarWeight = weight; +} + +ImVec4 GetLastItemColor() { + ImPlotContext& gp = *GImPlot; + if (gp.PreviousItem) + return gp.PreviousItem->Color; + return ImVec4(); +} + +void HideNextItem(bool hidden, ImGuiCond cond) { + ImPlotContext& gp = *GImPlot; + gp.NextItemData.HasHidden = true; + gp.NextItemData.Hidden = hidden; + gp.NextItemData.HiddenCond = cond; +} + +void BustItemCache() { + ImPlotContext& gp = *GImPlot; + for (int p = 0; p < gp.Plots.GetSize(); ++p) { + ImPlotPlot& plot = *gp.Plots.GetByIndex(p); + plot.ColormapIdx = 0; + plot.Items.Clear(); + plot.LegendData.Reset(); + } +} + +//----------------------------------------------------------------------------- +// Begin/EndItem +//----------------------------------------------------------------------------- + +// Begins a new item. Returns false if the item should not be plotted. +bool BeginItem(const char* label_id, ImPlotCol recolor_from) { + ImPlotContext& gp = *GImPlot; + IM_ASSERT_USER_ERROR(gp.CurrentPlot != NULL, "PlotX() needs to be called between BeginPlot() and EndPlot()!"); + bool just_created; + ImPlotItem* item = RegisterOrGetItem(label_id, &just_created); + // set current item + gp.CurrentItem = item; + ImPlotNextItemData& s = gp.NextItemData; + // override item color + if (recolor_from != -1) { + if (!IsColorAuto(s.Colors[recolor_from])) + item->Color = s.Colors[recolor_from]; + else if (!IsColorAuto(gp.Style.Colors[recolor_from])) + item->Color = gp.Style.Colors[recolor_from]; + } + // hide/show item + if (gp.NextItemData.HasHidden) { + if (just_created || gp.NextItemData.HiddenCond == ImGuiCond_Always) + item->Show = !gp.NextItemData.Hidden; + } + if (!item->Show) { + // reset next item data + gp.NextItemData.Reset(); + gp.PreviousItem = item; + gp.CurrentItem = NULL; + return false; + } + else { + // stage next item colors + s.Colors[ImPlotCol_Line] = IsColorAuto(s.Colors[ImPlotCol_Line]) ? ( IsColorAuto(ImPlotCol_Line) ? item->Color : gp.Style.Colors[ImPlotCol_Line] ) : s.Colors[ImPlotCol_Line]; + s.Colors[ImPlotCol_Fill] = IsColorAuto(s.Colors[ImPlotCol_Fill]) ? ( IsColorAuto(ImPlotCol_Fill) ? item->Color : gp.Style.Colors[ImPlotCol_Fill] ) : s.Colors[ImPlotCol_Fill]; + s.Colors[ImPlotCol_MarkerOutline] = IsColorAuto(s.Colors[ImPlotCol_MarkerOutline]) ? ( IsColorAuto(ImPlotCol_MarkerOutline) ? s.Colors[ImPlotCol_Line] : gp.Style.Colors[ImPlotCol_MarkerOutline] ) : s.Colors[ImPlotCol_MarkerOutline]; + s.Colors[ImPlotCol_MarkerFill] = IsColorAuto(s.Colors[ImPlotCol_MarkerFill]) ? ( IsColorAuto(ImPlotCol_MarkerFill) ? s.Colors[ImPlotCol_Line] : gp.Style.Colors[ImPlotCol_MarkerFill] ) : s.Colors[ImPlotCol_MarkerFill]; + s.Colors[ImPlotCol_ErrorBar] = IsColorAuto(s.Colors[ImPlotCol_ErrorBar]) ? ( GetStyleColorVec4(ImPlotCol_ErrorBar) ) : s.Colors[ImPlotCol_ErrorBar]; + // stage next item style vars + s.LineWeight = s.LineWeight < 0 ? gp.Style.LineWeight : s.LineWeight; + s.Marker = s.Marker < 0 ? gp.Style.Marker : s.Marker; + s.MarkerSize = s.MarkerSize < 0 ? gp.Style.MarkerSize : s.MarkerSize; + s.MarkerWeight = s.MarkerWeight < 0 ? gp.Style.MarkerWeight : s.MarkerWeight; + s.FillAlpha = s.FillAlpha < 0 ? gp.Style.FillAlpha : s.FillAlpha; + s.ErrorBarSize = s.ErrorBarSize < 0 ? gp.Style.ErrorBarSize : s.ErrorBarSize; + s.ErrorBarWeight = s.ErrorBarWeight < 0 ? gp.Style.ErrorBarWeight : s.ErrorBarWeight; + s.DigitalBitHeight = s.DigitalBitHeight < 0 ? gp.Style.DigitalBitHeight : s.DigitalBitHeight; + s.DigitalBitGap = s.DigitalBitGap < 0 ? gp.Style.DigitalBitGap : s.DigitalBitGap; + // apply alpha modifier(s) + s.Colors[ImPlotCol_Fill].w *= s.FillAlpha; + // s.Colors[ImPlotCol_MarkerFill].w *= s.FillAlpha; // TODO: this should be separate, if it at all + // apply highlight mods + if (item->LegendHovered && !ImHasFlag(gp.CurrentPlot->Flags, ImPlotFlags_NoHighlight)) { + s.LineWeight *= 2; + s.MarkerWeight *= 2; + // TODO: highlight fills? + } + // set render flags + s.RenderLine = s.Colors[ImPlotCol_Line].w > 0 && s.LineWeight > 0; + s.RenderFill = s.Colors[ImPlotCol_Fill].w > 0; + s.RenderMarkerLine = s.Colors[ImPlotCol_MarkerOutline].w > 0 && s.MarkerWeight > 0; + s.RenderMarkerFill = s.Colors[ImPlotCol_MarkerFill].w > 0; + // push rendering clip rect + PushPlotClipRect(); + return true; + } +} + +// Ends an item (call only if BeginItem returns true) +void EndItem() { + ImPlotContext& gp = *GImPlot; + // pop rendering clip rect + PopPlotClipRect(); + // reset next item data + gp.NextItemData.Reset(); + // set current item + gp.PreviousItem = gp.CurrentItem; + gp.CurrentItem = NULL; +} + +//----------------------------------------------------------------------------- +// GETTERS +//----------------------------------------------------------------------------- + +// Getters can be thought of as iterators that convert user data (e.g. raw arrays) +// to ImPlotPoints + +// Interprets an array of Y points as ImPlotPoints where the X value is the index +template +struct GetterYs { + GetterYs(const T* ys, int count, double xscale, double x0, int offset, int stride) : + Ys(ys), + Count(count), + XScale(xscale), + X0(x0), + Offset(count ? ImPosMod(offset, count) : 0), + Stride(stride) + { } + inline ImPlotPoint operator()(int idx) const { + return ImPlotPoint(X0 + XScale * idx, (double)OffsetAndStride(Ys, idx, Count, Offset, Stride)); + } + const T* const Ys; + const int Count; + const double XScale; + const double X0; + const int Offset; + const int Stride; +}; + +// Interprets separate arrays for X and Y points as ImPlotPoints +template +struct GetterXsYs { + GetterXsYs(const T* xs, const T* ys, int count, int offset, int stride) : + Xs(xs), + Ys(ys), + Count(count), + Offset(count ? ImPosMod(offset, count) : 0), + Stride(stride) + { } + inline ImPlotPoint operator()(int idx) const { + return ImPlotPoint((double)OffsetAndStride(Xs, idx, Count, Offset, Stride), (double)OffsetAndStride(Ys, idx, Count, Offset, Stride)); + } + const T* const Xs; + const T* const Ys; + const int Count; + const int Offset; + const int Stride; +}; + +// Always returns a constant Y reference value where the X value is the index +struct GetterYRef { + GetterYRef(double y_ref, int count, double xscale, double x0) : + YRef(y_ref), + Count(count), + XScale(xscale), + X0(x0) + { } + inline ImPlotPoint operator()(int idx) const { + return ImPlotPoint(X0 + XScale*idx, YRef); + } + const double YRef; + const int Count; + const double XScale; + const double X0; +}; + +// Interprets an array of X points as ImPlotPoints where the Y value is a constant reference value +template +struct GetterXsYRef { + GetterXsYRef(const T* xs, double y_ref, int count, int offset, int stride) : + Xs(xs), + YRef(y_ref), + Count(count), + Offset(count ? ImPosMod(offset, count) : 0), + Stride(stride) + { } + inline ImPlotPoint operator()(int idx) const { + return ImPlotPoint((double)OffsetAndStride(Xs, idx, Count, Offset, Stride), YRef); + } + const T* const Xs; + const double YRef; + const int Count; + const int Offset; + const int Stride; +}; + +// Interprets an array of Y points as ImPlotPoints where the X value is a constant reference value +template +struct GetterXRefYs { + GetterXRefYs(double x_ref, const T* ys, int count, int offset, int stride) : + XRef(x_ref), + Ys(ys), + Count(count), + Offset(count ? ImPosMod(offset, count) : 0), + Stride(stride) + { } + inline ImPlotPoint operator()(int idx) const { + return ImPlotPoint(XRef, (double)OffsetAndStride(Ys, idx, Count, Offset, Stride)); + } + const double XRef; + const T* const Ys; + const int Count; + const int Offset; + const int Stride; +}; + +/// Interprets a user's function pointer as ImPlotPoints +struct GetterFuncPtr { + GetterFuncPtr(ImPlotPoint (*getter)(void* data, int idx), void* data, int count, int offset) : + Getter(getter), + Data(data), + Count(count), + Offset(count ? ImPosMod(offset, count) : 0) + { } + inline ImPlotPoint operator()(int idx) const { + idx = ImPosMod(Offset + idx, Count); + return Getter(Data, idx); + } + ImPlotPoint (* const Getter)(void* data, int idx); + void* const Data; + const int Count; + const int Offset; +}; + +template +struct GetterBarV { + const T* Ys; double XShift; int Count; int Offset; int Stride; + GetterBarV(const T* ys, double xshift, int count, int offset, int stride) { Ys = ys; XShift = xshift; Count = count; Offset = offset; Stride = stride; } + inline ImPlotPoint operator()(int idx) const { return ImPlotPoint((double)idx + (double)XShift, (double)OffsetAndStride(Ys, idx, Count, Offset, Stride)); } +}; + +template +struct GetterBarH { + const T* Xs; double YShift; int Count; int Offset; int Stride; + GetterBarH(const T* xs, double yshift, int count, int offset, int stride) { Xs = xs; YShift = yshift; Count = count; Offset = offset; Stride = stride; } + inline ImPlotPoint operator()(int idx) const { return ImPlotPoint((double)OffsetAndStride(Xs, idx, Count, Offset, Stride), (double)idx + (double)YShift); } +}; + +template +struct GetterError { + GetterError(const T* xs, const T* ys, const T* neg, const T* pos, int count, int offset, int stride) : + Xs(xs), + Ys(ys), + Neg(neg), + Pos(pos), + Count(count), + Offset(count ? ImPosMod(offset, count) : 0), + Stride(stride) + { } + inline ImPlotPointError operator()(int idx) const { + return ImPlotPointError((double)OffsetAndStride(Xs, idx, Count, Offset, Stride), + (double)OffsetAndStride(Ys, idx, Count, Offset, Stride), + (double)OffsetAndStride(Neg, idx, Count, Offset, Stride), + (double)OffsetAndStride(Pos, idx, Count, Offset, Stride)); + } + const T* const Xs; + const T* const Ys; + const T* const Neg; + const T* const Pos; + const int Count; + const int Offset; + const int Stride; +}; + +//----------------------------------------------------------------------------- +// TRANSFORMERS +//----------------------------------------------------------------------------- + +// Transforms convert points in plot space (i.e. ImPlotPoint) to pixel space (i.e. ImVec2) + +// Transforms points for linear x and linear y space +struct TransformerLinLin { + TransformerLinLin() : YAxis(GetCurrentYAxis()) {} + // inline ImVec2 operator()(const ImPlotPoint& plt) const { return (*this)(plt.x, plt.y); } + inline ImVec2 operator()(const ImPlotPoint& plt) const { + ImPlotContext& gp = *GImPlot; + return ImVec2( (float)(gp.PixelRange[YAxis].Min.x + gp.Mx * (plt.x - gp.CurrentPlot->XAxis.Range.Min)), + (float)(gp.PixelRange[YAxis].Min.y + gp.My[YAxis] * (plt.y - gp.CurrentPlot->YAxis[YAxis].Range.Min)) ); + } + const int YAxis; +}; + +// Transforms points for log x and linear y space +struct TransformerLogLin { + TransformerLogLin() : YAxis(GetCurrentYAxis()) {} + inline ImVec2 operator()(const ImPlotPoint& plt) const { + ImPlotContext& gp = *GImPlot; + double t = ImLog10(plt.x / gp.CurrentPlot->XAxis.Range.Min) / gp.LogDenX; + double x = ImLerp(gp.CurrentPlot->XAxis.Range.Min, gp.CurrentPlot->XAxis.Range.Max, (float)t); + return ImVec2( (float)(gp.PixelRange[YAxis].Min.x + gp.Mx * (x - gp.CurrentPlot->XAxis.Range.Min)), + (float)(gp.PixelRange[YAxis].Min.y + gp.My[YAxis] * (plt.y - gp.CurrentPlot->YAxis[YAxis].Range.Min)) ); + } + const int YAxis; +}; + +// Transforms points for linear x and log y space +struct TransformerLinLog { + TransformerLinLog() : YAxis(GetCurrentYAxis()) {} + inline ImVec2 operator()(const ImPlotPoint& plt) const { + ImPlotContext& gp = *GImPlot; + double t = ImLog10(plt.y / gp.CurrentPlot->YAxis[YAxis].Range.Min) / gp.LogDenY[YAxis]; + double y = ImLerp(gp.CurrentPlot->YAxis[YAxis].Range.Min, gp.CurrentPlot->YAxis[YAxis].Range.Max, (float)t); + return ImVec2( (float)(gp.PixelRange[YAxis].Min.x + gp.Mx * (plt.x - gp.CurrentPlot->XAxis.Range.Min)), + (float)(gp.PixelRange[YAxis].Min.y + gp.My[YAxis] * (y - gp.CurrentPlot->YAxis[YAxis].Range.Min)) ); + } + const int YAxis; +}; + +// Transforms points for log x and log y space +struct TransformerLogLog { + TransformerLogLog() : YAxis(GetCurrentYAxis()) {} + inline ImVec2 operator()(const ImPlotPoint& plt) const { + ImPlotContext& gp = *GImPlot; + double t = ImLog10(plt.x / gp.CurrentPlot->XAxis.Range.Min) / gp.LogDenX; + double x = ImLerp(gp.CurrentPlot->XAxis.Range.Min, gp.CurrentPlot->XAxis.Range.Max, (float)t); + t = ImLog10(plt.y / gp.CurrentPlot->YAxis[YAxis].Range.Min) / gp.LogDenY[YAxis]; + double y = ImLerp(gp.CurrentPlot->YAxis[YAxis].Range.Min, gp.CurrentPlot->YAxis[YAxis].Range.Max, (float)t); + return ImVec2( (float)(gp.PixelRange[YAxis].Min.x + gp.Mx * (x - gp.CurrentPlot->XAxis.Range.Min)), + (float)(gp.PixelRange[YAxis].Min.y + gp.My[YAxis] * (y - gp.CurrentPlot->YAxis[YAxis].Range.Min)) ); + } + const int YAxis; +}; + +//----------------------------------------------------------------------------- +// PRIMITIVE RENDERERS +//----------------------------------------------------------------------------- + +inline void AddLine(const ImVec2& P1, const ImVec2& P2, float weight, ImU32 col, ImDrawList& DrawList, ImVec2 uv) { + float dx = P2.x - P1.x; + float dy = P2.y - P1.y; + IMPLOT_NORMALIZE2F_OVER_ZERO(dx, dy); + dx *= (weight * 0.5f); + dy *= (weight * 0.5f); + DrawList._VtxWritePtr[0].pos.x = P1.x + dy; + DrawList._VtxWritePtr[0].pos.y = P1.y - dx; + DrawList._VtxWritePtr[0].uv = uv; + DrawList._VtxWritePtr[0].col = col; + DrawList._VtxWritePtr[1].pos.x = P2.x + dy; + DrawList._VtxWritePtr[1].pos.y = P2.y - dx; + DrawList._VtxWritePtr[1].uv = uv; + DrawList._VtxWritePtr[1].col = col; + DrawList._VtxWritePtr[2].pos.x = P2.x - dy; + DrawList._VtxWritePtr[2].pos.y = P2.y + dx; + DrawList._VtxWritePtr[2].uv = uv; + DrawList._VtxWritePtr[2].col = col; + DrawList._VtxWritePtr[3].pos.x = P1.x - dy; + DrawList._VtxWritePtr[3].pos.y = P1.y + dx; + DrawList._VtxWritePtr[3].uv = uv; + DrawList._VtxWritePtr[3].col = col; + DrawList._VtxWritePtr += 4; + DrawList._IdxWritePtr[0] = (ImDrawIdx)(DrawList._VtxCurrentIdx); + DrawList._IdxWritePtr[1] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 1); + DrawList._IdxWritePtr[2] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 2); + DrawList._IdxWritePtr[3] = (ImDrawIdx)(DrawList._VtxCurrentIdx); + DrawList._IdxWritePtr[4] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 2); + DrawList._IdxWritePtr[5] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 3); + DrawList._IdxWritePtr += 6; + DrawList._VtxCurrentIdx += 4; +} + +inline void AddRectFilled(const ImVec2& Pmin, const ImVec2& Pmax, ImU32 col, ImDrawList& DrawList, ImVec2 uv) { + DrawList._VtxWritePtr[0].pos = Pmin; + DrawList._VtxWritePtr[0].uv = uv; + DrawList._VtxWritePtr[0].col = col; + DrawList._VtxWritePtr[1].pos = Pmax; + DrawList._VtxWritePtr[1].uv = uv; + DrawList._VtxWritePtr[1].col = col; + DrawList._VtxWritePtr[2].pos.x = Pmin.x; + DrawList._VtxWritePtr[2].pos.y = Pmax.y; + DrawList._VtxWritePtr[2].uv = uv; + DrawList._VtxWritePtr[2].col = col; + DrawList._VtxWritePtr[3].pos.x = Pmax.x; + DrawList._VtxWritePtr[3].pos.y = Pmin.y; + DrawList._VtxWritePtr[3].uv = uv; + DrawList._VtxWritePtr[3].col = col; + DrawList._VtxWritePtr += 4; + DrawList._IdxWritePtr[0] = (ImDrawIdx)(DrawList._VtxCurrentIdx); + DrawList._IdxWritePtr[1] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 1); + DrawList._IdxWritePtr[2] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 2); + DrawList._IdxWritePtr[3] = (ImDrawIdx)(DrawList._VtxCurrentIdx); + DrawList._IdxWritePtr[4] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 1); + DrawList._IdxWritePtr[5] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 3); + DrawList._IdxWritePtr += 6; + DrawList._VtxCurrentIdx += 4; +} + +template +struct LineStripRenderer { + inline LineStripRenderer(const TGetter& getter, const TTransformer& transformer, ImU32 col, float weight) : + Getter(getter), + Transformer(transformer), + Prims(Getter.Count - 1), + Col(col), + Weight(weight) + { + P1 = Transformer(Getter(0)); + } + inline bool operator()(ImDrawList& DrawList, const ImRect& cull_rect, const ImVec2& uv, int prim) const { + ImVec2 P2 = Transformer(Getter(prim + 1)); + if (!cull_rect.Overlaps(ImRect(ImMin(P1, P2), ImMax(P1, P2)))) { + P1 = P2; + return false; + } + AddLine(P1,P2,Weight,Col,DrawList,uv); + P1 = P2; + return true; + } + const TGetter& Getter; + const TTransformer& Transformer; + const int Prims; + const ImU32 Col; + const float Weight; + mutable ImVec2 P1; + static const int IdxConsumed = 6; + static const int VtxConsumed = 4; +}; + +template +struct StairsRenderer { + inline StairsRenderer(const TGetter& getter, const TTransformer& transformer, ImU32 col, float weight) : + Getter(getter), + Transformer(transformer), + Prims(Getter.Count - 1), + Col(col), + HalfWeight(weight * 0.5f) + { + P1 = Transformer(Getter(0)); + } + inline bool operator()(ImDrawList& DrawList, const ImRect& cull_rect, const ImVec2& uv, int prim) const { + ImVec2 P2 = Transformer(Getter(prim + 1)); + if (!cull_rect.Overlaps(ImRect(ImMin(P1, P2), ImMax(P1, P2)))) { + P1 = P2; + return false; + } + AddRectFilled(ImVec2(P1.x, P1.y + HalfWeight), ImVec2(P2.x, P1.y - HalfWeight), Col, DrawList, uv); + AddRectFilled(ImVec2(P2.x - HalfWeight, P2.y), ImVec2(P2.x + HalfWeight, P1.y), Col, DrawList, uv); + + // AddLine(P1, P12, Weight, Col, DrawList, uv); + // AddLine(P12, P2, Weight, Col, DrawList, uv); + P1 = P2; + return true; + } + const TGetter& Getter; + const TTransformer& Transformer; + const int Prims; + const ImU32 Col; + const float HalfWeight; + mutable ImVec2 P1; + static const int IdxConsumed = 12; + static const int VtxConsumed = 8; +}; + +template +struct LineSegmentsRenderer { + inline LineSegmentsRenderer(const TGetter1& getter1, const TGetter2& getter2, const TTransformer& transformer, ImU32 col, float weight) : + Getter1(getter1), + Getter2(getter2), + Transformer(transformer), + Prims(ImMin(Getter1.Count, Getter2.Count)), + Col(col), + Weight(weight) + {} + inline bool operator()(ImDrawList& DrawList, const ImRect& cull_rect, const ImVec2& uv, int prim) const { + ImVec2 P1 = Transformer(Getter1(prim)); + ImVec2 P2 = Transformer(Getter2(prim)); + if (!cull_rect.Overlaps(ImRect(ImMin(P1, P2), ImMax(P1, P2)))) + return false; + AddLine(P1,P2,Weight,Col,DrawList,uv); + return true; + } + const TGetter1& Getter1; + const TGetter2& Getter2; + const TTransformer& Transformer; + const int Prims; + const ImU32 Col; + const float Weight; + static const int IdxConsumed = 6; + static const int VtxConsumed = 4; +}; + +template +struct ShadedRenderer { + ShadedRenderer(const TGetter1& getter1, const TGetter2& getter2, const TTransformer& transformer, ImU32 col) : + Getter1(getter1), + Getter2(getter2), + Transformer(transformer), + Prims(ImMin(Getter1.Count, Getter2.Count) - 1), + Col(col) + { + P11 = Transformer(Getter1(0)); + P12 = Transformer(Getter2(0)); + } + + inline bool operator()(ImDrawList& DrawList, const ImRect& /*cull_rect*/, const ImVec2& uv, int prim) const { + // TODO: Culling + ImVec2 P21 = Transformer(Getter1(prim+1)); + ImVec2 P22 = Transformer(Getter2(prim+1)); + const int intersect = (P11.y > P12.y && P22.y > P21.y) || (P12.y > P11.y && P21.y > P22.y); + ImVec2 intersection = Intersection(P11,P21,P12,P22); + DrawList._VtxWritePtr[0].pos = P11; + DrawList._VtxWritePtr[0].uv = uv; + DrawList._VtxWritePtr[0].col = Col; + DrawList._VtxWritePtr[1].pos = P21; + DrawList._VtxWritePtr[1].uv = uv; + DrawList._VtxWritePtr[1].col = Col; + DrawList._VtxWritePtr[2].pos = intersection; + DrawList._VtxWritePtr[2].uv = uv; + DrawList._VtxWritePtr[2].col = Col; + DrawList._VtxWritePtr[3].pos = P12; + DrawList._VtxWritePtr[3].uv = uv; + DrawList._VtxWritePtr[3].col = Col; + DrawList._VtxWritePtr[4].pos = P22; + DrawList._VtxWritePtr[4].uv = uv; + DrawList._VtxWritePtr[4].col = Col; + DrawList._VtxWritePtr += 5; + DrawList._IdxWritePtr[0] = (ImDrawIdx)(DrawList._VtxCurrentIdx); + DrawList._IdxWritePtr[1] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 1 + intersect); + DrawList._IdxWritePtr[2] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 3); + DrawList._IdxWritePtr[3] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 1); + DrawList._IdxWritePtr[4] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 3 - intersect); + DrawList._IdxWritePtr[5] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 4); + DrawList._IdxWritePtr += 6; + DrawList._VtxCurrentIdx += 5; + P11 = P21; + P12 = P22; + return true; + } + const TGetter1& Getter1; + const TGetter2& Getter2; + const TTransformer& Transformer; + const int Prims; + const ImU32 Col; + mutable ImVec2 P11; + mutable ImVec2 P12; + static const int IdxConsumed = 6; + static const int VtxConsumed = 5; +}; + +template +struct RectRenderer { + inline RectRenderer(const TGetter& getter, const TTransformer& transformer, ImU32 col) : + Getter(getter), + Transformer(transformer), + Prims(Getter.Count / 2), + Col(col) + {} + inline bool operator()(ImDrawList& DrawList, const ImRect& /*cull_rect*/, const ImVec2& uv, int prim) const { + // TODO: Culling + ImVec2 P1 = Transformer(Getter(2*prim)); + ImVec2 P2 = Transformer(Getter(2*prim+1)); + DrawList._VtxWritePtr[0].pos = P1; + DrawList._VtxWritePtr[0].uv = uv; + DrawList._VtxWritePtr[0].col = Col; + DrawList._VtxWritePtr[1].pos.x = P1.x; + DrawList._VtxWritePtr[1].pos.y = P2.y; + DrawList._VtxWritePtr[1].uv = uv; + DrawList._VtxWritePtr[1].col = Col; + DrawList._VtxWritePtr[2].pos = P2; + DrawList._VtxWritePtr[2].uv = uv; + DrawList._VtxWritePtr[2].col = Col; + DrawList._VtxWritePtr[3].pos.x = P2.x; + DrawList._VtxWritePtr[3].pos.y = P1.y; + DrawList._VtxWritePtr[3].uv = uv; + DrawList._VtxWritePtr[3].col = Col; + DrawList._VtxWritePtr += 4; + DrawList._IdxWritePtr[0] = (ImDrawIdx)(DrawList._VtxCurrentIdx); + DrawList._IdxWritePtr[1] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 1); + DrawList._IdxWritePtr[2] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 3); + DrawList._IdxWritePtr[3] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 1); + DrawList._IdxWritePtr[4] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 2); + DrawList._IdxWritePtr[5] = (ImDrawIdx)(DrawList._VtxCurrentIdx + 3); + DrawList._IdxWritePtr += 6; + DrawList._VtxCurrentIdx += 4; + return true; + } + const TGetter& Getter; + const TTransformer& Transformer; + const int Prims; + const ImU32 Col; + static const int IdxConsumed = 6; + static const int VtxConsumed = 4; +}; + +// Stupid way of calculating maximum index size of ImDrawIdx without integer overflow issues +template +struct MaxIdx { static const unsigned int Value; }; +template <> const unsigned int MaxIdx::Value = 65535; +template <> const unsigned int MaxIdx::Value = 4294967295; + +/// Renders primitive shapes in bulk as efficiently as possible. +template +inline void RenderPrimitives(const Renderer& renderer, ImDrawList& DrawList, const ImRect& cull_rect) { + unsigned int prims = renderer.Prims; + unsigned int prims_culled = 0; + unsigned int idx = 0; + const ImVec2 uv = DrawList._Data->TexUvWhitePixel; + while (prims) { + // find how many can be reserved up to end of current draw command's limit + unsigned int cnt = ImMin(prims, (MaxIdx::Value - DrawList._VtxCurrentIdx) / Renderer::VtxConsumed); + // make sure at least this many elements can be rendered to avoid situations where at the end of buffer this slow path is not taken all the time + if (cnt >= ImMin(64u, prims)) { + if (prims_culled >= cnt) + prims_culled -= cnt; // reuse previous reservation + else { + DrawList.PrimReserve((cnt - prims_culled) * Renderer::IdxConsumed, (cnt - prims_culled) * Renderer::VtxConsumed); // add more elements to previous reservation + prims_culled = 0; + } + } + else + { + if (prims_culled > 0) { + DrawList.PrimUnreserve(prims_culled * Renderer::IdxConsumed, prims_culled * Renderer::VtxConsumed); + prims_culled = 0; + } + cnt = ImMin(prims, (MaxIdx::Value - 0/*DrawList._VtxCurrentIdx*/) / Renderer::VtxConsumed); + DrawList.PrimReserve(cnt * Renderer::IdxConsumed, cnt * Renderer::VtxConsumed); // reserve new draw command + } + prims -= cnt; + for (unsigned int ie = idx + cnt; idx != ie; ++idx) { + if (!renderer(DrawList, cull_rect, uv, idx)) + prims_culled++; + } + } + if (prims_culled > 0) + DrawList.PrimUnreserve(prims_culled * Renderer::IdxConsumed, prims_culled * Renderer::VtxConsumed); +} + +template +inline void RenderLineStrip(const Getter& getter, const Transformer& transformer, ImDrawList& DrawList, float line_weight, ImU32 col) { + ImPlotContext& gp = *GImPlot; + if (ImHasFlag(gp.CurrentPlot->Flags, ImPlotFlags_AntiAliased) || gp.Style.AntiAliasedLines) { + ImVec2 p1 = transformer(getter(0)); + for (int i = 1; i < getter.Count; ++i) { + ImVec2 p2 = transformer(getter(i)); + if (gp.CurrentPlot->PlotRect.Overlaps(ImRect(ImMin(p1, p2), ImMax(p1, p2)))) + DrawList.AddLine(p1, p2, col, line_weight); + p1 = p2; + } + } + else { + RenderPrimitives(LineStripRenderer(getter, transformer, col, line_weight), DrawList, gp.CurrentPlot->PlotRect); + } +} + +template +inline void RenderLineSegments(const Getter1& getter1, const Getter2& getter2, const Transformer& transformer, ImDrawList& DrawList, float line_weight, ImU32 col) { + ImPlotContext& gp = *GImPlot; + if (ImHasFlag(gp.CurrentPlot->Flags, ImPlotFlags_AntiAliased) || gp.Style.AntiAliasedLines) { + int I = ImMin(getter1.Count, getter2.Count); + for (int i = 0; i < I; ++i) { + ImVec2 p1 = transformer(getter1(i)); + ImVec2 p2 = transformer(getter2(i)); + if (gp.CurrentPlot->PlotRect.Overlaps(ImRect(ImMin(p1, p2), ImMax(p1, p2)))) + DrawList.AddLine(p1, p2, col, line_weight); + } + } + else { + RenderPrimitives(LineSegmentsRenderer(getter1, getter2, transformer, col, line_weight), DrawList, gp.CurrentPlot->PlotRect); + } +} + +template +inline void RenderStairs(const Getter& getter, const Transformer& transformer, ImDrawList& DrawList, float line_weight, ImU32 col) { + ImPlotContext& gp = *GImPlot; + if (ImHasFlag(gp.CurrentPlot->Flags, ImPlotFlags_AntiAliased) || gp.Style.AntiAliasedLines) { + ImVec2 p1 = transformer(getter(0)); + for (int i = 1; i < getter.Count; ++i) { + ImVec2 p2 = transformer(getter(i)); + if (gp.CurrentPlot->PlotRect.Overlaps(ImRect(ImMin(p1, p2), ImMax(p1, p2)))) { + ImVec2 p12(p2.x, p1.y); + DrawList.AddLine(p1, p12, col, line_weight); + DrawList.AddLine(p12, p2, col, line_weight); + } + p1 = p2; + } + } + else { + RenderPrimitives(StairsRenderer(getter, transformer, col, line_weight), DrawList, gp.CurrentPlot->PlotRect); + } +} + +//----------------------------------------------------------------------------- +// MARKER RENDERERS +//----------------------------------------------------------------------------- + +inline void TransformMarker(ImVec2* points, int n, const ImVec2& c, float s) { + for (int i = 0; i < n; ++i) { + points[i].x = c.x + points[i].x * s; + points[i].y = c.y + points[i].y * s; + } +} + +inline void RenderMarkerGeneral(ImDrawList& DrawList, ImVec2* points, int n, const ImVec2& c, float s, bool outline, ImU32 col_outline, bool fill, ImU32 col_fill, float weight) { + TransformMarker(points, n, c, s); + if (fill) + DrawList.AddConvexPolyFilled(points, n, col_fill); + if (outline && !(fill && col_outline == col_fill)) { + for (int i = 0; i < n; ++i) + DrawList.AddLine(points[i], points[(i+1)%n], col_outline, weight); + } +} + +inline void RenderMarkerCircle(ImDrawList& DrawList, const ImVec2& c, float s, bool outline, ImU32 col_outline, bool fill, ImU32 col_fill, float weight) { + ImVec2 marker[10] = {ImVec2(1.0f, 0.0f), + ImVec2(0.809017f, 0.58778524f), + ImVec2(0.30901697f, 0.95105654f), + ImVec2(-0.30901703f, 0.9510565f), + ImVec2(-0.80901706f, 0.5877852f), + ImVec2(-1.0f, 0.0f), + ImVec2(-0.80901694f, -0.58778536f), + ImVec2(-0.3090171f, -0.9510565f), + ImVec2(0.30901712f, -0.9510565f), + ImVec2(0.80901694f, -0.5877853f)}; + RenderMarkerGeneral(DrawList, marker, 10, c, s, outline, col_outline, fill, col_fill, weight); +} + +inline void RenderMarkerDiamond(ImDrawList& DrawList, const ImVec2& c, float s, bool outline, ImU32 col_outline, bool fill, ImU32 col_fill, float weight) { + ImVec2 marker[4] = {ImVec2(1, 0), ImVec2(0, -1), ImVec2(-1, 0), ImVec2(0, 1)}; + RenderMarkerGeneral(DrawList, marker, 4, c, s, outline, col_outline, fill, col_fill, weight); +} + +inline void RenderMarkerSquare(ImDrawList& DrawList, const ImVec2& c, float s, bool outline, ImU32 col_outline, bool fill, ImU32 col_fill, float weight) { + ImVec2 marker[4] = {ImVec2(SQRT_1_2,SQRT_1_2),ImVec2(SQRT_1_2,-SQRT_1_2),ImVec2(-SQRT_1_2,-SQRT_1_2),ImVec2(-SQRT_1_2,SQRT_1_2)}; + RenderMarkerGeneral(DrawList, marker, 4, c, s, outline, col_outline, fill, col_fill, weight); +} + +inline void RenderMarkerUp(ImDrawList& DrawList, const ImVec2& c, float s, bool outline, ImU32 col_outline, bool fill, ImU32 col_fill, float weight) { + ImVec2 marker[3] = {ImVec2(SQRT_3_2,0.5f),ImVec2(0,-1),ImVec2(-SQRT_3_2,0.5f)}; + RenderMarkerGeneral(DrawList, marker, 3, c, s, outline, col_outline, fill, col_fill, weight); +} + +inline void RenderMarkerDown(ImDrawList& DrawList, const ImVec2& c, float s, bool outline, ImU32 col_outline, bool fill, ImU32 col_fill, float weight) { + ImVec2 marker[3] = {ImVec2(SQRT_3_2,-0.5f),ImVec2(0,1),ImVec2(-SQRT_3_2,-0.5f)}; + RenderMarkerGeneral(DrawList, marker, 3, c, s, outline, col_outline, fill, col_fill, weight); +} + +inline void RenderMarkerLeft(ImDrawList& DrawList, const ImVec2& c, float s, bool outline, ImU32 col_outline, bool fill, ImU32 col_fill, float weight) { + ImVec2 marker[3] = {ImVec2(-1,0), ImVec2(0.5, SQRT_3_2), ImVec2(0.5, -SQRT_3_2)}; + RenderMarkerGeneral(DrawList, marker, 3, c, s, outline, col_outline, fill, col_fill, weight); +} + +inline void RenderMarkerRight(ImDrawList& DrawList, const ImVec2& c, float s, bool outline, ImU32 col_outline, bool fill, ImU32 col_fill, float weight) { + ImVec2 marker[3] = {ImVec2(1,0), ImVec2(-0.5, SQRT_3_2), ImVec2(-0.5, -SQRT_3_2)}; + RenderMarkerGeneral(DrawList, marker, 3, c, s, outline, col_outline, fill, col_fill, weight); +} + +inline void RenderMarkerAsterisk(ImDrawList& DrawList, const ImVec2& c, float s, bool /*outline*/, ImU32 col_outline, bool /*fill*/, ImU32 /*col_fill*/, float weight) { + ImVec2 marker[6] = {ImVec2(SQRT_3_2, 0.5f), ImVec2(0, -1), ImVec2(-SQRT_3_2, 0.5f), ImVec2(SQRT_3_2, -0.5f), ImVec2(0, 1), ImVec2(-SQRT_3_2, -0.5f)}; + TransformMarker(marker, 6, c, s); + DrawList.AddLine(marker[0], marker[5], col_outline, weight); + DrawList.AddLine(marker[1], marker[4], col_outline, weight); + DrawList.AddLine(marker[2], marker[3], col_outline, weight); +} + +inline void RenderMarkerPlus(ImDrawList& DrawList, const ImVec2& c, float s, bool /*outline*/, ImU32 col_outline, bool /*fill*/, ImU32 /*col_fill*/, float weight) { + ImVec2 marker[4] = {ImVec2(1, 0), ImVec2(0, -1), ImVec2(-1, 0), ImVec2(0, 1)}; + TransformMarker(marker, 4, c, s); + DrawList.AddLine(marker[0], marker[2], col_outline, weight); + DrawList.AddLine(marker[1], marker[3], col_outline, weight); +} + +inline void RenderMarkerCross(ImDrawList& DrawList, const ImVec2& c, float s, bool /*outline*/, ImU32 col_outline, bool /*fill*/, ImU32 /*col_fill*/, float weight) { + ImVec2 marker[4] = {ImVec2(SQRT_1_2,SQRT_1_2),ImVec2(SQRT_1_2,-SQRT_1_2),ImVec2(-SQRT_1_2,-SQRT_1_2),ImVec2(-SQRT_1_2,SQRT_1_2)}; + TransformMarker(marker, 4, c, s); + DrawList.AddLine(marker[0], marker[2], col_outline, weight); + DrawList.AddLine(marker[1], marker[3], col_outline, weight); +} + +template +inline void RenderMarkers(Getter getter, Transformer transformer, ImDrawList& DrawList, ImPlotMarker marker, float size, bool rend_mk_line, ImU32 col_mk_line, float weight, bool rend_mk_fill, ImU32 col_mk_fill) { + static void (*marker_table[ImPlotMarker_COUNT])(ImDrawList&, const ImVec2&, float s, bool, ImU32, bool, ImU32, float) = { + RenderMarkerCircle, + RenderMarkerSquare, + RenderMarkerDiamond , + RenderMarkerUp , + RenderMarkerDown , + RenderMarkerLeft, + RenderMarkerRight, + RenderMarkerCross, + RenderMarkerPlus, + RenderMarkerAsterisk + }; + ImPlotContext& gp = *GImPlot; + for (int i = 0; i < getter.Count; ++i) { + ImVec2 c = transformer(getter(i)); + if (gp.CurrentPlot->PlotRect.Contains(c)) + marker_table[marker](DrawList, c, size, rend_mk_line, col_mk_line, rend_mk_fill, col_mk_fill, weight); + } +} + +//----------------------------------------------------------------------------- +// PLOT LINE +//----------------------------------------------------------------------------- + +template +inline void PlotLineEx(const char* label_id, const Getter& getter) { + if (BeginItem(label_id, ImPlotCol_Line)) { + if (FitThisFrame()) { + for (int i = 0; i < getter.Count; ++i) { + ImPlotPoint p = getter(i); + FitPoint(p); + } + } + const ImPlotNextItemData& s = GetItemData(); + ImDrawList& DrawList = *GetPlotDrawList(); + if (getter.Count > 1 && s.RenderLine) { + const ImU32 col_line = ImGui::GetColorU32(s.Colors[ImPlotCol_Line]); + switch (GetCurrentScale()) { + case ImPlotScale_LinLin: RenderLineStrip(getter, TransformerLinLin(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LogLin: RenderLineStrip(getter, TransformerLogLin(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LinLog: RenderLineStrip(getter, TransformerLinLog(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LogLog: RenderLineStrip(getter, TransformerLogLog(), DrawList, s.LineWeight, col_line); break; + } + } + // render markers + if (s.Marker != ImPlotMarker_None) { + const ImU32 col_line = ImGui::GetColorU32(s.Colors[ImPlotCol_MarkerOutline]); + const ImU32 col_fill = ImGui::GetColorU32(s.Colors[ImPlotCol_MarkerFill]); + switch (GetCurrentScale()) { + case ImPlotScale_LinLin: RenderMarkers(getter, TransformerLinLin(), DrawList, s.Marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + case ImPlotScale_LogLin: RenderMarkers(getter, TransformerLogLin(), DrawList, s.Marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + case ImPlotScale_LinLog: RenderMarkers(getter, TransformerLinLog(), DrawList, s.Marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + case ImPlotScale_LogLog: RenderMarkers(getter, TransformerLogLog(), DrawList, s.Marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + } + } + EndItem(); + } +} + +template +void PlotLine(const char* label_id, const T* values, int count, double xscale, double x0, int offset, int stride) { + GetterYs getter(values,count,xscale,x0,offset,stride); + PlotLineEx(label_id, getter); +} + +template IMPLOT_API void PlotLine (const char* label_id, const ImS8* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotLine (const char* label_id, const ImU8* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImS16* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImU16* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImS32* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImU32* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImS64* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImU64* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const float* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const double* values, int count, double xscale, double x0, int offset, int stride); + +template +void PlotLine(const char* label_id, const T* xs, const T* ys, int count, int offset, int stride) { + GetterXsYs getter(xs,ys,count,offset,stride); + return PlotLineEx(label_id, getter); +} + +template IMPLOT_API void PlotLine(const char* label_id, const ImS8* xs, const ImS8* ys, int count, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImU8* xs, const ImU8* ys, int count, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImS16* xs, const ImS16* ys, int count, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImU16* xs, const ImU16* ys, int count, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImS32* xs, const ImS32* ys, int count, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImU32* xs, const ImU32* ys, int count, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImS64* xs, const ImS64* ys, int count, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const ImU64* xs, const ImU64* ys, int count, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const float* xs, const float* ys, int count, int offset, int stride); +template IMPLOT_API void PlotLine(const char* label_id, const double* xs, const double* ys, int count, int offset, int stride); + +// custom +void PlotLineG(const char* label_id, ImPlotPoint (*getter_func)(void* data, int idx), void* data, int count, int offset) { + GetterFuncPtr getter(getter_func,data, count, offset); + return PlotLineEx(label_id, getter); +} + +//----------------------------------------------------------------------------- +// PLOT SCATTER +//----------------------------------------------------------------------------- + +template +inline void PlotScatterEx(const char* label_id, const Getter& getter) { + if (BeginItem(label_id, ImPlotCol_MarkerOutline)) { + if (FitThisFrame()) { + for (int i = 0; i < getter.Count; ++i) { + ImPlotPoint p = getter(i); + FitPoint(p); + } + } + const ImPlotNextItemData& s = GetItemData(); + ImDrawList& DrawList = *GetPlotDrawList(); + // render markers + ImPlotMarker marker = s.Marker == ImPlotMarker_None ? ImPlotMarker_Circle : s.Marker; + if (marker != ImPlotMarker_None) { + const ImU32 col_line = ImGui::GetColorU32(s.Colors[ImPlotCol_MarkerOutline]); + const ImU32 col_fill = ImGui::GetColorU32(s.Colors[ImPlotCol_MarkerFill]); + switch (GetCurrentScale()) { + case ImPlotScale_LinLin: RenderMarkers(getter, TransformerLinLin(), DrawList, marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + case ImPlotScale_LogLin: RenderMarkers(getter, TransformerLogLin(), DrawList, marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + case ImPlotScale_LinLog: RenderMarkers(getter, TransformerLinLog(), DrawList, marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + case ImPlotScale_LogLog: RenderMarkers(getter, TransformerLogLog(), DrawList, marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + } + } + EndItem(); + } +} + +template +void PlotScatter(const char* label_id, const T* values, int count, double xscale, double x0, int offset, int stride) { + GetterYs getter(values,count,xscale,x0,offset,stride); + PlotScatterEx(label_id, getter); +} + +template IMPLOT_API void PlotScatter(const char* label_id, const ImS8* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImU8* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImS16* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImU16* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImS32* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImU32* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImS64* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImU64* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const float* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const double* values, int count, double xscale, double x0, int offset, int stride); + +template +void PlotScatter(const char* label_id, const T* xs, const T* ys, int count, int offset, int stride) { + GetterXsYs getter(xs,ys,count,offset,stride); + return PlotScatterEx(label_id, getter); +} + +template IMPLOT_API void PlotScatter(const char* label_id, const ImS8* xs, const ImS8* ys, int count, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImU8* xs, const ImU8* ys, int count, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImS16* xs, const ImS16* ys, int count, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImU16* xs, const ImU16* ys, int count, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImS32* xs, const ImS32* ys, int count, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImU32* xs, const ImU32* ys, int count, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImS64* xs, const ImS64* ys, int count, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const ImU64* xs, const ImU64* ys, int count, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const float* xs, const float* ys, int count, int offset, int stride); +template IMPLOT_API void PlotScatter(const char* label_id, const double* xs, const double* ys, int count, int offset, int stride); + +// custom +void PlotScatterG(const char* label_id, ImPlotPoint (*getter_func)(void* data, int idx), void* data, int count, int offset) { + GetterFuncPtr getter(getter_func,data, count, offset); + return PlotScatterEx(label_id, getter); +} + +//----------------------------------------------------------------------------- +// PLOT STAIRS +//----------------------------------------------------------------------------- + +template +inline void PlotStairsEx(const char* label_id, const Getter& getter) { + if (BeginItem(label_id, ImPlotCol_Line)) { + if (FitThisFrame()) { + for (int i = 0; i < getter.Count; ++i) { + ImPlotPoint p = getter(i); + FitPoint(p); + } + } + const ImPlotNextItemData& s = GetItemData(); + ImDrawList& DrawList = *GetPlotDrawList(); + if (getter.Count > 1 && s.RenderLine) { + const ImU32 col_line = ImGui::GetColorU32(s.Colors[ImPlotCol_Line]); + switch (GetCurrentScale()) { + case ImPlotScale_LinLin: RenderStairs(getter, TransformerLinLin(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LogLin: RenderStairs(getter, TransformerLogLin(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LinLog: RenderStairs(getter, TransformerLinLog(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LogLog: RenderStairs(getter, TransformerLogLog(), DrawList, s.LineWeight, col_line); break; + } + } + // render markers + if (s.Marker != ImPlotMarker_None) { + const ImU32 col_line = ImGui::GetColorU32(s.Colors[ImPlotCol_MarkerOutline]); + const ImU32 col_fill = ImGui::GetColorU32(s.Colors[ImPlotCol_MarkerFill]); + switch (GetCurrentScale()) { + case ImPlotScale_LinLin: RenderMarkers(getter, TransformerLinLin(), DrawList, s.Marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + case ImPlotScale_LogLin: RenderMarkers(getter, TransformerLogLin(), DrawList, s.Marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + case ImPlotScale_LinLog: RenderMarkers(getter, TransformerLinLog(), DrawList, s.Marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + case ImPlotScale_LogLog: RenderMarkers(getter, TransformerLogLog(), DrawList, s.Marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + } + } + EndItem(); + } +} + +template +void PlotStairs(const char* label_id, const T* values, int count, double xscale, double x0, int offset, int stride) { + GetterYs getter(values,count,xscale,x0,offset,stride); + PlotStairsEx(label_id, getter); +} + +template IMPLOT_API void PlotStairs (const char* label_id, const ImS8* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStairs (const char* label_id, const ImU8* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImS16* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImU16* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImS32* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImU32* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImS64* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImU64* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const float* values, int count, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const double* values, int count, double xscale, double x0, int offset, int stride); + +template +void PlotStairs(const char* label_id, const T* xs, const T* ys, int count, int offset, int stride) { + GetterXsYs getter(xs,ys,count,offset,stride); + return PlotStairsEx(label_id, getter); +} + +template IMPLOT_API void PlotStairs(const char* label_id, const ImS8* xs, const ImS8* ys, int count, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImU8* xs, const ImU8* ys, int count, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImS16* xs, const ImS16* ys, int count, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImU16* xs, const ImU16* ys, int count, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImS32* xs, const ImS32* ys, int count, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImU32* xs, const ImU32* ys, int count, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImS64* xs, const ImS64* ys, int count, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const ImU64* xs, const ImU64* ys, int count, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const float* xs, const float* ys, int count, int offset, int stride); +template IMPLOT_API void PlotStairs(const char* label_id, const double* xs, const double* ys, int count, int offset, int stride); + +// custom +void PlotStairsG(const char* label_id, ImPlotPoint (*getter_func)(void* data, int idx), void* data, int count, int offset) { + GetterFuncPtr getter(getter_func,data, count, offset); + return PlotStairsEx(label_id, getter); +} + +//----------------------------------------------------------------------------- +// PLOT SHADED +//----------------------------------------------------------------------------- + +template +inline void PlotShadedEx(const char* label_id, const Getter1& getter1, const Getter2& getter2, bool fit2) { + if (BeginItem(label_id, ImPlotCol_Fill)) { + if (FitThisFrame()) { + for (int i = 0; i < getter1.Count; ++i) + FitPoint(getter1(i)); + if (fit2) { + for (int i = 0; i < getter2.Count; ++i) + FitPoint(getter2(i)); + } + } + const ImPlotNextItemData& s = GetItemData(); + ImDrawList & DrawList = *GetPlotDrawList(); + if (s.RenderFill) { + ImU32 col = ImGui::GetColorU32(s.Colors[ImPlotCol_Fill]); + switch (GetCurrentScale()) { + case ImPlotScale_LinLin: RenderPrimitives(ShadedRenderer(getter1,getter2,TransformerLinLin(), col), DrawList, GImPlot->CurrentPlot->PlotRect); break; + case ImPlotScale_LogLin: RenderPrimitives(ShadedRenderer(getter1,getter2,TransformerLogLin(), col), DrawList, GImPlot->CurrentPlot->PlotRect); break; + case ImPlotScale_LinLog: RenderPrimitives(ShadedRenderer(getter1,getter2,TransformerLinLog(), col), DrawList, GImPlot->CurrentPlot->PlotRect); break; + case ImPlotScale_LogLog: RenderPrimitives(ShadedRenderer(getter1,getter2,TransformerLogLog(), col), DrawList, GImPlot->CurrentPlot->PlotRect); break; + } + } + EndItem(); + } +} + +template +void PlotShaded(const char* label_id, const T* values, int count, double y_ref, double xscale, double x0, int offset, int stride) { + bool fit2 = true; + if (y_ref == -HUGE_VAL) { + fit2 = false; + y_ref = GetPlotLimits().Y.Min; + } + if (y_ref == HUGE_VAL) { + fit2 = false; + y_ref = GetPlotLimits().Y.Max; + } + GetterYs getter1(values,count,xscale,x0,offset,stride); + GetterYRef getter2(y_ref,count,xscale,x0); + PlotShadedEx(label_id, getter1, getter2, fit2); +} + +template IMPLOT_API void PlotShaded(const char* label_id, const ImS8* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImU8* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImS16* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImU16* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImS32* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImU32* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImS64* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImU64* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const float* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const double* values, int count, double y_ref, double xscale, double x0, int offset, int stride); + +template +void PlotShaded(const char* label_id, const T* xs, const T* ys, int count, double y_ref, int offset, int stride) { + bool fit2 = true; + if (y_ref == -HUGE_VAL) { + fit2 = false; + y_ref = GetPlotLimits().Y.Min; + } + if (y_ref == HUGE_VAL) { + fit2 = false; + y_ref = GetPlotLimits().Y.Max; + } + GetterXsYs getter1(xs, ys, count, offset, stride); + GetterXsYRef getter2(xs, y_ref, count, offset, stride); + PlotShadedEx(label_id, getter1, getter2, fit2); +} + +template IMPLOT_API void PlotShaded(const char* label_id, const ImS8* xs, const ImS8* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImU8* xs, const ImU8* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImS16* xs, const ImS16* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImU16* xs, const ImU16* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImS32* xs, const ImS32* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImU32* xs, const ImU32* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImS64* xs, const ImS64* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImU64* xs, const ImU64* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const float* xs, const float* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const double* xs, const double* ys, int count, double y_ref, int offset, int stride); + +template +void PlotShaded(const char* label_id, const T* xs, const T* ys1, const T* ys2, int count, int offset, int stride) { + GetterXsYs getter1(xs, ys1, count, offset, stride); + GetterXsYs getter2(xs, ys2, count, offset, stride); + PlotShadedEx(label_id, getter1, getter2, true); +} + +template IMPLOT_API void PlotShaded(const char* label_id, const ImS8* xs, const ImS8* ys1, const ImS8* ys2, int count, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImU8* xs, const ImU8* ys1, const ImU8* ys2, int count, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImS16* xs, const ImS16* ys1, const ImS16* ys2, int count, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImU16* xs, const ImU16* ys1, const ImU16* ys2, int count, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImS32* xs, const ImS32* ys1, const ImS32* ys2, int count, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImU32* xs, const ImU32* ys1, const ImU32* ys2, int count, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImS64* xs, const ImS64* ys1, const ImS64* ys2, int count, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const ImU64* xs, const ImU64* ys1, const ImU64* ys2, int count, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const float* xs, const float* ys1, const float* ys2, int count, int offset, int stride); +template IMPLOT_API void PlotShaded(const char* label_id, const double* xs, const double* ys1, const double* ys2, int count, int offset, int stride); + +// custom +void PlotShadedG(const char* label_id, ImPlotPoint (*g1)(void* data, int idx), void* data1, ImPlotPoint (*g2)(void* data, int idx), void* data2, int count, int offset) { + GetterFuncPtr getter1(g1, data1, count, offset); + GetterFuncPtr getter2(g2, data2, count, offset); + PlotShadedEx(label_id, getter1, getter2, true); +} + +//----------------------------------------------------------------------------- +// PLOT BAR +//----------------------------------------------------------------------------- + +// TODO: Migrate to RenderPrimitives + +template +void PlotBarsEx(const char* label_id, const Getter& getter, double width) { + if (BeginItem(label_id, ImPlotCol_Fill)) { + const double half_width = width / 2; + if (FitThisFrame()) { + for (int i = 0; i < getter.Count; ++i) { + ImPlotPoint p = getter(i); + FitPoint(ImPlotPoint(p.x - half_width, p.y)); + FitPoint(ImPlotPoint(p.x + half_width, 0)); + } + } + const ImPlotNextItemData& s = GetItemData(); + ImDrawList& DrawList = *GetPlotDrawList(); + ImU32 col_line = ImGui::GetColorU32(s.Colors[ImPlotCol_Line]); + ImU32 col_fill = ImGui::GetColorU32(s.Colors[ImPlotCol_Fill]); + bool rend_line = s.RenderLine; + if (s.RenderFill && col_line == col_fill) + rend_line = false; + for (int i = 0; i < getter.Count; ++i) { + ImPlotPoint p = getter(i); + if (p.y == 0) + continue; + ImVec2 a = PlotToPixels(p.x - half_width, p.y); + ImVec2 b = PlotToPixels(p.x + half_width, 0); + if (s.RenderFill) + DrawList.AddRectFilled(a, b, col_fill); + if (rend_line) + DrawList.AddRect(a, b, col_line, 0, ImDrawCornerFlags_All, s.LineWeight); + } + EndItem(); + } +} + +template +void PlotBars(const char* label_id, const T* values, int count, double width, double shift, int offset, int stride) { + GetterBarV getter(values,shift,count,offset,stride); + PlotBarsEx(label_id, getter, width); +} + +template IMPLOT_API void PlotBars(const char* label_id, const ImS8* values, int count, double width, double shift, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImU8* values, int count, double width, double shift, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImS16* values, int count, double width, double shift, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImU16* values, int count, double width, double shift, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImS32* values, int count, double width, double shift, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImU32* values, int count, double width, double shift, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImS64* values, int count, double width, double shift, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImU64* values, int count, double width, double shift, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const float* values, int count, double width, double shift, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const double* values, int count, double width, double shift, int offset, int stride); + +template +void PlotBars(const char* label_id, const T* xs, const T* ys, int count, double width, int offset, int stride) { + GetterXsYs getter(xs,ys,count,offset,stride); + PlotBarsEx(label_id, getter, width); +} + +template IMPLOT_API void PlotBars(const char* label_id, const ImS8* xs, const ImS8* ys, int count, double width, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImU8* xs, const ImU8* ys, int count, double width, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImS16* xs, const ImS16* ys, int count, double width, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImU16* xs, const ImU16* ys, int count, double width, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImS32* xs, const ImS32* ys, int count, double width, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImU32* xs, const ImU32* ys, int count, double width, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImS64* xs, const ImS64* ys, int count, double width, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const ImU64* xs, const ImU64* ys, int count, double width, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const float* xs, const float* ys, int count, double width, int offset, int stride); +template IMPLOT_API void PlotBars(const char* label_id, const double* xs, const double* ys, int count, double width, int offset, int stride); + +// custom +void PlotBarsG(const char* label_id, ImPlotPoint (*getter_func)(void* data, int idx), void* data, int count, double width, int offset) { + GetterFuncPtr getter(getter_func, data, count, offset); + PlotBarsEx(label_id, getter, width); +} + +//----------------------------------------------------------------------------- +// PLOT BAR H +//----------------------------------------------------------------------------- + +// TODO: Migrate to RenderPrimitives + +template +void PlotBarsHEx(const char* label_id, const Getter& getter, THeight height) { + if (BeginItem(label_id, ImPlotCol_Fill)) { + const THeight half_height = height / 2; + if (FitThisFrame()) { + for (int i = 0; i < getter.Count; ++i) { + ImPlotPoint p = getter(i); + FitPoint(ImPlotPoint(0, p.y - half_height)); + FitPoint(ImPlotPoint(p.x, p.y + half_height)); + } + } + const ImPlotNextItemData& s = GetItemData(); + ImDrawList& DrawList = *GetPlotDrawList(); + ImU32 col_line = ImGui::GetColorU32(s.Colors[ImPlotCol_Line]); + ImU32 col_fill = ImGui::GetColorU32(s.Colors[ImPlotCol_Fill]); + bool rend_line = s.RenderLine; + if (s.RenderFill && col_line == col_fill) + rend_line = false; + for (int i = 0; i < getter.Count; ++i) { + ImPlotPoint p = getter(i); + if (p.x == 0) + continue; + ImVec2 a = PlotToPixels(0, p.y - half_height); + ImVec2 b = PlotToPixels(p.x, p.y + half_height); + if (s.RenderFill) + DrawList.AddRectFilled(a, b, col_fill); + if (rend_line) + DrawList.AddRect(a, b, col_line, 0, ImDrawCornerFlags_All, s.LineWeight); + } + EndItem(); + } +} + +template +void PlotBarsH(const char* label_id, const T* values, int count, double height, double shift, int offset, int stride) { + GetterBarH getter(values,shift,count,offset,stride); + PlotBarsHEx(label_id, getter, height); +} + +template IMPLOT_API void PlotBarsH(const char* label_id, const ImS8* values, int count, double height, double shift, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImU8* values, int count, double height, double shift, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImS16* values, int count, double height, double shift, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImU16* values, int count, double height, double shift, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImS32* values, int count, double height, double shift, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImU32* values, int count, double height, double shift, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImS64* values, int count, double height, double shift, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImU64* values, int count, double height, double shift, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const float* values, int count, double height, double shift, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const double* values, int count, double height, double shift, int offset, int stride); + +template +void PlotBarsH(const char* label_id, const T* xs, const T* ys, int count, double height, int offset, int stride) { + GetterXsYs getter(xs,ys,count,offset,stride); + PlotBarsHEx(label_id, getter, height); +} + +template IMPLOT_API void PlotBarsH(const char* label_id, const ImS8* xs, const ImS8* ys, int count, double height, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImU8* xs, const ImU8* ys, int count, double height, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImS16* xs, const ImS16* ys, int count, double height, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImU16* xs, const ImU16* ys, int count, double height, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImS32* xs, const ImS32* ys, int count, double height, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImU32* xs, const ImU32* ys, int count, double height, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImS64* xs, const ImS64* ys, int count, double height, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const ImU64* xs, const ImU64* ys, int count, double height, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const float* xs, const float* ys, int count, double height, int offset, int stride); +template IMPLOT_API void PlotBarsH(const char* label_id, const double* xs, const double* ys, int count, double height, int offset, int stride); + +// custom +void PlotBarsHG(const char* label_id, ImPlotPoint (*getter_func)(void* data, int idx), void* data, int count, double height, int offset) { + GetterFuncPtr getter(getter_func, data, count, offset); + PlotBarsHEx(label_id, getter, height); +} + +//----------------------------------------------------------------------------- +// PLOT ERROR BARS +//----------------------------------------------------------------------------- + +template +void PlotErrorBarsEx(const char* label_id, const Getter& getter) { + if (BeginItem(label_id)) { + if (FitThisFrame()) { + for (int i = 0; i < getter.Count; ++i) { + ImPlotPointError e = getter(i); + FitPoint(ImPlotPoint(e.X , e.Y - e.Neg)); + FitPoint(ImPlotPoint(e.X , e.Y + e.Pos )); + } + } + const ImPlotNextItemData& s = GetItemData(); + ImDrawList& DrawList = *GetPlotDrawList(); + const ImU32 col = ImGui::GetColorU32(s.Colors[ImPlotCol_ErrorBar]); + const bool rend_whisker = s.ErrorBarSize > 0; + const float half_whisker = s.ErrorBarSize * 0.5f; + for (int i = 0; i < getter.Count; ++i) { + ImPlotPointError e = getter(i); + ImVec2 p1 = PlotToPixels(e.X, e.Y - e.Neg); + ImVec2 p2 = PlotToPixels(e.X, e.Y + e.Pos); + DrawList.AddLine(p1,p2,col, s.ErrorBarWeight); + if (rend_whisker) { + DrawList.AddLine(p1 - ImVec2(half_whisker, 0), p1 + ImVec2(half_whisker, 0), col, s.ErrorBarWeight); + DrawList.AddLine(p2 - ImVec2(half_whisker, 0), p2 + ImVec2(half_whisker, 0), col, s.ErrorBarWeight); + } + } + EndItem(); + } +} + +template +void PlotErrorBars(const char* label_id, const T* xs, const T* ys, const T* err, int count, int offset, int stride) { + GetterError getter(xs, ys, err, err, count, offset, stride); + PlotErrorBarsEx(label_id, getter); +} + +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImS8* xs, const ImS8* ys, const ImS8* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImU8* xs, const ImU8* ys, const ImU8* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImS16* xs, const ImS16* ys, const ImS16* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImU16* xs, const ImU16* ys, const ImU16* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImS32* xs, const ImS32* ys, const ImS32* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImU32* xs, const ImU32* ys, const ImU32* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImS64* xs, const ImS64* ys, const ImS64* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImU64* xs, const ImU64* ys, const ImU64* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const float* xs, const float* ys, const float* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const double* xs, const double* ys, const double* err, int count, int offset, int stride); + +template +void PlotErrorBars(const char* label_id, const T* xs, const T* ys, const T* neg, const T* pos, int count, int offset, int stride) { + GetterError getter(xs, ys, neg, pos, count, offset, stride); + PlotErrorBarsEx(label_id, getter); +} +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImS8* xs, const ImS8* ys, const ImS8* neg, const ImS8* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImU8* xs, const ImU8* ys, const ImU8* neg, const ImU8* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImS16* xs, const ImS16* ys, const ImS16* neg, const ImS16* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImU16* xs, const ImU16* ys, const ImU16* neg, const ImU16* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImS32* xs, const ImS32* ys, const ImS32* neg, const ImS32* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImU32* xs, const ImU32* ys, const ImU32* neg, const ImU32* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImS64* xs, const ImS64* ys, const ImS64* neg, const ImS64* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const ImU64* xs, const ImU64* ys, const ImU64* neg, const ImU64* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const float* xs, const float* ys, const float* neg, const float* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBars(const char* label_id, const double* xs, const double* ys, const double* neg, const double* pos, int count, int offset, int stride); + +//----------------------------------------------------------------------------- +// PLOT ERROR BARS H +//----------------------------------------------------------------------------- + +template +void PlotErrorBarsHEx(const char* label_id, const Getter& getter) { + if (BeginItem(label_id)) { + if (FitThisFrame()) { + for (int i = 0; i < getter.Count; ++i) { + ImPlotPointError e = getter(i); + FitPoint(ImPlotPoint(e.X - e.Neg, e.Y)); + FitPoint(ImPlotPoint(e.X + e.Pos, e.Y)); + } + } + const ImPlotNextItemData& s = GetItemData(); + ImDrawList& DrawList = *GetPlotDrawList(); + const ImU32 col = ImGui::GetColorU32(s.Colors[ImPlotCol_ErrorBar]); + const bool rend_whisker = s.ErrorBarSize > 0; + const float half_whisker = s.ErrorBarSize * 0.5f; + for (int i = 0; i < getter.Count; ++i) { + ImPlotPointError e = getter(i); + ImVec2 p1 = PlotToPixels(e.X - e.Neg, e.Y); + ImVec2 p2 = PlotToPixels(e.X + e.Pos, e.Y); + DrawList.AddLine(p1, p2, col, s.ErrorBarWeight); + if (rend_whisker) { + DrawList.AddLine(p1 - ImVec2(0, half_whisker), p1 + ImVec2(0, half_whisker), col, s.ErrorBarWeight); + DrawList.AddLine(p2 - ImVec2(0, half_whisker), p2 + ImVec2(0, half_whisker), col, s.ErrorBarWeight); + } + } + EndItem(); + } +} + +template +void PlotErrorBarsH(const char* label_id, const T* xs, const T* ys, const T* err, int count, int offset, int stride) { + GetterError getter(xs, ys, err, err, count, offset, stride); + PlotErrorBarsHEx(label_id, getter); +} + +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImS8* xs, const ImS8* ys, const ImS8* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImU8* xs, const ImU8* ys, const ImU8* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImS16* xs, const ImS16* ys, const ImS16* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImU16* xs, const ImU16* ys, const ImU16* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImS32* xs, const ImS32* ys, const ImS32* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImU32* xs, const ImU32* ys, const ImU32* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImS64* xs, const ImS64* ys, const ImS64* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImU64* xs, const ImU64* ys, const ImU64* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const float* xs, const float* ys, const float* err, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const double* xs, const double* ys, const double* err, int count, int offset, int stride); + +template +void PlotErrorBarsH(const char* label_id, const T* xs, const T* ys, const T* neg, const T* pos, int count, int offset, int stride) { + GetterError getter(xs, ys, neg, pos, count, offset, stride); + PlotErrorBarsHEx(label_id, getter); +} + +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImS8* xs, const ImS8* ys, const ImS8* neg, const ImS8* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImU8* xs, const ImU8* ys, const ImU8* neg, const ImU8* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImS16* xs, const ImS16* ys, const ImS16* neg, const ImS16* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImU16* xs, const ImU16* ys, const ImU16* neg, const ImU16* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImS32* xs, const ImS32* ys, const ImS32* neg, const ImS32* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImU32* xs, const ImU32* ys, const ImU32* neg, const ImU32* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImS64* xs, const ImS64* ys, const ImS64* neg, const ImS64* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const ImU64* xs, const ImU64* ys, const ImU64* neg, const ImU64* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const float* xs, const float* ys, const float* neg, const float* pos, int count, int offset, int stride); +template IMPLOT_API void PlotErrorBarsH(const char* label_id, const double* xs, const double* ys, const double* neg, const double* pos, int count, int offset, int stride); + +//----------------------------------------------------------------------------- +// PLOT STEMS +//----------------------------------------------------------------------------- + +template +inline void PlotStemsEx(const char* label_id, const GetterM& get_mark, const GetterB& get_base) { + if (BeginItem(label_id, ImPlotCol_Line)) { + if (FitThisFrame()) { + for (int i = 0; i < get_base.Count; ++i) { + FitPoint(get_mark(i)); + FitPoint(get_base(i)); + } + } + const ImPlotNextItemData& s = GetItemData(); + ImDrawList& DrawList = *GetPlotDrawList(); + // render stems + if (s.RenderLine) { + const ImU32 col_line = ImGui::GetColorU32(s.Colors[ImPlotCol_Line]); + switch (GetCurrentScale()) { + case ImPlotScale_LinLin: RenderLineSegments(get_mark, get_base, TransformerLinLin(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LogLin: RenderLineSegments(get_mark, get_base, TransformerLogLin(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LinLog: RenderLineSegments(get_mark, get_base, TransformerLinLog(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LogLog: RenderLineSegments(get_mark, get_base, TransformerLogLog(), DrawList, s.LineWeight, col_line); break; + } + } + // render markers + ImPlotMarker marker = s.Marker == ImPlotMarker_None ? ImPlotMarker_Circle : s.Marker; + if (marker != ImPlotMarker_None) { + const ImU32 col_line = ImGui::GetColorU32(s.Colors[ImPlotCol_MarkerOutline]); + const ImU32 col_fill = ImGui::GetColorU32(s.Colors[ImPlotCol_MarkerFill]); + switch (GetCurrentScale()) { + case ImPlotScale_LinLin: RenderMarkers(get_mark, TransformerLinLin(), DrawList, marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + case ImPlotScale_LogLin: RenderMarkers(get_mark, TransformerLogLin(), DrawList, marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + case ImPlotScale_LinLog: RenderMarkers(get_mark, TransformerLinLog(), DrawList, marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + case ImPlotScale_LogLog: RenderMarkers(get_mark, TransformerLogLog(), DrawList, marker, s.MarkerSize, s.RenderMarkerLine, col_line, s.MarkerWeight, s.RenderMarkerFill, col_fill); break; + } + } + EndItem(); + } +} + +template +void PlotStems(const char* label_id, const T* values, int count, double y_ref, double xscale, double x0, int offset, int stride) { + GetterYs get_mark(values,count,xscale,x0,offset,stride); + GetterYRef get_base(y_ref,count,xscale,x0); + PlotStemsEx(label_id, get_mark, get_base); +} + +template IMPLOT_API void PlotStems(const char* label_id, const ImS8* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImU8* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImS16* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImU16* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImS32* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImU32* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImS64* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImU64* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const float* values, int count, double y_ref, double xscale, double x0, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const double* values, int count, double y_ref, double xscale, double x0, int offset, int stride); + +template +void PlotStems(const char* label_id, const T* xs, const T* ys, int count, double y_ref, int offset, int stride) { + GetterXsYs get_mark(xs,ys,count,offset,stride); + GetterXsYRef get_base(xs,y_ref,count,offset,stride); + PlotStemsEx(label_id, get_mark, get_base); +} + +template IMPLOT_API void PlotStems(const char* label_id, const ImS8* xs, const ImS8* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImU8* xs, const ImU8* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImS16* xs, const ImS16* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImU16* xs, const ImU16* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImS32* xs, const ImS32* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImU32* xs, const ImU32* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImS64* xs, const ImS64* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const ImU64* xs, const ImU64* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const float* xs, const float* ys, int count, double y_ref, int offset, int stride); +template IMPLOT_API void PlotStems(const char* label_id, const double* xs, const double* ys, int count, double y_ref, int offset, int stride); + +//----------------------------------------------------------------------------- +// INFINITE LINES +//----------------------------------------------------------------------------- + +template +void PlotVLines(const char* label_id, const T* xs, int count, int offset, int stride) { + if (BeginItem(label_id, ImPlotCol_Line)) { + const ImPlotLimits lims = GetPlotLimits(); + GetterXsYRef get_min(xs,lims.Y.Min,count,offset,stride); + GetterXsYRef get_max(xs,lims.Y.Max,count,offset,stride); + if (FitThisFrame()) { + for (int i = 0; i < get_min.Count; ++i) + FitPointX(get_min(i).x); + } + const ImPlotNextItemData& s = GetItemData(); + ImDrawList& DrawList = *GetPlotDrawList(); + // render stems + if (s.RenderLine) { + const ImU32 col_line = ImGui::GetColorU32(s.Colors[ImPlotCol_Line]); + switch (GetCurrentScale()) { + case ImPlotScale_LinLin: RenderLineSegments(get_min, get_max, TransformerLinLin(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LogLin: RenderLineSegments(get_min, get_max, TransformerLogLin(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LinLog: RenderLineSegments(get_min, get_max, TransformerLinLog(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LogLog: RenderLineSegments(get_min, get_max, TransformerLogLog(), DrawList, s.LineWeight, col_line); break; + } + } + EndItem(); + } +} + +template IMPLOT_API void PlotVLines(const char* label_id, const ImS8* xs, int count, int offset, int stride); +template IMPLOT_API void PlotVLines(const char* label_id, const ImU8* xs, int count, int offset, int stride); +template IMPLOT_API void PlotVLines(const char* label_id, const ImS16* xs, int count, int offset, int stride); +template IMPLOT_API void PlotVLines(const char* label_id, const ImU16* xs, int count, int offset, int stride); +template IMPLOT_API void PlotVLines(const char* label_id, const ImS32* xs, int count, int offset, int stride); +template IMPLOT_API void PlotVLines(const char* label_id, const ImU32* xs, int count, int offset, int stride); +template IMPLOT_API void PlotVLines(const char* label_id, const ImS64* xs, int count, int offset, int stride); +template IMPLOT_API void PlotVLines(const char* label_id, const ImU64* xs, int count, int offset, int stride); +template IMPLOT_API void PlotVLines(const char* label_id, const float* xs, int count, int offset, int stride); +template IMPLOT_API void PlotVLines(const char* label_id, const double* xs, int count, int offset, int stride); + + +template +void PlotHLines(const char* label_id, const T* ys, int count, int offset, int stride) { + if (BeginItem(label_id, ImPlotCol_Line)) { + const ImPlotLimits lims = GetPlotLimits(); + GetterXRefYs get_min(lims.X.Min,ys,count,offset,stride); + GetterXRefYs get_max(lims.X.Max,ys,count,offset,stride); + if (FitThisFrame()) { + for (int i = 0; i < get_min.Count; ++i) + FitPointY(get_min(i).y); + } + const ImPlotNextItemData& s = GetItemData(); + ImDrawList& DrawList = *GetPlotDrawList(); + // render stems + if (s.RenderLine) { + const ImU32 col_line = ImGui::GetColorU32(s.Colors[ImPlotCol_Line]); + switch (GetCurrentScale()) { + case ImPlotScale_LinLin: RenderLineSegments(get_min, get_max, TransformerLinLin(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LogLin: RenderLineSegments(get_min, get_max, TransformerLogLin(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LinLog: RenderLineSegments(get_min, get_max, TransformerLinLog(), DrawList, s.LineWeight, col_line); break; + case ImPlotScale_LogLog: RenderLineSegments(get_min, get_max, TransformerLogLog(), DrawList, s.LineWeight, col_line); break; + } + } + EndItem(); + } +} + +template IMPLOT_API void PlotHLines(const char* label_id, const ImS8* ys, int count, int offset, int stride); +template IMPLOT_API void PlotHLines(const char* label_id, const ImU8* ys, int count, int offset, int stride); +template IMPLOT_API void PlotHLines(const char* label_id, const ImS16* ys, int count, int offset, int stride); +template IMPLOT_API void PlotHLines(const char* label_id, const ImU16* ys, int count, int offset, int stride); +template IMPLOT_API void PlotHLines(const char* label_id, const ImS32* ys, int count, int offset, int stride); +template IMPLOT_API void PlotHLines(const char* label_id, const ImU32* ys, int count, int offset, int stride); +template IMPLOT_API void PlotHLines(const char* label_id, const ImS64* ys, int count, int offset, int stride); +template IMPLOT_API void PlotHLines(const char* label_id, const ImU64* ys, int count, int offset, int stride); +template IMPLOT_API void PlotHLines(const char* label_id, const float* ys, int count, int offset, int stride); +template IMPLOT_API void PlotHLines(const char* label_id, const double* ys, int count, int offset, int stride); + +//----------------------------------------------------------------------------- +// PLOT PIE CHART +//----------------------------------------------------------------------------- + +inline void RenderPieSlice(ImDrawList& DrawList, const ImPlotPoint& center, double radius, double a0, double a1, ImU32 col) { + static const float resolution = 50 / (2 * IM_PI); + static ImVec2 buffer[50]; + buffer[0] = PlotToPixels(center); + int n = ImMax(3, (int)((a1 - a0) * resolution)); + double da = (a1 - a0) / (n - 1); + for (int i = 0; i < n; ++i) { + double a = a0 + i * da; + buffer[i + 1] = PlotToPixels(center.x + radius * cos(a), center.y + radius * sin(a)); + } + DrawList.AddConvexPolyFilled(buffer, n + 1, col); +} + +template +void PlotPieChart(const char* const label_ids[], const T* values, int count, double x, double y, double radius, bool normalize, const char* fmt, double angle0) { + IM_ASSERT_USER_ERROR(GImPlot->CurrentPlot != NULL, "PlotPieChart() needs to be called between BeginPlot() and EndPlot()!"); + ImDrawList & DrawList = *GetPlotDrawList(); + double sum = 0; + for (int i = 0; i < count; ++i) + sum += (double)values[i]; + normalize = normalize || sum > 1.0; + ImPlotPoint center(x,y); + PushPlotClipRect(); + double a0 = angle0 * 2 * IM_PI / 360.0; + double a1 = angle0 * 2 * IM_PI / 360.0; + for (int i = 0; i < count; ++i) { + double percent = normalize ? (double)values[i] / sum : (double)values[i]; + a1 = a0 + 2 * IM_PI * percent; + if (BeginItem(label_ids[i])) { + ImU32 col = ImGui::GetColorU32(GetCurrentItem()->Color); + if (percent < 0.5) { + RenderPieSlice(DrawList, center, radius, a0, a1, col); + } + else { + RenderPieSlice(DrawList, center, radius, a0, a0 + (a1 - a0) * 0.5, col); + RenderPieSlice(DrawList, center, radius, a0 + (a1 - a0) * 0.5, a1, col); + } + EndItem(); + } + a0 = a1; + } + if (fmt != NULL) { + a0 = angle0 * 2 * IM_PI / 360.0; + a1 = angle0 * 2 * IM_PI / 360.0; + char buffer[32]; + for (int i = 0; i < count; ++i) { + ImPlotItem* item = GetItem(label_ids[i]); + double percent = normalize ? (double)values[i] / sum : (double)values[i]; + a1 = a0 + 2 * IM_PI * percent; + if (item->Show) { + sprintf(buffer, fmt, (double)values[i]); + ImVec2 size = ImGui::CalcTextSize(buffer); + double angle = a0 + (a1 - a0) * 0.5; + ImVec2 pos = PlotToPixels(center.x + 0.5 * radius * cos(angle), center.y + 0.5 * radius * sin(angle)); + ImU32 col = CalcTextColor(item->Color); + DrawList.AddText(pos - size * 0.5f, col, buffer); + } + a0 = a1; + } + } + PopPlotClipRect(); +} + +template IMPLOT_API void PlotPieChart(const char* const label_ids[], const ImS8* values, int count, double x, double y, double radius, bool normalize, const char* fmt, double angle0); +template IMPLOT_API void PlotPieChart(const char* const label_ids[], const ImU8* values, int count, double x, double y, double radius, bool normalize, const char* fmt, double angle0); +template IMPLOT_API void PlotPieChart(const char* const label_ids[], const ImS16* values, int count, double x, double y, double radius, bool normalize, const char* fmt, double angle0); +template IMPLOT_API void PlotPieChart(const char* const label_ids[], const ImU16* values, int count, double x, double y, double radius, bool normalize, const char* fmt, double angle0); +template IMPLOT_API void PlotPieChart(const char* const label_ids[], const ImS32* values, int count, double x, double y, double radius, bool normalize, const char* fmt, double angle0); +template IMPLOT_API void PlotPieChart(const char* const label_ids[], const ImU32* values, int count, double x, double y, double radius, bool normalize, const char* fmt, double angle0); +template IMPLOT_API void PlotPieChart(const char* const label_ids[], const ImS64* values, int count, double x, double y, double radius, bool normalize, const char* fmt, double angle0); +template IMPLOT_API void PlotPieChart(const char* const label_ids[], const ImU64* values, int count, double x, double y, double radius, bool normalize, const char* fmt, double angle0); +template IMPLOT_API void PlotPieChart(const char* const label_ids[], const float* values, int count, double x, double y, double radius, bool normalize, const char* fmt, double angle0); +template IMPLOT_API void PlotPieChart(const char* const label_ids[], const double* values, int count, double x, double y, double radius, bool normalize, const char* fmt, double angle0); + +//----------------------------------------------------------------------------- +// PLOT HEATMAP +//----------------------------------------------------------------------------- + +template +void RenderHeatmap(Transformer transformer, ImDrawList& DrawList, const T* values, int rows, int cols, double scale_min, double scale_max, const char* fmt, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max) { + ImPlotContext& gp = *GImPlot; + const double w = (bounds_max.x - bounds_min.x) / cols; + const double h = (bounds_max.y - bounds_min.y) / rows; + const ImPlotPoint half_size(w*0.5,h*0.5); + int i = 0; + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + ImPlotPoint p; + p.x = bounds_min.x + 0.5*w + c*w; + p.y = bounds_max.y - (0.5*h + r*h); + ImVec2 a = transformer(ImPlotPoint(p.x - half_size.x, p.y - half_size.y)); + ImVec2 b = transformer(ImPlotPoint(p.x + half_size.x, p.y + half_size.y)); + double t = ImRemap((double)values[i], scale_min, scale_max, 0.0, 1.0); + ImVec4 color = LerpColormap((float)t); + color.w *= gp.Style.FillAlpha; + ImU32 col = ImGui::GetColorU32(color); + DrawList.AddRectFilled(a, b, col); + i++; + } + } + if (fmt != NULL) { + i = 0; + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + ImPlotPoint p; + p.x = bounds_min.x + 0.5*w + c*w; + p.y = bounds_max.y - (0.5*h + r*h); + ImVec2 px = transformer(p); + char buff[32]; + sprintf(buff, fmt, values[i]); + ImVec2 size = ImGui::CalcTextSize(buff); + double t = ImRemap((double)values[i], scale_min, scale_max, 0.0, 1.0); + ImVec4 color = LerpColormap((float)t); + ImU32 col = CalcTextColor(color); + DrawList.AddText(px - size * 0.5f, col, buff); + i++; + } + } + } +} + +template +void PlotHeatmap(const char* label_id, const T* values, int rows, int cols, double scale_min, double scale_max, const char* fmt, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max) { + IM_ASSERT_USER_ERROR(scale_min != scale_max, "Scale values must be different!"); + if (BeginItem(label_id)) { + if (FitThisFrame()) { + FitPoint(bounds_min); + FitPoint(bounds_max); + } + ImDrawList& DrawList = *GetPlotDrawList(); + switch (GetCurrentScale()) { + case ImPlotScale_LinLin: RenderHeatmap(TransformerLinLin(), DrawList, values, rows, cols, scale_min, scale_max, fmt, bounds_min, bounds_max); break; + case ImPlotScale_LogLin: RenderHeatmap(TransformerLogLin(), DrawList, values, rows, cols, scale_min, scale_max, fmt, bounds_min, bounds_max); break; + case ImPlotScale_LinLog: RenderHeatmap(TransformerLinLog(), DrawList, values, rows, cols, scale_min, scale_max, fmt, bounds_min, bounds_max); break; + case ImPlotScale_LogLog: RenderHeatmap(TransformerLogLog(), DrawList, values, rows, cols, scale_min, scale_max, fmt, bounds_min, bounds_max); break; + } + EndItem(); + } +} + +template IMPLOT_API void PlotHeatmap(const char* label_id, const ImS8* values, int rows, int cols, double scale_min, double scale_max, const char* fmt, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max); +template IMPLOT_API void PlotHeatmap(const char* label_id, const ImU8* values, int rows, int cols, double scale_min, double scale_max, const char* fmt, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max); +template IMPLOT_API void PlotHeatmap(const char* label_id, const ImS16* values, int rows, int cols, double scale_min, double scale_max, const char* fmt, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max); +template IMPLOT_API void PlotHeatmap(const char* label_id, const ImU16* values, int rows, int cols, double scale_min, double scale_max, const char* fmt, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max); +template IMPLOT_API void PlotHeatmap(const char* label_id, const ImS32* values, int rows, int cols, double scale_min, double scale_max, const char* fmt, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max); +template IMPLOT_API void PlotHeatmap(const char* label_id, const ImU32* values, int rows, int cols, double scale_min, double scale_max, const char* fmt, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max); +template IMPLOT_API void PlotHeatmap(const char* label_id, const ImS64* values, int rows, int cols, double scale_min, double scale_max, const char* fmt, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max); +template IMPLOT_API void PlotHeatmap(const char* label_id, const ImU64* values, int rows, int cols, double scale_min, double scale_max, const char* fmt, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max); +template IMPLOT_API void PlotHeatmap(const char* label_id, const float* values, int rows, int cols, double scale_min, double scale_max, const char* fmt, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max); +template IMPLOT_API void PlotHeatmap(const char* label_id, const double* values, int rows, int cols, double scale_min, double scale_max, const char* fmt, const ImPlotPoint& bounds_min, const ImPlotPoint& bounds_max); + +//----------------------------------------------------------------------------- +// PLOT DIGITAL +//----------------------------------------------------------------------------- + +// TODO: Make this behave like all the other plot types (.e. not fixed in y axis) + +template +inline void PlotDigitalEx(const char* label_id, Getter getter) { + if (BeginItem(label_id, ImPlotCol_Fill)) { + ImPlotContext& gp = *GImPlot; + ImDrawList& DrawList = *GetPlotDrawList(); + const ImPlotNextItemData& s = GetItemData(); + if (getter.Count > 1 && s.RenderFill) { + const int y_axis = GetCurrentYAxis(); + int pixYMax = 0; + ImPlotPoint itemData1 = getter(0); + for (int i = 0; i < getter.Count; ++i) { + ImPlotPoint itemData2 = getter(i); + if (ImNanOrInf(itemData1.y)) { + itemData1 = itemData2; + continue; + } + if (ImNanOrInf(itemData2.y)) itemData2.y = ImConstrainNan(ImConstrainInf(itemData2.y)); + int pixY_0 = (int)(s.LineWeight); + itemData1.y = ImMax(0.0, itemData1.y); + float pixY_1_float = s.DigitalBitHeight * (float)itemData1.y; + int pixY_1 = (int)(pixY_1_float); //allow only positive values + int pixY_chPosOffset = (int)(ImMax(s.DigitalBitHeight, pixY_1_float) + s.DigitalBitGap); + pixYMax = ImMax(pixYMax, pixY_chPosOffset); + ImVec2 pMin = PlotToPixels(itemData1); + ImVec2 pMax = PlotToPixels(itemData2); + int pixY_Offset = 20; //20 pixel from bottom due to mouse cursor label + pMin.y = (gp.PixelRange[y_axis].Min.y) + ((-gp.DigitalPlotOffset) - pixY_Offset); + pMax.y = (gp.PixelRange[y_axis].Min.y) + ((-gp.DigitalPlotOffset) - pixY_0 - pixY_1 - pixY_Offset); + //plot only one rectangle for same digital state + while (((i+2) < getter.Count) && (itemData1.y == itemData2.y)) { + const int in = (i + 1); + itemData2 = getter(in); + if (ImNanOrInf(itemData2.y)) break; + pMax.x = PlotToPixels(itemData2).x; + i++; + } + //do not extend plot outside plot range + if (pMin.x < gp.PixelRange[y_axis].Min.x) pMin.x = gp.PixelRange[y_axis].Min.x; + if (pMax.x < gp.PixelRange[y_axis].Min.x) pMax.x = gp.PixelRange[y_axis].Min.x; + if (pMin.x > gp.PixelRange[y_axis].Max.x) pMin.x = gp.PixelRange[y_axis].Max.x; + if (pMax.x > gp.PixelRange[y_axis].Max.x) pMax.x = gp.PixelRange[y_axis].Max.x; + //plot a rectangle that extends up to x2 with y1 height + if ((pMax.x > pMin.x) && (gp.CurrentPlot->PlotRect.Contains(pMin) || gp.CurrentPlot->PlotRect.Contains(pMax))) { + // ImVec4 colAlpha = item->Color; + // colAlpha.w = item->Highlight ? 1.0f : 0.9f; + DrawList.AddRectFilled(pMin, pMax, ImGui::GetColorU32(s.Colors[ImPlotCol_Fill])); + } + itemData1 = itemData2; + } + gp.DigitalPlotItemCnt++; + gp.DigitalPlotOffset += pixYMax; + } + EndItem(); + } +} + + +template +void PlotDigital(const char* label_id, const T* xs, const T* ys, int count, int offset, int stride) { + GetterXsYs getter(xs,ys,count,offset,stride); + return PlotDigitalEx(label_id, getter); +} + +template IMPLOT_API void PlotDigital(const char* label_id, const ImS8* xs, const ImS8* ys, int count, int offset, int stride); +template IMPLOT_API void PlotDigital(const char* label_id, const ImU8* xs, const ImU8* ys, int count, int offset, int stride); +template IMPLOT_API void PlotDigital(const char* label_id, const ImS16* xs, const ImS16* ys, int count, int offset, int stride); +template IMPLOT_API void PlotDigital(const char* label_id, const ImU16* xs, const ImU16* ys, int count, int offset, int stride); +template IMPLOT_API void PlotDigital(const char* label_id, const ImS32* xs, const ImS32* ys, int count, int offset, int stride); +template IMPLOT_API void PlotDigital(const char* label_id, const ImU32* xs, const ImU32* ys, int count, int offset, int stride); +template IMPLOT_API void PlotDigital(const char* label_id, const ImS64* xs, const ImS64* ys, int count, int offset, int stride); +template IMPLOT_API void PlotDigital(const char* label_id, const ImU64* xs, const ImU64* ys, int count, int offset, int stride); +template IMPLOT_API void PlotDigital(const char* label_id, const float* xs, const float* ys, int count, int offset, int stride); +template IMPLOT_API void PlotDigital(const char* label_id, const double* xs, const double* ys, int count, int offset, int stride); + +// custom +void PlotDigitalG(const char* label_id, ImPlotPoint (*getter_func)(void* data, int idx), void* data, int count, int offset) { + GetterFuncPtr getter(getter_func,data,count,offset); + return PlotDigitalEx(label_id, getter); +} + +//----------------------------------------------------------------------------- +// PLOT RECTS +//----------------------------------------------------------------------------- +template +void PlotRectsEx(const char* label_id, const Getter& getter) { + if (BeginItem(label_id, ImPlotCol_Fill)) { + if (FitThisFrame()) { + for (int i = 0; i < getter.Count; ++i) { + ImPlotPoint p = getter(i); + FitPoint(p); + } + } + const ImPlotNextItemData& s = GetItemData(); + if (s.RenderFill) { + ImDrawList& DrawList = *GetPlotDrawList(); + ImU32 col = ImGui::GetColorU32(s.Colors[ImPlotCol_Fill]); + switch (GetCurrentScale()) { + case ImPlotScale_LinLin: RenderPrimitives(RectRenderer(getter, TransformerLinLin(), col), DrawList, GImPlot->CurrentPlot->PlotRect); break; + case ImPlotScale_LogLin: RenderPrimitives(RectRenderer(getter, TransformerLogLin(), col), DrawList, GImPlot->CurrentPlot->PlotRect); break; + case ImPlotScale_LinLog: RenderPrimitives(RectRenderer(getter, TransformerLinLog(), col), DrawList, GImPlot->CurrentPlot->PlotRect); break; + case ImPlotScale_LogLog: RenderPrimitives(RectRenderer(getter, TransformerLogLog(), col), DrawList, GImPlot->CurrentPlot->PlotRect); break; + } + } + EndItem(); + } +} + +// float +void PlotRects(const char* label_id, const float* xs, const float* ys, int count, int offset, int stride) { + GetterXsYs getter(xs,ys,count,offset,stride); + PlotRectsEx(label_id, getter); +} + +// double +void PlotRects(const char* label_id, const double* xs, const double* ys, int count, int offset, int stride) { + GetterXsYs getter(xs,ys,count,offset,stride); + PlotRectsEx(label_id, getter); +} + +// custom +void PlotRects(const char* label_id, ImPlotPoint (*getter_func)(void* data, int idx), void* data, int count, int offset) { + GetterFuncPtr getter(getter_func,data,count,offset); + return PlotRectsEx(label_id, getter); +} + +//----------------------------------------------------------------------------- +// PLOT IMAGE +//----------------------------------------------------------------------------- + +void PlotImage(const char* label_id, ImTextureID user_texture_id, const ImPlotPoint& bmin, const ImPlotPoint& bmax, const ImVec2& uv0, const ImVec2& uv1, const ImVec4& tint_col) { + if (BeginItem(label_id)) { + if (FitThisFrame()) { + FitPoint(bmin); + FitPoint(bmax); + } + GetCurrentItem()->Color = tint_col; + ImDrawList& DrawList = *GetPlotDrawList(); + ImVec2 p1 = PlotToPixels(bmin.x, bmax.y); + ImVec2 p2 = PlotToPixels(bmax.x, bmin.y); + PushPlotClipRect(); + DrawList.AddImage(user_texture_id, p1, p2, uv0, uv1, ImGui::ColorConvertFloat4ToU32(tint_col)); + PopPlotClipRect(); + EndItem(); + } +} + +//----------------------------------------------------------------------------- +// PLOT TEXT +//----------------------------------------------------------------------------- + +// double +void PlotText(const char* text, double x, double y, bool vertical, const ImVec2& pixel_offset) { + IM_ASSERT_USER_ERROR(GImPlot->CurrentPlot != NULL, "PlotText() needs to be called between BeginPlot() and EndPlot()!"); + ImDrawList & DrawList = *GetPlotDrawList(); + PushPlotClipRect(); + ImU32 colTxt = GetStyleColorU32(ImPlotCol_InlayText); + if (vertical) { + ImVec2 ctr = CalcTextSizeVertical(text) * 0.5f; + ImVec2 pos = PlotToPixels(ImPlotPoint(x,y)) + ImVec2(-ctr.x, ctr.y) + pixel_offset; + AddTextVertical(&DrawList, pos, colTxt, text); + } + else { + ImVec2 pos = PlotToPixels(ImPlotPoint(x,y)) - ImGui::CalcTextSize(text) * 0.5f + pixel_offset; + DrawList.AddText(pos, colTxt, text); + } + PopPlotClipRect(); +} + +//----------------------------------------------------------------------------- +// PLOT DUMMY +//----------------------------------------------------------------------------- + +void PlotDummy(const char* label_id) { + if (BeginItem(label_id, ImPlotCol_Line)) + EndItem(); +} + +} // namespace ImPlot \ No newline at end of file diff --git a/cpp-projects/3d-engine/imgui/imconfig.h b/cpp-projects/3d-engine/imgui/imconfig.h new file mode 100644 index 0000000..18901f8 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/imconfig.h @@ -0,0 +1,150 @@ +//----------------------------------------------------------------------------- +// COMPILE-TIME OPTIONS FOR DEAR IMGUI +// Runtime options (clipboard callbacks, enabling various features, etc.) can generally be set via the ImGuiIO structure. +// You can use ImGui::SetAllocatorFunctions() before calling ImGui::CreateContext() to rewire memory allocation functions. +//----------------------------------------------------------------------------- +// A) You may edit imconfig.h (and not overwrite it when updating Dear ImGui, or maintain a patch/rebased branch with your modifications to it) +// B) or '#define IMGUI_USER_CONFIG "my_imgui_config.h"' in your project and then add directives in your own file without touching this template. +//----------------------------------------------------------------------------- +// You need to make sure that configuration settings are defined consistently _everywhere_ Dear ImGui is used, which include the imgui*.cpp +// files but also _any_ of your code that uses Dear ImGui. This is because some compile-time options have an affect on data structures. +// Defining those options in imconfig.h will ensure every compilation unit gets to see the same data structure layouts. +// Call IMGUI_CHECKVERSION() from your .cpp files to verify that the data structures your files are using are matching the ones imgui.cpp is using. +//----------------------------------------------------------------------------- + +#pragma once + +//---- Define assertion handler. Defaults to calling assert(). +// If your macro uses multiple statements, make sure is enclosed in a 'do { .. } while (0)' block so it can be used as a single statement. +//#define IM_ASSERT(_EXPR) MyAssert(_EXPR) +//#define IM_ASSERT(_EXPR) ((void)(_EXPR)) // Disable asserts + +//---- Define attributes of all API symbols declarations, e.g. for DLL under Windows +// Using dear imgui via a shared library is not recommended, because of function call overhead and because we don't guarantee backward nor forward ABI compatibility. +//#define IMGUI_API __declspec( dllexport ) +//#define IMGUI_API __declspec( dllimport ) + +//---- Don't define obsolete functions/enums/behaviors. Consider enabling from time to time after updating to avoid using soon-to-be obsolete function/names. +//#define IMGUI_DISABLE_OBSOLETE_FUNCTIONS + +//---- Disable all of Dear ImGui or don't implement standard windows. +// It is very strongly recommended to NOT disable the demo windows during development. Please read comments in imgui_demo.cpp. +//#define IMGUI_DISABLE // Disable everything: all headers and source files will be empty. +//#define IMGUI_DISABLE_DEMO_WINDOWS // Disable demo windows: ShowDemoWindow()/ShowStyleEditor() will be empty. Not recommended. +//#define IMGUI_DISABLE_METRICS_WINDOW // Disable metrics/debugger window: ShowMetricsWindow() will be empty. + +//---- Don't implement some functions to reduce linkage requirements. +//#define IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS // [Win32] Don't implement default clipboard handler. Won't use and link with OpenClipboard/GetClipboardData/CloseClipboard etc. +//#define IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS // [Win32] Don't implement default IME handler. Won't use and link with ImmGetContext/ImmSetCompositionWindow. +//#define IMGUI_DISABLE_WIN32_FUNCTIONS // [Win32] Won't use and link with any Win32 function (clipboard, ime). +//#define IMGUI_ENABLE_OSX_DEFAULT_CLIPBOARD_FUNCTIONS // [OSX] Implement default OSX clipboard handler (need to link with '-framework ApplicationServices', this is why this is not the default). +//#define IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS // Don't implement ImFormatString/ImFormatStringV so you can implement them yourself (e.g. if you don't want to link with vsnprintf) +//#define IMGUI_DISABLE_DEFAULT_MATH_FUNCTIONS // Don't implement ImFabs/ImSqrt/ImPow/ImFmod/ImCos/ImSin/ImAcos/ImAtan2 so you can implement them yourself. +//#define IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS // Don't implement ImFileOpen/ImFileClose/ImFileRead/ImFileWrite so you can implement them yourself if you don't want to link with fopen/fclose/fread/fwrite. This will also disable the LogToTTY() function. +//#define IMGUI_DISABLE_DEFAULT_ALLOCATORS // Don't implement default allocators calling malloc()/free() to avoid linking with them. You will need to call ImGui::SetAllocatorFunctions(). + +//---- Include imgui_user.h at the end of imgui.h as a convenience +//#define IMGUI_INCLUDE_IMGUI_USER_H + +//---- Pack colors to BGRA8 instead of RGBA8 (to avoid converting from one to another) +//#define IMGUI_USE_BGRA_PACKED_COLOR + +//---- Use 32-bit for ImWchar (default is 16-bit) to support unicode planes 1-16. (e.g. point beyond 0xFFFF like emoticons, dingbats, symbols, shapes, ancient languages, etc...) +//#define IMGUI_USE_WCHAR32 + +//---- Avoid multiple STB libraries implementations, or redefine path/filenames to prioritize another version +// By default the embedded implementations are declared static and not available outside of Dear ImGui sources files. +//#define IMGUI_STB_TRUETYPE_FILENAME "my_folder/stb_truetype.h" +//#define IMGUI_STB_RECT_PACK_FILENAME "my_folder/stb_rect_pack.h" +//#define IMGUI_DISABLE_STB_TRUETYPE_IMPLEMENTATION +//#define IMGUI_DISABLE_STB_RECT_PACK_IMPLEMENTATION + +//---- Use stb_printf's faster implementation of vsnprintf instead of the one from libc (unless IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS is defined) +// Requires 'stb_sprintf.h' to be available in the include path. Compatibility checks of arguments and formats done by clang and GCC will be disabled in order to support the extra formats provided by STB sprintf. +// #define IMGUI_USE_STB_SPRINTF + +//---- Use FreeType to build and rasterize the font atlas (instead of stb_truetype which is embedded by default in Dear ImGui) +// Requires FreeType headers to be available in the include path. Requires program to be compiled with 'misc/freetype/imgui_freetype.cpp' (in this repository) + the FreeType library (not provided). +// On Windows you may use vcpkg with 'vcpkg install freetype' + 'vcpkg integrate install'. +//#define IMGUI_ENABLE_FREETYPE + +//---- Use stb_truetype to build and rasterize the font atlas (default) +// The only purpose of this define is if you want force compilation of the stb_truetype backend ALONG with the FreeType backend. +//#define IMGUI_ENABLE_STB_TRUETYPE + +//---- Define constructor and implicit cast operators to convert back<>forth between your math types and ImVec2/ImVec4. +// This will be inlined as part of ImVec2 and ImVec4 class declarations. +/* +#define IM_VEC2_CLASS_EXTRA \ + ImVec2(const MyVec2& f) { x = f.x; y = f.y; } \ + operator MyVec2() const { return MyVec2(x,y); } + +#define IM_VEC4_CLASS_EXTRA \ + ImVec4(const MyVec4& f) { x = f.x; y = f.y; z = f.z; w = f.w; } \ + operator MyVec4() const { return MyVec4(x,y,z,w); } +*/ + +//---- Use 32-bit vertex indices (default is 16-bit) is one way to allow large meshes with more than 64K vertices. +// Your renderer backend will need to support it (most example renderer backends support both 16/32-bit indices). +// Another way to allow large meshes while keeping 16-bit indices is to handle ImDrawCmd::VtxOffset in your renderer. +// Read about ImGuiBackendFlags_RendererHasVtxOffset for details. +//#define ImDrawIdx unsigned int + +//---- Override ImDrawCallback signature (will need to modify renderer backends accordingly) +//struct ImDrawList; +//struct ImDrawCmd; +//typedef void (*MyImDrawCallback)(const ImDrawList* draw_list, const ImDrawCmd* cmd, void* my_renderer_user_data); +//#define ImDrawCallback MyImDrawCallback + +//---- Debug Tools: Macro to break in Debugger +// (use 'Metrics->Tools->Item Picker' to pick widgets with the mouse and break into them for easy debugging.) +//#define IM_DEBUG_BREAK IM_ASSERT(0) +//#define IM_DEBUG_BREAK __debugbreak() + +//---- Debug Tools: Have the Item Picker break in the ItemAdd() function instead of ItemHoverable(), +// (which comes earlier in the code, will catch a few extra items, allow picking items other than Hovered one.) +// This adds a small runtime cost which is why it is not enabled by default. +//#define IMGUI_DEBUG_TOOL_ITEM_PICKER_EX + +//---- Debug Tools: Enable slower asserts +//#define IMGUI_DEBUG_PARANOID + +//---- Tip: You can add extra functions within the ImGui:: namespace, here or in your own headers files. +/* +namespace ImGui +{ + void MyFunction(const char* name, const MyMatrix44& v); +} +*/ + +#include +#include + +#include "imgui-sfml/imgui-SFML_export.h" + +#define IM_VEC2_CLASS_EXTRA \ + template \ + ImVec2(const sf::Vector2& v) { \ + x = static_cast(v.x); \ + y = static_cast(v.y); \ + } \ + \ + template \ + operator sf::Vector2() const { \ + return sf::Vector2(x, y); \ + } + +#define IM_VEC4_CLASS_EXTRA \ + ImVec4(const sf::Color & c) \ + : x(c.r / 255.f), y(c.g / 255.f), z(c.b / 255.f), w(c.a / 255.f)\ + {} \ + operator sf::Color() const { \ + return sf::Color( \ + static_cast(x * 255.f), \ + static_cast(y * 255.f), \ + static_cast(z * 255.f), \ + static_cast(w * 255.f)); \ + } + +#define ImTextureID unsigned int + diff --git a/cpp-projects/3d-engine/imgui/imgui.cpp b/cpp-projects/3d-engine/imgui/imgui.cpp new file mode 100644 index 0000000..380df57 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/imgui.cpp @@ -0,0 +1,11177 @@ +// dear imgui, v1.81 WIP +// (main code and documentation) + +// Help: +// - Read FAQ at http://dearimgui.org/faq +// - Newcomers, read 'Programmer guide' below for notes on how to setup Dear ImGui in your codebase. +// - Call and read ImGui::ShowDemoWindow() in imgui_demo.cpp. All applications in examples/ are doing that. +// Read imgui.cpp for details, links and comments. + +// Resources: +// - FAQ http://dearimgui.org/faq +// - Homepage & latest https://github.com/ocornut/imgui +// - Releases & changelog https://github.com/ocornut/imgui/releases +// - Gallery https://github.com/ocornut/imgui/issues/3488 (please post your screenshots/video there!) +// - Glossary https://github.com/ocornut/imgui/wiki/Glossary +// - Wiki https://github.com/ocornut/imgui/wiki +// - Issues & support https://github.com/ocornut/imgui/issues + +// Developed by Omar Cornut and every direct or indirect contributors to the GitHub. +// See LICENSE.txt for copyright and licensing details (standard MIT License). +// This library is free but needs your support to sustain development and maintenance. +// Businesses: you can support continued development via invoiced technical support, maintenance and sponsoring contracts. Please reach out to "contact AT dearimgui.org". +// Individuals: you can support continued development via donations. See docs/README or web page. + +// It is recommended that you don't modify imgui.cpp! It will become difficult for you to update the library. +// Note that 'ImGui::' being a namespace, you can add functions into the namespace from your own source files, without +// modifying imgui.h or imgui.cpp. You may include imgui_internal.h to access internal data structures, but it doesn't +// come with any guarantee of forward compatibility. Discussing your changes on the GitHub Issue Tracker may lead you +// to a better solution or official support for them. + +/* + +Index of this file: + +DOCUMENTATION + +- MISSION STATEMENT +- END-USER GUIDE +- PROGRAMMER GUIDE + - READ FIRST + - HOW TO UPDATE TO A NEWER VERSION OF DEAR IMGUI + - GETTING STARTED WITH INTEGRATING DEAR IMGUI IN YOUR CODE/ENGINE + - HOW A SIMPLE APPLICATION MAY LOOK LIKE + - HOW A SIMPLE RENDERING FUNCTION MAY LOOK LIKE + - USING GAMEPAD/KEYBOARD NAVIGATION CONTROLS +- API BREAKING CHANGES (read me when you update!) +- FREQUENTLY ASKED QUESTIONS (FAQ) + - Read all answers online: https://www.dearimgui.org/faq, or in docs/FAQ.md (with a Markdown viewer) + +CODE +(search for "[SECTION]" in the code to find them) + +// [SECTION] INCLUDES +// [SECTION] FORWARD DECLARATIONS +// [SECTION] CONTEXT AND MEMORY ALLOCATORS +// [SECTION] USER FACING STRUCTURES (ImGuiStyle, ImGuiIO) +// [SECTION] MISC HELPERS/UTILITIES (Geometry functions) +// [SECTION] MISC HELPERS/UTILITIES (String, Format, Hash functions) +// [SECTION] MISC HELPERS/UTILITIES (File functions) +// [SECTION] MISC HELPERS/UTILITIES (ImText* functions) +// [SECTION] MISC HELPERS/UTILITIES (Color functions) +// [SECTION] ImGuiStorage +// [SECTION] ImGuiTextFilter +// [SECTION] ImGuiTextBuffer +// [SECTION] ImGuiListClipper +// [SECTION] STYLING +// [SECTION] RENDER HELPERS +// [SECTION] MAIN CODE (most of the code! lots of stuff, needs tidying up!) +// [SECTION] ERROR CHECKING +// [SECTION] LAYOUT +// [SECTION] SCROLLING +// [SECTION] TOOLTIPS +// [SECTION] POPUPS +// [SECTION] KEYBOARD/GAMEPAD NAVIGATION +// [SECTION] DRAG AND DROP +// [SECTION] LOGGING/CAPTURING +// [SECTION] SETTINGS +// [SECTION] PLATFORM DEPENDENT HELPERS +// [SECTION] METRICS/DEBUGGER WINDOW + +*/ + +//----------------------------------------------------------------------------- +// DOCUMENTATION +//----------------------------------------------------------------------------- + +/* + + MISSION STATEMENT + ================= + + - Easy to use to create code-driven and data-driven tools. + - Easy to use to create ad hoc short-lived tools and long-lived, more elaborate tools. + - Easy to hack and improve. + - Minimize setup and maintenance. + - Minimize state storage on user side. + - Portable, minimize dependencies, run on target (consoles, phones, etc.). + - Efficient runtime and memory consumption. + + Designed for developers and content-creators, not the typical end-user! Some of the current weaknesses includes: + + - Doesn't look fancy, doesn't animate. + - Limited layout features, intricate layouts are typically crafted in code. + + + END-USER GUIDE + ============== + + - Double-click on title bar to collapse window. + - Click upper right corner to close a window, available when 'bool* p_open' is passed to ImGui::Begin(). + - Click and drag on lower right corner to resize window (double-click to auto fit window to its contents). + - Click and drag on any empty space to move window. + - TAB/SHIFT+TAB to cycle through keyboard editable fields. + - CTRL+Click on a slider or drag box to input value as text. + - Use mouse wheel to scroll. + - Text editor: + - Hold SHIFT or use mouse to select text. + - CTRL+Left/Right to word jump. + - CTRL+Shift+Left/Right to select words. + - CTRL+A our Double-Click to select all. + - CTRL+X,CTRL+C,CTRL+V to use OS clipboard/ + - CTRL+Z,CTRL+Y to undo/redo. + - ESCAPE to revert text to its original value. + - You can apply arithmetic operators +,*,/ on numerical values. Use +- to subtract (because - would set a negative value!) + - Controls are automatically adjusted for OSX to match standard OSX text editing operations. + - General Keyboard controls: enable with ImGuiConfigFlags_NavEnableKeyboard. + - General Gamepad controls: enable with ImGuiConfigFlags_NavEnableGamepad. See suggested mappings in imgui.h ImGuiNavInput_ + download PNG/PSD at http://dearimgui.org/controls_sheets + + + PROGRAMMER GUIDE + ================ + + READ FIRST + ---------- + - Remember to read the FAQ (https://www.dearimgui.org/faq) + - Your code creates the UI, if your code doesn't run the UI is gone! The UI can be highly dynamic, there are no construction + or destruction steps, less superfluous data retention on your side, less state duplication, less state synchronization, less bugs. + - Call and read ImGui::ShowDemoWindow() for demo code demonstrating most features. + - The library is designed to be built from sources. Avoid pre-compiled binaries and packaged versions. See imconfig.h to configure your build. + - Dear ImGui is an implementation of the IMGUI paradigm (immediate-mode graphical user interface, a term coined by Casey Muratori). + You can learn about IMGUI principles at http://www.johno.se/book/imgui.html, http://mollyrocket.com/861 & more links in the FAQ. + - Dear ImGui is a "single pass" rasterizing implementation of the IMGUI paradigm, aimed at ease of use and high-performances. + For every application frame your UI code will be called only once. This is in contrast to e.g. Unity's own implementation of an IMGUI, + where the UI code is called multiple times ("multiple passes") from a single entry point. There are pros and cons to both approaches. + - Our origin are on the top-left. In axis aligned bounding boxes, Min = top-left, Max = bottom-right. + - This codebase is also optimized to yield decent performances with typical "Debug" builds settings. + - Please make sure you have asserts enabled (IM_ASSERT redirects to assert() by default, but can be redirected). + If you get an assert, read the messages and comments around the assert. + - C++: this is a very C-ish codebase: we don't rely on C++11, we don't include any C++ headers, and ImGui:: is a namespace. + - C++: ImVec2/ImVec4 do not expose math operators by default, because it is expected that you use your own math types. + See FAQ "How can I use my own math types instead of ImVec2/ImVec4?" for details about setting up imconfig.h for that. + However, imgui_internal.h can optionally export math operators for ImVec2/ImVec4, which we use in this codebase. + - C++: pay attention that ImVector<> manipulates plain-old-data and does not honor construction/destruction (avoid using it in your code!). + + + HOW TO UPDATE TO A NEWER VERSION OF DEAR IMGUI + ---------------------------------------------- + - Overwrite all the sources files except for imconfig.h (if you have made modification to your copy of imconfig.h) + - Or maintain your own branch where you have imconfig.h modified as a top-most commit which you can regularly rebase over master. + - You can also use '#define IMGUI_USER_CONFIG "my_config_file.h" to redirect configuration to your own file. + - Read the "API BREAKING CHANGES" section (below). This is where we list occasional API breaking changes. + If a function/type has been renamed / or marked obsolete, try to fix the name in your code before it is permanently removed + from the public API. If you have a problem with a missing function/symbols, search for its name in the code, there will + likely be a comment about it. Please report any issue to the GitHub page! + - To find out usage of old API, you can add '#define IMGUI_DISABLE_OBSOLETE_FUNCTIONS' in your configuration file. + - Try to keep your copy of Dear ImGui reasonably up to date. + + + GETTING STARTED WITH INTEGRATING DEAR IMGUI IN YOUR CODE/ENGINE + --------------------------------------------------------------- + - Run and study the examples and demo in imgui_demo.cpp to get acquainted with the library. + - In the majority of cases you should be able to use unmodified backends files available in the backends/ folder. + - Add the Dear ImGui source files + selected backend source files to your projects or using your preferred build system. + It is recommended you build and statically link the .cpp files as part of your project and NOT as shared library (DLL). + - You can later customize the imconfig.h file to tweak some compile-time behavior, such as integrating Dear ImGui types with your own maths types. + - When using Dear ImGui, your programming IDE is your friend: follow the declaration of variables, functions and types to find comments about them. + - Dear ImGui never touches or knows about your GPU state. The only function that knows about GPU is the draw function that you provide. + Effectively it means you can create widgets at any time in your code, regardless of considerations of being in "update" vs "render" + phases of your own application. All rendering information are stored into command-lists that you will retrieve after calling ImGui::Render(). + - Refer to the backends and demo applications in the examples/ folder for instruction on how to setup your code. + - If you are running over a standard OS with a common graphics API, you should be able to use unmodified imgui_impl_*** files from the examples/ folder. + + + HOW A SIMPLE APPLICATION MAY LOOK LIKE + -------------------------------------- + EXHIBIT 1: USING THE EXAMPLE BACKENDS (= imgui_impl_XXX.cpp files from the backends/ folder). + The sub-folders in examples/ contains examples applications following this structure. + + // Application init: create a dear imgui context, setup some options, load fonts + ImGui::CreateContext(); + ImGuiIO& io = ImGui::GetIO(); + // TODO: Set optional io.ConfigFlags values, e.g. 'io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard' to enable keyboard controls. + // TODO: Fill optional fields of the io structure later. + // TODO: Load TTF/OTF fonts if you don't want to use the default font. + + // Initialize helper Platform and Renderer backends (here we are using imgui_impl_win32.cpp and imgui_impl_dx11.cpp) + ImGui_ImplWin32_Init(hwnd); + ImGui_ImplDX11_Init(g_pd3dDevice, g_pd3dDeviceContext); + + // Application main loop + while (true) + { + // Feed inputs to dear imgui, start new frame + ImGui_ImplDX11_NewFrame(); + ImGui_ImplWin32_NewFrame(); + ImGui::NewFrame(); + + // Any application code here + ImGui::Text("Hello, world!"); + + // Render dear imgui into screen + ImGui::Render(); + ImGui_ImplDX11_RenderDrawData(ImGui::GetDrawData()); + g_pSwapChain->Present(1, 0); + } + + // Shutdown + ImGui_ImplDX11_Shutdown(); + ImGui_ImplWin32_Shutdown(); + ImGui::DestroyContext(); + + EXHIBIT 2: IMPLEMENTING CUSTOM BACKEND / CUSTOM ENGINE + + // Application init: create a dear imgui context, setup some options, load fonts + ImGui::CreateContext(); + ImGuiIO& io = ImGui::GetIO(); + // TODO: Set optional io.ConfigFlags values, e.g. 'io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard' to enable keyboard controls. + // TODO: Fill optional fields of the io structure later. + // TODO: Load TTF/OTF fonts if you don't want to use the default font. + + // Build and load the texture atlas into a texture + // (In the examples/ app this is usually done within the ImGui_ImplXXX_Init() function from one of the demo Renderer) + int width, height; + unsigned char* pixels = NULL; + io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); + + // At this point you've got the texture data and you need to upload that your your graphic system: + // After we have created the texture, store its pointer/identifier (_in whichever format your engine uses_) in 'io.Fonts->TexID'. + // This will be passed back to your via the renderer. Basically ImTextureID == void*. Read FAQ for details about ImTextureID. + MyTexture* texture = MyEngine::CreateTextureFromMemoryPixels(pixels, width, height, TEXTURE_TYPE_RGBA32) + io.Fonts->TexID = (void*)texture; + + // Application main loop + while (true) + { + // Setup low-level inputs, e.g. on Win32: calling GetKeyboardState(), or write to those fields from your Windows message handlers, etc. + // (In the examples/ app this is usually done within the ImGui_ImplXXX_NewFrame() function from one of the demo Platform Backends) + io.DeltaTime = 1.0f/60.0f; // set the time elapsed since the previous frame (in seconds) + io.DisplaySize.x = 1920.0f; // set the current display width + io.DisplaySize.y = 1280.0f; // set the current display height here + io.MousePos = my_mouse_pos; // set the mouse position + io.MouseDown[0] = my_mouse_buttons[0]; // set the mouse button states + io.MouseDown[1] = my_mouse_buttons[1]; + + // Call NewFrame(), after this point you can use ImGui::* functions anytime + // (So you want to try calling NewFrame() as early as you can in your mainloop to be able to use Dear ImGui everywhere) + ImGui::NewFrame(); + + // Most of your application code here + ImGui::Text("Hello, world!"); + MyGameUpdate(); // may use any Dear ImGui functions, e.g. ImGui::Begin("My window"); ImGui::Text("Hello, world!"); ImGui::End(); + MyGameRender(); // may use any Dear ImGui functions as well! + + // Render dear imgui, swap buffers + // (You want to try calling EndFrame/Render as late as you can, to be able to use Dear ImGui in your own game rendering code) + ImGui::EndFrame(); + ImGui::Render(); + ImDrawData* draw_data = ImGui::GetDrawData(); + MyImGuiRenderFunction(draw_data); + SwapBuffers(); + } + + // Shutdown + ImGui::DestroyContext(); + + To decide whether to dispatch mouse/keyboard inputs to Dear ImGui to the rest your application, + you should read the 'io.WantCaptureMouse', 'io.WantCaptureKeyboard' and 'io.WantTextInput' flags! + Please read the FAQ and example applications for details about this! + + + HOW A SIMPLE RENDERING FUNCTION MAY LOOK LIKE + --------------------------------------------- + The backends in impl_impl_XXX.cpp files contains many working implementations of a rendering function. + + void void MyImGuiRenderFunction(ImDrawData* draw_data) + { + // TODO: Setup render state: alpha-blending enabled, no face culling, no depth testing, scissor enabled + // TODO: Setup viewport covering draw_data->DisplayPos to draw_data->DisplayPos + draw_data->DisplaySize + // TODO: Setup orthographic projection matrix cover draw_data->DisplayPos to draw_data->DisplayPos + draw_data->DisplaySize + // TODO: Setup shader: vertex { float2 pos, float2 uv, u32 color }, fragment shader sample color from 1 texture, multiply by vertex color. + for (int n = 0; n < draw_data->CmdListsCount; n++) + { + const ImDrawList* cmd_list = draw_data->CmdLists[n]; + const ImDrawVert* vtx_buffer = cmd_list->VtxBuffer.Data; // vertex buffer generated by Dear ImGui + const ImDrawIdx* idx_buffer = cmd_list->IdxBuffer.Data; // index buffer generated by Dear ImGui + for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) + { + const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; + if (pcmd->UserCallback) + { + pcmd->UserCallback(cmd_list, pcmd); + } + else + { + // The texture for the draw call is specified by pcmd->TextureId. + // The vast majority of draw calls will use the Dear ImGui texture atlas, which value you have set yourself during initialization. + MyEngineBindTexture((MyTexture*)pcmd->TextureId); + + // We are using scissoring to clip some objects. All low-level graphics API should supports it. + // - If your engine doesn't support scissoring yet, you may ignore this at first. You will get some small glitches + // (some elements visible outside their bounds) but you can fix that once everything else works! + // - Clipping coordinates are provided in imgui coordinates space (from draw_data->DisplayPos to draw_data->DisplayPos + draw_data->DisplaySize) + // In a single viewport application, draw_data->DisplayPos will always be (0,0) and draw_data->DisplaySize will always be == io.DisplaySize. + // However, in the interest of supporting multi-viewport applications in the future (see 'viewport' branch on github), + // always subtract draw_data->DisplayPos from clipping bounds to convert them to your viewport space. + // - Note that pcmd->ClipRect contains Min+Max bounds. Some graphics API may use Min+Max, other may use Min+Size (size being Max-Min) + ImVec2 pos = draw_data->DisplayPos; + MyEngineScissor((int)(pcmd->ClipRect.x - pos.x), (int)(pcmd->ClipRect.y - pos.y), (int)(pcmd->ClipRect.z - pos.x), (int)(pcmd->ClipRect.w - pos.y)); + + // Render 'pcmd->ElemCount/3' indexed triangles. + // By default the indices ImDrawIdx are 16-bit, you can change them to 32-bit in imconfig.h if your engine doesn't support 16-bit indices. + MyEngineDrawIndexedTriangles(pcmd->ElemCount, sizeof(ImDrawIdx) == 2 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT, idx_buffer, vtx_buffer); + } + idx_buffer += pcmd->ElemCount; + } + } + } + + + USING GAMEPAD/KEYBOARD NAVIGATION CONTROLS + ------------------------------------------ + - The gamepad/keyboard navigation is fairly functional and keeps being improved. + - Gamepad support is particularly useful to use Dear ImGui on a console system (e.g. PS4, Switch, XB1) without a mouse! + - You can ask questions and report issues at https://github.com/ocornut/imgui/issues/787 + - The initial focus was to support game controllers, but keyboard is becoming increasingly and decently usable. + - Keyboard: + - Set io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard to enable. + NewFrame() will automatically fill io.NavInputs[] based on your io.KeysDown[] + io.KeyMap[] arrays. + - When keyboard navigation is active (io.NavActive + ImGuiConfigFlags_NavEnableKeyboard), the io.WantCaptureKeyboard flag + will be set. For more advanced uses, you may want to read from: + - io.NavActive: true when a window is focused and it doesn't have the ImGuiWindowFlags_NoNavInputs flag set. + - io.NavVisible: true when the navigation cursor is visible (and usually goes false when mouse is used). + - or query focus information with e.g. IsWindowFocused(ImGuiFocusedFlags_AnyWindow), IsItemFocused() etc. functions. + Please reach out if you think the game vs navigation input sharing could be improved. + - Gamepad: + - Set io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad to enable. + - Backend: Set io.BackendFlags |= ImGuiBackendFlags_HasGamepad + fill the io.NavInputs[] fields before calling NewFrame(). + Note that io.NavInputs[] is cleared by EndFrame(). + - See 'enum ImGuiNavInput_' in imgui.h for a description of inputs. For each entry of io.NavInputs[], set the following values: + 0.0f= not held. 1.0f= fully held. Pass intermediate 0.0f..1.0f values for analog triggers/sticks. + - We uses a simple >0.0f test for activation testing, and won't attempt to test for a dead-zone. + Your code will probably need to transform your raw inputs (such as e.g. remapping your 0.2..0.9 raw input range to 0.0..1.0 imgui range, etc.). + - You can download PNG/PSD files depicting the gamepad controls for common controllers at: http://dearimgui.org/controls_sheets + - If you need to share inputs between your game and the imgui parts, the easiest approach is to go all-or-nothing, with a buttons combo + to toggle the target. Please reach out if you think the game vs navigation input sharing could be improved. + - Mouse: + - PS4 users: Consider emulating a mouse cursor with DualShock4 touch pad or a spare analog stick as a mouse-emulation fallback. + - Consoles/Tablet/Phone users: Consider using a Synergy 1.x server (on your PC) + uSynergy.c (on your console/tablet/phone app) to share your PC mouse/keyboard. + - On a TV/console system where readability may be lower or mouse inputs may be awkward, you may want to set the ImGuiConfigFlags_NavEnableSetMousePos flag. + Enabling ImGuiConfigFlags_NavEnableSetMousePos + ImGuiBackendFlags_HasSetMousePos instructs dear imgui to move your mouse cursor along with navigation movements. + When enabled, the NewFrame() function may alter 'io.MousePos' and set 'io.WantSetMousePos' to notify you that it wants the mouse cursor to be moved. + When that happens your backend NEEDS to move the OS or underlying mouse cursor on the next frame. Some of the backends in examples/ do that. + (If you set the NavEnableSetMousePos flag but don't honor 'io.WantSetMousePos' properly, imgui will misbehave as it will see your mouse as moving back and forth!) + (In a setup when you may not have easy control over the mouse cursor, e.g. uSynergy.c doesn't expose moving remote mouse cursor, you may want + to set a boolean to ignore your other external mouse positions until the external source is moved again.) + + + API BREAKING CHANGES + ==================== + + Occasionally introducing changes that are breaking the API. We try to make the breakage minor and easy to fix. + Below is a change-log of API breaking changes only. If you are using one of the functions listed, expect to have to fix some code. + When you are not sure about a old symbol or function name, try using the Search/Find function of your IDE to look for comments or references in all imgui files. + You can read releases logs https://github.com/ocornut/imgui/releases for more details. + + - 2021/01/26 (1.81) - imgui_freetype: removed ImGuiFreeType::BuildFontAtlas(). Kept inline redirection function. Prefer using '#define IMGUI_ENABLE_FREETYPE', but there's a runtime selection path available too. The shared extra flags parameters (very rarely used) are now stored in ImFontAtlas::FontBuilderFlags. + - imgui_freetype: renamed ImFontConfig::RasterizerFlags (used by FreeType) to ImFontConfig::FontBuilderFlags. + - imgui_freetype: renamed ImGuiFreeType::XXX flags to ImGuiFreeTypeBuilderFlags_XXX for consistency with other API. + - 2020/10/12 (1.80) - removed redirecting functions/enums that were marked obsolete in 1.63 (August 2018): + - ImGui::IsItemDeactivatedAfterChange() -> use ImGui::IsItemDeactivatedAfterEdit(). + - ImGuiCol_ModalWindowDarkening -> use ImGuiCol_ModalWindowDimBg + - ImGuiInputTextCallback -> use ImGuiTextEditCallback + - ImGuiInputTextCallbackData -> use ImGuiTextEditCallbackData + - 2020/12/21 (1.80) - renamed ImDrawList::AddBezierCurve() to AddBezierCubic(), and PathBezierCurveTo() to PathBezierCubicCurveTo(). Kept inline redirection function (will obsolete). + - 2020/12/04 (1.80) - added imgui_tables.cpp file! Manually constructed project files will need the new file added! + - 2020/11/18 (1.80) - renamed undocumented/internals ImGuiColumnsFlags_* to ImGuiOldColumnFlags_* in prevision of incoming Tables API. + - 2020/11/03 (1.80) - renamed io.ConfigWindowsMemoryCompactTimer to io.ConfigMemoryCompactTimer as the feature will apply to other data structures + - 2020/10/14 (1.80) - backends: moved all backends files (imgui_impl_XXXX.cpp, imgui_impl_XXXX.h) from examples/ to backends/. + - 2020/10/12 (1.80) - removed redirecting functions/enums that were marked obsolete in 1.60 (April 2018): + - io.RenderDrawListsFn pointer -> use ImGui::GetDrawData() value and call the render function of your backend + - ImGui::IsAnyWindowFocused() -> use ImGui::IsWindowFocused(ImGuiFocusedFlags_AnyWindow) + - ImGui::IsAnyWindowHovered() -> use ImGui::IsWindowHovered(ImGuiHoveredFlags_AnyWindow) + - ImGuiStyleVar_Count_ -> use ImGuiStyleVar_COUNT + - ImGuiMouseCursor_Count_ -> use ImGuiMouseCursor_COUNT + - removed redirecting functions names that were marked obsolete in 1.61 (May 2018): + - InputFloat (... int decimal_precision ...) -> use InputFloat (... const char* format ...) with format = "%.Xf" where X is your value for decimal_precision. + - same for InputFloat2()/InputFloat3()/InputFloat4() variants taking a `int decimal_precision` parameter. + - 2020/10/05 (1.79) - removed ImGuiListClipper: Renamed constructor parameters which created an ambiguous alternative to using the ImGuiListClipper::Begin() function, with misleading edge cases (note: imgui_memory_editor <0.40 from imgui_club/ used this old clipper API. Update your copy if needed). + - 2020/09/25 (1.79) - renamed ImGuiSliderFlags_ClampOnInput to ImGuiSliderFlags_AlwaysClamp. Kept redirection enum (will obsolete sooner because previous name was added recently). + - 2020/09/25 (1.79) - renamed style.TabMinWidthForUnselectedCloseButton to style.TabMinWidthForCloseButton. + - 2020/09/21 (1.79) - renamed OpenPopupContextItem() back to OpenPopupOnItemClick(), reverting the change from 1.77. For varieties of reason this is more self-explanatory. + - 2020/09/21 (1.79) - removed return value from OpenPopupOnItemClick() - returned true on mouse release on item - because it is inconsistent with other popup APIs and makes others misleading. It's also and unnecessary: you can use IsWindowAppearing() after BeginPopup() for a similar result. + - 2020/09/17 (1.79) - removed ImFont::DisplayOffset in favor of ImFontConfig::GlyphOffset. DisplayOffset was applied after scaling and not very meaningful/useful outside of being needed by the default ProggyClean font. If you scaled this value after calling AddFontDefault(), this is now done automatically. It was also getting in the way of better font scaling, so let's get rid of it now! + - 2020/08/17 (1.78) - obsoleted use of the trailing 'float power=1.0f' parameter for DragFloat(), DragFloat2(), DragFloat3(), DragFloat4(), DragFloatRange2(), DragScalar(), DragScalarN(), SliderFloat(), SliderFloat2(), SliderFloat3(), SliderFloat4(), SliderScalar(), SliderScalarN(), VSliderFloat() and VSliderScalar(). + replaced the 'float power=1.0f' argument with integer-based flags defaulting to 0 (as with all our flags). + worked out a backward-compatibility scheme so hopefully most C++ codebase should not be affected. in short, when calling those functions: + - if you omitted the 'power' parameter (likely!), you are not affected. + - if you set the 'power' parameter to 1.0f (same as previous default value): 1/ your compiler may warn on float>int conversion, 2/ everything else will work. 3/ you can replace the 1.0f value with 0 to fix the warning, and be technically correct. + - if you set the 'power' parameter to >1.0f (to enable non-linear editing): 1/ your compiler may warn on float>int conversion, 2/ code will assert at runtime, 3/ in case asserts are disabled, the code will not crash and enable the _Logarithmic flag. 4/ you can replace the >1.0f value with ImGuiSliderFlags_Logarithmic to fix the warning/assert and get a _similar_ effect as previous uses of power >1.0f. + see https://github.com/ocornut/imgui/issues/3361 for all details. + kept inline redirection functions (will obsolete) apart for: DragFloatRange2(), VSliderFloat(), VSliderScalar(). For those three the 'float power=1.0f' version were removed directly as they were most unlikely ever used. + for shared code, you can version check at compile-time with `#if IMGUI_VERSION_NUM >= 17704`. + - obsoleted use of v_min > v_max in DragInt, DragFloat, DragScalar to lock edits (introduced in 1.73, was not demoed nor documented very), will be replaced by a more generic ReadOnly feature. You may use the ImGuiSliderFlags_ReadOnly internal flag in the meantime. + - 2020/06/23 (1.77) - removed BeginPopupContextWindow(const char*, int mouse_button, bool also_over_items) in favor of BeginPopupContextWindow(const char*, ImGuiPopupFlags flags) with ImGuiPopupFlags_NoOverItems. + - 2020/06/15 (1.77) - renamed OpenPopupOnItemClick() to OpenPopupContextItem(). Kept inline redirection function (will obsolete). [NOTE: THIS WAS REVERTED IN 1.79] + - 2020/06/15 (1.77) - removed CalcItemRectClosestPoint() entry point which was made obsolete and asserting in December 2017. + - 2020/04/23 (1.77) - removed unnecessary ID (first arg) of ImFontAtlas::AddCustomRectRegular(). + - 2020/01/22 (1.75) - ImDrawList::AddCircle()/AddCircleFilled() functions don't accept negative radius any more. + - 2019/12/17 (1.75) - [undid this change in 1.76] made Columns() limited to 64 columns by asserting above that limit. While the current code technically supports it, future code may not so we're putting the restriction ahead. + - 2019/12/13 (1.75) - [imgui_internal.h] changed ImRect() default constructor initializes all fields to 0.0f instead of (FLT_MAX,FLT_MAX,-FLT_MAX,-FLT_MAX). If you used ImRect::Add() to create bounding boxes by adding multiple points into it, you may need to fix your initial value. + - 2019/12/08 (1.75) - removed redirecting functions/enums that were marked obsolete in 1.53 (December 2017): + - ShowTestWindow() -> use ShowDemoWindow() + - IsRootWindowFocused() -> use IsWindowFocused(ImGuiFocusedFlags_RootWindow) + - IsRootWindowOrAnyChildFocused() -> use IsWindowFocused(ImGuiFocusedFlags_RootAndChildWindows) + - SetNextWindowContentWidth(w) -> use SetNextWindowContentSize(ImVec2(w, 0.0f) + - GetItemsLineHeightWithSpacing() -> use GetFrameHeightWithSpacing() + - ImGuiCol_ChildWindowBg -> use ImGuiCol_ChildBg + - ImGuiStyleVar_ChildWindowRounding -> use ImGuiStyleVar_ChildRounding + - ImGuiTreeNodeFlags_AllowOverlapMode -> use ImGuiTreeNodeFlags_AllowItemOverlap + - IMGUI_DISABLE_TEST_WINDOWS -> use IMGUI_DISABLE_DEMO_WINDOWS + - 2019/12/08 (1.75) - obsoleted calling ImDrawList::PrimReserve() with a negative count (which was the vaguely documented and rarely if ever used). Instead we added an explicit PrimUnreserve() API. + - 2019/12/06 (1.75) - removed implicit default parameter to IsMouseDragging(int button = 0) to be consistent with other mouse functions (none of the other functions have it). + - 2019/11/21 (1.74) - ImFontAtlas::AddCustomRectRegular() now requires an ID larger than 0x110000 (instead of 0x10000) to conform with supporting Unicode planes 1-16 in a future update. ID below 0x110000 will now assert. + - 2019/11/19 (1.74) - renamed IMGUI_DISABLE_FORMAT_STRING_FUNCTIONS to IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS for consistency. + - 2019/11/19 (1.74) - renamed IMGUI_DISABLE_MATH_FUNCTIONS to IMGUI_DISABLE_DEFAULT_MATH_FUNCTIONS for consistency. + - 2019/10/22 (1.74) - removed redirecting functions/enums that were marked obsolete in 1.52 (October 2017): + - Begin() [old 5 args version] -> use Begin() [3 args], use SetNextWindowSize() SetNextWindowBgAlpha() if needed + - IsRootWindowOrAnyChildHovered() -> use IsWindowHovered(ImGuiHoveredFlags_RootAndChildWindows) + - AlignFirstTextHeightToWidgets() -> use AlignTextToFramePadding() + - SetNextWindowPosCenter() -> use SetNextWindowPos() with a pivot of (0.5f, 0.5f) + - ImFont::Glyph -> use ImFontGlyph + - 2019/10/14 (1.74) - inputs: Fixed a miscalculation in the keyboard/mouse "typematic" repeat delay/rate calculation, used by keys and e.g. repeating mouse buttons as well as the GetKeyPressedAmount() function. + if you were using a non-default value for io.KeyRepeatRate (previous default was 0.250), you can add +io.KeyRepeatDelay to it to compensate for the fix. + The function was triggering on: 0.0 and (delay+rate*N) where (N>=1). Fixed formula responds to (N>=0). Effectively it made io.KeyRepeatRate behave like it was set to (io.KeyRepeatRate + io.KeyRepeatDelay). + If you never altered io.KeyRepeatRate nor used GetKeyPressedAmount() this won't affect you. + - 2019/07/15 (1.72) - removed TreeAdvanceToLabelPos() which is rarely used and only does SetCursorPosX(GetCursorPosX() + GetTreeNodeToLabelSpacing()). Kept redirection function (will obsolete). + - 2019/07/12 (1.72) - renamed ImFontAtlas::CustomRect to ImFontAtlasCustomRect. Kept redirection typedef (will obsolete). + - 2019/06/14 (1.72) - removed redirecting functions/enums names that were marked obsolete in 1.51 (June 2017): ImGuiCol_Column*, ImGuiSetCond_*, IsItemHoveredRect(), IsPosHoveringAnyWindow(), IsMouseHoveringAnyWindow(), IsMouseHoveringWindow(), IMGUI_ONCE_UPON_A_FRAME. Grep this log for details and new names, or see how they were implemented until 1.71. + - 2019/06/07 (1.71) - rendering of child window outer decorations (bg color, border, scrollbars) is now performed as part of the parent window. If you have + overlapping child windows in a same parent, and relied on their relative z-order to be mapped to their submission order, this will affect your rendering. + This optimization is disabled if the parent window has no visual output, because it appears to be the most common situation leading to the creation of overlapping child windows. + Please reach out if you are affected. + - 2019/05/13 (1.71) - renamed SetNextTreeNodeOpen() to SetNextItemOpen(). Kept inline redirection function (will obsolete). + - 2019/05/11 (1.71) - changed io.AddInputCharacter(unsigned short c) signature to io.AddInputCharacter(unsigned int c). + - 2019/04/29 (1.70) - improved ImDrawList thick strokes (>1.0f) preserving correct thickness up to 90 degrees angles (e.g. rectangles). If you have custom rendering using thick lines, they will appear thicker now. + - 2019/04/29 (1.70) - removed GetContentRegionAvailWidth(), use GetContentRegionAvail().x instead. Kept inline redirection function (will obsolete). + - 2019/03/04 (1.69) - renamed GetOverlayDrawList() to GetForegroundDrawList(). Kept redirection function (will obsolete). + - 2019/02/26 (1.69) - renamed ImGuiColorEditFlags_RGB/ImGuiColorEditFlags_HSV/ImGuiColorEditFlags_HEX to ImGuiColorEditFlags_DisplayRGB/ImGuiColorEditFlags_DisplayHSV/ImGuiColorEditFlags_DisplayHex. Kept redirection enums (will obsolete). + - 2019/02/14 (1.68) - made it illegal/assert when io.DisplayTime == 0.0f (with an exception for the first frame). If for some reason your time step calculation gives you a zero value, replace it with an arbitrary small value! + - 2019/02/01 (1.68) - removed io.DisplayVisibleMin/DisplayVisibleMax (which were marked obsolete and removed from viewport/docking branch already). + - 2019/01/06 (1.67) - renamed io.InputCharacters[], marked internal as was always intended. Please don't access directly, and use AddInputCharacter() instead! + - 2019/01/06 (1.67) - renamed ImFontAtlas::GlyphRangesBuilder to ImFontGlyphRangesBuilder. Kept redirection typedef (will obsolete). + - 2018/12/20 (1.67) - made it illegal to call Begin("") with an empty string. This somehow half-worked before but had various undesirable side-effects. + - 2018/12/10 (1.67) - renamed io.ConfigResizeWindowsFromEdges to io.ConfigWindowsResizeFromEdges as we are doing a large pass on configuration flags. + - 2018/10/12 (1.66) - renamed misc/stl/imgui_stl.* to misc/cpp/imgui_stdlib.* in prevision for other C++ helper files. + - 2018/09/28 (1.66) - renamed SetScrollHere() to SetScrollHereY(). Kept redirection function (will obsolete). + - 2018/09/06 (1.65) - renamed stb_truetype.h to imstb_truetype.h, stb_textedit.h to imstb_textedit.h, and stb_rect_pack.h to imstb_rectpack.h. + If you were conveniently using the imgui copy of those STB headers in your project you will have to update your include paths. + - 2018/09/05 (1.65) - renamed io.OptCursorBlink/io.ConfigCursorBlink to io.ConfigInputTextCursorBlink. (#1427) + - 2018/08/31 (1.64) - added imgui_widgets.cpp file, extracted and moved widgets code out of imgui.cpp into imgui_widgets.cpp. Re-ordered some of the code remaining in imgui.cpp. + NONE OF THE FUNCTIONS HAVE CHANGED. THE CODE IS SEMANTICALLY 100% IDENTICAL, BUT _EVERY_ FUNCTION HAS BEEN MOVED. + Because of this, any local modifications to imgui.cpp will likely conflict when you update. Read docs/CHANGELOG.txt for suggestions. + - 2018/08/22 (1.63) - renamed IsItemDeactivatedAfterChange() to IsItemDeactivatedAfterEdit() for consistency with new IsItemEdited() API. Kept redirection function (will obsolete soonish as IsItemDeactivatedAfterChange() is very recent). + - 2018/08/21 (1.63) - renamed ImGuiTextEditCallback to ImGuiInputTextCallback, ImGuiTextEditCallbackData to ImGuiInputTextCallbackData for consistency. Kept redirection types (will obsolete). + - 2018/08/21 (1.63) - removed ImGuiInputTextCallbackData::ReadOnly since it is a duplication of (ImGuiInputTextCallbackData::Flags & ImGuiInputTextFlags_ReadOnly). + - 2018/08/01 (1.63) - removed per-window ImGuiWindowFlags_ResizeFromAnySide beta flag in favor of a global io.ConfigResizeWindowsFromEdges [update 1.67 renamed to ConfigWindowsResizeFromEdges] to enable the feature. + - 2018/08/01 (1.63) - renamed io.OptCursorBlink to io.ConfigCursorBlink [-> io.ConfigInputTextCursorBlink in 1.65], io.OptMacOSXBehaviors to ConfigMacOSXBehaviors for consistency. + - 2018/07/22 (1.63) - changed ImGui::GetTime() return value from float to double to avoid accumulating floating point imprecisions over time. + - 2018/07/08 (1.63) - style: renamed ImGuiCol_ModalWindowDarkening to ImGuiCol_ModalWindowDimBg for consistency with other features. Kept redirection enum (will obsolete). + - 2018/06/08 (1.62) - examples: the imgui_impl_XXX files have been split to separate platform (Win32, GLFW, SDL2, etc.) from renderer (DX11, OpenGL, Vulkan, etc.). + old backends will still work as is, however prefer using the separated backends as they will be updated to support multi-viewports. + when adopting new backends follow the main.cpp code of your preferred examples/ folder to know which functions to call. + in particular, note that old backends called ImGui::NewFrame() at the end of their ImGui_ImplXXXX_NewFrame() function. + - 2018/06/06 (1.62) - renamed GetGlyphRangesChinese() to GetGlyphRangesChineseFull() to distinguish other variants and discourage using the full set. + - 2018/06/06 (1.62) - TreeNodeEx()/TreeNodeBehavior(): the ImGuiTreeNodeFlags_CollapsingHeader helper now include the ImGuiTreeNodeFlags_NoTreePushOnOpen flag. See Changelog for details. + - 2018/05/03 (1.61) - DragInt(): the default compile-time format string has been changed from "%.0f" to "%d", as we are not using integers internally any more. + If you used DragInt() with custom format strings, make sure you change them to use %d or an integer-compatible format. + To honor backward-compatibility, the DragInt() code will currently parse and modify format strings to replace %*f with %d, giving time to users to upgrade their code. + If you have IMGUI_DISABLE_OBSOLETE_FUNCTIONS enabled, the code will instead assert! You may run a reg-exp search on your codebase for e.g. "DragInt.*%f" to help you find them. + - 2018/04/28 (1.61) - obsoleted InputFloat() functions taking an optional "int decimal_precision" in favor of an equivalent and more flexible "const char* format", + consistent with other functions. Kept redirection functions (will obsolete). + - 2018/04/09 (1.61) - IM_DELETE() helper function added in 1.60 doesn't clear the input _pointer_ reference, more consistent with expectation and allows passing r-value. + - 2018/03/20 (1.60) - renamed io.WantMoveMouse to io.WantSetMousePos for consistency and ease of understanding (was added in 1.52, _not_ used by core and only honored by some backend ahead of merging the Nav branch). + - 2018/03/12 (1.60) - removed ImGuiCol_CloseButton, ImGuiCol_CloseButtonActive, ImGuiCol_CloseButtonHovered as the closing cross uses regular button colors now. + - 2018/03/08 (1.60) - changed ImFont::DisplayOffset.y to default to 0 instead of +1. Fixed rounding of Ascent/Descent to match TrueType renderer. If you were adding or subtracting to ImFont::DisplayOffset check if your fonts are correctly aligned vertically. + - 2018/03/03 (1.60) - renamed ImGuiStyleVar_Count_ to ImGuiStyleVar_COUNT and ImGuiMouseCursor_Count_ to ImGuiMouseCursor_COUNT for consistency with other public enums. + - 2018/02/18 (1.60) - BeginDragDropSource(): temporarily removed the optional mouse_button=0 parameter because it is not really usable in many situations at the moment. + - 2018/02/16 (1.60) - obsoleted the io.RenderDrawListsFn callback, you can call your graphics engine render function after ImGui::Render(). Use ImGui::GetDrawData() to retrieve the ImDrawData* to display. + - 2018/02/07 (1.60) - reorganized context handling to be more explicit, + - YOU NOW NEED TO CALL ImGui::CreateContext() AT THE BEGINNING OF YOUR APP, AND CALL ImGui::DestroyContext() AT THE END. + - removed Shutdown() function, as DestroyContext() serve this purpose. + - you may pass a ImFontAtlas* pointer to CreateContext() to share a font atlas between contexts. Otherwise CreateContext() will create its own font atlas instance. + - removed allocator parameters from CreateContext(), they are now setup with SetAllocatorFunctions(), and shared by all contexts. + - removed the default global context and font atlas instance, which were confusing for users of DLL reloading and users of multiple contexts. + - 2018/01/31 (1.60) - moved sample TTF files from extra_fonts/ to misc/fonts/. If you loaded files directly from the imgui repo you may need to update your paths. + - 2018/01/11 (1.60) - obsoleted IsAnyWindowHovered() in favor of IsWindowHovered(ImGuiHoveredFlags_AnyWindow). Kept redirection function (will obsolete). + - 2018/01/11 (1.60) - obsoleted IsAnyWindowFocused() in favor of IsWindowFocused(ImGuiFocusedFlags_AnyWindow). Kept redirection function (will obsolete). + - 2018/01/03 (1.60) - renamed ImGuiSizeConstraintCallback to ImGuiSizeCallback, ImGuiSizeConstraintCallbackData to ImGuiSizeCallbackData. + - 2017/12/29 (1.60) - removed CalcItemRectClosestPoint() which was weird and not really used by anyone except demo code. If you need it it's easy to replicate on your side. + - 2017/12/24 (1.53) - renamed the emblematic ShowTestWindow() function to ShowDemoWindow(). Kept redirection function (will obsolete). + - 2017/12/21 (1.53) - ImDrawList: renamed style.AntiAliasedShapes to style.AntiAliasedFill for consistency and as a way to explicitly break code that manipulate those flag at runtime. You can now manipulate ImDrawList::Flags + - 2017/12/21 (1.53) - ImDrawList: removed 'bool anti_aliased = true' final parameter of ImDrawList::AddPolyline() and ImDrawList::AddConvexPolyFilled(). Prefer manipulating ImDrawList::Flags if you need to toggle them during the frame. + - 2017/12/14 (1.53) - using the ImGuiWindowFlags_NoScrollWithMouse flag on a child window forwards the mouse wheel event to the parent window, unless either ImGuiWindowFlags_NoInputs or ImGuiWindowFlags_NoScrollbar are also set. + - 2017/12/13 (1.53) - renamed GetItemsLineHeightWithSpacing() to GetFrameHeightWithSpacing(). Kept redirection function (will obsolete). + - 2017/12/13 (1.53) - obsoleted IsRootWindowFocused() in favor of using IsWindowFocused(ImGuiFocusedFlags_RootWindow). Kept redirection function (will obsolete). + - obsoleted IsRootWindowOrAnyChildFocused() in favor of using IsWindowFocused(ImGuiFocusedFlags_RootAndChildWindows). Kept redirection function (will obsolete). + - 2017/12/12 (1.53) - renamed ImGuiTreeNodeFlags_AllowOverlapMode to ImGuiTreeNodeFlags_AllowItemOverlap. Kept redirection enum (will obsolete). + - 2017/12/10 (1.53) - removed SetNextWindowContentWidth(), prefer using SetNextWindowContentSize(). Kept redirection function (will obsolete). + - 2017/11/27 (1.53) - renamed ImGuiTextBuffer::append() helper to appendf(), appendv() to appendfv(). If you copied the 'Log' demo in your code, it uses appendv() so that needs to be renamed. + - 2017/11/18 (1.53) - Style, Begin: removed ImGuiWindowFlags_ShowBorders window flag. Borders are now fully set up in the ImGuiStyle structure (see e.g. style.FrameBorderSize, style.WindowBorderSize). Use ImGui::ShowStyleEditor() to look them up. + Please note that the style system will keep evolving (hopefully stabilizing in Q1 2018), and so custom styles will probably subtly break over time. It is recommended you use the StyleColorsClassic(), StyleColorsDark(), StyleColorsLight() functions. + - 2017/11/18 (1.53) - Style: removed ImGuiCol_ComboBg in favor of combo boxes using ImGuiCol_PopupBg for consistency. + - 2017/11/18 (1.53) - Style: renamed ImGuiCol_ChildWindowBg to ImGuiCol_ChildBg. + - 2017/11/18 (1.53) - Style: renamed style.ChildWindowRounding to style.ChildRounding, ImGuiStyleVar_ChildWindowRounding to ImGuiStyleVar_ChildRounding. + - 2017/11/02 (1.53) - obsoleted IsRootWindowOrAnyChildHovered() in favor of using IsWindowHovered(ImGuiHoveredFlags_RootAndChildWindows); + - 2017/10/24 (1.52) - renamed IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCS/IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCS to IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS/IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS for consistency. + - 2017/10/20 (1.52) - changed IsWindowHovered() default parameters behavior to return false if an item is active in another window (e.g. click-dragging item from another window to this window). You can use the newly introduced IsWindowHovered() flags to requests this specific behavior if you need it. + - 2017/10/20 (1.52) - marked IsItemHoveredRect()/IsMouseHoveringWindow() as obsolete, in favor of using the newly introduced flags for IsItemHovered() and IsWindowHovered(). See https://github.com/ocornut/imgui/issues/1382 for details. + removed the IsItemRectHovered()/IsWindowRectHovered() names introduced in 1.51 since they were merely more consistent names for the two functions we are now obsoleting. + IsItemHoveredRect() --> IsItemHovered(ImGuiHoveredFlags_RectOnly) + IsMouseHoveringAnyWindow() --> IsWindowHovered(ImGuiHoveredFlags_AnyWindow) + IsMouseHoveringWindow() --> IsWindowHovered(ImGuiHoveredFlags_AllowWhenBlockedByPopup | ImGuiHoveredFlags_AllowWhenBlockedByActiveItem) [weird, old behavior] + - 2017/10/17 (1.52) - marked the old 5-parameters version of Begin() as obsolete (still available). Use SetNextWindowSize()+Begin() instead! + - 2017/10/11 (1.52) - renamed AlignFirstTextHeightToWidgets() to AlignTextToFramePadding(). Kept inline redirection function (will obsolete). + - 2017/09/26 (1.52) - renamed ImFont::Glyph to ImFontGlyph. Kept redirection typedef (will obsolete). + - 2017/09/25 (1.52) - removed SetNextWindowPosCenter() because SetNextWindowPos() now has the optional pivot information to do the same and more. Kept redirection function (will obsolete). + - 2017/08/25 (1.52) - io.MousePos needs to be set to ImVec2(-FLT_MAX,-FLT_MAX) when mouse is unavailable/missing. Previously ImVec2(-1,-1) was enough but we now accept negative mouse coordinates. In your backend if you need to support unavailable mouse, make sure to replace "io.MousePos = ImVec2(-1,-1)" with "io.MousePos = ImVec2(-FLT_MAX,-FLT_MAX)". + - 2017/08/22 (1.51) - renamed IsItemHoveredRect() to IsItemRectHovered(). Kept inline redirection function (will obsolete). -> (1.52) use IsItemHovered(ImGuiHoveredFlags_RectOnly)! + - renamed IsMouseHoveringAnyWindow() to IsAnyWindowHovered() for consistency. Kept inline redirection function (will obsolete). + - renamed IsMouseHoveringWindow() to IsWindowRectHovered() for consistency. Kept inline redirection function (will obsolete). + - 2017/08/20 (1.51) - renamed GetStyleColName() to GetStyleColorName() for consistency. + - 2017/08/20 (1.51) - added PushStyleColor(ImGuiCol idx, ImU32 col) overload, which _might_ cause an "ambiguous call" compilation error if you are using ImColor() with implicit cast. Cast to ImU32 or ImVec4 explicily to fix. + - 2017/08/15 (1.51) - marked the weird IMGUI_ONCE_UPON_A_FRAME helper macro as obsolete. prefer using the more explicit ImGuiOnceUponAFrame type. + - 2017/08/15 (1.51) - changed parameter order for BeginPopupContextWindow() from (const char*,int buttons,bool also_over_items) to (const char*,int buttons,bool also_over_items). Note that most calls relied on default parameters completely. + - 2017/08/13 (1.51) - renamed ImGuiCol_Column to ImGuiCol_Separator, ImGuiCol_ColumnHovered to ImGuiCol_SeparatorHovered, ImGuiCol_ColumnActive to ImGuiCol_SeparatorActive. Kept redirection enums (will obsolete). + - 2017/08/11 (1.51) - renamed ImGuiSetCond_Always to ImGuiCond_Always, ImGuiSetCond_Once to ImGuiCond_Once, ImGuiSetCond_FirstUseEver to ImGuiCond_FirstUseEver, ImGuiSetCond_Appearing to ImGuiCond_Appearing. Kept redirection enums (will obsolete). + - 2017/08/09 (1.51) - removed ValueColor() helpers, they are equivalent to calling Text(label) + SameLine() + ColorButton(). + - 2017/08/08 (1.51) - removed ColorEditMode() and ImGuiColorEditMode in favor of ImGuiColorEditFlags and parameters to the various Color*() functions. The SetColorEditOptions() allows to initialize default but the user can still change them with right-click context menu. + - changed prototype of 'ColorEdit4(const char* label, float col[4], bool show_alpha = true)' to 'ColorEdit4(const char* label, float col[4], ImGuiColorEditFlags flags = 0)', where passing flags = 0x01 is a safe no-op (hello dodgy backward compatibility!). - check and run the demo window, under "Color/Picker Widgets", to understand the various new options. + - changed prototype of rarely used 'ColorButton(ImVec4 col, bool small_height = false, bool outline_border = true)' to 'ColorButton(const char* desc_id, ImVec4 col, ImGuiColorEditFlags flags = 0, ImVec2 size = ImVec2(0, 0))' + - 2017/07/20 (1.51) - removed IsPosHoveringAnyWindow(ImVec2), which was partly broken and misleading. ASSERT + redirect user to io.WantCaptureMouse + - 2017/05/26 (1.50) - removed ImFontConfig::MergeGlyphCenterV in favor of a more multipurpose ImFontConfig::GlyphOffset. + - 2017/05/01 (1.50) - renamed ImDrawList::PathFill() (rarely used directly) to ImDrawList::PathFillConvex() for clarity. + - 2016/11/06 (1.50) - BeginChild(const char*) now applies the stack id to the provided label, consistently with other functions as it should always have been. It shouldn't affect you unless (extremely unlikely) you were appending multiple times to a same child from different locations of the stack id. If that's the case, generate an id with GetId() and use it instead of passing string to BeginChild(). + - 2016/10/15 (1.50) - avoid 'void* user_data' parameter to io.SetClipboardTextFn/io.GetClipboardTextFn pointers. We pass io.ClipboardUserData to it. + - 2016/09/25 (1.50) - style.WindowTitleAlign is now a ImVec2 (ImGuiAlign enum was removed). set to (0.5f,0.5f) for horizontal+vertical centering, (0.0f,0.0f) for upper-left, etc. + - 2016/07/30 (1.50) - SameLine(x) with x>0.0f is now relative to left of column/group if any, and not always to left of window. This was sort of always the intent and hopefully breakage should be minimal. + - 2016/05/12 (1.49) - title bar (using ImGuiCol_TitleBg/ImGuiCol_TitleBgActive colors) isn't rendered over a window background (ImGuiCol_WindowBg color) anymore. + If your TitleBg/TitleBgActive alpha was 1.0f or you are using the default theme it will not affect you, otherwise if <1.0f you need tweak your custom theme to readjust for the fact that we don't draw a WindowBg background behind the title bar. + This helper function will convert an old TitleBg/TitleBgActive color into a new one with the same visual output, given the OLD color and the OLD WindowBg color: + ImVec4 ConvertTitleBgCol(const ImVec4& win_bg_col, const ImVec4& title_bg_col) { float new_a = 1.0f - ((1.0f - win_bg_col.w) * (1.0f - title_bg_col.w)), k = title_bg_col.w / new_a; return ImVec4((win_bg_col.x * win_bg_col.w + title_bg_col.x) * k, (win_bg_col.y * win_bg_col.w + title_bg_col.y) * k, (win_bg_col.z * win_bg_col.w + title_bg_col.z) * k, new_a); } + If this is confusing, pick the RGB value from title bar from an old screenshot and apply this as TitleBg/TitleBgActive. Or you may just create TitleBgActive from a tweaked TitleBg color. + - 2016/05/07 (1.49) - removed confusing set of GetInternalState(), GetInternalStateSize(), SetInternalState() functions. Now using CreateContext(), DestroyContext(), GetCurrentContext(), SetCurrentContext(). + - 2016/05/02 (1.49) - renamed SetNextTreeNodeOpened() to SetNextTreeNodeOpen(), no redirection. + - 2016/05/01 (1.49) - obsoleted old signature of CollapsingHeader(const char* label, const char* str_id = NULL, bool display_frame = true, bool default_open = false) as extra parameters were badly designed and rarely used. You can replace the "default_open = true" flag in new API with CollapsingHeader(label, ImGuiTreeNodeFlags_DefaultOpen). + - 2016/04/26 (1.49) - changed ImDrawList::PushClipRect(ImVec4 rect) to ImDrawList::PushClipRect(Imvec2 min,ImVec2 max,bool intersect_with_current_clip_rect=false). Note that higher-level ImGui::PushClipRect() is preferable because it will clip at logic/widget level, whereas ImDrawList::PushClipRect() only affect your renderer. + - 2016/04/03 (1.48) - removed style.WindowFillAlphaDefault setting which was redundant. Bake default BG alpha inside style.Colors[ImGuiCol_WindowBg] and all other Bg color values. (ref github issue #337). + - 2016/04/03 (1.48) - renamed ImGuiCol_TooltipBg to ImGuiCol_PopupBg, used by popups/menus and tooltips. popups/menus were previously using ImGuiCol_WindowBg. (ref github issue #337) + - 2016/03/21 (1.48) - renamed GetWindowFont() to GetFont(), GetWindowFontSize() to GetFontSize(). Kept inline redirection function (will obsolete). + - 2016/03/02 (1.48) - InputText() completion/history/always callbacks: if you modify the text buffer manually (without using DeleteChars()/InsertChars() helper) you need to maintain the BufTextLen field. added an assert. + - 2016/01/23 (1.48) - fixed not honoring exact width passed to PushItemWidth(), previously it would add extra FramePadding.x*2 over that width. if you had manual pixel-perfect alignment in place it might affect you. + - 2015/12/27 (1.48) - fixed ImDrawList::AddRect() which used to render a rectangle 1 px too large on each axis. + - 2015/12/04 (1.47) - renamed Color() helpers to ValueColor() - dangerously named, rarely used and probably to be made obsolete. + - 2015/08/29 (1.45) - with the addition of horizontal scrollbar we made various fixes to inconsistencies with dealing with cursor position. + GetCursorPos()/SetCursorPos() functions now include the scrolled amount. It shouldn't affect the majority of users, but take note that SetCursorPosX(100.0f) puts you at +100 from the starting x position which may include scrolling, not at +100 from the window left side. + GetContentRegionMax()/GetWindowContentRegionMin()/GetWindowContentRegionMax() functions allow include the scrolled amount. Typically those were used in cases where no scrolling would happen so it may not be a problem, but watch out! + - 2015/08/29 (1.45) - renamed style.ScrollbarWidth to style.ScrollbarSize + - 2015/08/05 (1.44) - split imgui.cpp into extra files: imgui_demo.cpp imgui_draw.cpp imgui_internal.h that you need to add to your project. + - 2015/07/18 (1.44) - fixed angles in ImDrawList::PathArcTo(), PathArcToFast() (introduced in 1.43) being off by an extra PI for no justifiable reason + - 2015/07/14 (1.43) - add new ImFontAtlas::AddFont() API. For the old AddFont***, moved the 'font_no' parameter of ImFontAtlas::AddFont** functions to the ImFontConfig structure. + you need to render your textured triangles with bilinear filtering to benefit from sub-pixel positioning of text. + - 2015/07/08 (1.43) - switched rendering data to use indexed rendering. this is saving a fair amount of CPU/GPU and enables us to get anti-aliasing for a marginal cost. + this necessary change will break your rendering function! the fix should be very easy. sorry for that :( + - if you are using a vanilla copy of one of the imgui_impl_XXX.cpp provided in the example, you just need to update your copy and you can ignore the rest. + - the signature of the io.RenderDrawListsFn handler has changed! + old: ImGui_XXXX_RenderDrawLists(ImDrawList** const cmd_lists, int cmd_lists_count) + new: ImGui_XXXX_RenderDrawLists(ImDrawData* draw_data). + parameters: 'cmd_lists' becomes 'draw_data->CmdLists', 'cmd_lists_count' becomes 'draw_data->CmdListsCount' + ImDrawList: 'commands' becomes 'CmdBuffer', 'vtx_buffer' becomes 'VtxBuffer', 'IdxBuffer' is new. + ImDrawCmd: 'vtx_count' becomes 'ElemCount', 'clip_rect' becomes 'ClipRect', 'user_callback' becomes 'UserCallback', 'texture_id' becomes 'TextureId'. + - each ImDrawList now contains both a vertex buffer and an index buffer. For each command, render ElemCount/3 triangles using indices from the index buffer. + - if you REALLY cannot render indexed primitives, you can call the draw_data->DeIndexAllBuffers() method to de-index the buffers. This is slow and a waste of CPU/GPU. Prefer using indexed rendering! + - refer to code in the examples/ folder or ask on the GitHub if you are unsure of how to upgrade. please upgrade! + - 2015/07/10 (1.43) - changed SameLine() parameters from int to float. + - 2015/07/02 (1.42) - renamed SetScrollPosHere() to SetScrollFromCursorPos(). Kept inline redirection function (will obsolete). + - 2015/07/02 (1.42) - renamed GetScrollPosY() to GetScrollY(). Necessary to reduce confusion along with other scrolling functions, because positions (e.g. cursor position) are not equivalent to scrolling amount. + - 2015/06/14 (1.41) - changed ImageButton() default bg_col parameter from (0,0,0,1) (black) to (0,0,0,0) (transparent) - makes a difference when texture have transparence + - 2015/06/14 (1.41) - changed Selectable() API from (label, selected, size) to (label, selected, flags, size). Size override should have been rarely be used. Sorry! + - 2015/05/31 (1.40) - renamed GetWindowCollapsed() to IsWindowCollapsed() for consistency. Kept inline redirection function (will obsolete). + - 2015/05/31 (1.40) - renamed IsRectClipped() to IsRectVisible() for consistency. Note that return value is opposite! Kept inline redirection function (will obsolete). + - 2015/05/27 (1.40) - removed the third 'repeat_if_held' parameter from Button() - sorry! it was rarely used and inconsistent. Use PushButtonRepeat(true) / PopButtonRepeat() to enable repeat on desired buttons. + - 2015/05/11 (1.40) - changed BeginPopup() API, takes a string identifier instead of a bool. ImGui needs to manage the open/closed state of popups. Call OpenPopup() to actually set the "open" state of a popup. BeginPopup() returns true if the popup is opened. + - 2015/05/03 (1.40) - removed style.AutoFitPadding, using style.WindowPadding makes more sense (the default values were already the same). + - 2015/04/13 (1.38) - renamed IsClipped() to IsRectClipped(). Kept inline redirection function until 1.50. + - 2015/04/09 (1.38) - renamed ImDrawList::AddArc() to ImDrawList::AddArcFast() for compatibility with future API + - 2015/04/03 (1.38) - removed ImGuiCol_CheckHovered, ImGuiCol_CheckActive, replaced with the more general ImGuiCol_FrameBgHovered, ImGuiCol_FrameBgActive. + - 2014/04/03 (1.38) - removed support for passing -FLT_MAX..+FLT_MAX as the range for a SliderFloat(). Use DragFloat() or Inputfloat() instead. + - 2015/03/17 (1.36) - renamed GetItemBoxMin()/GetItemBoxMax()/IsMouseHoveringBox() to GetItemRectMin()/GetItemRectMax()/IsMouseHoveringRect(). Kept inline redirection function until 1.50. + - 2015/03/15 (1.36) - renamed style.TreeNodeSpacing to style.IndentSpacing, ImGuiStyleVar_TreeNodeSpacing to ImGuiStyleVar_IndentSpacing + - 2015/03/13 (1.36) - renamed GetWindowIsFocused() to IsWindowFocused(). Kept inline redirection function until 1.50. + - 2015/03/08 (1.35) - renamed style.ScrollBarWidth to style.ScrollbarWidth (casing) + - 2015/02/27 (1.34) - renamed OpenNextNode(bool) to SetNextTreeNodeOpened(bool, ImGuiSetCond). Kept inline redirection function until 1.50. + - 2015/02/27 (1.34) - renamed ImGuiSetCondition_*** to ImGuiSetCond_***, and _FirstUseThisSession becomes _Once. + - 2015/02/11 (1.32) - changed text input callback ImGuiTextEditCallback return type from void-->int. reserved for future use, return 0 for now. + - 2015/02/10 (1.32) - renamed GetItemWidth() to CalcItemWidth() to clarify its evolving behavior + - 2015/02/08 (1.31) - renamed GetTextLineSpacing() to GetTextLineHeightWithSpacing() + - 2015/02/01 (1.31) - removed IO.MemReallocFn (unused) + - 2015/01/19 (1.30) - renamed ImGuiStorage::GetIntPtr()/GetFloatPtr() to GetIntRef()/GetIntRef() because Ptr was conflicting with actual pointer storage functions. + - 2015/01/11 (1.30) - big font/image API change! now loads TTF file. allow for multiple fonts. no need for a PNG loader. + - 2015/01/11 (1.30) - removed GetDefaultFontData(). uses io.Fonts->GetTextureData*() API to retrieve uncompressed pixels. + - old: const void* png_data; unsigned int png_size; ImGui::GetDefaultFontData(NULL, NULL, &png_data, &png_size); [..Upload texture to GPU..]; + - new: unsigned char* pixels; int width, height; io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); [..Upload texture to GPU..]; io.Fonts->TexId = YourTexIdentifier; + you now have more flexibility to load multiple TTF fonts and manage the texture buffer for internal needs. It is now recommended that you sample the font texture with bilinear interpolation. + - 2015/01/11 (1.30) - added texture identifier in ImDrawCmd passed to your render function (we can now render images). make sure to set io.Fonts->TexID. + - 2015/01/11 (1.30) - removed IO.PixelCenterOffset (unnecessary, can be handled in user projection matrix) + - 2015/01/11 (1.30) - removed ImGui::IsItemFocused() in favor of ImGui::IsItemActive() which handles all widgets + - 2014/12/10 (1.18) - removed SetNewWindowDefaultPos() in favor of new generic API SetNextWindowPos(pos, ImGuiSetCondition_FirstUseEver) + - 2014/11/28 (1.17) - moved IO.Font*** options to inside the IO.Font-> structure (FontYOffset, FontTexUvForWhite, FontBaseScale, FontFallbackGlyph) + - 2014/11/26 (1.17) - reworked syntax of IMGUI_ONCE_UPON_A_FRAME helper macro to increase compiler compatibility + - 2014/11/07 (1.15) - renamed IsHovered() to IsItemHovered() + - 2014/10/02 (1.14) - renamed IMGUI_INCLUDE_IMGUI_USER_CPP to IMGUI_INCLUDE_IMGUI_USER_INL and imgui_user.cpp to imgui_user.inl (more IDE friendly) + - 2014/09/25 (1.13) - removed 'text_end' parameter from IO.SetClipboardTextFn (the string is now always zero-terminated for simplicity) + - 2014/09/24 (1.12) - renamed SetFontScale() to SetWindowFontScale() + - 2014/09/24 (1.12) - moved IM_MALLOC/IM_REALLOC/IM_FREE preprocessor defines to IO.MemAllocFn/IO.MemReallocFn/IO.MemFreeFn + - 2014/08/30 (1.09) - removed IO.FontHeight (now computed automatically) + - 2014/08/30 (1.09) - moved IMGUI_FONT_TEX_UV_FOR_WHITE preprocessor define to IO.FontTexUvForWhite + - 2014/08/28 (1.09) - changed the behavior of IO.PixelCenterOffset following various rendering fixes + + + FREQUENTLY ASKED QUESTIONS (FAQ) + ================================ + + Read all answers online: + https://www.dearimgui.org/faq or https://github.com/ocornut/imgui/blob/master/docs/FAQ.md (same url) + Read all answers locally (with a text editor or ideally a Markdown viewer): + docs/FAQ.md + Some answers are copied down here to facilitate searching in code. + + Q&A: Basics + =========== + + Q: Where is the documentation? + A: This library is poorly documented at the moment and expects of the user to be acquainted with C/C++. + - Run the examples/ and explore them. + - See demo code in imgui_demo.cpp and particularly the ImGui::ShowDemoWindow() function. + - The demo covers most features of Dear ImGui, so you can read the code and see its output. + - See documentation and comments at the top of imgui.cpp + effectively imgui.h. + - Dozens of standalone example applications using e.g. OpenGL/DirectX are provided in the + examples/ folder to explain how to integrate Dear ImGui with your own engine/application. + - The Wiki (https://github.com/ocornut/imgui/wiki) has many resources and links. + - The Glossary (https://github.com/ocornut/imgui/wiki/Glossary) page also may be useful. + - Your programming IDE is your friend, find the type or function declaration to find comments + associated to it. + + Q: What is this library called? + Q: Which version should I get? + >> This library is called "Dear ImGui", please don't call it "ImGui" :) + >> See https://www.dearimgui.org/faq for details. + + Q&A: Integration + ================ + + Q: How to get started? + A: Read 'PROGRAMMER GUIDE' above. Read examples/README.txt. + + Q: How can I tell whether to dispatch mouse/keyboard to Dear ImGui or to my application? + A: You should read the 'io.WantCaptureMouse', 'io.WantCaptureKeyboard' and 'io.WantTextInput' flags! + >> See https://www.dearimgui.org/faq for fully detailed answer. You really want to read this. + + Q. How can I enable keyboard controls? + Q: How can I use this without a mouse, without a keyboard or without a screen? (gamepad, input share, remote display) + Q: I integrated Dear ImGui in my engine and little squares are showing instead of text.. + Q: I integrated Dear ImGui in my engine and some elements are clipping or disappearing when I move windows around.. + Q: I integrated Dear ImGui in my engine and some elements are displaying outside their expected windows boundaries.. + >> See https://www.dearimgui.org/faq + + Q&A: Usage + ---------- + + Q: Why is my widget not reacting when I click on it? + Q: How can I have widgets with an empty label? + Q: How can I have multiple widgets with the same label? + Q: How can I display an image? What is ImTextureID, how does it works? + Q: How can I use my own math types instead of ImVec2/ImVec4? + Q: How can I interact with standard C++ types (such as std::string and std::vector)? + Q: How can I display custom shapes? (using low-level ImDrawList API) + >> See https://www.dearimgui.org/faq + + Q&A: Fonts, Text + ================ + + Q: How should I handle DPI in my application? + Q: How can I load a different font than the default? + Q: How can I easily use icons in my application? + Q: How can I load multiple fonts? + Q: How can I display and input non-Latin characters such as Chinese, Japanese, Korean, Cyrillic? + >> See https://www.dearimgui.org/faq and https://github.com/ocornut/imgui/edit/master/docs/FONTS.md + + Q&A: Concerns + ============= + + Q: Who uses Dear ImGui? + Q: Can you create elaborate/serious tools with Dear ImGui? + Q: Can you reskin the look of Dear ImGui? + Q: Why using C++ (as opposed to C)? + >> See https://www.dearimgui.org/faq + + Q&A: Community + ============== + + Q: How can I help? + A: - Businesses: please reach out to "contact AT dearimgui.org" if you work in a place using Dear ImGui! + We can discuss ways for your company to fund development via invoiced technical support, maintenance or sponsoring contacts. + This is among the most useful thing you can do for Dear ImGui. With increased funding we can hire more people working on this project. + - Individuals: you can support continued development via PayPal donations. See README. + - If you are experienced with Dear ImGui and C++, look at the github issues, look at the Wiki, read docs/TODO.txt + and see how you want to help and can help! + - Disclose your usage of Dear ImGui via a dev blog post, a tweet, a screenshot, a mention somewhere etc. + You may post screenshot or links in the gallery threads (github.com/ocornut/imgui/issues/3488). Visuals are ideal as they inspire other programmers. + But even without visuals, disclosing your use of dear imgui help the library grow credibility, and help other teams and programmers with taking decisions. + - If you have issues or if you need to hack into the library, even if you don't expect any support it is useful that you share your issues (on github or privately). + +*/ + +//------------------------------------------------------------------------- +// [SECTION] INCLUDES +//------------------------------------------------------------------------- + +#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS) +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include "imgui.h" +#ifndef IMGUI_DISABLE + +#ifndef IMGUI_DEFINE_MATH_OPERATORS +#define IMGUI_DEFINE_MATH_OPERATORS +#endif +#include "imgui_internal.h" + +// System includes +#include // toupper +#include // vsnprintf, sscanf, printf +#if defined(_MSC_VER) && _MSC_VER <= 1500 // MSVC 2008 or earlier +#include // intptr_t +#else +#include // intptr_t +#endif + +// [Windows] OS specific includes (optional) +#if defined(_WIN32) && defined(IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS) && defined(IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS) && defined(IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) +#define IMGUI_DISABLE_WIN32_FUNCTIONS +#endif +#if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#ifndef NOMINMAX +#define NOMINMAX +#endif +#ifndef __MINGW32__ +#include // _wfopen, OpenClipboard +#else +#include +#endif +#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP) // UWP doesn't have all Win32 functions +#define IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS +#define IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS +#endif +#endif + +// [Apple] OS specific includes +#if defined(__APPLE__) +#include +#endif + +// Visual Studio warnings +#ifdef _MSC_VER +#pragma warning (disable: 4127) // condition expression is constant +#pragma warning (disable: 4996) // 'This function or variable may be unsafe': strcpy, strdup, sprintf, vsnprintf, sscanf, fopen +#if defined(_MSC_VER) && _MSC_VER >= 1922 // MSVC 2019 16.2 or later +#pragma warning (disable: 5054) // operator '|': deprecated between enumerations of different types +#endif +#endif + +// Clang/GCC warnings with -Weverything +#if defined(__clang__) +#if __has_warning("-Wunknown-warning-option") +#pragma clang diagnostic ignored "-Wunknown-warning-option" // warning: unknown warning group 'xxx' // not all warnings are known by all Clang versions and they tend to be rename-happy.. so ignoring warnings triggers new warnings on some configuration. Great! +#endif +#pragma clang diagnostic ignored "-Wunknown-pragmas" // warning: unknown warning group 'xxx' +#pragma clang diagnostic ignored "-Wold-style-cast" // warning: use of old-style cast // yes, they are more terse. +#pragma clang diagnostic ignored "-Wfloat-equal" // warning: comparing floating point with == or != is unsafe // storing and comparing against same constants (typically 0.0f) is ok. +#pragma clang diagnostic ignored "-Wformat-nonliteral" // warning: format string is not a string literal // passing non-literal to vsnformat(). yes, user passing incorrect format strings can crash the code. +#pragma clang diagnostic ignored "-Wexit-time-destructors" // warning: declaration requires an exit-time destructor // exit-time destruction order is undefined. if MemFree() leads to users code that has been disabled before exit it might cause problems. ImGui coding style welcomes static/globals. +#pragma clang diagnostic ignored "-Wglobal-constructors" // warning: declaration requires a global destructor // similar to above, not sure what the exact difference is. +#pragma clang diagnostic ignored "-Wsign-conversion" // warning: implicit conversion changes signedness +#pragma clang diagnostic ignored "-Wformat-pedantic" // warning: format specifies type 'void *' but the argument has type 'xxxx *' // unreasonable, would lead to casting every %p arg to void*. probably enabled by -pedantic. +#pragma clang diagnostic ignored "-Wint-to-void-pointer-cast" // warning: cast to 'void *' from smaller integer type 'int' +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" // warning: zero as null pointer constant // some standard header variations use #define NULL 0 +#pragma clang diagnostic ignored "-Wdouble-promotion" // warning: implicit conversion from 'float' to 'double' when passing argument to function // using printf() is a misery with this as C++ va_arg ellipsis changes float to double. +#pragma clang diagnostic ignored "-Wimplicit-int-float-conversion" // warning: implicit conversion from 'xxx' to 'float' may lose precision +#elif defined(__GNUC__) +// We disable -Wpragmas because GCC doesn't provide an has_warning equivalent and some forks/patches may not following the warning/version association. +#pragma GCC diagnostic ignored "-Wpragmas" // warning: unknown option after '#pragma GCC diagnostic' kind +#pragma GCC diagnostic ignored "-Wunused-function" // warning: 'xxxx' defined but not used +#pragma GCC diagnostic ignored "-Wint-to-pointer-cast" // warning: cast to pointer from integer of different size +#pragma GCC diagnostic ignored "-Wformat" // warning: format '%p' expects argument of type 'void*', but argument 6 has type 'ImGuiWindow*' +#pragma GCC diagnostic ignored "-Wdouble-promotion" // warning: implicit conversion from 'float' to 'double' when passing argument to function +#pragma GCC diagnostic ignored "-Wconversion" // warning: conversion to 'xxxx' from 'xxxx' may alter its value +#pragma GCC diagnostic ignored "-Wformat-nonliteral" // warning: format not a string literal, format string not checked +#pragma GCC diagnostic ignored "-Wstrict-overflow" // warning: assuming signed overflow does not occur when assuming that (X - c) > X is always false +#pragma GCC diagnostic ignored "-Wclass-memaccess" // [__GNUC__ >= 8] warning: 'memset/memcpy' clearing/writing an object of type 'xxxx' with no trivial copy-assignment; use assignment or value-initialization instead +#endif + +// Debug options +#define IMGUI_DEBUG_NAV_SCORING 0 // Display navigation scoring preview when hovering items. Display last moving direction matches when holding CTRL +#define IMGUI_DEBUG_NAV_RECTS 0 // Display the reference navigation rectangle for each window +#define IMGUI_DEBUG_INI_SETTINGS 0 // Save additional comments in .ini file (particularly helps for Docking, but makes saving slower) + +// When using CTRL+TAB (or Gamepad Square+L/R) we delay the visual a little in order to reduce visual noise doing a fast switch. +static const float NAV_WINDOWING_HIGHLIGHT_DELAY = 0.20f; // Time before the highlight and screen dimming starts fading in +static const float NAV_WINDOWING_LIST_APPEAR_DELAY = 0.15f; // Time before the window list starts to appear + +// Window resizing from edges (when io.ConfigWindowsResizeFromEdges = true and ImGuiBackendFlags_HasMouseCursors is set in io.BackendFlags by backend) +static const float WINDOWS_RESIZE_FROM_EDGES_HALF_THICKNESS = 4.0f; // Extend outside and inside windows. Affect FindHoveredWindow(). +static const float WINDOWS_RESIZE_FROM_EDGES_FEEDBACK_TIMER = 0.04f; // Reduce visual noise by only highlighting the border after a certain time. +static const float WINDOWS_MOUSE_WHEEL_SCROLL_LOCK_TIMER = 2.00f; // Lock scrolled window (so it doesn't pick child windows that are scrolling through) for a certain time, unless mouse moved. + +//------------------------------------------------------------------------- +// [SECTION] FORWARD DECLARATIONS +//------------------------------------------------------------------------- + +static void SetCurrentWindow(ImGuiWindow* window); +static void FindHoveredWindow(); +static ImGuiWindow* CreateNewWindow(const char* name, ImGuiWindowFlags flags); +static ImVec2 CalcNextScrollFromScrollTargetAndClamp(ImGuiWindow* window); + +static void AddDrawListToDrawData(ImVector* out_list, ImDrawList* draw_list); +static void AddWindowToSortBuffer(ImVector* out_sorted_windows, ImGuiWindow* window); + +static ImRect GetViewportRect(); + +// Settings +static void WindowSettingsHandler_ClearAll(ImGuiContext*, ImGuiSettingsHandler*); +static void* WindowSettingsHandler_ReadOpen(ImGuiContext*, ImGuiSettingsHandler*, const char* name); +static void WindowSettingsHandler_ReadLine(ImGuiContext*, ImGuiSettingsHandler*, void* entry, const char* line); +static void WindowSettingsHandler_ApplyAll(ImGuiContext*, ImGuiSettingsHandler*); +static void WindowSettingsHandler_WriteAll(ImGuiContext*, ImGuiSettingsHandler*, ImGuiTextBuffer* buf); + +// Platform Dependents default implementation for IO functions +static const char* GetClipboardTextFn_DefaultImpl(void* user_data); +static void SetClipboardTextFn_DefaultImpl(void* user_data, const char* text); +static void ImeSetInputScreenPosFn_DefaultImpl(int x, int y); + +namespace ImGui +{ +// Navigation +static void NavUpdate(); +static void NavUpdateWindowing(); +static void NavUpdateWindowingOverlay(); +static void NavUpdateMoveResult(); +static void NavUpdateInitResult(); +static float NavUpdatePageUpPageDown(); +static inline void NavUpdateAnyRequestFlag(); +static void NavEndFrame(); +static bool NavScoreItem(ImGuiNavMoveResult* result, ImRect cand); +static void NavApplyItemToResult(ImGuiNavMoveResult* result, ImGuiWindow* window, ImGuiID id, const ImRect& nav_bb_rel); +static void NavProcessItem(ImGuiWindow* window, const ImRect& nav_bb, ImGuiID id); +static ImVec2 NavCalcPreferredRefPos(); +static void NavSaveLastChildNavWindowIntoParent(ImGuiWindow* nav_window); +static ImGuiWindow* NavRestoreLastChildNavWindow(ImGuiWindow* window); +static int FindWindowFocusIndex(ImGuiWindow* window); + +// Error Checking +static void ErrorCheckNewFrameSanityChecks(); +static void ErrorCheckEndFrameSanityChecks(); + +// Misc +static void UpdateSettings(); +static void UpdateMouseInputs(); +static void UpdateMouseWheel(); +static void UpdateTabFocus(); +static void UpdateDebugToolItemPicker(); +static bool UpdateWindowManualResize(ImGuiWindow* window, const ImVec2& size_auto_fit, int* border_held, int resize_grip_count, ImU32 resize_grip_col[4], const ImRect& visibility_rect); +static void RenderWindowOuterBorders(ImGuiWindow* window); +static void RenderWindowDecorations(ImGuiWindow* window, const ImRect& title_bar_rect, bool title_bar_is_highlight, int resize_grip_count, const ImU32 resize_grip_col[4], float resize_grip_draw_size); +static void RenderWindowTitleBarContents(ImGuiWindow* window, const ImRect& title_bar_rect, const char* name, bool* p_open); + +} + +//----------------------------------------------------------------------------- +// [SECTION] CONTEXT AND MEMORY ALLOCATORS +//----------------------------------------------------------------------------- + +// Current context pointer. Implicitly used by all Dear ImGui functions. Always assumed to be != NULL. +// ImGui::CreateContext() will automatically set this pointer if it is NULL. Change to a different context by calling ImGui::SetCurrentContext(). +// 1) Important: globals are not shared across DLL boundaries! If you use DLLs or any form of hot-reloading: you will need to call +// SetCurrentContext() (with the pointer you got from CreateContext) from each unique static/DLL boundary, and after each hot-reloading. +// In your debugger, add GImGui to your watch window and notice how its value changes depending on which location you are currently stepping into. +// 2) Important: Dear ImGui functions are not thread-safe because of this pointer. +// If you want thread-safety to allow N threads to access N different contexts, you can: +// - Change this variable to use thread local storage so each thread can refer to a different context, in imconfig.h: +// struct ImGuiContext; +// extern thread_local ImGuiContext* MyImGuiTLS; +// #define GImGui MyImGuiTLS +// And then define MyImGuiTLS in one of your cpp file. Note that thread_local is a C++11 keyword, earlier C++ uses compiler-specific keyword. +// - Future development aim to make this context pointer explicit to all calls. Also read https://github.com/ocornut/imgui/issues/586 +// - If you need a finite number of contexts, you may compile and use multiple instances of the ImGui code from different namespace. +#ifndef GImGui +ImGuiContext* GImGui = NULL; +#endif + +// Memory Allocator functions. Use SetAllocatorFunctions() to change them. +// If you use DLL hotreloading you might need to call SetAllocatorFunctions() after reloading code from this file. +// Otherwise, you probably don't want to modify them mid-program, and if you use global/static e.g. ImVector<> instances you may need to keep them accessible during program destruction. +#ifndef IMGUI_DISABLE_DEFAULT_ALLOCATORS +static void* MallocWrapper(size_t size, void* user_data) { IM_UNUSED(user_data); return malloc(size); } +static void FreeWrapper(void* ptr, void* user_data) { IM_UNUSED(user_data); free(ptr); } +#else +static void* MallocWrapper(size_t size, void* user_data) { IM_UNUSED(user_data); IM_UNUSED(size); IM_ASSERT(0); return NULL; } +static void FreeWrapper(void* ptr, void* user_data) { IM_UNUSED(user_data); IM_UNUSED(ptr); IM_ASSERT(0); } +#endif + +static void* (*GImAllocatorAllocFunc)(size_t size, void* user_data) = MallocWrapper; +static void (*GImAllocatorFreeFunc)(void* ptr, void* user_data) = FreeWrapper; +static void* GImAllocatorUserData = NULL; + +//----------------------------------------------------------------------------- +// [SECTION] USER FACING STRUCTURES (ImGuiStyle, ImGuiIO) +//----------------------------------------------------------------------------- + +ImGuiStyle::ImGuiStyle() +{ + Alpha = 1.0f; // Global alpha applies to everything in ImGui + WindowPadding = ImVec2(8,8); // Padding within a window + WindowRounding = 0.0f; // Radius of window corners rounding. Set to 0.0f to have rectangular windows. Large values tend to lead to variety of artifacts and are not recommended. + WindowBorderSize = 1.0f; // Thickness of border around windows. Generally set to 0.0f or 1.0f. Other values not well tested. + WindowMinSize = ImVec2(32,32); // Minimum window size + WindowTitleAlign = ImVec2(0.0f,0.5f);// Alignment for title bar text + WindowMenuButtonPosition= ImGuiDir_Left; // Position of the collapsing/docking button in the title bar (left/right). Defaults to ImGuiDir_Left. + ChildRounding = 0.0f; // Radius of child window corners rounding. Set to 0.0f to have rectangular child windows + ChildBorderSize = 1.0f; // Thickness of border around child windows. Generally set to 0.0f or 1.0f. Other values not well tested. + PopupRounding = 0.0f; // Radius of popup window corners rounding. Set to 0.0f to have rectangular child windows + PopupBorderSize = 1.0f; // Thickness of border around popup or tooltip windows. Generally set to 0.0f or 1.0f. Other values not well tested. + FramePadding = ImVec2(4,3); // Padding within a framed rectangle (used by most widgets) + FrameRounding = 0.0f; // Radius of frame corners rounding. Set to 0.0f to have rectangular frames (used by most widgets). + FrameBorderSize = 0.0f; // Thickness of border around frames. Generally set to 0.0f or 1.0f. Other values not well tested. + ItemSpacing = ImVec2(8,4); // Horizontal and vertical spacing between widgets/lines + ItemInnerSpacing = ImVec2(4,4); // Horizontal and vertical spacing between within elements of a composed widget (e.g. a slider and its label) + CellPadding = ImVec2(4,2); // Padding within a table cell + TouchExtraPadding = ImVec2(0,0); // Expand reactive bounding box for touch-based system where touch position is not accurate enough. Unfortunately we don't sort widgets so priority on overlap will always be given to the first widget. So don't grow this too much! + IndentSpacing = 21.0f; // Horizontal spacing when e.g. entering a tree node. Generally == (FontSize + FramePadding.x*2). + ColumnsMinSpacing = 6.0f; // Minimum horizontal spacing between two columns. Preferably > (FramePadding.x + 1). + ScrollbarSize = 14.0f; // Width of the vertical scrollbar, Height of the horizontal scrollbar + ScrollbarRounding = 9.0f; // Radius of grab corners rounding for scrollbar + GrabMinSize = 10.0f; // Minimum width/height of a grab box for slider/scrollbar + GrabRounding = 0.0f; // Radius of grabs corners rounding. Set to 0.0f to have rectangular slider grabs. + LogSliderDeadzone = 4.0f; // The size in pixels of the dead-zone around zero on logarithmic sliders that cross zero. + TabRounding = 4.0f; // Radius of upper corners of a tab. Set to 0.0f to have rectangular tabs. + TabBorderSize = 0.0f; // Thickness of border around tabs. + TabMinWidthForCloseButton = 0.0f; // Minimum width for close button to appears on an unselected tab when hovered. Set to 0.0f to always show when hovering, set to FLT_MAX to never show close button unless selected. + ColorButtonPosition = ImGuiDir_Right; // Side of the color button in the ColorEdit4 widget (left/right). Defaults to ImGuiDir_Right. + ButtonTextAlign = ImVec2(0.5f,0.5f);// Alignment of button text when button is larger than text. + SelectableTextAlign = ImVec2(0.0f,0.0f);// Alignment of selectable text. Defaults to (0.0f, 0.0f) (top-left aligned). It's generally important to keep this left-aligned if you want to lay multiple items on a same line. + DisplayWindowPadding = ImVec2(19,19); // Window position are clamped to be visible within the display area or monitors by at least this amount. Only applies to regular windows. + DisplaySafeAreaPadding = ImVec2(3,3); // If you cannot see the edge of your screen (e.g. on a TV) increase the safe area padding. Covers popups/tooltips as well regular windows. + MouseCursorScale = 1.0f; // Scale software rendered mouse cursor (when io.MouseDrawCursor is enabled). May be removed later. + AntiAliasedLines = true; // Enable anti-aliased lines/borders. Disable if you are really tight on CPU/GPU. + AntiAliasedLinesUseTex = true; // Enable anti-aliased lines/borders using textures where possible. Require backend to render with bilinear filtering. + AntiAliasedFill = true; // Enable anti-aliased filled shapes (rounded rectangles, circles, etc.). + CurveTessellationTol = 1.25f; // Tessellation tolerance when using PathBezierCurveTo() without a specific number of segments. Decrease for highly tessellated curves (higher quality, more polygons), increase to reduce quality. + CircleSegmentMaxError = 1.60f; // Maximum error (in pixels) allowed when using AddCircle()/AddCircleFilled() or drawing rounded corner rectangles with no explicit segment count specified. Decrease for higher quality but more geometry. + + // Default theme + ImGui::StyleColorsDark(this); +} + +// To scale your entire UI (e.g. if you want your app to use High DPI or generally be DPI aware) you may use this helper function. Scaling the fonts is done separately and is up to you. +// Important: This operation is lossy because we round all sizes to integer. If you need to change your scale multiples, call this over a freshly initialized ImGuiStyle structure rather than scaling multiple times. +void ImGuiStyle::ScaleAllSizes(float scale_factor) +{ + WindowPadding = ImFloor(WindowPadding * scale_factor); + WindowRounding = ImFloor(WindowRounding * scale_factor); + WindowMinSize = ImFloor(WindowMinSize * scale_factor); + ChildRounding = ImFloor(ChildRounding * scale_factor); + PopupRounding = ImFloor(PopupRounding * scale_factor); + FramePadding = ImFloor(FramePadding * scale_factor); + FrameRounding = ImFloor(FrameRounding * scale_factor); + ItemSpacing = ImFloor(ItemSpacing * scale_factor); + ItemInnerSpacing = ImFloor(ItemInnerSpacing * scale_factor); + CellPadding = ImFloor(CellPadding * scale_factor); + TouchExtraPadding = ImFloor(TouchExtraPadding * scale_factor); + IndentSpacing = ImFloor(IndentSpacing * scale_factor); + ColumnsMinSpacing = ImFloor(ColumnsMinSpacing * scale_factor); + ScrollbarSize = ImFloor(ScrollbarSize * scale_factor); + ScrollbarRounding = ImFloor(ScrollbarRounding * scale_factor); + GrabMinSize = ImFloor(GrabMinSize * scale_factor); + GrabRounding = ImFloor(GrabRounding * scale_factor); + LogSliderDeadzone = ImFloor(LogSliderDeadzone * scale_factor); + TabRounding = ImFloor(TabRounding * scale_factor); + TabMinWidthForCloseButton = (TabMinWidthForCloseButton != FLT_MAX) ? ImFloor(TabMinWidthForCloseButton * scale_factor) : FLT_MAX; + DisplayWindowPadding = ImFloor(DisplayWindowPadding * scale_factor); + DisplaySafeAreaPadding = ImFloor(DisplaySafeAreaPadding * scale_factor); + MouseCursorScale = ImFloor(MouseCursorScale * scale_factor); +} + +ImGuiIO::ImGuiIO() +{ + // Most fields are initialized with zero + memset(this, 0, sizeof(*this)); + IM_ASSERT(IM_ARRAYSIZE(ImGuiIO::MouseDown) == ImGuiMouseButton_COUNT && IM_ARRAYSIZE(ImGuiIO::MouseClicked) == ImGuiMouseButton_COUNT); // Our pre-C++11 IM_STATIC_ASSERT() macros triggers warning on modern compilers so we don't use it here. + + // Settings + ConfigFlags = ImGuiConfigFlags_None; + BackendFlags = ImGuiBackendFlags_None; + DisplaySize = ImVec2(-1.0f, -1.0f); + DeltaTime = 1.0f / 60.0f; + IniSavingRate = 5.0f; + IniFilename = "imgui.ini"; + LogFilename = "imgui_log.txt"; + MouseDoubleClickTime = 0.30f; + MouseDoubleClickMaxDist = 6.0f; + for (int i = 0; i < ImGuiKey_COUNT; i++) + KeyMap[i] = -1; + KeyRepeatDelay = 0.275f; + KeyRepeatRate = 0.050f; + UserData = NULL; + + Fonts = NULL; + FontGlobalScale = 1.0f; + FontDefault = NULL; + FontAllowUserScaling = false; + DisplayFramebufferScale = ImVec2(1.0f, 1.0f); + + // Miscellaneous options + MouseDrawCursor = false; +#ifdef __APPLE__ + ConfigMacOSXBehaviors = true; // Set Mac OS X style defaults based on __APPLE__ compile time flag +#else + ConfigMacOSXBehaviors = false; +#endif + ConfigInputTextCursorBlink = true; + ConfigWindowsResizeFromEdges = true; + ConfigWindowsMoveFromTitleBarOnly = false; + ConfigMemoryCompactTimer = 60.0f; + + // Platform Functions + BackendPlatformName = BackendRendererName = NULL; + BackendPlatformUserData = BackendRendererUserData = BackendLanguageUserData = NULL; + GetClipboardTextFn = GetClipboardTextFn_DefaultImpl; // Platform dependent default implementations + SetClipboardTextFn = SetClipboardTextFn_DefaultImpl; + ClipboardUserData = NULL; + ImeSetInputScreenPosFn = ImeSetInputScreenPosFn_DefaultImpl; + ImeWindowHandle = NULL; + + // Input (NB: we already have memset zero the entire structure!) + MousePos = ImVec2(-FLT_MAX, -FLT_MAX); + MousePosPrev = ImVec2(-FLT_MAX, -FLT_MAX); + MouseDragThreshold = 6.0f; + for (int i = 0; i < IM_ARRAYSIZE(MouseDownDuration); i++) MouseDownDuration[i] = MouseDownDurationPrev[i] = -1.0f; + for (int i = 0; i < IM_ARRAYSIZE(KeysDownDuration); i++) KeysDownDuration[i] = KeysDownDurationPrev[i] = -1.0f; + for (int i = 0; i < IM_ARRAYSIZE(NavInputsDownDuration); i++) NavInputsDownDuration[i] = -1.0f; +} + +// Pass in translated ASCII characters for text input. +// - with glfw you can get those from the callback set in glfwSetCharCallback() +// - on Windows you can get those using ToAscii+keyboard state, or via the WM_CHAR message +void ImGuiIO::AddInputCharacter(unsigned int c) +{ + if (c != 0) + InputQueueCharacters.push_back(c <= IM_UNICODE_CODEPOINT_MAX ? (ImWchar)c : IM_UNICODE_CODEPOINT_INVALID); +} + +// UTF16 strings use surrogate pairs to encode codepoints >= 0x10000, so +// we should save the high surrogate. +void ImGuiIO::AddInputCharacterUTF16(ImWchar16 c) +{ + if (c == 0 && InputQueueSurrogate == 0) + return; + + if ((c & 0xFC00) == 0xD800) // High surrogate, must save + { + if (InputQueueSurrogate != 0) + InputQueueCharacters.push_back(IM_UNICODE_CODEPOINT_INVALID); + InputQueueSurrogate = c; + return; + } + + ImWchar cp = c; + if (InputQueueSurrogate != 0) + { + if ((c & 0xFC00) != 0xDC00) // Invalid low surrogate + InputQueueCharacters.push_back(IM_UNICODE_CODEPOINT_INVALID); + else if (IM_UNICODE_CODEPOINT_MAX == (0xFFFF)) // Codepoint will not fit in ImWchar (extra parenthesis around 0xFFFF somehow fixes -Wunreachable-code with Clang) + cp = IM_UNICODE_CODEPOINT_INVALID; + else + cp = (ImWchar)(((InputQueueSurrogate - 0xD800) << 10) + (c - 0xDC00) + 0x10000); + InputQueueSurrogate = 0; + } + InputQueueCharacters.push_back(cp); +} + +void ImGuiIO::AddInputCharactersUTF8(const char* utf8_chars) +{ + while (*utf8_chars != 0) + { + unsigned int c = 0; + utf8_chars += ImTextCharFromUtf8(&c, utf8_chars, NULL); + if (c != 0) + InputQueueCharacters.push_back((ImWchar)c); + } +} + +void ImGuiIO::ClearInputCharacters() +{ + InputQueueCharacters.resize(0); +} + +//----------------------------------------------------------------------------- +// [SECTION] MISC HELPERS/UTILITIES (Geometry functions) +//----------------------------------------------------------------------------- + +ImVec2 ImBezierCubicClosestPoint(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, const ImVec2& p, int num_segments) +{ + IM_ASSERT(num_segments > 0); // Use ImBezierCubicClosestPointCasteljau() + ImVec2 p_last = p1; + ImVec2 p_closest; + float p_closest_dist2 = FLT_MAX; + float t_step = 1.0f / (float)num_segments; + for (int i_step = 1; i_step <= num_segments; i_step++) + { + ImVec2 p_current = ImBezierCubicCalc(p1, p2, p3, p4, t_step * i_step); + ImVec2 p_line = ImLineClosestPoint(p_last, p_current, p); + float dist2 = ImLengthSqr(p - p_line); + if (dist2 < p_closest_dist2) + { + p_closest = p_line; + p_closest_dist2 = dist2; + } + p_last = p_current; + } + return p_closest; +} + +// Closely mimics PathBezierToCasteljau() in imgui_draw.cpp +static void ImBezierCubicClosestPointCasteljauStep(const ImVec2& p, ImVec2& p_closest, ImVec2& p_last, float& p_closest_dist2, float x1, float y1, float x2, float y2, float x3, float y3, float x4, float y4, float tess_tol, int level) +{ + float dx = x4 - x1; + float dy = y4 - y1; + float d2 = ((x2 - x4) * dy - (y2 - y4) * dx); + float d3 = ((x3 - x4) * dy - (y3 - y4) * dx); + d2 = (d2 >= 0) ? d2 : -d2; + d3 = (d3 >= 0) ? d3 : -d3; + if ((d2 + d3) * (d2 + d3) < tess_tol * (dx * dx + dy * dy)) + { + ImVec2 p_current(x4, y4); + ImVec2 p_line = ImLineClosestPoint(p_last, p_current, p); + float dist2 = ImLengthSqr(p - p_line); + if (dist2 < p_closest_dist2) + { + p_closest = p_line; + p_closest_dist2 = dist2; + } + p_last = p_current; + } + else if (level < 10) + { + float x12 = (x1 + x2)*0.5f, y12 = (y1 + y2)*0.5f; + float x23 = (x2 + x3)*0.5f, y23 = (y2 + y3)*0.5f; + float x34 = (x3 + x4)*0.5f, y34 = (y3 + y4)*0.5f; + float x123 = (x12 + x23)*0.5f, y123 = (y12 + y23)*0.5f; + float x234 = (x23 + x34)*0.5f, y234 = (y23 + y34)*0.5f; + float x1234 = (x123 + x234)*0.5f, y1234 = (y123 + y234)*0.5f; + ImBezierCubicClosestPointCasteljauStep(p, p_closest, p_last, p_closest_dist2, x1, y1, x12, y12, x123, y123, x1234, y1234, tess_tol, level + 1); + ImBezierCubicClosestPointCasteljauStep(p, p_closest, p_last, p_closest_dist2, x1234, y1234, x234, y234, x34, y34, x4, y4, tess_tol, level + 1); + } +} + +// tess_tol is generally the same value you would find in ImGui::GetStyle().CurveTessellationTol +// Because those ImXXX functions are lower-level than ImGui:: we cannot access this value automatically. +ImVec2 ImBezierCubicClosestPointCasteljau(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, const ImVec2& p, float tess_tol) +{ + IM_ASSERT(tess_tol > 0.0f); + ImVec2 p_last = p1; + ImVec2 p_closest; + float p_closest_dist2 = FLT_MAX; + ImBezierCubicClosestPointCasteljauStep(p, p_closest, p_last, p_closest_dist2, p1.x, p1.y, p2.x, p2.y, p3.x, p3.y, p4.x, p4.y, tess_tol, 0); + return p_closest; +} + +ImVec2 ImLineClosestPoint(const ImVec2& a, const ImVec2& b, const ImVec2& p) +{ + ImVec2 ap = p - a; + ImVec2 ab_dir = b - a; + float dot = ap.x * ab_dir.x + ap.y * ab_dir.y; + if (dot < 0.0f) + return a; + float ab_len_sqr = ab_dir.x * ab_dir.x + ab_dir.y * ab_dir.y; + if (dot > ab_len_sqr) + return b; + return a + ab_dir * dot / ab_len_sqr; +} + +bool ImTriangleContainsPoint(const ImVec2& a, const ImVec2& b, const ImVec2& c, const ImVec2& p) +{ + bool b1 = ((p.x - b.x) * (a.y - b.y) - (p.y - b.y) * (a.x - b.x)) < 0.0f; + bool b2 = ((p.x - c.x) * (b.y - c.y) - (p.y - c.y) * (b.x - c.x)) < 0.0f; + bool b3 = ((p.x - a.x) * (c.y - a.y) - (p.y - a.y) * (c.x - a.x)) < 0.0f; + return ((b1 == b2) && (b2 == b3)); +} + +void ImTriangleBarycentricCoords(const ImVec2& a, const ImVec2& b, const ImVec2& c, const ImVec2& p, float& out_u, float& out_v, float& out_w) +{ + ImVec2 v0 = b - a; + ImVec2 v1 = c - a; + ImVec2 v2 = p - a; + const float denom = v0.x * v1.y - v1.x * v0.y; + out_v = (v2.x * v1.y - v1.x * v2.y) / denom; + out_w = (v0.x * v2.y - v2.x * v0.y) / denom; + out_u = 1.0f - out_v - out_w; +} + +ImVec2 ImTriangleClosestPoint(const ImVec2& a, const ImVec2& b, const ImVec2& c, const ImVec2& p) +{ + ImVec2 proj_ab = ImLineClosestPoint(a, b, p); + ImVec2 proj_bc = ImLineClosestPoint(b, c, p); + ImVec2 proj_ca = ImLineClosestPoint(c, a, p); + float dist2_ab = ImLengthSqr(p - proj_ab); + float dist2_bc = ImLengthSqr(p - proj_bc); + float dist2_ca = ImLengthSqr(p - proj_ca); + float m = ImMin(dist2_ab, ImMin(dist2_bc, dist2_ca)); + if (m == dist2_ab) + return proj_ab; + if (m == dist2_bc) + return proj_bc; + return proj_ca; +} + +//----------------------------------------------------------------------------- +// [SECTION] MISC HELPERS/UTILITIES (String, Format, Hash functions) +//----------------------------------------------------------------------------- + +// Consider using _stricmp/_strnicmp under Windows or strcasecmp/strncasecmp. We don't actually use either ImStricmp/ImStrnicmp in the codebase any more. +int ImStricmp(const char* str1, const char* str2) +{ + int d; + while ((d = toupper(*str2) - toupper(*str1)) == 0 && *str1) { str1++; str2++; } + return d; +} + +int ImStrnicmp(const char* str1, const char* str2, size_t count) +{ + int d = 0; + while (count > 0 && (d = toupper(*str2) - toupper(*str1)) == 0 && *str1) { str1++; str2++; count--; } + return d; +} + +void ImStrncpy(char* dst, const char* src, size_t count) +{ + if (count < 1) + return; + if (count > 1) + strncpy(dst, src, count - 1); + dst[count - 1] = 0; +} + +char* ImStrdup(const char* str) +{ + size_t len = strlen(str); + void* buf = IM_ALLOC(len + 1); + return (char*)memcpy(buf, (const void*)str, len + 1); +} + +char* ImStrdupcpy(char* dst, size_t* p_dst_size, const char* src) +{ + size_t dst_buf_size = p_dst_size ? *p_dst_size : strlen(dst) + 1; + size_t src_size = strlen(src) + 1; + if (dst_buf_size < src_size) + { + IM_FREE(dst); + dst = (char*)IM_ALLOC(src_size); + if (p_dst_size) + *p_dst_size = src_size; + } + return (char*)memcpy(dst, (const void*)src, src_size); +} + +const char* ImStrchrRange(const char* str, const char* str_end, char c) +{ + const char* p = (const char*)memchr(str, (int)c, str_end - str); + return p; +} + +int ImStrlenW(const ImWchar* str) +{ + //return (int)wcslen((const wchar_t*)str); // FIXME-OPT: Could use this when wchar_t are 16-bit + int n = 0; + while (*str++) n++; + return n; +} + +// Find end-of-line. Return pointer will point to either first \n, either str_end. +const char* ImStreolRange(const char* str, const char* str_end) +{ + const char* p = (const char*)memchr(str, '\n', str_end - str); + return p ? p : str_end; +} + +const ImWchar* ImStrbolW(const ImWchar* buf_mid_line, const ImWchar* buf_begin) // find beginning-of-line +{ + while (buf_mid_line > buf_begin && buf_mid_line[-1] != '\n') + buf_mid_line--; + return buf_mid_line; +} + +const char* ImStristr(const char* haystack, const char* haystack_end, const char* needle, const char* needle_end) +{ + if (!needle_end) + needle_end = needle + strlen(needle); + + const char un0 = (char)toupper(*needle); + while ((!haystack_end && *haystack) || (haystack_end && haystack < haystack_end)) + { + if (toupper(*haystack) == un0) + { + const char* b = needle + 1; + for (const char* a = haystack + 1; b < needle_end; a++, b++) + if (toupper(*a) != toupper(*b)) + break; + if (b == needle_end) + return haystack; + } + haystack++; + } + return NULL; +} + +// Trim str by offsetting contents when there's leading data + writing a \0 at the trailing position. We use this in situation where the cost is negligible. +void ImStrTrimBlanks(char* buf) +{ + char* p = buf; + while (p[0] == ' ' || p[0] == '\t') // Leading blanks + p++; + char* p_start = p; + while (*p != 0) // Find end of string + p++; + while (p > p_start && (p[-1] == ' ' || p[-1] == '\t')) // Trailing blanks + p--; + if (p_start != buf) // Copy memory if we had leading blanks + memmove(buf, p_start, p - p_start); + buf[p - p_start] = 0; // Zero terminate +} + +const char* ImStrSkipBlank(const char* str) +{ + while (str[0] == ' ' || str[0] == '\t') + str++; + return str; +} + +// A) MSVC version appears to return -1 on overflow, whereas glibc appears to return total count (which may be >= buf_size). +// Ideally we would test for only one of those limits at runtime depending on the behavior the vsnprintf(), but trying to deduct it at compile time sounds like a pandora can of worm. +// B) When buf==NULL vsnprintf() will return the output size. +#ifndef IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS + +// We support stb_sprintf which is much faster (see: https://github.com/nothings/stb/blob/master/stb_sprintf.h) +// You may set IMGUI_USE_STB_SPRINTF to use our default wrapper, or set IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS +// and setup the wrapper yourself. (FIXME-OPT: Some of our high-level operations such as ImGuiTextBuffer::appendfv() are +// designed using two-passes worst case, which probably could be improved using the stbsp_vsprintfcb() function.) +#ifdef IMGUI_USE_STB_SPRINTF +#define STB_SPRINTF_IMPLEMENTATION +#include "stb_sprintf.h" +#endif + +#if defined(_MSC_VER) && !defined(vsnprintf) +#define vsnprintf _vsnprintf +#endif + +int ImFormatString(char* buf, size_t buf_size, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); +#ifdef IMGUI_USE_STB_SPRINTF + int w = stbsp_vsnprintf(buf, (int)buf_size, fmt, args); +#else + int w = vsnprintf(buf, buf_size, fmt, args); +#endif + va_end(args); + if (buf == NULL) + return w; + if (w == -1 || w >= (int)buf_size) + w = (int)buf_size - 1; + buf[w] = 0; + return w; +} + +int ImFormatStringV(char* buf, size_t buf_size, const char* fmt, va_list args) +{ +#ifdef IMGUI_USE_STB_SPRINTF + int w = stbsp_vsnprintf(buf, (int)buf_size, fmt, args); +#else + int w = vsnprintf(buf, buf_size, fmt, args); +#endif + if (buf == NULL) + return w; + if (w == -1 || w >= (int)buf_size) + w = (int)buf_size - 1; + buf[w] = 0; + return w; +} +#endif // #ifdef IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS + +// CRC32 needs a 1KB lookup table (not cache friendly) +// Although the code to generate the table is simple and shorter than the table itself, using a const table allows us to easily: +// - avoid an unnecessary branch/memory tap, - keep the ImHashXXX functions usable by static constructors, - make it thread-safe. +static const ImU32 GCrc32LookupTable[256] = +{ + 0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3,0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91, + 0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7,0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5, + 0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59, + 0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F,0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D, + 0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433,0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01, + 0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65, + 0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9, + 0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F,0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD, + 0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683,0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1, + 0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5, + 0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B,0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79, + 0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F,0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D, + 0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21, + 0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45, + 0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB,0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9, + 0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF,0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D, +}; + +// Known size hash +// It is ok to call ImHashData on a string with known length but the ### operator won't be supported. +// FIXME-OPT: Replace with e.g. FNV1a hash? CRC32 pretty much randomly access 1KB. Need to do proper measurements. +ImGuiID ImHashData(const void* data_p, size_t data_size, ImU32 seed) +{ + ImU32 crc = ~seed; + const unsigned char* data = (const unsigned char*)data_p; + const ImU32* crc32_lut = GCrc32LookupTable; + while (data_size-- != 0) + crc = (crc >> 8) ^ crc32_lut[(crc & 0xFF) ^ *data++]; + return ~crc; +} + +// Zero-terminated string hash, with support for ### to reset back to seed value +// We support a syntax of "label###id" where only "###id" is included in the hash, and only "label" gets displayed. +// Because this syntax is rarely used we are optimizing for the common case. +// - If we reach ### in the string we discard the hash so far and reset to the seed. +// - We don't do 'current += 2; continue;' after handling ### to keep the code smaller/faster (measured ~10% diff in Debug build) +// FIXME-OPT: Replace with e.g. FNV1a hash? CRC32 pretty much randomly access 1KB. Need to do proper measurements. +ImGuiID ImHashStr(const char* data_p, size_t data_size, ImU32 seed) +{ + seed = ~seed; + ImU32 crc = seed; + const unsigned char* data = (const unsigned char*)data_p; + const ImU32* crc32_lut = GCrc32LookupTable; + if (data_size != 0) + { + while (data_size-- != 0) + { + unsigned char c = *data++; + if (c == '#' && data_size >= 2 && data[0] == '#' && data[1] == '#') + crc = seed; + crc = (crc >> 8) ^ crc32_lut[(crc & 0xFF) ^ c]; + } + } + else + { + while (unsigned char c = *data++) + { + if (c == '#' && data[0] == '#' && data[1] == '#') + crc = seed; + crc = (crc >> 8) ^ crc32_lut[(crc & 0xFF) ^ c]; + } + } + return ~crc; +} + +//----------------------------------------------------------------------------- +// [SECTION] MISC HELPERS/UTILITIES (File functions) +//----------------------------------------------------------------------------- + +// Default file functions +#ifndef IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS + +ImFileHandle ImFileOpen(const char* filename, const char* mode) +{ +#if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__) + // We need a fopen() wrapper because MSVC/Windows fopen doesn't handle UTF-8 filenames. + // Previously we used ImTextCountCharsFromUtf8/ImTextStrFromUtf8 here but we now need to support ImWchar16 and ImWchar32! + const int filename_wsize = ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, NULL, 0); + const int mode_wsize = ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, NULL, 0); + ImVector buf; + buf.resize(filename_wsize + mode_wsize); + ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, (wchar_t*)&buf[0], filename_wsize); + ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, (wchar_t*)&buf[filename_wsize], mode_wsize); + return ::_wfopen((const wchar_t*)&buf[0], (const wchar_t*)&buf[filename_wsize]); +#else + return fopen(filename, mode); +#endif +} + +// We should in theory be using fseeko()/ftello() with off_t and _fseeki64()/_ftelli64() with __int64, waiting for the PR that does that in a very portable pre-C++11 zero-warnings way. +bool ImFileClose(ImFileHandle f) { return fclose(f) == 0; } +ImU64 ImFileGetSize(ImFileHandle f) { long off = 0, sz = 0; return ((off = ftell(f)) != -1 && !fseek(f, 0, SEEK_END) && (sz = ftell(f)) != -1 && !fseek(f, off, SEEK_SET)) ? (ImU64)sz : (ImU64)-1; } +ImU64 ImFileRead(void* data, ImU64 sz, ImU64 count, ImFileHandle f) { return fread(data, (size_t)sz, (size_t)count, f); } +ImU64 ImFileWrite(const void* data, ImU64 sz, ImU64 count, ImFileHandle f) { return fwrite(data, (size_t)sz, (size_t)count, f); } +#endif // #ifndef IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS + +// Helper: Load file content into memory +// Memory allocated with IM_ALLOC(), must be freed by user using IM_FREE() == ImGui::MemFree() +// This can't really be used with "rt" because fseek size won't match read size. +void* ImFileLoadToMemory(const char* filename, const char* mode, size_t* out_file_size, int padding_bytes) +{ + IM_ASSERT(filename && mode); + if (out_file_size) + *out_file_size = 0; + + ImFileHandle f; + if ((f = ImFileOpen(filename, mode)) == NULL) + return NULL; + + size_t file_size = (size_t)ImFileGetSize(f); + if (file_size == (size_t)-1) + { + ImFileClose(f); + return NULL; + } + + void* file_data = IM_ALLOC(file_size + padding_bytes); + if (file_data == NULL) + { + ImFileClose(f); + return NULL; + } + if (ImFileRead(file_data, 1, file_size, f) != file_size) + { + ImFileClose(f); + IM_FREE(file_data); + return NULL; + } + if (padding_bytes > 0) + memset((void*)(((char*)file_data) + file_size), 0, (size_t)padding_bytes); + + ImFileClose(f); + if (out_file_size) + *out_file_size = file_size; + + return file_data; +} + +//----------------------------------------------------------------------------- +// [SECTION] MISC HELPERS/UTILITIES (ImText* functions) +//----------------------------------------------------------------------------- + +// Convert UTF-8 to 32-bit character, process single character input. +// A nearly-branchless UTF-8 decoder, based on work of Christopher Wellons (https://github.com/skeeto/branchless-utf8). +// We handle UTF-8 decoding error by skipping forward. +int ImTextCharFromUtf8(unsigned int* out_char, const char* in_text, const char* in_text_end) +{ + static const char lengths[32] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0 }; + static const int masks[] = { 0x00, 0x7f, 0x1f, 0x0f, 0x07 }; + static const uint32_t mins[] = { 0x400000, 0, 0x80, 0x800, 0x10000 }; + static const int shiftc[] = { 0, 18, 12, 6, 0 }; + static const int shifte[] = { 0, 6, 4, 2, 0 }; + int len = lengths[*(const unsigned char*)in_text >> 3]; + int wanted = len + !len; + + if (in_text_end == NULL) + in_text_end = in_text + wanted; // Max length, nulls will be taken into account. + + // Copy at most 'len' bytes, stop copying at 0 or past in_text_end. Branch predictor does a good job here, + // so it is fast even with excessive branching. + unsigned char s[4]; + s[0] = in_text + 0 < in_text_end ? in_text[0] : 0; + s[1] = in_text + 1 < in_text_end ? in_text[1] : 0; + s[2] = in_text + 2 < in_text_end ? in_text[2] : 0; + s[3] = in_text + 3 < in_text_end ? in_text[3] : 0; + + // Assume a four-byte character and load four bytes. Unused bits are shifted out. + *out_char = (uint32_t)(s[0] & masks[len]) << 18; + *out_char |= (uint32_t)(s[1] & 0x3f) << 12; + *out_char |= (uint32_t)(s[2] & 0x3f) << 6; + *out_char |= (uint32_t)(s[3] & 0x3f) << 0; + *out_char >>= shiftc[len]; + + // Accumulate the various error conditions. + int e = 0; + e = (*out_char < mins[len]) << 6; // non-canonical encoding + e |= ((*out_char >> 11) == 0x1b) << 7; // surrogate half? + e |= (*out_char > IM_UNICODE_CODEPOINT_MAX) << 8; // out of range? + e |= (s[1] & 0xc0) >> 2; + e |= (s[2] & 0xc0) >> 4; + e |= (s[3] ) >> 6; + e ^= 0x2a; // top two bits of each tail byte correct? + e >>= shifte[len]; + + if (e) + { + // No bytes are consumed when *in_text == 0 || in_text == in_text_end. + // One byte is consumed in case of invalid first byte of in_text. + // All available bytes (at most `len` bytes) are consumed on incomplete/invalid second to last bytes. + // Invalid or incomplete input may consume less bytes than wanted, therefore every byte has to be inspected in s. + wanted = ImMin(wanted, !!s[0] + !!s[1] + !!s[2] + !!s[3]); + *out_char = IM_UNICODE_CODEPOINT_INVALID; + } + + return wanted; +} + +int ImTextStrFromUtf8(ImWchar* buf, int buf_size, const char* in_text, const char* in_text_end, const char** in_text_remaining) +{ + ImWchar* buf_out = buf; + ImWchar* buf_end = buf + buf_size; + while (buf_out < buf_end - 1 && (!in_text_end || in_text < in_text_end) && *in_text) + { + unsigned int c; + in_text += ImTextCharFromUtf8(&c, in_text, in_text_end); + if (c == 0) + break; + *buf_out++ = (ImWchar)c; + } + *buf_out = 0; + if (in_text_remaining) + *in_text_remaining = in_text; + return (int)(buf_out - buf); +} + +int ImTextCountCharsFromUtf8(const char* in_text, const char* in_text_end) +{ + int char_count = 0; + while ((!in_text_end || in_text < in_text_end) && *in_text) + { + unsigned int c; + in_text += ImTextCharFromUtf8(&c, in_text, in_text_end); + if (c == 0) + break; + char_count++; + } + return char_count; +} + +// Based on stb_to_utf8() from github.com/nothings/stb/ +static inline int ImTextCharToUtf8(char* buf, int buf_size, unsigned int c) +{ + if (c < 0x80) + { + buf[0] = (char)c; + return 1; + } + if (c < 0x800) + { + if (buf_size < 2) return 0; + buf[0] = (char)(0xc0 + (c >> 6)); + buf[1] = (char)(0x80 + (c & 0x3f)); + return 2; + } + if (c < 0x10000) + { + if (buf_size < 3) return 0; + buf[0] = (char)(0xe0 + (c >> 12)); + buf[1] = (char)(0x80 + ((c >> 6) & 0x3f)); + buf[2] = (char)(0x80 + ((c ) & 0x3f)); + return 3; + } + if (c <= 0x10FFFF) + { + if (buf_size < 4) return 0; + buf[0] = (char)(0xf0 + (c >> 18)); + buf[1] = (char)(0x80 + ((c >> 12) & 0x3f)); + buf[2] = (char)(0x80 + ((c >> 6) & 0x3f)); + buf[3] = (char)(0x80 + ((c ) & 0x3f)); + return 4; + } + // Invalid code point, the max unicode is 0x10FFFF + return 0; +} + +// Not optimal but we very rarely use this function. +int ImTextCountUtf8BytesFromChar(const char* in_text, const char* in_text_end) +{ + unsigned int unused = 0; + return ImTextCharFromUtf8(&unused, in_text, in_text_end); +} + +static inline int ImTextCountUtf8BytesFromChar(unsigned int c) +{ + if (c < 0x80) return 1; + if (c < 0x800) return 2; + if (c < 0x10000) return 3; + if (c <= 0x10FFFF) return 4; + return 3; +} + +int ImTextStrToUtf8(char* buf, int buf_size, const ImWchar* in_text, const ImWchar* in_text_end) +{ + char* buf_out = buf; + const char* buf_end = buf + buf_size; + while (buf_out < buf_end - 1 && (!in_text_end || in_text < in_text_end) && *in_text) + { + unsigned int c = (unsigned int)(*in_text++); + if (c < 0x80) + *buf_out++ = (char)c; + else + buf_out += ImTextCharToUtf8(buf_out, (int)(buf_end - buf_out - 1), c); + } + *buf_out = 0; + return (int)(buf_out - buf); +} + +int ImTextCountUtf8BytesFromStr(const ImWchar* in_text, const ImWchar* in_text_end) +{ + int bytes_count = 0; + while ((!in_text_end || in_text < in_text_end) && *in_text) + { + unsigned int c = (unsigned int)(*in_text++); + if (c < 0x80) + bytes_count++; + else + bytes_count += ImTextCountUtf8BytesFromChar(c); + } + return bytes_count; +} + +//----------------------------------------------------------------------------- +// [SECTION] MISC HELPERS/UTILITIES (Color functions) +// Note: The Convert functions are early design which are not consistent with other API. +//----------------------------------------------------------------------------- + +IMGUI_API ImU32 ImAlphaBlendColors(ImU32 col_a, ImU32 col_b) +{ + float t = ((col_b >> IM_COL32_A_SHIFT) & 0xFF) / 255.f; + int r = ImLerp((int)(col_a >> IM_COL32_R_SHIFT) & 0xFF, (int)(col_b >> IM_COL32_R_SHIFT) & 0xFF, t); + int g = ImLerp((int)(col_a >> IM_COL32_G_SHIFT) & 0xFF, (int)(col_b >> IM_COL32_G_SHIFT) & 0xFF, t); + int b = ImLerp((int)(col_a >> IM_COL32_B_SHIFT) & 0xFF, (int)(col_b >> IM_COL32_B_SHIFT) & 0xFF, t); + return IM_COL32(r, g, b, 0xFF); +} + +ImVec4 ImGui::ColorConvertU32ToFloat4(ImU32 in) +{ + float s = 1.0f / 255.0f; + return ImVec4( + ((in >> IM_COL32_R_SHIFT) & 0xFF) * s, + ((in >> IM_COL32_G_SHIFT) & 0xFF) * s, + ((in >> IM_COL32_B_SHIFT) & 0xFF) * s, + ((in >> IM_COL32_A_SHIFT) & 0xFF) * s); +} + +ImU32 ImGui::ColorConvertFloat4ToU32(const ImVec4& in) +{ + ImU32 out; + out = ((ImU32)IM_F32_TO_INT8_SAT(in.x)) << IM_COL32_R_SHIFT; + out |= ((ImU32)IM_F32_TO_INT8_SAT(in.y)) << IM_COL32_G_SHIFT; + out |= ((ImU32)IM_F32_TO_INT8_SAT(in.z)) << IM_COL32_B_SHIFT; + out |= ((ImU32)IM_F32_TO_INT8_SAT(in.w)) << IM_COL32_A_SHIFT; + return out; +} + +// Convert rgb floats ([0-1],[0-1],[0-1]) to hsv floats ([0-1],[0-1],[0-1]), from Foley & van Dam p592 +// Optimized http://lolengine.net/blog/2013/01/13/fast-rgb-to-hsv +void ImGui::ColorConvertRGBtoHSV(float r, float g, float b, float& out_h, float& out_s, float& out_v) +{ + float K = 0.f; + if (g < b) + { + ImSwap(g, b); + K = -1.f; + } + if (r < g) + { + ImSwap(r, g); + K = -2.f / 6.f - K; + } + + const float chroma = r - (g < b ? g : b); + out_h = ImFabs(K + (g - b) / (6.f * chroma + 1e-20f)); + out_s = chroma / (r + 1e-20f); + out_v = r; +} + +// Convert hsv floats ([0-1],[0-1],[0-1]) to rgb floats ([0-1],[0-1],[0-1]), from Foley & van Dam p593 +// also http://en.wikipedia.org/wiki/HSL_and_HSV +void ImGui::ColorConvertHSVtoRGB(float h, float s, float v, float& out_r, float& out_g, float& out_b) +{ + if (s == 0.0f) + { + // gray + out_r = out_g = out_b = v; + return; + } + + h = ImFmod(h, 1.0f) / (60.0f / 360.0f); + int i = (int)h; + float f = h - (float)i; + float p = v * (1.0f - s); + float q = v * (1.0f - s * f); + float t = v * (1.0f - s * (1.0f - f)); + + switch (i) + { + case 0: out_r = v; out_g = t; out_b = p; break; + case 1: out_r = q; out_g = v; out_b = p; break; + case 2: out_r = p; out_g = v; out_b = t; break; + case 3: out_r = p; out_g = q; out_b = v; break; + case 4: out_r = t; out_g = p; out_b = v; break; + case 5: default: out_r = v; out_g = p; out_b = q; break; + } +} + +//----------------------------------------------------------------------------- +// [SECTION] ImGuiStorage +// Helper: Key->value storage +//----------------------------------------------------------------------------- + +// std::lower_bound but without the bullshit +static ImGuiStorage::ImGuiStoragePair* LowerBound(ImVector& data, ImGuiID key) +{ + ImGuiStorage::ImGuiStoragePair* first = data.Data; + ImGuiStorage::ImGuiStoragePair* last = data.Data + data.Size; + size_t count = (size_t)(last - first); + while (count > 0) + { + size_t count2 = count >> 1; + ImGuiStorage::ImGuiStoragePair* mid = first + count2; + if (mid->key < key) + { + first = ++mid; + count -= count2 + 1; + } + else + { + count = count2; + } + } + return first; +} + +// For quicker full rebuild of a storage (instead of an incremental one), you may add all your contents and then sort once. +void ImGuiStorage::BuildSortByKey() +{ + struct StaticFunc + { + static int IMGUI_CDECL PairCompareByID(const void* lhs, const void* rhs) + { + // We can't just do a subtraction because qsort uses signed integers and subtracting our ID doesn't play well with that. + if (((const ImGuiStoragePair*)lhs)->key > ((const ImGuiStoragePair*)rhs)->key) return +1; + if (((const ImGuiStoragePair*)lhs)->key < ((const ImGuiStoragePair*)rhs)->key) return -1; + return 0; + } + }; + if (Data.Size > 1) + ImQsort(Data.Data, (size_t)Data.Size, sizeof(ImGuiStoragePair), StaticFunc::PairCompareByID); +} + +int ImGuiStorage::GetInt(ImGuiID key, int default_val) const +{ + ImGuiStoragePair* it = LowerBound(const_cast&>(Data), key); + if (it == Data.end() || it->key != key) + return default_val; + return it->val_i; +} + +bool ImGuiStorage::GetBool(ImGuiID key, bool default_val) const +{ + return GetInt(key, default_val ? 1 : 0) != 0; +} + +float ImGuiStorage::GetFloat(ImGuiID key, float default_val) const +{ + ImGuiStoragePair* it = LowerBound(const_cast&>(Data), key); + if (it == Data.end() || it->key != key) + return default_val; + return it->val_f; +} + +void* ImGuiStorage::GetVoidPtr(ImGuiID key) const +{ + ImGuiStoragePair* it = LowerBound(const_cast&>(Data), key); + if (it == Data.end() || it->key != key) + return NULL; + return it->val_p; +} + +// References are only valid until a new value is added to the storage. Calling a Set***() function or a Get***Ref() function invalidates the pointer. +int* ImGuiStorage::GetIntRef(ImGuiID key, int default_val) +{ + ImGuiStoragePair* it = LowerBound(Data, key); + if (it == Data.end() || it->key != key) + it = Data.insert(it, ImGuiStoragePair(key, default_val)); + return &it->val_i; +} + +bool* ImGuiStorage::GetBoolRef(ImGuiID key, bool default_val) +{ + return (bool*)GetIntRef(key, default_val ? 1 : 0); +} + +float* ImGuiStorage::GetFloatRef(ImGuiID key, float default_val) +{ + ImGuiStoragePair* it = LowerBound(Data, key); + if (it == Data.end() || it->key != key) + it = Data.insert(it, ImGuiStoragePair(key, default_val)); + return &it->val_f; +} + +void** ImGuiStorage::GetVoidPtrRef(ImGuiID key, void* default_val) +{ + ImGuiStoragePair* it = LowerBound(Data, key); + if (it == Data.end() || it->key != key) + it = Data.insert(it, ImGuiStoragePair(key, default_val)); + return &it->val_p; +} + +// FIXME-OPT: Need a way to reuse the result of lower_bound when doing GetInt()/SetInt() - not too bad because it only happens on explicit interaction (maximum one a frame) +void ImGuiStorage::SetInt(ImGuiID key, int val) +{ + ImGuiStoragePair* it = LowerBound(Data, key); + if (it == Data.end() || it->key != key) + { + Data.insert(it, ImGuiStoragePair(key, val)); + return; + } + it->val_i = val; +} + +void ImGuiStorage::SetBool(ImGuiID key, bool val) +{ + SetInt(key, val ? 1 : 0); +} + +void ImGuiStorage::SetFloat(ImGuiID key, float val) +{ + ImGuiStoragePair* it = LowerBound(Data, key); + if (it == Data.end() || it->key != key) + { + Data.insert(it, ImGuiStoragePair(key, val)); + return; + } + it->val_f = val; +} + +void ImGuiStorage::SetVoidPtr(ImGuiID key, void* val) +{ + ImGuiStoragePair* it = LowerBound(Data, key); + if (it == Data.end() || it->key != key) + { + Data.insert(it, ImGuiStoragePair(key, val)); + return; + } + it->val_p = val; +} + +void ImGuiStorage::SetAllInt(int v) +{ + for (int i = 0; i < Data.Size; i++) + Data[i].val_i = v; +} + +//----------------------------------------------------------------------------- +// [SECTION] ImGuiTextFilter +//----------------------------------------------------------------------------- + +// Helper: Parse and apply text filters. In format "aaaaa[,bbbb][,ccccc]" +ImGuiTextFilter::ImGuiTextFilter(const char* default_filter) +{ + if (default_filter) + { + ImStrncpy(InputBuf, default_filter, IM_ARRAYSIZE(InputBuf)); + Build(); + } + else + { + InputBuf[0] = 0; + CountGrep = 0; + } +} + +bool ImGuiTextFilter::Draw(const char* label, float width) +{ + if (width != 0.0f) + ImGui::SetNextItemWidth(width); + bool value_changed = ImGui::InputText(label, InputBuf, IM_ARRAYSIZE(InputBuf)); + if (value_changed) + Build(); + return value_changed; +} + +void ImGuiTextFilter::ImGuiTextRange::split(char separator, ImVector* out) const +{ + out->resize(0); + const char* wb = b; + const char* we = wb; + while (we < e) + { + if (*we == separator) + { + out->push_back(ImGuiTextRange(wb, we)); + wb = we + 1; + } + we++; + } + if (wb != we) + out->push_back(ImGuiTextRange(wb, we)); +} + +void ImGuiTextFilter::Build() +{ + Filters.resize(0); + ImGuiTextRange input_range(InputBuf, InputBuf + strlen(InputBuf)); + input_range.split(',', &Filters); + + CountGrep = 0; + for (int i = 0; i != Filters.Size; i++) + { + ImGuiTextRange& f = Filters[i]; + while (f.b < f.e && ImCharIsBlankA(f.b[0])) + f.b++; + while (f.e > f.b && ImCharIsBlankA(f.e[-1])) + f.e--; + if (f.empty()) + continue; + if (Filters[i].b[0] != '-') + CountGrep += 1; + } +} + +bool ImGuiTextFilter::PassFilter(const char* text, const char* text_end) const +{ + if (Filters.empty()) + return true; + + if (text == NULL) + text = ""; + + for (int i = 0; i != Filters.Size; i++) + { + const ImGuiTextRange& f = Filters[i]; + if (f.empty()) + continue; + if (f.b[0] == '-') + { + // Subtract + if (ImStristr(text, text_end, f.b + 1, f.e) != NULL) + return false; + } + else + { + // Grep + if (ImStristr(text, text_end, f.b, f.e) != NULL) + return true; + } + } + + // Implicit * grep + if (CountGrep == 0) + return true; + + return false; +} + +//----------------------------------------------------------------------------- +// [SECTION] ImGuiTextBuffer +//----------------------------------------------------------------------------- + +// On some platform vsnprintf() takes va_list by reference and modifies it. +// va_copy is the 'correct' way to copy a va_list but Visual Studio prior to 2013 doesn't have it. +#ifndef va_copy +#if defined(__GNUC__) || defined(__clang__) +#define va_copy(dest, src) __builtin_va_copy(dest, src) +#else +#define va_copy(dest, src) (dest = src) +#endif +#endif + +char ImGuiTextBuffer::EmptyString[1] = { 0 }; + +void ImGuiTextBuffer::append(const char* str, const char* str_end) +{ + int len = str_end ? (int)(str_end - str) : (int)strlen(str); + + // Add zero-terminator the first time + const int write_off = (Buf.Size != 0) ? Buf.Size : 1; + const int needed_sz = write_off + len; + if (write_off + len >= Buf.Capacity) + { + int new_capacity = Buf.Capacity * 2; + Buf.reserve(needed_sz > new_capacity ? needed_sz : new_capacity); + } + + Buf.resize(needed_sz); + memcpy(&Buf[write_off - 1], str, (size_t)len); + Buf[write_off - 1 + len] = 0; +} + +void ImGuiTextBuffer::appendf(const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + appendfv(fmt, args); + va_end(args); +} + +// Helper: Text buffer for logging/accumulating text +void ImGuiTextBuffer::appendfv(const char* fmt, va_list args) +{ + va_list args_copy; + va_copy(args_copy, args); + + int len = ImFormatStringV(NULL, 0, fmt, args); // FIXME-OPT: could do a first pass write attempt, likely successful on first pass. + if (len <= 0) + { + va_end(args_copy); + return; + } + + // Add zero-terminator the first time + const int write_off = (Buf.Size != 0) ? Buf.Size : 1; + const int needed_sz = write_off + len; + if (write_off + len >= Buf.Capacity) + { + int new_capacity = Buf.Capacity * 2; + Buf.reserve(needed_sz > new_capacity ? needed_sz : new_capacity); + } + + Buf.resize(needed_sz); + ImFormatStringV(&Buf[write_off - 1], (size_t)len + 1, fmt, args_copy); + va_end(args_copy); +} + +//----------------------------------------------------------------------------- +// [SECTION] ImGuiListClipper +// This is currently not as flexible/powerful as it should be and really confusing/spaghetti, mostly because we changed +// the API mid-way through development and support two ways to using the clipper, needs some rework (see TODO) +//----------------------------------------------------------------------------- + +// FIXME-TABLE: This prevents us from using ImGuiListClipper _inside_ a table cell. +// The problem we have is that without a Begin/End scheme for rows using the clipper is ambiguous. +static bool GetSkipItemForListClipping() +{ + ImGuiContext& g = *GImGui; + return (g.CurrentTable ? g.CurrentTable->HostSkipItems : g.CurrentWindow->SkipItems); +} + +// Helper to calculate coarse clipping of large list of evenly sized items. +// NB: Prefer using the ImGuiListClipper higher-level helper if you can! Read comments and instructions there on how those use this sort of pattern. +// NB: 'items_count' is only used to clamp the result, if you don't know your count you can use INT_MAX +void ImGui::CalcListClipping(int items_count, float items_height, int* out_items_display_start, int* out_items_display_end) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (g.LogEnabled) + { + // If logging is active, do not perform any clipping + *out_items_display_start = 0; + *out_items_display_end = items_count; + return; + } + if (GetSkipItemForListClipping()) + { + *out_items_display_start = *out_items_display_end = 0; + return; + } + + // We create the union of the ClipRect and the NavScoringRect which at worst should be 1 page away from ClipRect + ImRect unclipped_rect = window->ClipRect; + if (g.NavMoveRequest) + unclipped_rect.Add(g.NavScoringRect); + if (g.NavJustMovedToId && window->NavLastIds[0] == g.NavJustMovedToId) + unclipped_rect.Add(ImRect(window->Pos + window->NavRectRel[0].Min, window->Pos + window->NavRectRel[0].Max)); + + const ImVec2 pos = window->DC.CursorPos; + int start = (int)((unclipped_rect.Min.y - pos.y) / items_height); + int end = (int)((unclipped_rect.Max.y - pos.y) / items_height); + + // When performing a navigation request, ensure we have one item extra in the direction we are moving to + if (g.NavMoveRequest && g.NavMoveClipDir == ImGuiDir_Up) + start--; + if (g.NavMoveRequest && g.NavMoveClipDir == ImGuiDir_Down) + end++; + + start = ImClamp(start, 0, items_count); + end = ImClamp(end + 1, start, items_count); + *out_items_display_start = start; + *out_items_display_end = end; +} + +static void SetCursorPosYAndSetupForPrevLine(float pos_y, float line_height) +{ + // Set cursor position and a few other things so that SetScrollHereY() and Columns() can work when seeking cursor. + // FIXME: It is problematic that we have to do that here, because custom/equivalent end-user code would stumble on the same issue. + // The clipper should probably have a 4th step to display the last item in a regular manner. + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + float off_y = pos_y - window->DC.CursorPos.y; + window->DC.CursorPos.y = pos_y; + window->DC.CursorMaxPos.y = ImMax(window->DC.CursorMaxPos.y, pos_y); + window->DC.CursorPosPrevLine.y = window->DC.CursorPos.y - line_height; // Setting those fields so that SetScrollHereY() can properly function after the end of our clipper usage. + window->DC.PrevLineSize.y = (line_height - g.Style.ItemSpacing.y); // If we end up needing more accurate data (to e.g. use SameLine) we may as well make the clipper have a fourth step to let user process and display the last item in their list. + if (ImGuiOldColumns* columns = window->DC.CurrentColumns) + columns->LineMinY = window->DC.CursorPos.y; // Setting this so that cell Y position are set properly + if (ImGuiTable* table = g.CurrentTable) + { + if (table->IsInsideRow) + ImGui::TableEndRow(table); + table->RowPosY2 = window->DC.CursorPos.y; + const int row_increase = (int)((off_y / line_height) + 0.5f); + //table->CurrentRow += row_increase; // Can't do without fixing TableEndRow() + table->RowBgColorCounter += row_increase; + } +} + +ImGuiListClipper::ImGuiListClipper() +{ + memset(this, 0, sizeof(*this)); + ItemsCount = -1; +} + +ImGuiListClipper::~ImGuiListClipper() +{ + IM_ASSERT(ItemsCount == -1 && "Forgot to call End(), or to Step() until false?"); +} + +// Use case A: Begin() called from constructor with items_height<0, then called again from Step() in StepNo 1 +// Use case B: Begin() called from constructor with items_height>0 +// FIXME-LEGACY: Ideally we should remove the Begin/End functions but they are part of the legacy API we still support. This is why some of the code in Step() calling Begin() and reassign some fields, spaghetti style. +void ImGuiListClipper::Begin(int items_count, float items_height) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + if (ImGuiTable* table = g.CurrentTable) + if (table->IsInsideRow) + ImGui::TableEndRow(table); + + StartPosY = window->DC.CursorPos.y; + ItemsHeight = items_height; + ItemsCount = items_count; + ItemsFrozen = 0; + StepNo = 0; + DisplayStart = -1; + DisplayEnd = 0; +} + +void ImGuiListClipper::End() +{ + if (ItemsCount < 0) // Already ended + return; + + // In theory here we should assert that ImGui::GetCursorPosY() == StartPosY + DisplayEnd * ItemsHeight, but it feels saner to just seek at the end and not assert/crash the user. + if (ItemsCount < INT_MAX && DisplayStart >= 0) + SetCursorPosYAndSetupForPrevLine(StartPosY + (ItemsCount - ItemsFrozen) * ItemsHeight, ItemsHeight); + ItemsCount = -1; + StepNo = 3; +} + +bool ImGuiListClipper::Step() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + ImGuiTable* table = g.CurrentTable; + if (table && table->IsInsideRow) + ImGui::TableEndRow(table); + + // No items + if (ItemsCount == 0 || GetSkipItemForListClipping()) + { + End(); + return false; + } + + // Step 0: Let you process the first element (regardless of it being visible or not, so we can measure the element height) + if (StepNo == 0) + { + // While we are in frozen row state, keep displaying items one by one, unclipped + // FIXME: Could be stored as a table-agnostic state. + if (table != NULL && !table->IsUnfrozenRows) + { + DisplayStart = ItemsFrozen; + DisplayEnd = ItemsFrozen + 1; + ItemsFrozen++; + return true; + } + + StartPosY = window->DC.CursorPos.y; + if (ItemsHeight <= 0.0f) + { + // Submit the first item so we can measure its height (generally it is 0..1) + DisplayStart = ItemsFrozen; + DisplayEnd = ItemsFrozen + 1; + StepNo = 1; + return true; + } + + // Already has item height (given by user in Begin): skip to calculating step + DisplayStart = DisplayEnd; + StepNo = 2; + } + + // Step 1: the clipper infer height from first element + if (StepNo == 1) + { + IM_ASSERT(ItemsHeight <= 0.0f); + if (table) + { + const float pos_y1 = table->RowPosY1; // Using this instead of StartPosY to handle clipper straddling the frozen row + const float pos_y2 = table->RowPosY2; // Using this instead of CursorPos.y to take account of tallest cell. + ItemsHeight = pos_y2 - pos_y1; + window->DC.CursorPos.y = pos_y2; + } + else + { + ItemsHeight = window->DC.CursorPos.y - StartPosY; + } + IM_ASSERT(ItemsHeight > 0.0f && "Unable to calculate item height! First item hasn't moved the cursor vertically!"); + StepNo = 2; + } + + // Reached end of list + if (DisplayEnd >= ItemsCount) + { + End(); + return false; + } + + // Step 2: calculate the actual range of elements to display, and position the cursor before the first element + if (StepNo == 2) + { + IM_ASSERT(ItemsHeight > 0.0f); + + int already_submitted = DisplayEnd; + ImGui::CalcListClipping(ItemsCount - already_submitted, ItemsHeight, &DisplayStart, &DisplayEnd); + DisplayStart += already_submitted; + DisplayEnd += already_submitted; + + // Seek cursor + if (DisplayStart > already_submitted) + SetCursorPosYAndSetupForPrevLine(StartPosY + (DisplayStart - ItemsFrozen) * ItemsHeight, ItemsHeight); + + StepNo = 3; + return true; + } + + // Step 3: the clipper validate that we have reached the expected Y position (corresponding to element DisplayEnd), + // Advance the cursor to the end of the list and then returns 'false' to end the loop. + if (StepNo == 3) + { + // Seek cursor + if (ItemsCount < INT_MAX) + SetCursorPosYAndSetupForPrevLine(StartPosY + (ItemsCount - ItemsFrozen) * ItemsHeight, ItemsHeight); // advance cursor + ItemsCount = -1; + return false; + } + + IM_ASSERT(0); + return false; +} + +//----------------------------------------------------------------------------- +// [SECTION] STYLING +//----------------------------------------------------------------------------- + +ImGuiStyle& ImGui::GetStyle() +{ + IM_ASSERT(GImGui != NULL && "No current context. Did you call ImGui::CreateContext() and ImGui::SetCurrentContext() ?"); + return GImGui->Style; +} + +ImU32 ImGui::GetColorU32(ImGuiCol idx, float alpha_mul) +{ + ImGuiStyle& style = GImGui->Style; + ImVec4 c = style.Colors[idx]; + c.w *= style.Alpha * alpha_mul; + return ColorConvertFloat4ToU32(c); +} + +ImU32 ImGui::GetColorU32(const ImVec4& col) +{ + ImGuiStyle& style = GImGui->Style; + ImVec4 c = col; + c.w *= style.Alpha; + return ColorConvertFloat4ToU32(c); +} + +const ImVec4& ImGui::GetStyleColorVec4(ImGuiCol idx) +{ + ImGuiStyle& style = GImGui->Style; + return style.Colors[idx]; +} + +ImU32 ImGui::GetColorU32(ImU32 col) +{ + ImGuiStyle& style = GImGui->Style; + if (style.Alpha >= 1.0f) + return col; + ImU32 a = (col & IM_COL32_A_MASK) >> IM_COL32_A_SHIFT; + a = (ImU32)(a * style.Alpha); // We don't need to clamp 0..255 because Style.Alpha is in 0..1 range. + return (col & ~IM_COL32_A_MASK) | (a << IM_COL32_A_SHIFT); +} + +// FIXME: This may incur a round-trip (if the end user got their data from a float4) but eventually we aim to store the in-flight colors as ImU32 +void ImGui::PushStyleColor(ImGuiCol idx, ImU32 col) +{ + ImGuiContext& g = *GImGui; + ImGuiColorMod backup; + backup.Col = idx; + backup.BackupValue = g.Style.Colors[idx]; + g.ColorStack.push_back(backup); + g.Style.Colors[idx] = ColorConvertU32ToFloat4(col); +} + +void ImGui::PushStyleColor(ImGuiCol idx, const ImVec4& col) +{ + ImGuiContext& g = *GImGui; + ImGuiColorMod backup; + backup.Col = idx; + backup.BackupValue = g.Style.Colors[idx]; + g.ColorStack.push_back(backup); + g.Style.Colors[idx] = col; +} + +void ImGui::PopStyleColor(int count) +{ + ImGuiContext& g = *GImGui; + while (count > 0) + { + ImGuiColorMod& backup = g.ColorStack.back(); + g.Style.Colors[backup.Col] = backup.BackupValue; + g.ColorStack.pop_back(); + count--; + } +} + +struct ImGuiStyleVarInfo +{ + ImGuiDataType Type; + ImU32 Count; + ImU32 Offset; + void* GetVarPtr(ImGuiStyle* style) const { return (void*)((unsigned char*)style + Offset); } +}; + +static const ImGuiStyleVarInfo GStyleVarInfo[] = +{ + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, Alpha) }, // ImGuiStyleVar_Alpha + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImGuiStyle, WindowPadding) }, // ImGuiStyleVar_WindowPadding + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, WindowRounding) }, // ImGuiStyleVar_WindowRounding + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, WindowBorderSize) }, // ImGuiStyleVar_WindowBorderSize + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImGuiStyle, WindowMinSize) }, // ImGuiStyleVar_WindowMinSize + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImGuiStyle, WindowTitleAlign) }, // ImGuiStyleVar_WindowTitleAlign + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, ChildRounding) }, // ImGuiStyleVar_ChildRounding + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, ChildBorderSize) }, // ImGuiStyleVar_ChildBorderSize + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, PopupRounding) }, // ImGuiStyleVar_PopupRounding + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, PopupBorderSize) }, // ImGuiStyleVar_PopupBorderSize + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImGuiStyle, FramePadding) }, // ImGuiStyleVar_FramePadding + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, FrameRounding) }, // ImGuiStyleVar_FrameRounding + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, FrameBorderSize) }, // ImGuiStyleVar_FrameBorderSize + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImGuiStyle, ItemSpacing) }, // ImGuiStyleVar_ItemSpacing + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImGuiStyle, ItemInnerSpacing) }, // ImGuiStyleVar_ItemInnerSpacing + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, IndentSpacing) }, // ImGuiStyleVar_IndentSpacing + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImGuiStyle, CellPadding) }, // ImGuiStyleVar_CellPadding + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, ScrollbarSize) }, // ImGuiStyleVar_ScrollbarSize + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, ScrollbarRounding) }, // ImGuiStyleVar_ScrollbarRounding + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, GrabMinSize) }, // ImGuiStyleVar_GrabMinSize + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, GrabRounding) }, // ImGuiStyleVar_GrabRounding + { ImGuiDataType_Float, 1, (ImU32)IM_OFFSETOF(ImGuiStyle, TabRounding) }, // ImGuiStyleVar_TabRounding + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImGuiStyle, ButtonTextAlign) }, // ImGuiStyleVar_ButtonTextAlign + { ImGuiDataType_Float, 2, (ImU32)IM_OFFSETOF(ImGuiStyle, SelectableTextAlign) }, // ImGuiStyleVar_SelectableTextAlign +}; + +static const ImGuiStyleVarInfo* GetStyleVarInfo(ImGuiStyleVar idx) +{ + IM_ASSERT(idx >= 0 && idx < ImGuiStyleVar_COUNT); + IM_ASSERT(IM_ARRAYSIZE(GStyleVarInfo) == ImGuiStyleVar_COUNT); + return &GStyleVarInfo[idx]; +} + +void ImGui::PushStyleVar(ImGuiStyleVar idx, float val) +{ + const ImGuiStyleVarInfo* var_info = GetStyleVarInfo(idx); + if (var_info->Type == ImGuiDataType_Float && var_info->Count == 1) + { + ImGuiContext& g = *GImGui; + float* pvar = (float*)var_info->GetVarPtr(&g.Style); + g.StyleVarStack.push_back(ImGuiStyleMod(idx, *pvar)); + *pvar = val; + return; + } + IM_ASSERT(0 && "Called PushStyleVar() float variant but variable is not a float!"); +} + +void ImGui::PushStyleVar(ImGuiStyleVar idx, const ImVec2& val) +{ + const ImGuiStyleVarInfo* var_info = GetStyleVarInfo(idx); + if (var_info->Type == ImGuiDataType_Float && var_info->Count == 2) + { + ImGuiContext& g = *GImGui; + ImVec2* pvar = (ImVec2*)var_info->GetVarPtr(&g.Style); + g.StyleVarStack.push_back(ImGuiStyleMod(idx, *pvar)); + *pvar = val; + return; + } + IM_ASSERT(0 && "Called PushStyleVar() ImVec2 variant but variable is not a ImVec2!"); +} + +void ImGui::PopStyleVar(int count) +{ + ImGuiContext& g = *GImGui; + while (count > 0) + { + // We avoid a generic memcpy(data, &backup.Backup.., GDataTypeSize[info->Type] * info->Count), the overhead in Debug is not worth it. + ImGuiStyleMod& backup = g.StyleVarStack.back(); + const ImGuiStyleVarInfo* info = GetStyleVarInfo(backup.VarIdx); + void* data = info->GetVarPtr(&g.Style); + if (info->Type == ImGuiDataType_Float && info->Count == 1) { ((float*)data)[0] = backup.BackupFloat[0]; } + else if (info->Type == ImGuiDataType_Float && info->Count == 2) { ((float*)data)[0] = backup.BackupFloat[0]; ((float*)data)[1] = backup.BackupFloat[1]; } + g.StyleVarStack.pop_back(); + count--; + } +} + +const char* ImGui::GetStyleColorName(ImGuiCol idx) +{ + // Create switch-case from enum with regexp: ImGuiCol_{.*}, --> case ImGuiCol_\1: return "\1"; + switch (idx) + { + case ImGuiCol_Text: return "Text"; + case ImGuiCol_TextDisabled: return "TextDisabled"; + case ImGuiCol_WindowBg: return "WindowBg"; + case ImGuiCol_ChildBg: return "ChildBg"; + case ImGuiCol_PopupBg: return "PopupBg"; + case ImGuiCol_Border: return "Border"; + case ImGuiCol_BorderShadow: return "BorderShadow"; + case ImGuiCol_FrameBg: return "FrameBg"; + case ImGuiCol_FrameBgHovered: return "FrameBgHovered"; + case ImGuiCol_FrameBgActive: return "FrameBgActive"; + case ImGuiCol_TitleBg: return "TitleBg"; + case ImGuiCol_TitleBgActive: return "TitleBgActive"; + case ImGuiCol_TitleBgCollapsed: return "TitleBgCollapsed"; + case ImGuiCol_MenuBarBg: return "MenuBarBg"; + case ImGuiCol_ScrollbarBg: return "ScrollbarBg"; + case ImGuiCol_ScrollbarGrab: return "ScrollbarGrab"; + case ImGuiCol_ScrollbarGrabHovered: return "ScrollbarGrabHovered"; + case ImGuiCol_ScrollbarGrabActive: return "ScrollbarGrabActive"; + case ImGuiCol_CheckMark: return "CheckMark"; + case ImGuiCol_SliderGrab: return "SliderGrab"; + case ImGuiCol_SliderGrabActive: return "SliderGrabActive"; + case ImGuiCol_Button: return "Button"; + case ImGuiCol_ButtonHovered: return "ButtonHovered"; + case ImGuiCol_ButtonActive: return "ButtonActive"; + case ImGuiCol_Header: return "Header"; + case ImGuiCol_HeaderHovered: return "HeaderHovered"; + case ImGuiCol_HeaderActive: return "HeaderActive"; + case ImGuiCol_Separator: return "Separator"; + case ImGuiCol_SeparatorHovered: return "SeparatorHovered"; + case ImGuiCol_SeparatorActive: return "SeparatorActive"; + case ImGuiCol_ResizeGrip: return "ResizeGrip"; + case ImGuiCol_ResizeGripHovered: return "ResizeGripHovered"; + case ImGuiCol_ResizeGripActive: return "ResizeGripActive"; + case ImGuiCol_Tab: return "Tab"; + case ImGuiCol_TabHovered: return "TabHovered"; + case ImGuiCol_TabActive: return "TabActive"; + case ImGuiCol_TabUnfocused: return "TabUnfocused"; + case ImGuiCol_TabUnfocusedActive: return "TabUnfocusedActive"; + case ImGuiCol_PlotLines: return "PlotLines"; + case ImGuiCol_PlotLinesHovered: return "PlotLinesHovered"; + case ImGuiCol_PlotHistogram: return "PlotHistogram"; + case ImGuiCol_PlotHistogramHovered: return "PlotHistogramHovered"; + case ImGuiCol_TableHeaderBg: return "TableHeaderBg"; + case ImGuiCol_TableBorderStrong: return "TableBorderStrong"; + case ImGuiCol_TableBorderLight: return "TableBorderLight"; + case ImGuiCol_TableRowBg: return "TableRowBg"; + case ImGuiCol_TableRowBgAlt: return "TableRowBgAlt"; + case ImGuiCol_TextSelectedBg: return "TextSelectedBg"; + case ImGuiCol_DragDropTarget: return "DragDropTarget"; + case ImGuiCol_NavHighlight: return "NavHighlight"; + case ImGuiCol_NavWindowingHighlight: return "NavWindowingHighlight"; + case ImGuiCol_NavWindowingDimBg: return "NavWindowingDimBg"; + case ImGuiCol_ModalWindowDimBg: return "ModalWindowDimBg"; + } + IM_ASSERT(0); + return "Unknown"; +} + + +//----------------------------------------------------------------------------- +// [SECTION] RENDER HELPERS +// Some of those (internal) functions are currently quite a legacy mess - their signature and behavior will change, +// we need a nicer separation between low-level functions and high-level functions relying on the ImGui context. +// Also see imgui_draw.cpp for some more which have been reworked to not rely on ImGui:: context. +//----------------------------------------------------------------------------- + +const char* ImGui::FindRenderedTextEnd(const char* text, const char* text_end) +{ + const char* text_display_end = text; + if (!text_end) + text_end = (const char*)-1; + + while (text_display_end < text_end && *text_display_end != '\0' && (text_display_end[0] != '#' || text_display_end[1] != '#')) + text_display_end++; + return text_display_end; +} + +// Internal ImGui functions to render text +// RenderText***() functions calls ImDrawList::AddText() calls ImBitmapFont::RenderText() +void ImGui::RenderText(ImVec2 pos, const char* text, const char* text_end, bool hide_text_after_hash) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + // Hide anything after a '##' string + const char* text_display_end; + if (hide_text_after_hash) + { + text_display_end = FindRenderedTextEnd(text, text_end); + } + else + { + if (!text_end) + text_end = text + strlen(text); // FIXME-OPT + text_display_end = text_end; + } + + if (text != text_display_end) + { + window->DrawList->AddText(g.Font, g.FontSize, pos, GetColorU32(ImGuiCol_Text), text, text_display_end); + if (g.LogEnabled) + LogRenderedText(&pos, text, text_display_end); + } +} + +void ImGui::RenderTextWrapped(ImVec2 pos, const char* text, const char* text_end, float wrap_width) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + if (!text_end) + text_end = text + strlen(text); // FIXME-OPT + + if (text != text_end) + { + window->DrawList->AddText(g.Font, g.FontSize, pos, GetColorU32(ImGuiCol_Text), text, text_end, wrap_width); + if (g.LogEnabled) + LogRenderedText(&pos, text, text_end); + } +} + +// Default clip_rect uses (pos_min,pos_max) +// Handle clipping on CPU immediately (vs typically let the GPU clip the triangles that are overlapping the clipping rectangle edges) +void ImGui::RenderTextClippedEx(ImDrawList* draw_list, const ImVec2& pos_min, const ImVec2& pos_max, const char* text, const char* text_display_end, const ImVec2* text_size_if_known, const ImVec2& align, const ImRect* clip_rect) +{ + // Perform CPU side clipping for single clipped element to avoid using scissor state + ImVec2 pos = pos_min; + const ImVec2 text_size = text_size_if_known ? *text_size_if_known : CalcTextSize(text, text_display_end, false, 0.0f); + + const ImVec2* clip_min = clip_rect ? &clip_rect->Min : &pos_min; + const ImVec2* clip_max = clip_rect ? &clip_rect->Max : &pos_max; + bool need_clipping = (pos.x + text_size.x >= clip_max->x) || (pos.y + text_size.y >= clip_max->y); + if (clip_rect) // If we had no explicit clipping rectangle then pos==clip_min + need_clipping |= (pos.x < clip_min->x) || (pos.y < clip_min->y); + + // Align whole block. We should defer that to the better rendering function when we'll have support for individual line alignment. + if (align.x > 0.0f) pos.x = ImMax(pos.x, pos.x + (pos_max.x - pos.x - text_size.x) * align.x); + if (align.y > 0.0f) pos.y = ImMax(pos.y, pos.y + (pos_max.y - pos.y - text_size.y) * align.y); + + // Render + if (need_clipping) + { + ImVec4 fine_clip_rect(clip_min->x, clip_min->y, clip_max->x, clip_max->y); + draw_list->AddText(NULL, 0.0f, pos, GetColorU32(ImGuiCol_Text), text, text_display_end, 0.0f, &fine_clip_rect); + } + else + { + draw_list->AddText(NULL, 0.0f, pos, GetColorU32(ImGuiCol_Text), text, text_display_end, 0.0f, NULL); + } +} + +void ImGui::RenderTextClipped(const ImVec2& pos_min, const ImVec2& pos_max, const char* text, const char* text_end, const ImVec2* text_size_if_known, const ImVec2& align, const ImRect* clip_rect) +{ + // Hide anything after a '##' string + const char* text_display_end = FindRenderedTextEnd(text, text_end); + const int text_len = (int)(text_display_end - text); + if (text_len == 0) + return; + + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + RenderTextClippedEx(window->DrawList, pos_min, pos_max, text, text_display_end, text_size_if_known, align, clip_rect); + if (g.LogEnabled) + LogRenderedText(&pos_min, text, text_display_end); +} + + +// Another overly complex function until we reorganize everything into a nice all-in-one helper. +// This is made more complex because we have dissociated the layout rectangle (pos_min..pos_max) which define _where_ the ellipsis is, from actual clipping of text and limit of the ellipsis display. +// This is because in the context of tabs we selectively hide part of the text when the Close Button appears, but we don't want the ellipsis to move. +void ImGui::RenderTextEllipsis(ImDrawList* draw_list, const ImVec2& pos_min, const ImVec2& pos_max, float clip_max_x, float ellipsis_max_x, const char* text, const char* text_end_full, const ImVec2* text_size_if_known) +{ + ImGuiContext& g = *GImGui; + if (text_end_full == NULL) + text_end_full = FindRenderedTextEnd(text); + const ImVec2 text_size = text_size_if_known ? *text_size_if_known : CalcTextSize(text, text_end_full, false, 0.0f); + + //draw_list->AddLine(ImVec2(pos_max.x, pos_min.y - 4), ImVec2(pos_max.x, pos_max.y + 4), IM_COL32(0, 0, 255, 255)); + //draw_list->AddLine(ImVec2(ellipsis_max_x, pos_min.y-2), ImVec2(ellipsis_max_x, pos_max.y+2), IM_COL32(0, 255, 0, 255)); + //draw_list->AddLine(ImVec2(clip_max_x, pos_min.y), ImVec2(clip_max_x, pos_max.y), IM_COL32(255, 0, 0, 255)); + // FIXME: We could technically remove (last_glyph->AdvanceX - last_glyph->X1) from text_size.x here and save a few pixels. + if (text_size.x > pos_max.x - pos_min.x) + { + // Hello wo... + // | | | + // min max ellipsis_max + // <-> this is generally some padding value + + const ImFont* font = draw_list->_Data->Font; + const float font_size = draw_list->_Data->FontSize; + const char* text_end_ellipsis = NULL; + + ImWchar ellipsis_char = font->EllipsisChar; + int ellipsis_char_count = 1; + if (ellipsis_char == (ImWchar)-1) + { + ellipsis_char = (ImWchar)'.'; + ellipsis_char_count = 3; + } + const ImFontGlyph* glyph = font->FindGlyph(ellipsis_char); + + float ellipsis_glyph_width = glyph->X1; // Width of the glyph with no padding on either side + float ellipsis_total_width = ellipsis_glyph_width; // Full width of entire ellipsis + + if (ellipsis_char_count > 1) + { + // Full ellipsis size without free spacing after it. + const float spacing_between_dots = 1.0f * (draw_list->_Data->FontSize / font->FontSize); + ellipsis_glyph_width = glyph->X1 - glyph->X0 + spacing_between_dots; + ellipsis_total_width = ellipsis_glyph_width * (float)ellipsis_char_count - spacing_between_dots; + } + + // We can now claim the space between pos_max.x and ellipsis_max.x + const float text_avail_width = ImMax((ImMax(pos_max.x, ellipsis_max_x) - ellipsis_total_width) - pos_min.x, 1.0f); + float text_size_clipped_x = font->CalcTextSizeA(font_size, text_avail_width, 0.0f, text, text_end_full, &text_end_ellipsis).x; + if (text == text_end_ellipsis && text_end_ellipsis < text_end_full) + { + // Always display at least 1 character if there's no room for character + ellipsis + text_end_ellipsis = text + ImTextCountUtf8BytesFromChar(text, text_end_full); + text_size_clipped_x = font->CalcTextSizeA(font_size, FLT_MAX, 0.0f, text, text_end_ellipsis).x; + } + while (text_end_ellipsis > text && ImCharIsBlankA(text_end_ellipsis[-1])) + { + // Trim trailing space before ellipsis (FIXME: Supporting non-ascii blanks would be nice, for this we need a function to backtrack in UTF-8 text) + text_end_ellipsis--; + text_size_clipped_x -= font->CalcTextSizeA(font_size, FLT_MAX, 0.0f, text_end_ellipsis, text_end_ellipsis + 1).x; // Ascii blanks are always 1 byte + } + + // Render text, render ellipsis + RenderTextClippedEx(draw_list, pos_min, ImVec2(clip_max_x, pos_max.y), text, text_end_ellipsis, &text_size, ImVec2(0.0f, 0.0f)); + float ellipsis_x = pos_min.x + text_size_clipped_x; + if (ellipsis_x + ellipsis_total_width <= ellipsis_max_x) + for (int i = 0; i < ellipsis_char_count; i++) + { + font->RenderChar(draw_list, font_size, ImVec2(ellipsis_x, pos_min.y), GetColorU32(ImGuiCol_Text), ellipsis_char); + ellipsis_x += ellipsis_glyph_width; + } + } + else + { + RenderTextClippedEx(draw_list, pos_min, ImVec2(clip_max_x, pos_max.y), text, text_end_full, &text_size, ImVec2(0.0f, 0.0f)); + } + + if (g.LogEnabled) + LogRenderedText(&pos_min, text, text_end_full); +} + +// Render a rectangle shaped with optional rounding and borders +void ImGui::RenderFrame(ImVec2 p_min, ImVec2 p_max, ImU32 fill_col, bool border, float rounding) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + window->DrawList->AddRectFilled(p_min, p_max, fill_col, rounding); + const float border_size = g.Style.FrameBorderSize; + if (border && border_size > 0.0f) + { + window->DrawList->AddRect(p_min + ImVec2(1, 1), p_max + ImVec2(1, 1), GetColorU32(ImGuiCol_BorderShadow), rounding, ImDrawCornerFlags_All, border_size); + window->DrawList->AddRect(p_min, p_max, GetColorU32(ImGuiCol_Border), rounding, ImDrawCornerFlags_All, border_size); + } +} + +void ImGui::RenderFrameBorder(ImVec2 p_min, ImVec2 p_max, float rounding) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + const float border_size = g.Style.FrameBorderSize; + if (border_size > 0.0f) + { + window->DrawList->AddRect(p_min + ImVec2(1, 1), p_max + ImVec2(1, 1), GetColorU32(ImGuiCol_BorderShadow), rounding, ImDrawCornerFlags_All, border_size); + window->DrawList->AddRect(p_min, p_max, GetColorU32(ImGuiCol_Border), rounding, ImDrawCornerFlags_All, border_size); + } +} + +void ImGui::RenderNavHighlight(const ImRect& bb, ImGuiID id, ImGuiNavHighlightFlags flags) +{ + ImGuiContext& g = *GImGui; + if (id != g.NavId) + return; + if (g.NavDisableHighlight && !(flags & ImGuiNavHighlightFlags_AlwaysDraw)) + return; + ImGuiWindow* window = g.CurrentWindow; + if (window->DC.NavHideHighlightOneFrame) + return; + + float rounding = (flags & ImGuiNavHighlightFlags_NoRounding) ? 0.0f : g.Style.FrameRounding; + ImRect display_rect = bb; + display_rect.ClipWith(window->ClipRect); + if (flags & ImGuiNavHighlightFlags_TypeDefault) + { + const float THICKNESS = 2.0f; + const float DISTANCE = 3.0f + THICKNESS * 0.5f; + display_rect.Expand(ImVec2(DISTANCE, DISTANCE)); + bool fully_visible = window->ClipRect.Contains(display_rect); + if (!fully_visible) + window->DrawList->PushClipRect(display_rect.Min, display_rect.Max); + window->DrawList->AddRect(display_rect.Min + ImVec2(THICKNESS * 0.5f, THICKNESS * 0.5f), display_rect.Max - ImVec2(THICKNESS * 0.5f, THICKNESS * 0.5f), GetColorU32(ImGuiCol_NavHighlight), rounding, ImDrawCornerFlags_All, THICKNESS); + if (!fully_visible) + window->DrawList->PopClipRect(); + } + if (flags & ImGuiNavHighlightFlags_TypeThin) + { + window->DrawList->AddRect(display_rect.Min, display_rect.Max, GetColorU32(ImGuiCol_NavHighlight), rounding, ~0, 1.0f); + } +} + +//----------------------------------------------------------------------------- +// [SECTION] MAIN CODE (most of the code! lots of stuff, needs tidying up!) +//----------------------------------------------------------------------------- + +// ImGuiWindow is mostly a dumb struct. It merely has a constructor and a few helper methods +ImGuiWindow::ImGuiWindow(ImGuiContext* context, const char* name) : DrawListInst(NULL) +{ + memset(this, 0, sizeof(*this)); + Name = ImStrdup(name); + NameBufLen = (int)strlen(name) + 1; + ID = ImHashStr(name); + IDStack.push_back(ID); + MoveId = GetID("#MOVE"); + ScrollTarget = ImVec2(FLT_MAX, FLT_MAX); + ScrollTargetCenterRatio = ImVec2(0.5f, 0.5f); + AutoFitFramesX = AutoFitFramesY = -1; + AutoPosLastDirection = ImGuiDir_None; + SetWindowPosAllowFlags = SetWindowSizeAllowFlags = SetWindowCollapsedAllowFlags = ImGuiCond_Always | ImGuiCond_Once | ImGuiCond_FirstUseEver | ImGuiCond_Appearing; + SetWindowPosVal = SetWindowPosPivot = ImVec2(FLT_MAX, FLT_MAX); + LastFrameActive = -1; + LastTimeActive = -1.0f; + FontWindowScale = 1.0f; + SettingsOffset = -1; + DrawList = &DrawListInst; + DrawList->_Data = &context->DrawListSharedData; + DrawList->_OwnerName = Name; +} + +ImGuiWindow::~ImGuiWindow() +{ + IM_ASSERT(DrawList == &DrawListInst); + IM_DELETE(Name); + for (int i = 0; i != ColumnsStorage.Size; i++) + ColumnsStorage[i].~ImGuiOldColumns(); +} + +ImGuiID ImGuiWindow::GetID(const char* str, const char* str_end) +{ + ImGuiID seed = IDStack.back(); + ImGuiID id = ImHashStr(str, str_end ? (str_end - str) : 0, seed); + ImGui::KeepAliveID(id); +#ifdef IMGUI_ENABLE_TEST_ENGINE + ImGuiContext& g = *GImGui; + IMGUI_TEST_ENGINE_ID_INFO2(id, ImGuiDataType_String, str, str_end); +#endif + return id; +} + +ImGuiID ImGuiWindow::GetID(const void* ptr) +{ + ImGuiID seed = IDStack.back(); + ImGuiID id = ImHashData(&ptr, sizeof(void*), seed); + ImGui::KeepAliveID(id); +#ifdef IMGUI_ENABLE_TEST_ENGINE + ImGuiContext& g = *GImGui; + IMGUI_TEST_ENGINE_ID_INFO(id, ImGuiDataType_Pointer, ptr); +#endif + return id; +} + +ImGuiID ImGuiWindow::GetID(int n) +{ + ImGuiID seed = IDStack.back(); + ImGuiID id = ImHashData(&n, sizeof(n), seed); + ImGui::KeepAliveID(id); +#ifdef IMGUI_ENABLE_TEST_ENGINE + ImGuiContext& g = *GImGui; + IMGUI_TEST_ENGINE_ID_INFO(id, ImGuiDataType_S32, (intptr_t)n); +#endif + return id; +} + +ImGuiID ImGuiWindow::GetIDNoKeepAlive(const char* str, const char* str_end) +{ + ImGuiID seed = IDStack.back(); + ImGuiID id = ImHashStr(str, str_end ? (str_end - str) : 0, seed); +#ifdef IMGUI_ENABLE_TEST_ENGINE + ImGuiContext& g = *GImGui; + IMGUI_TEST_ENGINE_ID_INFO2(id, ImGuiDataType_String, str, str_end); +#endif + return id; +} + +ImGuiID ImGuiWindow::GetIDNoKeepAlive(const void* ptr) +{ + ImGuiID seed = IDStack.back(); + ImGuiID id = ImHashData(&ptr, sizeof(void*), seed); +#ifdef IMGUI_ENABLE_TEST_ENGINE + ImGuiContext& g = *GImGui; + IMGUI_TEST_ENGINE_ID_INFO(id, ImGuiDataType_Pointer, ptr); +#endif + return id; +} + +ImGuiID ImGuiWindow::GetIDNoKeepAlive(int n) +{ + ImGuiID seed = IDStack.back(); + ImGuiID id = ImHashData(&n, sizeof(n), seed); +#ifdef IMGUI_ENABLE_TEST_ENGINE + ImGuiContext& g = *GImGui; + IMGUI_TEST_ENGINE_ID_INFO(id, ImGuiDataType_S32, (intptr_t)n); +#endif + return id; +} + +// This is only used in rare/specific situations to manufacture an ID out of nowhere. +ImGuiID ImGuiWindow::GetIDFromRectangle(const ImRect& r_abs) +{ + ImGuiID seed = IDStack.back(); + const int r_rel[4] = { (int)(r_abs.Min.x - Pos.x), (int)(r_abs.Min.y - Pos.y), (int)(r_abs.Max.x - Pos.x), (int)(r_abs.Max.y - Pos.y) }; + ImGuiID id = ImHashData(&r_rel, sizeof(r_rel), seed); + ImGui::KeepAliveID(id); + return id; +} + +static void SetCurrentWindow(ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + g.CurrentWindow = window; + g.CurrentTable = window && window->DC.CurrentTableIdx != -1 ? g.Tables.GetByIndex(window->DC.CurrentTableIdx) : NULL; + if (window) + g.FontSize = g.DrawListSharedData.FontSize = window->CalcFontSize(); +} + +void ImGui::GcCompactTransientMiscBuffers() +{ + ImGuiContext& g = *GImGui; + g.ItemFlagsStack.clear(); + g.GroupStack.clear(); + TableGcCompactSettings(); +} + +// Free up/compact internal window buffers, we can use this when a window becomes unused. +// Not freed: +// - ImGuiWindow, ImGuiWindowSettings, Name, StateStorage, ColumnsStorage (may hold useful data) +// This should have no noticeable visual effect. When the window reappear however, expect new allocation/buffer growth/copy cost. +void ImGui::GcCompactTransientWindowBuffers(ImGuiWindow* window) +{ + window->MemoryCompacted = true; + window->MemoryDrawListIdxCapacity = window->DrawList->IdxBuffer.Capacity; + window->MemoryDrawListVtxCapacity = window->DrawList->VtxBuffer.Capacity; + window->IDStack.clear(); + window->DrawList->_ClearFreeMemory(); + window->DC.ChildWindows.clear(); + window->DC.ItemWidthStack.clear(); + window->DC.TextWrapPosStack.clear(); +} + +void ImGui::GcAwakeTransientWindowBuffers(ImGuiWindow* window) +{ + // We stored capacity of the ImDrawList buffer to reduce growth-caused allocation/copy when awakening. + // The other buffers tends to amortize much faster. + window->MemoryCompacted = false; + window->DrawList->IdxBuffer.reserve(window->MemoryDrawListIdxCapacity); + window->DrawList->VtxBuffer.reserve(window->MemoryDrawListVtxCapacity); + window->MemoryDrawListIdxCapacity = window->MemoryDrawListVtxCapacity = 0; +} + +void ImGui::SetActiveID(ImGuiID id, ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + g.ActiveIdIsJustActivated = (g.ActiveId != id); + if (g.ActiveIdIsJustActivated) + { + g.ActiveIdTimer = 0.0f; + g.ActiveIdHasBeenPressedBefore = false; + g.ActiveIdHasBeenEditedBefore = false; + if (id != 0) + { + g.LastActiveId = id; + g.LastActiveIdTimer = 0.0f; + } + } + g.ActiveId = id; + g.ActiveIdAllowOverlap = false; + g.ActiveIdNoClearOnFocusLoss = false; + g.ActiveIdWindow = window; + g.ActiveIdHasBeenEditedThisFrame = false; + if (id) + { + g.ActiveIdIsAlive = id; + g.ActiveIdSource = (g.NavActivateId == id || g.NavInputId == id || g.NavJustTabbedId == id || g.NavJustMovedToId == id) ? ImGuiInputSource_Nav : ImGuiInputSource_Mouse; + } + + // Clear declaration of inputs claimed by the widget + // (Please note that this is WIP and not all keys/inputs are thoroughly declared by all widgets yet) + g.ActiveIdUsingMouseWheel = false; + g.ActiveIdUsingNavDirMask = 0x00; + g.ActiveIdUsingNavInputMask = 0x00; + g.ActiveIdUsingKeyInputMask = 0x00; +} + +void ImGui::ClearActiveID() +{ + SetActiveID(0, NULL); // g.ActiveId = 0; +} + +void ImGui::SetHoveredID(ImGuiID id) +{ + ImGuiContext& g = *GImGui; + g.HoveredId = id; + g.HoveredIdAllowOverlap = false; + g.HoveredIdUsingMouseWheel = false; + if (id != 0 && g.HoveredIdPreviousFrame != id) + g.HoveredIdTimer = g.HoveredIdNotActiveTimer = 0.0f; +} + +ImGuiID ImGui::GetHoveredID() +{ + ImGuiContext& g = *GImGui; + return g.HoveredId ? g.HoveredId : g.HoveredIdPreviousFrame; +} + +void ImGui::KeepAliveID(ImGuiID id) +{ + ImGuiContext& g = *GImGui; + if (g.ActiveId == id) + g.ActiveIdIsAlive = id; + if (g.ActiveIdPreviousFrame == id) + g.ActiveIdPreviousFrameIsAlive = true; +} + +void ImGui::MarkItemEdited(ImGuiID id) +{ + // This marking is solely to be able to provide info for IsItemDeactivatedAfterEdit(). + // ActiveId might have been released by the time we call this (as in the typical press/release button behavior) but still need need to fill the data. + ImGuiContext& g = *GImGui; + IM_ASSERT(g.ActiveId == id || g.ActiveId == 0 || g.DragDropActive); + IM_UNUSED(id); // Avoid unused variable warnings when asserts are compiled out. + //IM_ASSERT(g.CurrentWindow->DC.LastItemId == id); + g.ActiveIdHasBeenEditedThisFrame = true; + g.ActiveIdHasBeenEditedBefore = true; + g.CurrentWindow->DC.LastItemStatusFlags |= ImGuiItemStatusFlags_Edited; +} + +static inline bool IsWindowContentHoverable(ImGuiWindow* window, ImGuiHoveredFlags flags) +{ + // An active popup disable hovering on other windows (apart from its own children) + // FIXME-OPT: This could be cached/stored within the window. + ImGuiContext& g = *GImGui; + if (g.NavWindow) + if (ImGuiWindow* focused_root_window = g.NavWindow->RootWindow) + if (focused_root_window->WasActive && focused_root_window != window->RootWindow) + { + // For the purpose of those flags we differentiate "standard popup" from "modal popup" + // NB: The order of those two tests is important because Modal windows are also Popups. + if (focused_root_window->Flags & ImGuiWindowFlags_Modal) + return false; + if ((focused_root_window->Flags & ImGuiWindowFlags_Popup) && !(flags & ImGuiHoveredFlags_AllowWhenBlockedByPopup)) + return false; + } + return true; +} + +// This is roughly matching the behavior of internal-facing ItemHoverable() +// - we allow hovering to be true when ActiveId==window->MoveID, so that clicking on non-interactive items such as a Text() item still returns true with IsItemHovered() +// - this should work even for non-interactive items that have no ID, so we cannot use LastItemId +bool ImGui::IsItemHovered(ImGuiHoveredFlags flags) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (g.NavDisableMouseHover && !g.NavDisableHighlight) + return IsItemFocused(); + + // Test for bounding box overlap, as updated as ItemAdd() + if (!(window->DC.LastItemStatusFlags & ImGuiItemStatusFlags_HoveredRect)) + return false; + IM_ASSERT((flags & (ImGuiHoveredFlags_RootWindow | ImGuiHoveredFlags_ChildWindows)) == 0); // Flags not supported by this function + + // Test if we are hovering the right window (our window could be behind another window) + // [2017/10/16] Reverted commit 344d48be3 and testing RootWindow instead. I believe it is correct to NOT test for RootWindow but this leaves us unable to use IsItemHovered() after EndChild() itself. + // Until a solution is found I believe reverting to the test from 2017/09/27 is safe since this was the test that has been running for a long while. + //if (g.HoveredWindow != window) + // return false; + if (g.HoveredRootWindow != window->RootWindow && !(flags & ImGuiHoveredFlags_AllowWhenOverlapped)) + return false; + + // Test if another item is active (e.g. being dragged) + if (!(flags & ImGuiHoveredFlags_AllowWhenBlockedByActiveItem)) + if (g.ActiveId != 0 && g.ActiveId != window->DC.LastItemId && !g.ActiveIdAllowOverlap && g.ActiveId != window->MoveId) + return false; + + // Test if interactions on this window are blocked by an active popup or modal. + // The ImGuiHoveredFlags_AllowWhenBlockedByPopup flag will be tested here. + if (!IsWindowContentHoverable(window, flags)) + return false; + + // Test if the item is disabled + if ((window->DC.ItemFlags & ImGuiItemFlags_Disabled) && !(flags & ImGuiHoveredFlags_AllowWhenDisabled)) + return false; + + // Special handling for calling after Begin() which represent the title bar or tab. + // When the window is collapsed (SkipItems==true) that last item will never be overwritten so we need to detect the case. + if (window->DC.LastItemId == window->MoveId && window->WriteAccessed) + return false; + return true; +} + +// Internal facing ItemHoverable() used when submitting widgets. Differs slightly from IsItemHovered(). +bool ImGui::ItemHoverable(const ImRect& bb, ImGuiID id) +{ + ImGuiContext& g = *GImGui; + if (g.HoveredId != 0 && g.HoveredId != id && !g.HoveredIdAllowOverlap) + return false; + + ImGuiWindow* window = g.CurrentWindow; + if (g.HoveredWindow != window) + return false; + if (g.ActiveId != 0 && g.ActiveId != id && !g.ActiveIdAllowOverlap) + return false; + if (!IsMouseHoveringRect(bb.Min, bb.Max)) + return false; + if (g.NavDisableMouseHover) + return false; + if (!IsWindowContentHoverable(window, ImGuiHoveredFlags_None) || (window->DC.ItemFlags & ImGuiItemFlags_Disabled)) + { + g.HoveredIdDisabled = true; + return false; + } + + // We exceptionally allow this function to be called with id==0 to allow using it for easy high-level + // hover test in widgets code. We could also decide to split this function is two. + if (id != 0) + { + SetHoveredID(id); + + // [DEBUG] Item Picker tool! + // We perform the check here because SetHoveredID() is not frequently called (1~ time a frame), making + // the cost of this tool near-zero. We can get slightly better call-stack and support picking non-hovered + // items if we perform the test in ItemAdd(), but that would incur a small runtime cost. + // #define IMGUI_DEBUG_TOOL_ITEM_PICKER_EX in imconfig.h if you want this check to also be performed in ItemAdd(). + if (g.DebugItemPickerActive && g.HoveredIdPreviousFrame == id) + GetForegroundDrawList()->AddRect(bb.Min, bb.Max, IM_COL32(255, 255, 0, 255)); + if (g.DebugItemPickerBreakId == id) + IM_DEBUG_BREAK(); + } + + return true; +} + +bool ImGui::IsClippedEx(const ImRect& bb, ImGuiID id, bool clip_even_when_logged) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (!bb.Overlaps(window->ClipRect)) + if (id == 0 || (id != g.ActiveId && id != g.NavId)) + if (clip_even_when_logged || !g.LogEnabled) + return true; + return false; +} + +// This is also inlined in ItemAdd() +// Note: if ImGuiItemStatusFlags_HasDisplayRect is set, user needs to set window->DC.LastItemDisplayRect! +void ImGui::SetLastItemData(ImGuiWindow* window, ImGuiID item_id, ImGuiItemStatusFlags item_flags, const ImRect& item_rect) +{ + window->DC.LastItemId = item_id; + window->DC.LastItemStatusFlags = item_flags; + window->DC.LastItemRect = item_rect; +} + +// Process TAB/Shift+TAB. Be mindful that this function may _clear_ the ActiveID when tabbing out. +bool ImGui::FocusableItemRegister(ImGuiWindow* window, ImGuiID id) +{ + ImGuiContext& g = *GImGui; + + // Increment counters + const bool is_tab_stop = (window->DC.ItemFlags & (ImGuiItemFlags_NoTabStop | ImGuiItemFlags_Disabled)) == 0; + window->DC.FocusCounterRegular++; + if (is_tab_stop) + window->DC.FocusCounterTabStop++; + + // Process TAB/Shift-TAB to tab *OUT* of the currently focused item. + // (Note that we can always TAB out of a widget that doesn't allow tabbing in) + if (g.ActiveId == id && g.FocusTabPressed && !IsActiveIdUsingKey(ImGuiKey_Tab) && g.FocusRequestNextWindow == NULL) + { + g.FocusRequestNextWindow = window; + g.FocusRequestNextCounterTabStop = window->DC.FocusCounterTabStop + (g.IO.KeyShift ? (is_tab_stop ? -1 : 0) : +1); // Modulo on index will be applied at the end of frame once we've got the total counter of items. + } + + // Handle focus requests + if (g.FocusRequestCurrWindow == window) + { + if (window->DC.FocusCounterRegular == g.FocusRequestCurrCounterRegular) + return true; + if (is_tab_stop && window->DC.FocusCounterTabStop == g.FocusRequestCurrCounterTabStop) + { + g.NavJustTabbedId = id; + return true; + } + + // If another item is about to be focused, we clear our own active id + if (g.ActiveId == id) + ClearActiveID(); + } + + return false; +} + +void ImGui::FocusableItemUnregister(ImGuiWindow* window) +{ + window->DC.FocusCounterRegular--; + window->DC.FocusCounterTabStop--; +} + +float ImGui::CalcWrapWidthForPos(const ImVec2& pos, float wrap_pos_x) +{ + if (wrap_pos_x < 0.0f) + return 0.0f; + + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (wrap_pos_x == 0.0f) + { + // We could decide to setup a default wrapping max point for auto-resizing windows, + // or have auto-wrap (with unspecified wrapping pos) behave as a ContentSize extending function? + //if (window->Hidden && (window->Flags & ImGuiWindowFlags_AlwaysAutoResize)) + // wrap_pos_x = ImMax(window->WorkRect.Min.x + g.FontSize * 10.0f, window->WorkRect.Max.x); + //else + wrap_pos_x = window->WorkRect.Max.x; + } + else if (wrap_pos_x > 0.0f) + { + wrap_pos_x += window->Pos.x - window->Scroll.x; // wrap_pos_x is provided is window local space + } + + return ImMax(wrap_pos_x - pos.x, 1.0f); +} + +// IM_ALLOC() == ImGui::MemAlloc() +void* ImGui::MemAlloc(size_t size) +{ + if (ImGuiContext* ctx = GImGui) + ctx->IO.MetricsActiveAllocations++; + return GImAllocatorAllocFunc(size, GImAllocatorUserData); +} + +// IM_FREE() == ImGui::MemFree() +void ImGui::MemFree(void* ptr) +{ + if (ptr) + if (ImGuiContext* ctx = GImGui) + ctx->IO.MetricsActiveAllocations--; + return GImAllocatorFreeFunc(ptr, GImAllocatorUserData); +} + +const char* ImGui::GetClipboardText() +{ + ImGuiContext& g = *GImGui; + return g.IO.GetClipboardTextFn ? g.IO.GetClipboardTextFn(g.IO.ClipboardUserData) : ""; +} + +void ImGui::SetClipboardText(const char* text) +{ + ImGuiContext& g = *GImGui; + if (g.IO.SetClipboardTextFn) + g.IO.SetClipboardTextFn(g.IO.ClipboardUserData, text); +} + +const char* ImGui::GetVersion() +{ + return IMGUI_VERSION; +} + +// Internal state access - if you want to share Dear ImGui state between modules (e.g. DLL) or allocate it yourself +// Note that we still point to some static data and members (such as GFontAtlas), so the state instance you end up using will point to the static data within its module +ImGuiContext* ImGui::GetCurrentContext() +{ + return GImGui; +} + +void ImGui::SetCurrentContext(ImGuiContext* ctx) +{ +#ifdef IMGUI_SET_CURRENT_CONTEXT_FUNC + IMGUI_SET_CURRENT_CONTEXT_FUNC(ctx); // For custom thread-based hackery you may want to have control over this. +#else + GImGui = ctx; +#endif +} + +void ImGui::SetAllocatorFunctions(void* (*alloc_func)(size_t sz, void* user_data), void (*free_func)(void* ptr, void* user_data), void* user_data) +{ + GImAllocatorAllocFunc = alloc_func; + GImAllocatorFreeFunc = free_func; + GImAllocatorUserData = user_data; +} + +ImGuiContext* ImGui::CreateContext(ImFontAtlas* shared_font_atlas) +{ + ImGuiContext* ctx = IM_NEW(ImGuiContext)(shared_font_atlas); + if (GImGui == NULL) + SetCurrentContext(ctx); + Initialize(ctx); + return ctx; +} + +void ImGui::DestroyContext(ImGuiContext* ctx) +{ + if (ctx == NULL) + ctx = GImGui; + Shutdown(ctx); + if (GImGui == ctx) + SetCurrentContext(NULL); + IM_DELETE(ctx); +} + +// No specific ordering/dependency support, will see as needed +ImGuiID ImGui::AddContextHook(ImGuiContext* ctx, const ImGuiContextHook* hook) +{ + ImGuiContext& g = *ctx; + IM_ASSERT(hook->Callback != NULL && hook->HookId == 0 && hook->Type != ImGuiContextHookType_PendingRemoval_); + g.Hooks.push_back(*hook); + g.Hooks.back().HookId = ++g.HookIdNext; + return g.HookIdNext; +} + +// Deferred removal, avoiding issue with changing vector while iterating it +void ImGui::RemoveContextHook(ImGuiContext* ctx, ImGuiID hook_id) +{ + ImGuiContext& g = *ctx; + IM_ASSERT(hook_id != 0); + for (int n = 0; n < g.Hooks.Size; n++) + if (g.Hooks[n].HookId == hook_id) + g.Hooks[n].Type = ImGuiContextHookType_PendingRemoval_; +} + +// Call context hooks (used by e.g. test engine) +// We assume a small number of hooks so all stored in same array +void ImGui::CallContextHooks(ImGuiContext* ctx, ImGuiContextHookType hook_type) +{ + ImGuiContext& g = *ctx; + for (int n = 0; n < g.Hooks.Size; n++) + if (g.Hooks[n].Type == hook_type) + g.Hooks[n].Callback(&g, &g.Hooks[n]); +} + +ImGuiIO& ImGui::GetIO() +{ + IM_ASSERT(GImGui != NULL && "No current context. Did you call ImGui::CreateContext() and ImGui::SetCurrentContext() ?"); + return GImGui->IO; +} + +// Pass this to your backend rendering function! Valid after Render() and until the next call to NewFrame() +ImDrawData* ImGui::GetDrawData() +{ + ImGuiContext& g = *GImGui; + return g.DrawData.Valid ? &g.DrawData : NULL; +} + +double ImGui::GetTime() +{ + return GImGui->Time; +} + +int ImGui::GetFrameCount() +{ + return GImGui->FrameCount; +} + +ImDrawList* ImGui::GetBackgroundDrawList() +{ + return &GImGui->BackgroundDrawList; +} + +ImDrawList* ImGui::GetForegroundDrawList() +{ + return &GImGui->ForegroundDrawList; +} + +ImDrawListSharedData* ImGui::GetDrawListSharedData() +{ + return &GImGui->DrawListSharedData; +} + +void ImGui::StartMouseMovingWindow(ImGuiWindow* window) +{ + // Set ActiveId even if the _NoMove flag is set. Without it, dragging away from a window with _NoMove would activate hover on other windows. + // We _also_ call this when clicking in a window empty space when io.ConfigWindowsMoveFromTitleBarOnly is set, but clear g.MovingWindow afterward. + // This is because we want ActiveId to be set even when the window is not permitted to move. + ImGuiContext& g = *GImGui; + FocusWindow(window); + SetActiveID(window->MoveId, window); + g.NavDisableHighlight = true; + g.ActiveIdNoClearOnFocusLoss = true; + g.ActiveIdClickOffset = g.IO.MousePos - window->RootWindow->Pos; + + bool can_move_window = true; + if ((window->Flags & ImGuiWindowFlags_NoMove) || (window->RootWindow->Flags & ImGuiWindowFlags_NoMove)) + can_move_window = false; + if (can_move_window) + g.MovingWindow = window; +} + +// Handle mouse moving window +// Note: moving window with the navigation keys (Square + d-pad / CTRL+TAB + Arrows) are processed in NavUpdateWindowing() +// FIXME: We don't have strong guarantee that g.MovingWindow stay synched with g.ActiveId == g.MovingWindow->MoveId. +// This is currently enforced by the fact that BeginDragDropSource() is setting all g.ActiveIdUsingXXXX flags to inhibit navigation inputs, +// but if we should more thoroughly test cases where g.ActiveId or g.MovingWindow gets changed and not the other. +void ImGui::UpdateMouseMovingWindowNewFrame() +{ + ImGuiContext& g = *GImGui; + if (g.MovingWindow != NULL) + { + // We actually want to move the root window. g.MovingWindow == window we clicked on (could be a child window). + // We track it to preserve Focus and so that generally ActiveIdWindow == MovingWindow and ActiveId == MovingWindow->MoveId for consistency. + KeepAliveID(g.ActiveId); + IM_ASSERT(g.MovingWindow && g.MovingWindow->RootWindow); + ImGuiWindow* moving_window = g.MovingWindow->RootWindow; + if (g.IO.MouseDown[0] && IsMousePosValid(&g.IO.MousePos)) + { + ImVec2 pos = g.IO.MousePos - g.ActiveIdClickOffset; + if (moving_window->Pos.x != pos.x || moving_window->Pos.y != pos.y) + { + MarkIniSettingsDirty(moving_window); + SetWindowPos(moving_window, pos, ImGuiCond_Always); + } + FocusWindow(g.MovingWindow); + } + else + { + ClearActiveID(); + g.MovingWindow = NULL; + } + } + else + { + // When clicking/dragging from a window that has the _NoMove flag, we still set the ActiveId in order to prevent hovering others. + if (g.ActiveIdWindow && g.ActiveIdWindow->MoveId == g.ActiveId) + { + KeepAliveID(g.ActiveId); + if (!g.IO.MouseDown[0]) + ClearActiveID(); + } + } +} + +// Initiate moving window when clicking on empty space or title bar. +// Handle left-click and right-click focus. +void ImGui::UpdateMouseMovingWindowEndFrame() +{ + ImGuiContext& g = *GImGui; + if (g.ActiveId != 0 || g.HoveredId != 0) + return; + + // Unless we just made a window/popup appear + if (g.NavWindow && g.NavWindow->Appearing) + return; + + // Click on empty space to focus window and start moving (after we're done with all our widgets) + if (g.IO.MouseClicked[0]) + { + // Handle the edge case of a popup being closed while clicking in its empty space. + // If we try to focus it, FocusWindow() > ClosePopupsOverWindow() will accidentally close any parent popups because they are not linked together any more. + ImGuiWindow* root_window = g.HoveredRootWindow; + const bool is_closed_popup = root_window && (root_window->Flags & ImGuiWindowFlags_Popup) && !IsPopupOpen(root_window->PopupId, ImGuiPopupFlags_AnyPopupLevel); + + if (root_window != NULL && !is_closed_popup) + { + StartMouseMovingWindow(g.HoveredWindow); //-V595 + + // Cancel moving if clicked outside of title bar + if (g.IO.ConfigWindowsMoveFromTitleBarOnly && !(root_window->Flags & ImGuiWindowFlags_NoTitleBar)) + if (!root_window->TitleBarRect().Contains(g.IO.MouseClickedPos[0])) + g.MovingWindow = NULL; + + // Cancel moving if clicked over an item which was disabled or inhibited by popups (note that we know HoveredId == 0 already) + if (g.HoveredIdDisabled) + g.MovingWindow = NULL; + } + else if (root_window == NULL && g.NavWindow != NULL && GetTopMostPopupModal() == NULL) + { + // Clicking on void disable focus + FocusWindow(NULL); + } + } + + // With right mouse button we close popups without changing focus based on where the mouse is aimed + // Instead, focus will be restored to the window under the bottom-most closed popup. + // (The left mouse button path calls FocusWindow on the hovered window, which will lead NewFrame->ClosePopupsOverWindow to trigger) + if (g.IO.MouseClicked[1]) + { + // Find the top-most window between HoveredWindow and the top-most Modal Window. + // This is where we can trim the popup stack. + ImGuiWindow* modal = GetTopMostPopupModal(); + bool hovered_window_above_modal = g.HoveredWindow && IsWindowAbove(g.HoveredWindow, modal); + ClosePopupsOverWindow(hovered_window_above_modal ? g.HoveredWindow : modal, true); + } +} + +static bool IsWindowActiveAndVisible(ImGuiWindow* window) +{ + return (window->Active) && (!window->Hidden); +} + +static void ImGui::UpdateMouseInputs() +{ + ImGuiContext& g = *GImGui; + + // Round mouse position to avoid spreading non-rounded position (e.g. UpdateManualResize doesn't support them well) + if (IsMousePosValid(&g.IO.MousePos)) + g.IO.MousePos = g.LastValidMousePos = ImFloor(g.IO.MousePos); + + // If mouse just appeared or disappeared (usually denoted by -FLT_MAX components) we cancel out movement in MouseDelta + if (IsMousePosValid(&g.IO.MousePos) && IsMousePosValid(&g.IO.MousePosPrev)) + g.IO.MouseDelta = g.IO.MousePos - g.IO.MousePosPrev; + else + g.IO.MouseDelta = ImVec2(0.0f, 0.0f); + if (g.IO.MouseDelta.x != 0.0f || g.IO.MouseDelta.y != 0.0f) + g.NavDisableMouseHover = false; + + g.IO.MousePosPrev = g.IO.MousePos; + for (int i = 0; i < IM_ARRAYSIZE(g.IO.MouseDown); i++) + { + g.IO.MouseClicked[i] = g.IO.MouseDown[i] && g.IO.MouseDownDuration[i] < 0.0f; + g.IO.MouseReleased[i] = !g.IO.MouseDown[i] && g.IO.MouseDownDuration[i] >= 0.0f; + g.IO.MouseDownDurationPrev[i] = g.IO.MouseDownDuration[i]; + g.IO.MouseDownDuration[i] = g.IO.MouseDown[i] ? (g.IO.MouseDownDuration[i] < 0.0f ? 0.0f : g.IO.MouseDownDuration[i] + g.IO.DeltaTime) : -1.0f; + g.IO.MouseDoubleClicked[i] = false; + if (g.IO.MouseClicked[i]) + { + if ((float)(g.Time - g.IO.MouseClickedTime[i]) < g.IO.MouseDoubleClickTime) + { + ImVec2 delta_from_click_pos = IsMousePosValid(&g.IO.MousePos) ? (g.IO.MousePos - g.IO.MouseClickedPos[i]) : ImVec2(0.0f, 0.0f); + if (ImLengthSqr(delta_from_click_pos) < g.IO.MouseDoubleClickMaxDist * g.IO.MouseDoubleClickMaxDist) + g.IO.MouseDoubleClicked[i] = true; + g.IO.MouseClickedTime[i] = -g.IO.MouseDoubleClickTime * 2.0f; // Mark as "old enough" so the third click isn't turned into a double-click + } + else + { + g.IO.MouseClickedTime[i] = g.Time; + } + g.IO.MouseClickedPos[i] = g.IO.MousePos; + g.IO.MouseDownWasDoubleClick[i] = g.IO.MouseDoubleClicked[i]; + g.IO.MouseDragMaxDistanceAbs[i] = ImVec2(0.0f, 0.0f); + g.IO.MouseDragMaxDistanceSqr[i] = 0.0f; + } + else if (g.IO.MouseDown[i]) + { + // Maintain the maximum distance we reaching from the initial click position, which is used with dragging threshold + ImVec2 delta_from_click_pos = IsMousePosValid(&g.IO.MousePos) ? (g.IO.MousePos - g.IO.MouseClickedPos[i]) : ImVec2(0.0f, 0.0f); + g.IO.MouseDragMaxDistanceSqr[i] = ImMax(g.IO.MouseDragMaxDistanceSqr[i], ImLengthSqr(delta_from_click_pos)); + g.IO.MouseDragMaxDistanceAbs[i].x = ImMax(g.IO.MouseDragMaxDistanceAbs[i].x, delta_from_click_pos.x < 0.0f ? -delta_from_click_pos.x : delta_from_click_pos.x); + g.IO.MouseDragMaxDistanceAbs[i].y = ImMax(g.IO.MouseDragMaxDistanceAbs[i].y, delta_from_click_pos.y < 0.0f ? -delta_from_click_pos.y : delta_from_click_pos.y); + } + if (!g.IO.MouseDown[i] && !g.IO.MouseReleased[i]) + g.IO.MouseDownWasDoubleClick[i] = false; + if (g.IO.MouseClicked[i]) // Clicking any mouse button reactivate mouse hovering which may have been deactivated by gamepad/keyboard navigation + g.NavDisableMouseHover = false; + } +} + +static void StartLockWheelingWindow(ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + if (g.WheelingWindow == window) + return; + g.WheelingWindow = window; + g.WheelingWindowRefMousePos = g.IO.MousePos; + g.WheelingWindowTimer = WINDOWS_MOUSE_WHEEL_SCROLL_LOCK_TIMER; +} + +void ImGui::UpdateMouseWheel() +{ + ImGuiContext& g = *GImGui; + + // Reset the locked window if we move the mouse or after the timer elapses + if (g.WheelingWindow != NULL) + { + g.WheelingWindowTimer -= g.IO.DeltaTime; + if (IsMousePosValid() && ImLengthSqr(g.IO.MousePos - g.WheelingWindowRefMousePos) > g.IO.MouseDragThreshold * g.IO.MouseDragThreshold) + g.WheelingWindowTimer = 0.0f; + if (g.WheelingWindowTimer <= 0.0f) + { + g.WheelingWindow = NULL; + g.WheelingWindowTimer = 0.0f; + } + } + + if (g.IO.MouseWheel == 0.0f && g.IO.MouseWheelH == 0.0f) + return; + + if ((g.ActiveId != 0 && g.ActiveIdUsingMouseWheel) || (g.HoveredIdPreviousFrame != 0 && g.HoveredIdPreviousFrameUsingMouseWheel)) + return; + + ImGuiWindow* window = g.WheelingWindow ? g.WheelingWindow : g.HoveredWindow; + if (!window || window->Collapsed) + return; + + // Zoom / Scale window + // FIXME-OBSOLETE: This is an old feature, it still works but pretty much nobody is using it and may be best redesigned. + if (g.IO.MouseWheel != 0.0f && g.IO.KeyCtrl && g.IO.FontAllowUserScaling) + { + StartLockWheelingWindow(window); + const float new_font_scale = ImClamp(window->FontWindowScale + g.IO.MouseWheel * 0.10f, 0.50f, 2.50f); + const float scale = new_font_scale / window->FontWindowScale; + window->FontWindowScale = new_font_scale; + if (!(window->Flags & ImGuiWindowFlags_ChildWindow)) + { + const ImVec2 offset = window->Size * (1.0f - scale) * (g.IO.MousePos - window->Pos) / window->Size; + SetWindowPos(window, window->Pos + offset, 0); + window->Size = ImFloor(window->Size * scale); + window->SizeFull = ImFloor(window->SizeFull * scale); + } + return; + } + + // Mouse wheel scrolling + // If a child window has the ImGuiWindowFlags_NoScrollWithMouse flag, we give a chance to scroll its parent + + // Vertical Mouse Wheel scrolling + const float wheel_y = (g.IO.MouseWheel != 0.0f && !g.IO.KeyShift) ? g.IO.MouseWheel : 0.0f; + if (wheel_y != 0.0f && !g.IO.KeyCtrl) + { + StartLockWheelingWindow(window); + while ((window->Flags & ImGuiWindowFlags_ChildWindow) && ((window->ScrollMax.y == 0.0f) || ((window->Flags & ImGuiWindowFlags_NoScrollWithMouse) && !(window->Flags & ImGuiWindowFlags_NoMouseInputs)))) + window = window->ParentWindow; + if (!(window->Flags & ImGuiWindowFlags_NoScrollWithMouse) && !(window->Flags & ImGuiWindowFlags_NoMouseInputs)) + { + float max_step = window->InnerRect.GetHeight() * 0.67f; + float scroll_step = ImFloor(ImMin(5 * window->CalcFontSize(), max_step)); + SetScrollY(window, window->Scroll.y - wheel_y * scroll_step); + } + } + + // Horizontal Mouse Wheel scrolling, or Vertical Mouse Wheel w/ Shift held + const float wheel_x = (g.IO.MouseWheelH != 0.0f && !g.IO.KeyShift) ? g.IO.MouseWheelH : (g.IO.MouseWheel != 0.0f && g.IO.KeyShift) ? g.IO.MouseWheel : 0.0f; + if (wheel_x != 0.0f && !g.IO.KeyCtrl) + { + StartLockWheelingWindow(window); + while ((window->Flags & ImGuiWindowFlags_ChildWindow) && ((window->ScrollMax.x == 0.0f) || ((window->Flags & ImGuiWindowFlags_NoScrollWithMouse) && !(window->Flags & ImGuiWindowFlags_NoMouseInputs)))) + window = window->ParentWindow; + if (!(window->Flags & ImGuiWindowFlags_NoScrollWithMouse) && !(window->Flags & ImGuiWindowFlags_NoMouseInputs)) + { + float max_step = window->InnerRect.GetWidth() * 0.67f; + float scroll_step = ImFloor(ImMin(2 * window->CalcFontSize(), max_step)); + SetScrollX(window, window->Scroll.x - wheel_x * scroll_step); + } + } +} + +void ImGui::UpdateTabFocus() +{ + ImGuiContext& g = *GImGui; + + // Pressing TAB activate widget focus + g.FocusTabPressed = (g.NavWindow && g.NavWindow->Active && !(g.NavWindow->Flags & ImGuiWindowFlags_NoNavInputs) && !g.IO.KeyCtrl && IsKeyPressedMap(ImGuiKey_Tab)); + if (g.ActiveId == 0 && g.FocusTabPressed) + { + // Note that SetKeyboardFocusHere() sets the Next fields mid-frame. To be consistent we also + // manipulate the Next fields even, even though they will be turned into Curr fields by the code below. + g.FocusRequestNextWindow = g.NavWindow; + g.FocusRequestNextCounterRegular = INT_MAX; + if (g.NavId != 0 && g.NavIdTabCounter != INT_MAX) + g.FocusRequestNextCounterTabStop = g.NavIdTabCounter + 1 + (g.IO.KeyShift ? -1 : 1); + else + g.FocusRequestNextCounterTabStop = g.IO.KeyShift ? -1 : 0; + } + + // Turn queued focus request into current one + g.FocusRequestCurrWindow = NULL; + g.FocusRequestCurrCounterRegular = g.FocusRequestCurrCounterTabStop = INT_MAX; + if (g.FocusRequestNextWindow != NULL) + { + ImGuiWindow* window = g.FocusRequestNextWindow; + g.FocusRequestCurrWindow = window; + if (g.FocusRequestNextCounterRegular != INT_MAX && window->DC.FocusCounterRegular != -1) + g.FocusRequestCurrCounterRegular = ImModPositive(g.FocusRequestNextCounterRegular, window->DC.FocusCounterRegular + 1); + if (g.FocusRequestNextCounterTabStop != INT_MAX && window->DC.FocusCounterTabStop != -1) + g.FocusRequestCurrCounterTabStop = ImModPositive(g.FocusRequestNextCounterTabStop, window->DC.FocusCounterTabStop + 1); + g.FocusRequestNextWindow = NULL; + g.FocusRequestNextCounterRegular = g.FocusRequestNextCounterTabStop = INT_MAX; + } + + g.NavIdTabCounter = INT_MAX; +} + +// The reason this is exposed in imgui_internal.h is: on touch-based system that don't have hovering, we want to dispatch inputs to the right target (imgui vs imgui+app) +void ImGui::UpdateHoveredWindowAndCaptureFlags() +{ + ImGuiContext& g = *GImGui; + + // Find the window hovered by mouse: + // - Child windows can extend beyond the limit of their parent so we need to derive HoveredRootWindow from HoveredWindow. + // - When moving a window we can skip the search, which also conveniently bypasses the fact that window->WindowRectClipped is lagging as this point of the frame. + // - We also support the moved window toggling the NoInputs flag after moving has started in order to be able to detect windows below it, which is useful for e.g. docking mechanisms. + bool clear_hovered_windows = false; + FindHoveredWindow(); + + // Modal windows prevents mouse from hovering behind them. + ImGuiWindow* modal_window = GetTopMostPopupModal(); + if (modal_window && g.HoveredRootWindow && !IsWindowChildOf(g.HoveredRootWindow, modal_window)) + clear_hovered_windows = true; + + // Disabled mouse? + if (g.IO.ConfigFlags & ImGuiConfigFlags_NoMouse) + clear_hovered_windows = true; + + // We track click ownership. When clicked outside of a window the click is owned by the application and won't report hovering nor request capture even while dragging over our windows afterward. + int mouse_earliest_button_down = -1; + bool mouse_any_down = false; + for (int i = 0; i < IM_ARRAYSIZE(g.IO.MouseDown); i++) + { + if (g.IO.MouseClicked[i]) + g.IO.MouseDownOwned[i] = (g.HoveredWindow != NULL) || (g.OpenPopupStack.Size > 0); + mouse_any_down |= g.IO.MouseDown[i]; + if (g.IO.MouseDown[i]) + if (mouse_earliest_button_down == -1 || g.IO.MouseClickedTime[i] < g.IO.MouseClickedTime[mouse_earliest_button_down]) + mouse_earliest_button_down = i; + } + const bool mouse_avail_to_imgui = (mouse_earliest_button_down == -1) || g.IO.MouseDownOwned[mouse_earliest_button_down]; + + // If mouse was first clicked outside of ImGui bounds we also cancel out hovering. + // FIXME: For patterns of drag and drop across OS windows, we may need to rework/remove this test (first committed 311c0ca9 on 2015/02) + const bool mouse_dragging_extern_payload = g.DragDropActive && (g.DragDropSourceFlags & ImGuiDragDropFlags_SourceExtern) != 0; + if (!mouse_avail_to_imgui && !mouse_dragging_extern_payload) + clear_hovered_windows = true; + + if (clear_hovered_windows) + g.HoveredWindow = g.HoveredRootWindow = g.HoveredWindowUnderMovingWindow = NULL; + + // Update io.WantCaptureMouse for the user application (true = dispatch mouse info to imgui, false = dispatch mouse info to Dear ImGui + app) + if (g.WantCaptureMouseNextFrame != -1) + g.IO.WantCaptureMouse = (g.WantCaptureMouseNextFrame != 0); + else + g.IO.WantCaptureMouse = (mouse_avail_to_imgui && (g.HoveredWindow != NULL || mouse_any_down)) || (g.OpenPopupStack.Size > 0); + + // Update io.WantCaptureKeyboard for the user application (true = dispatch keyboard info to imgui, false = dispatch keyboard info to Dear ImGui + app) + if (g.WantCaptureKeyboardNextFrame != -1) + g.IO.WantCaptureKeyboard = (g.WantCaptureKeyboardNextFrame != 0); + else + g.IO.WantCaptureKeyboard = (g.ActiveId != 0) || (modal_window != NULL); + if (g.IO.NavActive && (g.IO.ConfigFlags & ImGuiConfigFlags_NavEnableKeyboard) && !(g.IO.ConfigFlags & ImGuiConfigFlags_NavNoCaptureKeyboard)) + g.IO.WantCaptureKeyboard = true; + + // Update io.WantTextInput flag, this is to allow systems without a keyboard (e.g. mobile, hand-held) to show a software keyboard if possible + g.IO.WantTextInput = (g.WantTextInputNextFrame != -1) ? (g.WantTextInputNextFrame != 0) : false; +} + +ImGuiKeyModFlags ImGui::GetMergedKeyModFlags() +{ + ImGuiContext& g = *GImGui; + ImGuiKeyModFlags key_mod_flags = ImGuiKeyModFlags_None; + if (g.IO.KeyCtrl) { key_mod_flags |= ImGuiKeyModFlags_Ctrl; } + if (g.IO.KeyShift) { key_mod_flags |= ImGuiKeyModFlags_Shift; } + if (g.IO.KeyAlt) { key_mod_flags |= ImGuiKeyModFlags_Alt; } + if (g.IO.KeySuper) { key_mod_flags |= ImGuiKeyModFlags_Super; } + return key_mod_flags; +} + +void ImGui::NewFrame() +{ + IM_ASSERT(GImGui != NULL && "No current context. Did you call ImGui::CreateContext() and ImGui::SetCurrentContext() ?"); + ImGuiContext& g = *GImGui; + + // Remove pending delete hooks before frame start. + // This deferred removal avoid issues of removal while iterating the hook vector + for (int n = g.Hooks.Size - 1; n >= 0; n--) + if (g.Hooks[n].Type == ImGuiContextHookType_PendingRemoval_) + g.Hooks.erase(&g.Hooks[n]); + + CallContextHooks(&g, ImGuiContextHookType_NewFramePre); + + // Check and assert for various common IO and Configuration mistakes + ErrorCheckNewFrameSanityChecks(); + + // Load settings on first frame, save settings when modified (after a delay) + UpdateSettings(); + + g.Time += g.IO.DeltaTime; + g.WithinFrameScope = true; + g.FrameCount += 1; + g.TooltipOverrideCount = 0; + g.WindowsActiveCount = 0; + g.MenusIdSubmittedThisFrame.resize(0); + + // Calculate frame-rate for the user, as a purely luxurious feature + g.FramerateSecPerFrameAccum += g.IO.DeltaTime - g.FramerateSecPerFrame[g.FramerateSecPerFrameIdx]; + g.FramerateSecPerFrame[g.FramerateSecPerFrameIdx] = g.IO.DeltaTime; + g.FramerateSecPerFrameIdx = (g.FramerateSecPerFrameIdx + 1) % IM_ARRAYSIZE(g.FramerateSecPerFrame); + g.IO.Framerate = (g.FramerateSecPerFrameAccum > 0.0f) ? (1.0f / (g.FramerateSecPerFrameAccum / (float)IM_ARRAYSIZE(g.FramerateSecPerFrame))) : FLT_MAX; + + // Setup current font and draw list shared data + g.IO.Fonts->Locked = true; + SetCurrentFont(GetDefaultFont()); + IM_ASSERT(g.Font->IsLoaded()); + g.DrawListSharedData.ClipRectFullscreen = ImVec4(0.0f, 0.0f, g.IO.DisplaySize.x, g.IO.DisplaySize.y); + g.DrawListSharedData.CurveTessellationTol = g.Style.CurveTessellationTol; + g.DrawListSharedData.SetCircleSegmentMaxError(g.Style.CircleSegmentMaxError); + g.DrawListSharedData.InitialFlags = ImDrawListFlags_None; + if (g.Style.AntiAliasedLines) + g.DrawListSharedData.InitialFlags |= ImDrawListFlags_AntiAliasedLines; + if (g.Style.AntiAliasedLinesUseTex && !(g.Font->ContainerAtlas->Flags & ImFontAtlasFlags_NoBakedLines)) + g.DrawListSharedData.InitialFlags |= ImDrawListFlags_AntiAliasedLinesUseTex; + if (g.Style.AntiAliasedFill) + g.DrawListSharedData.InitialFlags |= ImDrawListFlags_AntiAliasedFill; + if (g.IO.BackendFlags & ImGuiBackendFlags_RendererHasVtxOffset) + g.DrawListSharedData.InitialFlags |= ImDrawListFlags_AllowVtxOffset; + + g.BackgroundDrawList._ResetForNewFrame(); + g.BackgroundDrawList.PushTextureID(g.IO.Fonts->TexID); + g.BackgroundDrawList.PushClipRectFullScreen(); + + g.ForegroundDrawList._ResetForNewFrame(); + g.ForegroundDrawList.PushTextureID(g.IO.Fonts->TexID); + g.ForegroundDrawList.PushClipRectFullScreen(); + + // Mark rendering data as invalid to prevent user who may have a handle on it to use it. + g.DrawData.Clear(); + + // Drag and drop keep the source ID alive so even if the source disappear our state is consistent + if (g.DragDropActive && g.DragDropPayload.SourceId == g.ActiveId) + KeepAliveID(g.DragDropPayload.SourceId); + + // Update HoveredId data + if (!g.HoveredIdPreviousFrame) + g.HoveredIdTimer = 0.0f; + if (!g.HoveredIdPreviousFrame || (g.HoveredId && g.ActiveId == g.HoveredId)) + g.HoveredIdNotActiveTimer = 0.0f; + if (g.HoveredId) + g.HoveredIdTimer += g.IO.DeltaTime; + if (g.HoveredId && g.ActiveId != g.HoveredId) + g.HoveredIdNotActiveTimer += g.IO.DeltaTime; + g.HoveredIdPreviousFrame = g.HoveredId; + g.HoveredIdPreviousFrameUsingMouseWheel = g.HoveredIdUsingMouseWheel; + g.HoveredId = 0; + g.HoveredIdAllowOverlap = false; + g.HoveredIdUsingMouseWheel = false; + g.HoveredIdDisabled = false; + + // Update ActiveId data (clear reference to active widget if the widget isn't alive anymore) + if (g.ActiveIdIsAlive != g.ActiveId && g.ActiveIdPreviousFrame == g.ActiveId && g.ActiveId != 0) + ClearActiveID(); + if (g.ActiveId) + g.ActiveIdTimer += g.IO.DeltaTime; + g.LastActiveIdTimer += g.IO.DeltaTime; + g.ActiveIdPreviousFrame = g.ActiveId; + g.ActiveIdPreviousFrameWindow = g.ActiveIdWindow; + g.ActiveIdPreviousFrameHasBeenEditedBefore = g.ActiveIdHasBeenEditedBefore; + g.ActiveIdIsAlive = 0; + g.ActiveIdHasBeenEditedThisFrame = false; + g.ActiveIdPreviousFrameIsAlive = false; + g.ActiveIdIsJustActivated = false; + if (g.TempInputId != 0 && g.ActiveId != g.TempInputId) + g.TempInputId = 0; + if (g.ActiveId == 0) + { + g.ActiveIdUsingNavDirMask = 0x00; + g.ActiveIdUsingNavInputMask = 0x00; + g.ActiveIdUsingKeyInputMask = 0x00; + } + + // Drag and drop + g.DragDropAcceptIdPrev = g.DragDropAcceptIdCurr; + g.DragDropAcceptIdCurr = 0; + g.DragDropAcceptIdCurrRectSurface = FLT_MAX; + g.DragDropWithinSource = false; + g.DragDropWithinTarget = false; + g.DragDropHoldJustPressedId = 0; + + // Update keyboard input state + // Synchronize io.KeyMods with individual modifiers io.KeyXXX bools + g.IO.KeyMods = GetMergedKeyModFlags(); + memcpy(g.IO.KeysDownDurationPrev, g.IO.KeysDownDuration, sizeof(g.IO.KeysDownDuration)); + for (int i = 0; i < IM_ARRAYSIZE(g.IO.KeysDown); i++) + g.IO.KeysDownDuration[i] = g.IO.KeysDown[i] ? (g.IO.KeysDownDuration[i] < 0.0f ? 0.0f : g.IO.KeysDownDuration[i] + g.IO.DeltaTime) : -1.0f; + + // Update gamepad/keyboard navigation + NavUpdate(); + + // Update mouse input state + UpdateMouseInputs(); + + // Find hovered window + // (needs to be before UpdateMouseMovingWindowNewFrame so we fill g.HoveredWindowUnderMovingWindow on the mouse release frame) + UpdateHoveredWindowAndCaptureFlags(); + + // Handle user moving window with mouse (at the beginning of the frame to avoid input lag or sheering) + UpdateMouseMovingWindowNewFrame(); + + // Background darkening/whitening + if (GetTopMostPopupModal() != NULL || (g.NavWindowingTarget != NULL && g.NavWindowingHighlightAlpha > 0.0f)) + g.DimBgRatio = ImMin(g.DimBgRatio + g.IO.DeltaTime * 6.0f, 1.0f); + else + g.DimBgRatio = ImMax(g.DimBgRatio - g.IO.DeltaTime * 10.0f, 0.0f); + + g.MouseCursor = ImGuiMouseCursor_Arrow; + g.WantCaptureMouseNextFrame = g.WantCaptureKeyboardNextFrame = g.WantTextInputNextFrame = -1; + g.PlatformImePos = ImVec2(1.0f, 1.0f); // OS Input Method Editor showing on top-left of our window by default + + // Mouse wheel scrolling, scale + UpdateMouseWheel(); + + // Update legacy TAB focus + UpdateTabFocus(); + + // Mark all windows as not visible and compact unused memory. + IM_ASSERT(g.WindowsFocusOrder.Size == g.Windows.Size); + const float memory_compact_start_time = (g.GcCompactAll || g.IO.ConfigMemoryCompactTimer < 0.0f) ? FLT_MAX : (float)g.Time - g.IO.ConfigMemoryCompactTimer; + for (int i = 0; i != g.Windows.Size; i++) + { + ImGuiWindow* window = g.Windows[i]; + window->WasActive = window->Active; + window->BeginCount = 0; + window->Active = false; + window->WriteAccessed = false; + + // Garbage collect transient buffers of recently unused windows + if (!window->WasActive && !window->MemoryCompacted && window->LastTimeActive < memory_compact_start_time) + GcCompactTransientWindowBuffers(window); + } + + // Garbage collect transient buffers of recently unused tables + for (int i = 0; i < g.TablesLastTimeActive.Size; i++) + if (g.TablesLastTimeActive[i] >= 0.0f && g.TablesLastTimeActive[i] < memory_compact_start_time) + TableGcCompactTransientBuffers(g.Tables.GetByIndex(i)); + if (g.GcCompactAll) + GcCompactTransientMiscBuffers(); + g.GcCompactAll = false; + + // Closing the focused window restore focus to the first active root window in descending z-order + if (g.NavWindow && !g.NavWindow->WasActive) + FocusTopMostWindowUnderOne(NULL, NULL); + + // No window should be open at the beginning of the frame. + // But in order to allow the user to call NewFrame() multiple times without calling Render(), we are doing an explicit clear. + g.CurrentWindowStack.resize(0); + g.BeginPopupStack.resize(0); + g.ItemFlagsStack.resize(0); + g.ItemFlagsStack.push_back(ImGuiItemFlags_Default_); + g.GroupStack.resize(0); + ClosePopupsOverWindow(g.NavWindow, false); + + // [DEBUG] Item picker tool - start with DebugStartItemPicker() - useful to visually select an item and break into its call-stack. + UpdateDebugToolItemPicker(); + + // Create implicit/fallback window - which we will only render it if the user has added something to it. + // We don't use "Debug" to avoid colliding with user trying to create a "Debug" window with custom flags. + // This fallback is particularly important as it avoid ImGui:: calls from crashing. + g.WithinFrameScopeWithImplicitWindow = true; + SetNextWindowSize(ImVec2(400, 400), ImGuiCond_FirstUseEver); + Begin("Debug##Default"); + IM_ASSERT(g.CurrentWindow->IsFallbackWindow == true); + + CallContextHooks(&g, ImGuiContextHookType_NewFramePost); +} + +// [DEBUG] Item picker tool - start with DebugStartItemPicker() - useful to visually select an item and break into its call-stack. +void ImGui::UpdateDebugToolItemPicker() +{ + ImGuiContext& g = *GImGui; + g.DebugItemPickerBreakId = 0; + if (g.DebugItemPickerActive) + { + const ImGuiID hovered_id = g.HoveredIdPreviousFrame; + ImGui::SetMouseCursor(ImGuiMouseCursor_Hand); + if (ImGui::IsKeyPressedMap(ImGuiKey_Escape)) + g.DebugItemPickerActive = false; + if (ImGui::IsMouseClicked(0) && hovered_id) + { + g.DebugItemPickerBreakId = hovered_id; + g.DebugItemPickerActive = false; + } + ImGui::SetNextWindowBgAlpha(0.60f); + ImGui::BeginTooltip(); + ImGui::Text("HoveredId: 0x%08X", hovered_id); + ImGui::Text("Press ESC to abort picking."); + ImGui::TextColored(GetStyleColorVec4(hovered_id ? ImGuiCol_Text : ImGuiCol_TextDisabled), "Click to break in debugger!"); + ImGui::EndTooltip(); + } +} + +void ImGui::Initialize(ImGuiContext* context) +{ + ImGuiContext& g = *context; + IM_ASSERT(!g.Initialized && !g.SettingsLoaded); + + // Add .ini handle for ImGuiWindow type + { + ImGuiSettingsHandler ini_handler; + ini_handler.TypeName = "Window"; + ini_handler.TypeHash = ImHashStr("Window"); + ini_handler.ClearAllFn = WindowSettingsHandler_ClearAll; + ini_handler.ReadOpenFn = WindowSettingsHandler_ReadOpen; + ini_handler.ReadLineFn = WindowSettingsHandler_ReadLine; + ini_handler.ApplyAllFn = WindowSettingsHandler_ApplyAll; + ini_handler.WriteAllFn = WindowSettingsHandler_WriteAll; + g.SettingsHandlers.push_back(ini_handler); + } + +#ifdef IMGUI_HAS_TABLE + // Add .ini handle for ImGuiTable type + TableSettingsInstallHandler(context); +#endif // #ifdef IMGUI_HAS_TABLE + +#ifdef IMGUI_HAS_DOCK +#endif // #ifdef IMGUI_HAS_DOCK + + g.Initialized = true; +} + +// This function is merely here to free heap allocations. +void ImGui::Shutdown(ImGuiContext* context) +{ + // The fonts atlas can be used prior to calling NewFrame(), so we clear it even if g.Initialized is FALSE (which would happen if we never called NewFrame) + ImGuiContext& g = *context; + if (g.IO.Fonts && g.FontAtlasOwnedByContext) + { + g.IO.Fonts->Locked = false; + IM_DELETE(g.IO.Fonts); + } + g.IO.Fonts = NULL; + + // Cleanup of other data are conditional on actually having initialized Dear ImGui. + if (!g.Initialized) + return; + + // Save settings (unless we haven't attempted to load them: CreateContext/DestroyContext without a call to NewFrame shouldn't save an empty file) + if (g.SettingsLoaded && g.IO.IniFilename != NULL) + { + ImGuiContext* backup_context = GImGui; + SetCurrentContext(&g); + SaveIniSettingsToDisk(g.IO.IniFilename); + SetCurrentContext(backup_context); + } + + CallContextHooks(&g, ImGuiContextHookType_Shutdown); + + // Clear everything else + for (int i = 0; i < g.Windows.Size; i++) + IM_DELETE(g.Windows[i]); + g.Windows.clear(); + g.WindowsFocusOrder.clear(); + g.WindowsTempSortBuffer.clear(); + g.CurrentWindow = NULL; + g.CurrentWindowStack.clear(); + g.WindowsById.Clear(); + g.NavWindow = NULL; + g.HoveredWindow = g.HoveredRootWindow = g.HoveredWindowUnderMovingWindow = NULL; + g.ActiveIdWindow = g.ActiveIdPreviousFrameWindow = NULL; + g.MovingWindow = NULL; + g.ColorStack.clear(); + g.StyleVarStack.clear(); + g.FontStack.clear(); + g.OpenPopupStack.clear(); + g.BeginPopupStack.clear(); + g.DrawDataBuilder.ClearFreeMemory(); + g.BackgroundDrawList._ClearFreeMemory(); + g.ForegroundDrawList._ClearFreeMemory(); + + g.TabBars.Clear(); + g.CurrentTabBarStack.clear(); + g.ShrinkWidthBuffer.clear(); + + g.Tables.Clear(); + g.CurrentTableStack.clear(); + g.DrawChannelsTempMergeBuffer.clear(); + + g.ClipboardHandlerData.clear(); + g.MenusIdSubmittedThisFrame.clear(); + g.InputTextState.ClearFreeMemory(); + + g.SettingsWindows.clear(); + g.SettingsHandlers.clear(); + + if (g.LogFile) + { +#ifndef IMGUI_DISABLE_TTY_FUNCTIONS + if (g.LogFile != stdout) +#endif + ImFileClose(g.LogFile); + g.LogFile = NULL; + } + g.LogBuffer.clear(); + + g.Initialized = false; +} + +// FIXME: Add a more explicit sort order in the window structure. +static int IMGUI_CDECL ChildWindowComparer(const void* lhs, const void* rhs) +{ + const ImGuiWindow* const a = *(const ImGuiWindow* const *)lhs; + const ImGuiWindow* const b = *(const ImGuiWindow* const *)rhs; + if (int d = (a->Flags & ImGuiWindowFlags_Popup) - (b->Flags & ImGuiWindowFlags_Popup)) + return d; + if (int d = (a->Flags & ImGuiWindowFlags_Tooltip) - (b->Flags & ImGuiWindowFlags_Tooltip)) + return d; + return (a->BeginOrderWithinParent - b->BeginOrderWithinParent); +} + +static void AddWindowToSortBuffer(ImVector* out_sorted_windows, ImGuiWindow* window) +{ + out_sorted_windows->push_back(window); + if (window->Active) + { + int count = window->DC.ChildWindows.Size; + if (count > 1) + ImQsort(window->DC.ChildWindows.Data, (size_t)count, sizeof(ImGuiWindow*), ChildWindowComparer); + for (int i = 0; i < count; i++) + { + ImGuiWindow* child = window->DC.ChildWindows[i]; + if (child->Active) + AddWindowToSortBuffer(out_sorted_windows, child); + } + } +} + +static void AddDrawListToDrawData(ImVector* out_list, ImDrawList* draw_list) +{ + // Remove trailing command if unused. + // Technically we could return directly instead of popping, but this make things looks neat in Metrics/Debugger window as well. + draw_list->_PopUnusedDrawCmd(); + if (draw_list->CmdBuffer.Size == 0) + return; + + // Draw list sanity check. Detect mismatch between PrimReserve() calls and incrementing _VtxCurrentIdx, _VtxWritePtr etc. + // May trigger for you if you are using PrimXXX functions incorrectly. + IM_ASSERT(draw_list->VtxBuffer.Size == 0 || draw_list->_VtxWritePtr == draw_list->VtxBuffer.Data + draw_list->VtxBuffer.Size); + IM_ASSERT(draw_list->IdxBuffer.Size == 0 || draw_list->_IdxWritePtr == draw_list->IdxBuffer.Data + draw_list->IdxBuffer.Size); + if (!(draw_list->Flags & ImDrawListFlags_AllowVtxOffset)) + IM_ASSERT((int)draw_list->_VtxCurrentIdx == draw_list->VtxBuffer.Size); + + // Check that draw_list doesn't use more vertices than indexable (default ImDrawIdx = unsigned short = 2 bytes = 64K vertices per ImDrawList = per window) + // If this assert triggers because you are drawing lots of stuff manually: + // - First, make sure you are coarse clipping yourself and not trying to draw many things outside visible bounds. + // Be mindful that the ImDrawList API doesn't filter vertices. Use the Metrics/Debugger window to inspect draw list contents. + // - If you want large meshes with more than 64K vertices, you can either: + // (A) Handle the ImDrawCmd::VtxOffset value in your renderer backend, and set 'io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset'. + // Most example backends already support this from 1.71. Pre-1.71 backends won't. + // Some graphics API such as GL ES 1/2 don't have a way to offset the starting vertex so it is not supported for them. + // (B) Or handle 32-bit indices in your renderer backend, and uncomment '#define ImDrawIdx unsigned int' line in imconfig.h. + // Most example backends already support this. For example, the OpenGL example code detect index size at compile-time: + // glDrawElements(GL_TRIANGLES, (GLsizei)pcmd->ElemCount, sizeof(ImDrawIdx) == 2 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT, idx_buffer_offset); + // Your own engine or render API may use different parameters or function calls to specify index sizes. + // 2 and 4 bytes indices are generally supported by most graphics API. + // - If for some reason neither of those solutions works for you, a workaround is to call BeginChild()/EndChild() before reaching + // the 64K limit to split your draw commands in multiple draw lists. + if (sizeof(ImDrawIdx) == 2) + IM_ASSERT(draw_list->_VtxCurrentIdx < (1 << 16) && "Too many vertices in ImDrawList using 16-bit indices. Read comment above"); + + out_list->push_back(draw_list); +} + +static void AddWindowToDrawData(ImVector* out_render_list, ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + g.IO.MetricsRenderWindows++; + AddDrawListToDrawData(out_render_list, window->DrawList); + for (int i = 0; i < window->DC.ChildWindows.Size; i++) + { + ImGuiWindow* child = window->DC.ChildWindows[i]; + if (IsWindowActiveAndVisible(child)) // clipped children may have been marked not active + AddWindowToDrawData(out_render_list, child); + } +} + +// Layer is locked for the root window, however child windows may use a different viewport (e.g. extruding menu) +static void AddRootWindowToDrawData(ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + int layer = (window->Flags & ImGuiWindowFlags_Tooltip) ? 1 : 0; + AddWindowToDrawData(&g.DrawDataBuilder.Layers[layer], window); +} + +void ImDrawDataBuilder::FlattenIntoSingleLayer() +{ + int n = Layers[0].Size; + int size = n; + for (int i = 1; i < IM_ARRAYSIZE(Layers); i++) + size += Layers[i].Size; + Layers[0].resize(size); + for (int layer_n = 1; layer_n < IM_ARRAYSIZE(Layers); layer_n++) + { + ImVector& layer = Layers[layer_n]; + if (layer.empty()) + continue; + memcpy(&Layers[0][n], &layer[0], layer.Size * sizeof(ImDrawList*)); + n += layer.Size; + layer.resize(0); + } +} + +static void SetupDrawData(ImVector* draw_lists, ImDrawData* draw_data) +{ + ImGuiIO& io = ImGui::GetIO(); + draw_data->Valid = true; + draw_data->CmdLists = (draw_lists->Size > 0) ? draw_lists->Data : NULL; + draw_data->CmdListsCount = draw_lists->Size; + draw_data->TotalVtxCount = draw_data->TotalIdxCount = 0; + draw_data->DisplayPos = ImVec2(0.0f, 0.0f); + draw_data->DisplaySize = io.DisplaySize; + draw_data->FramebufferScale = io.DisplayFramebufferScale; + for (int n = 0; n < draw_lists->Size; n++) + { + draw_data->TotalVtxCount += draw_lists->Data[n]->VtxBuffer.Size; + draw_data->TotalIdxCount += draw_lists->Data[n]->IdxBuffer.Size; + } +} + +// Push a clipping rectangle for both ImGui logic (hit-testing etc.) and low-level ImDrawList rendering. +// - When using this function it is sane to ensure that float are perfectly rounded to integer values, +// so that e.g. (int)(max.x-min.x) in user's render produce correct result. +// - If the code here changes, may need to update code of functions like NextColumn() and PushColumnClipRect(): +// some frequently called functions which to modify both channels and clipping simultaneously tend to use the +// more specialized SetWindowClipRectBeforeSetChannel() to avoid extraneous updates of underlying ImDrawCmds. +void ImGui::PushClipRect(const ImVec2& clip_rect_min, const ImVec2& clip_rect_max, bool intersect_with_current_clip_rect) +{ + ImGuiWindow* window = GetCurrentWindow(); + window->DrawList->PushClipRect(clip_rect_min, clip_rect_max, intersect_with_current_clip_rect); + window->ClipRect = window->DrawList->_ClipRectStack.back(); +} + +void ImGui::PopClipRect() +{ + ImGuiWindow* window = GetCurrentWindow(); + window->DrawList->PopClipRect(); + window->ClipRect = window->DrawList->_ClipRectStack.back(); +} + +// This is normally called by Render(). You may want to call it directly if you want to avoid calling Render() but the gain will be very minimal. +void ImGui::EndFrame() +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(g.Initialized); + + // Don't process EndFrame() multiple times. + if (g.FrameCountEnded == g.FrameCount) + return; + IM_ASSERT(g.WithinFrameScope && "Forgot to call ImGui::NewFrame()?"); + + CallContextHooks(&g, ImGuiContextHookType_EndFramePre); + + ErrorCheckEndFrameSanityChecks(); + + // Notify OS when our Input Method Editor cursor has moved (e.g. CJK inputs using Microsoft IME) + if (g.IO.ImeSetInputScreenPosFn && (g.PlatformImeLastPos.x == FLT_MAX || ImLengthSqr(g.PlatformImeLastPos - g.PlatformImePos) > 0.0001f)) + { + g.IO.ImeSetInputScreenPosFn((int)g.PlatformImePos.x, (int)g.PlatformImePos.y); + g.PlatformImeLastPos = g.PlatformImePos; + } + + // Hide implicit/fallback "Debug" window if it hasn't been used + g.WithinFrameScopeWithImplicitWindow = false; + if (g.CurrentWindow && !g.CurrentWindow->WriteAccessed) + g.CurrentWindow->Active = false; + End(); + + // Update navigation: CTRL+Tab, wrap-around requests + NavEndFrame(); + + // Drag and Drop: Elapse payload (if delivered, or if source stops being submitted) + if (g.DragDropActive) + { + bool is_delivered = g.DragDropPayload.Delivery; + bool is_elapsed = (g.DragDropPayload.DataFrameCount + 1 < g.FrameCount) && ((g.DragDropSourceFlags & ImGuiDragDropFlags_SourceAutoExpirePayload) || !IsMouseDown(g.DragDropMouseButton)); + if (is_delivered || is_elapsed) + ClearDragDrop(); + } + + // Drag and Drop: Fallback for source tooltip. This is not ideal but better than nothing. + if (g.DragDropActive && g.DragDropSourceFrameCount < g.FrameCount && !(g.DragDropSourceFlags & ImGuiDragDropFlags_SourceNoPreviewTooltip)) + { + g.DragDropWithinSource = true; + SetTooltip("..."); + g.DragDropWithinSource = false; + } + + // End frame + g.WithinFrameScope = false; + g.FrameCountEnded = g.FrameCount; + + // Initiate moving window + handle left-click and right-click focus + UpdateMouseMovingWindowEndFrame(); + + // Sort the window list so that all child windows are after their parent + // We cannot do that on FocusWindow() because children may not exist yet + g.WindowsTempSortBuffer.resize(0); + g.WindowsTempSortBuffer.reserve(g.Windows.Size); + for (int i = 0; i != g.Windows.Size; i++) + { + ImGuiWindow* window = g.Windows[i]; + if (window->Active && (window->Flags & ImGuiWindowFlags_ChildWindow)) // if a child is active its parent will add it + continue; + AddWindowToSortBuffer(&g.WindowsTempSortBuffer, window); + } + + // This usually assert if there is a mismatch between the ImGuiWindowFlags_ChildWindow / ParentWindow values and DC.ChildWindows[] in parents, aka we've done something wrong. + IM_ASSERT(g.Windows.Size == g.WindowsTempSortBuffer.Size); + g.Windows.swap(g.WindowsTempSortBuffer); + g.IO.MetricsActiveWindows = g.WindowsActiveCount; + + // Unlock font atlas + g.IO.Fonts->Locked = false; + + // Clear Input data for next frame + g.IO.MouseWheel = g.IO.MouseWheelH = 0.0f; + g.IO.InputQueueCharacters.resize(0); + memset(g.IO.NavInputs, 0, sizeof(g.IO.NavInputs)); + + CallContextHooks(&g, ImGuiContextHookType_EndFramePost); +} + +void ImGui::Render() +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(g.Initialized); + + if (g.FrameCountEnded != g.FrameCount) + EndFrame(); + g.FrameCountRendered = g.FrameCount; + g.IO.MetricsRenderWindows = 0; + g.DrawDataBuilder.Clear(); + + CallContextHooks(&g, ImGuiContextHookType_RenderPre); + + // Add background ImDrawList + if (!g.BackgroundDrawList.VtxBuffer.empty()) + AddDrawListToDrawData(&g.DrawDataBuilder.Layers[0], &g.BackgroundDrawList); + + // Add ImDrawList to render + ImGuiWindow* windows_to_render_top_most[2]; + windows_to_render_top_most[0] = (g.NavWindowingTarget && !(g.NavWindowingTarget->Flags & ImGuiWindowFlags_NoBringToFrontOnFocus)) ? g.NavWindowingTarget->RootWindow : NULL; + windows_to_render_top_most[1] = (g.NavWindowingTarget ? g.NavWindowingListWindow : NULL); + for (int n = 0; n != g.Windows.Size; n++) + { + ImGuiWindow* window = g.Windows[n]; + if (IsWindowActiveAndVisible(window) && (window->Flags & ImGuiWindowFlags_ChildWindow) == 0 && window != windows_to_render_top_most[0] && window != windows_to_render_top_most[1]) + AddRootWindowToDrawData(window); + } + for (int n = 0; n < IM_ARRAYSIZE(windows_to_render_top_most); n++) + if (windows_to_render_top_most[n] && IsWindowActiveAndVisible(windows_to_render_top_most[n])) // NavWindowingTarget is always temporarily displayed as the top-most window + AddRootWindowToDrawData(windows_to_render_top_most[n]); + g.DrawDataBuilder.FlattenIntoSingleLayer(); + + // Draw software mouse cursor if requested + if (g.IO.MouseDrawCursor) + RenderMouseCursor(&g.ForegroundDrawList, g.IO.MousePos, g.Style.MouseCursorScale, g.MouseCursor, IM_COL32_WHITE, IM_COL32_BLACK, IM_COL32(0, 0, 0, 48)); + + // Add foreground ImDrawList + if (!g.ForegroundDrawList.VtxBuffer.empty()) + AddDrawListToDrawData(&g.DrawDataBuilder.Layers[0], &g.ForegroundDrawList); + + // Setup ImDrawData structure for end-user + SetupDrawData(&g.DrawDataBuilder.Layers[0], &g.DrawData); + g.IO.MetricsRenderVertices = g.DrawData.TotalVtxCount; + g.IO.MetricsRenderIndices = g.DrawData.TotalIdxCount; + + CallContextHooks(&g, ImGuiContextHookType_RenderPost); +} + +// Calculate text size. Text can be multi-line. Optionally ignore text after a ## marker. +// CalcTextSize("") should return ImVec2(0.0f, g.FontSize) +ImVec2 ImGui::CalcTextSize(const char* text, const char* text_end, bool hide_text_after_double_hash, float wrap_width) +{ + ImGuiContext& g = *GImGui; + + const char* text_display_end; + if (hide_text_after_double_hash) + text_display_end = FindRenderedTextEnd(text, text_end); // Hide anything after a '##' string + else + text_display_end = text_end; + + ImFont* font = g.Font; + const float font_size = g.FontSize; + if (text == text_display_end) + return ImVec2(0.0f, font_size); + ImVec2 text_size = font->CalcTextSizeA(font_size, FLT_MAX, wrap_width, text, text_display_end, NULL); + + // Round + text_size.x = IM_FLOOR(text_size.x + 0.95f); + + return text_size; +} + +// Find window given position, search front-to-back +// FIXME: Note that we have an inconsequential lag here: OuterRectClipped is updated in Begin(), so windows moved programmatically +// with SetWindowPos() and not SetNextWindowPos() will have that rectangle lagging by a frame at the time FindHoveredWindow() is +// called, aka before the next Begin(). Moving window isn't affected. +static void FindHoveredWindow() +{ + ImGuiContext& g = *GImGui; + + ImGuiWindow* hovered_window = NULL; + ImGuiWindow* hovered_window_ignoring_moving_window = NULL; + if (g.MovingWindow && !(g.MovingWindow->Flags & ImGuiWindowFlags_NoMouseInputs)) + hovered_window = g.MovingWindow; + + ImVec2 padding_regular = g.Style.TouchExtraPadding; + ImVec2 padding_for_resize_from_edges = g.IO.ConfigWindowsResizeFromEdges ? ImMax(g.Style.TouchExtraPadding, ImVec2(WINDOWS_RESIZE_FROM_EDGES_HALF_THICKNESS, WINDOWS_RESIZE_FROM_EDGES_HALF_THICKNESS)) : padding_regular; + for (int i = g.Windows.Size - 1; i >= 0; i--) + { + ImGuiWindow* window = g.Windows[i]; + if (!window->Active || window->Hidden) + continue; + if (window->Flags & ImGuiWindowFlags_NoMouseInputs) + continue; + + // Using the clipped AABB, a child window will typically be clipped by its parent (not always) + ImRect bb(window->OuterRectClipped); + if (window->Flags & (ImGuiWindowFlags_ChildWindow | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_AlwaysAutoResize)) + bb.Expand(padding_regular); + else + bb.Expand(padding_for_resize_from_edges); + if (!bb.Contains(g.IO.MousePos)) + continue; + + // Support for one rectangular hole in any given window + // FIXME: Consider generalizing hit-testing override (with more generic data, callback, etc.) (#1512) + if (window->HitTestHoleSize.x != 0) + { + ImVec2 hole_pos(window->Pos.x + (float)window->HitTestHoleOffset.x, window->Pos.y + (float)window->HitTestHoleOffset.y); + ImVec2 hole_size((float)window->HitTestHoleSize.x, (float)window->HitTestHoleSize.y); + if (ImRect(hole_pos, hole_pos + hole_size).Contains(g.IO.MousePos)) + continue; + } + + if (hovered_window == NULL) + hovered_window = window; + if (hovered_window_ignoring_moving_window == NULL && (!g.MovingWindow || window->RootWindow != g.MovingWindow->RootWindow)) + hovered_window_ignoring_moving_window = window; + if (hovered_window && hovered_window_ignoring_moving_window) + break; + } + + g.HoveredWindow = hovered_window; + g.HoveredRootWindow = g.HoveredWindow ? g.HoveredWindow->RootWindow : NULL; + g.HoveredWindowUnderMovingWindow = hovered_window_ignoring_moving_window; +} + +// Test if mouse cursor is hovering given rectangle +// NB- Rectangle is clipped by our current clip setting +// NB- Expand the rectangle to be generous on imprecise inputs systems (g.Style.TouchExtraPadding) +bool ImGui::IsMouseHoveringRect(const ImVec2& r_min, const ImVec2& r_max, bool clip) +{ + ImGuiContext& g = *GImGui; + + // Clip + ImRect rect_clipped(r_min, r_max); + if (clip) + rect_clipped.ClipWith(g.CurrentWindow->ClipRect); + + // Expand for touch input + const ImRect rect_for_touch(rect_clipped.Min - g.Style.TouchExtraPadding, rect_clipped.Max + g.Style.TouchExtraPadding); + if (!rect_for_touch.Contains(g.IO.MousePos)) + return false; + return true; +} + +int ImGui::GetKeyIndex(ImGuiKey imgui_key) +{ + IM_ASSERT(imgui_key >= 0 && imgui_key < ImGuiKey_COUNT); + ImGuiContext& g = *GImGui; + return g.IO.KeyMap[imgui_key]; +} + +// Note that dear imgui doesn't know the semantic of each entry of io.KeysDown[]! +// Use your own indices/enums according to how your backend/engine stored them into io.KeysDown[]! +bool ImGui::IsKeyDown(int user_key_index) +{ + if (user_key_index < 0) + return false; + ImGuiContext& g = *GImGui; + IM_ASSERT(user_key_index >= 0 && user_key_index < IM_ARRAYSIZE(g.IO.KeysDown)); + return g.IO.KeysDown[user_key_index]; +} + +// t0 = previous time (e.g.: g.Time - g.IO.DeltaTime) +// t1 = current time (e.g.: g.Time) +// An event is triggered at: +// t = 0.0f t = repeat_delay, t = repeat_delay + repeat_rate*N +int ImGui::CalcTypematicRepeatAmount(float t0, float t1, float repeat_delay, float repeat_rate) +{ + if (t1 == 0.0f) + return 1; + if (t0 >= t1) + return 0; + if (repeat_rate <= 0.0f) + return (t0 < repeat_delay) && (t1 >= repeat_delay); + const int count_t0 = (t0 < repeat_delay) ? -1 : (int)((t0 - repeat_delay) / repeat_rate); + const int count_t1 = (t1 < repeat_delay) ? -1 : (int)((t1 - repeat_delay) / repeat_rate); + const int count = count_t1 - count_t0; + return count; +} + +int ImGui::GetKeyPressedAmount(int key_index, float repeat_delay, float repeat_rate) +{ + ImGuiContext& g = *GImGui; + if (key_index < 0) + return 0; + IM_ASSERT(key_index >= 0 && key_index < IM_ARRAYSIZE(g.IO.KeysDown)); + const float t = g.IO.KeysDownDuration[key_index]; + return CalcTypematicRepeatAmount(t - g.IO.DeltaTime, t, repeat_delay, repeat_rate); +} + +bool ImGui::IsKeyPressed(int user_key_index, bool repeat) +{ + ImGuiContext& g = *GImGui; + if (user_key_index < 0) + return false; + IM_ASSERT(user_key_index >= 0 && user_key_index < IM_ARRAYSIZE(g.IO.KeysDown)); + const float t = g.IO.KeysDownDuration[user_key_index]; + if (t == 0.0f) + return true; + if (repeat && t > g.IO.KeyRepeatDelay) + return GetKeyPressedAmount(user_key_index, g.IO.KeyRepeatDelay, g.IO.KeyRepeatRate) > 0; + return false; +} + +bool ImGui::IsKeyReleased(int user_key_index) +{ + ImGuiContext& g = *GImGui; + if (user_key_index < 0) return false; + IM_ASSERT(user_key_index >= 0 && user_key_index < IM_ARRAYSIZE(g.IO.KeysDown)); + return g.IO.KeysDownDurationPrev[user_key_index] >= 0.0f && !g.IO.KeysDown[user_key_index]; +} + +bool ImGui::IsMouseDown(ImGuiMouseButton button) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(button >= 0 && button < IM_ARRAYSIZE(g.IO.MouseDown)); + return g.IO.MouseDown[button]; +} + +bool ImGui::IsMouseClicked(ImGuiMouseButton button, bool repeat) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(button >= 0 && button < IM_ARRAYSIZE(g.IO.MouseDown)); + const float t = g.IO.MouseDownDuration[button]; + if (t == 0.0f) + return true; + + if (repeat && t > g.IO.KeyRepeatDelay) + { + // FIXME: 2019/05/03: Our old repeat code was wrong here and led to doubling the repeat rate, which made it an ok rate for repeat on mouse hold. + int amount = CalcTypematicRepeatAmount(t - g.IO.DeltaTime, t, g.IO.KeyRepeatDelay, g.IO.KeyRepeatRate * 0.50f); + if (amount > 0) + return true; + } + return false; +} + +bool ImGui::IsMouseReleased(ImGuiMouseButton button) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(button >= 0 && button < IM_ARRAYSIZE(g.IO.MouseDown)); + return g.IO.MouseReleased[button]; +} + +bool ImGui::IsMouseDoubleClicked(ImGuiMouseButton button) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(button >= 0 && button < IM_ARRAYSIZE(g.IO.MouseDown)); + return g.IO.MouseDoubleClicked[button]; +} + +// Return if a mouse click/drag went past the given threshold. Valid to call during the MouseReleased frame. +// [Internal] This doesn't test if the button is pressed +bool ImGui::IsMouseDragPastThreshold(ImGuiMouseButton button, float lock_threshold) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(button >= 0 && button < IM_ARRAYSIZE(g.IO.MouseDown)); + if (lock_threshold < 0.0f) + lock_threshold = g.IO.MouseDragThreshold; + return g.IO.MouseDragMaxDistanceSqr[button] >= lock_threshold * lock_threshold; +} + +bool ImGui::IsMouseDragging(ImGuiMouseButton button, float lock_threshold) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(button >= 0 && button < IM_ARRAYSIZE(g.IO.MouseDown)); + if (!g.IO.MouseDown[button]) + return false; + return IsMouseDragPastThreshold(button, lock_threshold); +} + +ImVec2 ImGui::GetMousePos() +{ + ImGuiContext& g = *GImGui; + return g.IO.MousePos; +} + +// NB: prefer to call right after BeginPopup(). At the time Selectable/MenuItem is activated, the popup is already closed! +ImVec2 ImGui::GetMousePosOnOpeningCurrentPopup() +{ + ImGuiContext& g = *GImGui; + if (g.BeginPopupStack.Size > 0) + return g.OpenPopupStack[g.BeginPopupStack.Size - 1].OpenMousePos; + return g.IO.MousePos; +} + +// We typically use ImVec2(-FLT_MAX,-FLT_MAX) to denote an invalid mouse position. +bool ImGui::IsMousePosValid(const ImVec2* mouse_pos) +{ + // The assert is only to silence a false-positive in XCode Static Analysis. + // Because GImGui is not dereferenced in every code path, the static analyzer assume that it may be NULL (which it doesn't for other functions). + IM_ASSERT(GImGui != NULL); + const float MOUSE_INVALID = -256000.0f; + ImVec2 p = mouse_pos ? *mouse_pos : GImGui->IO.MousePos; + return p.x >= MOUSE_INVALID && p.y >= MOUSE_INVALID; +} + +bool ImGui::IsAnyMouseDown() +{ + ImGuiContext& g = *GImGui; + for (int n = 0; n < IM_ARRAYSIZE(g.IO.MouseDown); n++) + if (g.IO.MouseDown[n]) + return true; + return false; +} + +// Return the delta from the initial clicking position while the mouse button is clicked or was just released. +// This is locked and return 0.0f until the mouse moves past a distance threshold at least once. +// NB: This is only valid if IsMousePosValid(). backends in theory should always keep mouse position valid when dragging even outside the client window. +ImVec2 ImGui::GetMouseDragDelta(ImGuiMouseButton button, float lock_threshold) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(button >= 0 && button < IM_ARRAYSIZE(g.IO.MouseDown)); + if (lock_threshold < 0.0f) + lock_threshold = g.IO.MouseDragThreshold; + if (g.IO.MouseDown[button] || g.IO.MouseReleased[button]) + if (g.IO.MouseDragMaxDistanceSqr[button] >= lock_threshold * lock_threshold) + if (IsMousePosValid(&g.IO.MousePos) && IsMousePosValid(&g.IO.MouseClickedPos[button])) + return g.IO.MousePos - g.IO.MouseClickedPos[button]; + return ImVec2(0.0f, 0.0f); +} + +void ImGui::ResetMouseDragDelta(ImGuiMouseButton button) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(button >= 0 && button < IM_ARRAYSIZE(g.IO.MouseDown)); + // NB: We don't need to reset g.IO.MouseDragMaxDistanceSqr + g.IO.MouseClickedPos[button] = g.IO.MousePos; +} + +ImGuiMouseCursor ImGui::GetMouseCursor() +{ + return GImGui->MouseCursor; +} + +void ImGui::SetMouseCursor(ImGuiMouseCursor cursor_type) +{ + GImGui->MouseCursor = cursor_type; +} + +void ImGui::CaptureKeyboardFromApp(bool capture) +{ + GImGui->WantCaptureKeyboardNextFrame = capture ? 1 : 0; +} + +void ImGui::CaptureMouseFromApp(bool capture) +{ + GImGui->WantCaptureMouseNextFrame = capture ? 1 : 0; +} + +bool ImGui::IsItemActive() +{ + ImGuiContext& g = *GImGui; + if (g.ActiveId) + { + ImGuiWindow* window = g.CurrentWindow; + return g.ActiveId == window->DC.LastItemId; + } + return false; +} + +bool ImGui::IsItemActivated() +{ + ImGuiContext& g = *GImGui; + if (g.ActiveId) + { + ImGuiWindow* window = g.CurrentWindow; + if (g.ActiveId == window->DC.LastItemId && g.ActiveIdPreviousFrame != window->DC.LastItemId) + return true; + } + return false; +} + +bool ImGui::IsItemDeactivated() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->DC.LastItemStatusFlags & ImGuiItemStatusFlags_HasDeactivated) + return (window->DC.LastItemStatusFlags & ImGuiItemStatusFlags_Deactivated) != 0; + return (g.ActiveIdPreviousFrame == window->DC.LastItemId && g.ActiveIdPreviousFrame != 0 && g.ActiveId != window->DC.LastItemId); +} + +bool ImGui::IsItemDeactivatedAfterEdit() +{ + ImGuiContext& g = *GImGui; + return IsItemDeactivated() && (g.ActiveIdPreviousFrameHasBeenEditedBefore || (g.ActiveId == 0 && g.ActiveIdHasBeenEditedBefore)); +} + +// == GetItemID() == GetFocusID() +bool ImGui::IsItemFocused() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + if (g.NavId != window->DC.LastItemId || g.NavId == 0) + return false; + return true; +} + +bool ImGui::IsItemClicked(ImGuiMouseButton mouse_button) +{ + return IsMouseClicked(mouse_button) && IsItemHovered(ImGuiHoveredFlags_None); +} + +bool ImGui::IsItemToggledOpen() +{ + ImGuiContext& g = *GImGui; + return (g.CurrentWindow->DC.LastItemStatusFlags & ImGuiItemStatusFlags_ToggledOpen) ? true : false; +} + +bool ImGui::IsItemToggledSelection() +{ + ImGuiContext& g = *GImGui; + return (g.CurrentWindow->DC.LastItemStatusFlags & ImGuiItemStatusFlags_ToggledSelection) ? true : false; +} + +bool ImGui::IsAnyItemHovered() +{ + ImGuiContext& g = *GImGui; + return g.HoveredId != 0 || g.HoveredIdPreviousFrame != 0; +} + +bool ImGui::IsAnyItemActive() +{ + ImGuiContext& g = *GImGui; + return g.ActiveId != 0; +} + +bool ImGui::IsAnyItemFocused() +{ + ImGuiContext& g = *GImGui; + return g.NavId != 0 && !g.NavDisableHighlight; +} + +bool ImGui::IsItemVisible() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->ClipRect.Overlaps(window->DC.LastItemRect); +} + +bool ImGui::IsItemEdited() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return (window->DC.LastItemStatusFlags & ImGuiItemStatusFlags_Edited) != 0; +} + +// Allow last item to be overlapped by a subsequent item. Both may be activated during the same frame before the later one takes priority. +// FIXME: Although this is exposed, its interaction and ideal idiom with using ImGuiButtonFlags_AllowItemOverlap flag are extremely confusing, need rework. +void ImGui::SetItemAllowOverlap() +{ + ImGuiContext& g = *GImGui; + ImGuiID id = g.CurrentWindow->DC.LastItemId; + if (g.HoveredId == id) + g.HoveredIdAllowOverlap = true; + if (g.ActiveId == id) + g.ActiveIdAllowOverlap = true; +} + +void ImGui::SetItemUsingMouseWheel() +{ + ImGuiContext& g = *GImGui; + ImGuiID id = g.CurrentWindow->DC.LastItemId; + if (g.HoveredId == id) + g.HoveredIdUsingMouseWheel = true; + if (g.ActiveId == id) + g.ActiveIdUsingMouseWheel = true; +} + +ImVec2 ImGui::GetItemRectMin() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->DC.LastItemRect.Min; +} + +ImVec2 ImGui::GetItemRectMax() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->DC.LastItemRect.Max; +} + +ImVec2 ImGui::GetItemRectSize() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->DC.LastItemRect.GetSize(); +} + +static ImRect GetViewportRect() +{ + ImGuiContext& g = *GImGui; + return ImRect(0.0f, 0.0f, g.IO.DisplaySize.x, g.IO.DisplaySize.y); +} + +bool ImGui::BeginChildEx(const char* name, ImGuiID id, const ImVec2& size_arg, bool border, ImGuiWindowFlags flags) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* parent_window = g.CurrentWindow; + + flags |= ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_ChildWindow; + flags |= (parent_window->Flags & ImGuiWindowFlags_NoMove); // Inherit the NoMove flag + + // Size + const ImVec2 content_avail = GetContentRegionAvail(); + ImVec2 size = ImFloor(size_arg); + const int auto_fit_axises = ((size.x == 0.0f) ? (1 << ImGuiAxis_X) : 0x00) | ((size.y == 0.0f) ? (1 << ImGuiAxis_Y) : 0x00); + if (size.x <= 0.0f) + size.x = ImMax(content_avail.x + size.x, 4.0f); // Arbitrary minimum child size (0.0f causing too much issues) + if (size.y <= 0.0f) + size.y = ImMax(content_avail.y + size.y, 4.0f); + SetNextWindowSize(size); + + // Build up name. If you need to append to a same child from multiple location in the ID stack, use BeginChild(ImGuiID id) with a stable value. + if (name) + ImFormatString(g.TempBuffer, IM_ARRAYSIZE(g.TempBuffer), "%s/%s_%08X", parent_window->Name, name, id); + else + ImFormatString(g.TempBuffer, IM_ARRAYSIZE(g.TempBuffer), "%s/%08X", parent_window->Name, id); + + const float backup_border_size = g.Style.ChildBorderSize; + if (!border) + g.Style.ChildBorderSize = 0.0f; + bool ret = Begin(g.TempBuffer, NULL, flags); + g.Style.ChildBorderSize = backup_border_size; + + ImGuiWindow* child_window = g.CurrentWindow; + child_window->ChildId = id; + child_window->AutoFitChildAxises = (ImS8)auto_fit_axises; + + // Set the cursor to handle case where the user called SetNextWindowPos()+BeginChild() manually. + // While this is not really documented/defined, it seems that the expected thing to do. + if (child_window->BeginCount == 1) + parent_window->DC.CursorPos = child_window->Pos; + + // Process navigation-in immediately so NavInit can run on first frame + if (g.NavActivateId == id && !(flags & ImGuiWindowFlags_NavFlattened) && (child_window->DC.NavLayerActiveMask != 0 || child_window->DC.NavHasScroll)) + { + FocusWindow(child_window); + NavInitWindow(child_window, false); + SetActiveID(id + 1, child_window); // Steal ActiveId with another arbitrary id so that key-press won't activate child item + g.ActiveIdSource = ImGuiInputSource_Nav; + } + return ret; +} + +bool ImGui::BeginChild(const char* str_id, const ImVec2& size_arg, bool border, ImGuiWindowFlags extra_flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + return BeginChildEx(str_id, window->GetID(str_id), size_arg, border, extra_flags); +} + +bool ImGui::BeginChild(ImGuiID id, const ImVec2& size_arg, bool border, ImGuiWindowFlags extra_flags) +{ + IM_ASSERT(id != 0); + return BeginChildEx(NULL, id, size_arg, border, extra_flags); +} + +void ImGui::EndChild() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + IM_ASSERT(g.WithinEndChild == false); + IM_ASSERT(window->Flags & ImGuiWindowFlags_ChildWindow); // Mismatched BeginChild()/EndChild() calls + + g.WithinEndChild = true; + if (window->BeginCount > 1) + { + End(); + } + else + { + ImVec2 sz = window->Size; + if (window->AutoFitChildAxises & (1 << ImGuiAxis_X)) // Arbitrary minimum zero-ish child size of 4.0f causes less trouble than a 0.0f + sz.x = ImMax(4.0f, sz.x); + if (window->AutoFitChildAxises & (1 << ImGuiAxis_Y)) + sz.y = ImMax(4.0f, sz.y); + End(); + + ImGuiWindow* parent_window = g.CurrentWindow; + ImRect bb(parent_window->DC.CursorPos, parent_window->DC.CursorPos + sz); + ItemSize(sz); + if ((window->DC.NavLayerActiveMask != 0 || window->DC.NavHasScroll) && !(window->Flags & ImGuiWindowFlags_NavFlattened)) + { + ItemAdd(bb, window->ChildId); + RenderNavHighlight(bb, window->ChildId); + + // When browsing a window that has no activable items (scroll only) we keep a highlight on the child + if (window->DC.NavLayerActiveMask == 0 && window == g.NavWindow) + RenderNavHighlight(ImRect(bb.Min - ImVec2(2, 2), bb.Max + ImVec2(2, 2)), g.NavId, ImGuiNavHighlightFlags_TypeThin); + } + else + { + // Not navigable into + ItemAdd(bb, 0); + } + } + g.WithinEndChild = false; +} + +// Helper to create a child window / scrolling region that looks like a normal widget frame. +bool ImGui::BeginChildFrame(ImGuiID id, const ImVec2& size, ImGuiWindowFlags extra_flags) +{ + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + PushStyleColor(ImGuiCol_ChildBg, style.Colors[ImGuiCol_FrameBg]); + PushStyleVar(ImGuiStyleVar_ChildRounding, style.FrameRounding); + PushStyleVar(ImGuiStyleVar_ChildBorderSize, style.FrameBorderSize); + PushStyleVar(ImGuiStyleVar_WindowPadding, style.FramePadding); + bool ret = BeginChild(id, size, true, ImGuiWindowFlags_NoMove | ImGuiWindowFlags_AlwaysUseWindowPadding | extra_flags); + PopStyleVar(3); + PopStyleColor(); + return ret; +} + +void ImGui::EndChildFrame() +{ + EndChild(); +} + +static void SetWindowConditionAllowFlags(ImGuiWindow* window, ImGuiCond flags, bool enabled) +{ + window->SetWindowPosAllowFlags = enabled ? (window->SetWindowPosAllowFlags | flags) : (window->SetWindowPosAllowFlags & ~flags); + window->SetWindowSizeAllowFlags = enabled ? (window->SetWindowSizeAllowFlags | flags) : (window->SetWindowSizeAllowFlags & ~flags); + window->SetWindowCollapsedAllowFlags = enabled ? (window->SetWindowCollapsedAllowFlags | flags) : (window->SetWindowCollapsedAllowFlags & ~flags); +} + +ImGuiWindow* ImGui::FindWindowByID(ImGuiID id) +{ + ImGuiContext& g = *GImGui; + return (ImGuiWindow*)g.WindowsById.GetVoidPtr(id); +} + +ImGuiWindow* ImGui::FindWindowByName(const char* name) +{ + ImGuiID id = ImHashStr(name); + return FindWindowByID(id); +} + +static void ApplyWindowSettings(ImGuiWindow* window, ImGuiWindowSettings* settings) +{ + window->Pos = ImFloor(ImVec2(settings->Pos.x, settings->Pos.y)); + if (settings->Size.x > 0 && settings->Size.y > 0) + window->Size = window->SizeFull = ImFloor(ImVec2(settings->Size.x, settings->Size.y)); + window->Collapsed = settings->Collapsed; +} + +static ImGuiWindow* CreateNewWindow(const char* name, ImGuiWindowFlags flags) +{ + ImGuiContext& g = *GImGui; + //IMGUI_DEBUG_LOG("CreateNewWindow '%s', flags = 0x%08X\n", name, flags); + + // Create window the first time + ImGuiWindow* window = IM_NEW(ImGuiWindow)(&g, name); + window->Flags = flags; + g.WindowsById.SetVoidPtr(window->ID, window); + + // Default/arbitrary window position. Use SetNextWindowPos() with the appropriate condition flag to change the initial position of a window. + window->Pos = ImVec2(60, 60); + + // User can disable loading and saving of settings. Tooltip and child windows also don't store settings. + if (!(flags & ImGuiWindowFlags_NoSavedSettings)) + if (ImGuiWindowSettings* settings = ImGui::FindWindowSettings(window->ID)) + { + // Retrieve settings from .ini file + window->SettingsOffset = g.SettingsWindows.offset_from_ptr(settings); + SetWindowConditionAllowFlags(window, ImGuiCond_FirstUseEver, false); + ApplyWindowSettings(window, settings); + } + window->DC.CursorStartPos = window->DC.CursorMaxPos = window->Pos; // So first call to CalcContentSize() doesn't return crazy values + + if ((flags & ImGuiWindowFlags_AlwaysAutoResize) != 0) + { + window->AutoFitFramesX = window->AutoFitFramesY = 2; + window->AutoFitOnlyGrows = false; + } + else + { + if (window->Size.x <= 0.0f) + window->AutoFitFramesX = 2; + if (window->Size.y <= 0.0f) + window->AutoFitFramesY = 2; + window->AutoFitOnlyGrows = (window->AutoFitFramesX > 0) || (window->AutoFitFramesY > 0); + } + + g.WindowsFocusOrder.push_back(window); + if (flags & ImGuiWindowFlags_NoBringToFrontOnFocus) + g.Windows.push_front(window); // Quite slow but rare and only once + else + g.Windows.push_back(window); + return window; +} + +static ImVec2 CalcWindowSizeAfterConstraint(ImGuiWindow* window, ImVec2 new_size) +{ + ImGuiContext& g = *GImGui; + if (g.NextWindowData.Flags & ImGuiNextWindowDataFlags_HasSizeConstraint) + { + // Using -1,-1 on either X/Y axis to preserve the current size. + ImRect cr = g.NextWindowData.SizeConstraintRect; + new_size.x = (cr.Min.x >= 0 && cr.Max.x >= 0) ? ImClamp(new_size.x, cr.Min.x, cr.Max.x) : window->SizeFull.x; + new_size.y = (cr.Min.y >= 0 && cr.Max.y >= 0) ? ImClamp(new_size.y, cr.Min.y, cr.Max.y) : window->SizeFull.y; + if (g.NextWindowData.SizeCallback) + { + ImGuiSizeCallbackData data; + data.UserData = g.NextWindowData.SizeCallbackUserData; + data.Pos = window->Pos; + data.CurrentSize = window->SizeFull; + data.DesiredSize = new_size; + g.NextWindowData.SizeCallback(&data); + new_size = data.DesiredSize; + } + new_size.x = IM_FLOOR(new_size.x); + new_size.y = IM_FLOOR(new_size.y); + } + + // Minimum size + if (!(window->Flags & (ImGuiWindowFlags_ChildWindow | ImGuiWindowFlags_AlwaysAutoResize))) + { + ImGuiWindow* window_for_height = window; + new_size = ImMax(new_size, g.Style.WindowMinSize); + new_size.y = ImMax(new_size.y, window_for_height->TitleBarHeight() + window_for_height->MenuBarHeight() + ImMax(0.0f, g.Style.WindowRounding - 1.0f)); // Reduce artifacts with very small windows + } + return new_size; +} + +static void CalcWindowContentSizes(ImGuiWindow* window, ImVec2* content_size_current, ImVec2* content_size_ideal) +{ + bool preserve_old_content_sizes = false; + if (window->Collapsed && window->AutoFitFramesX <= 0 && window->AutoFitFramesY <= 0) + preserve_old_content_sizes = true; + else if (window->Hidden && window->HiddenFramesCannotSkipItems == 0 && window->HiddenFramesCanSkipItems > 0) + preserve_old_content_sizes = true; + if (preserve_old_content_sizes) + { + *content_size_current = window->ContentSize; + *content_size_ideal = window->ContentSizeIdeal; + return; + } + + content_size_current->x = (window->ContentSizeExplicit.x != 0.0f) ? window->ContentSizeExplicit.x : IM_FLOOR(window->DC.CursorMaxPos.x - window->DC.CursorStartPos.x); + content_size_current->y = (window->ContentSizeExplicit.y != 0.0f) ? window->ContentSizeExplicit.y : IM_FLOOR(window->DC.CursorMaxPos.y - window->DC.CursorStartPos.y); + content_size_ideal->x = (window->ContentSizeExplicit.x != 0.0f) ? window->ContentSizeExplicit.x : IM_FLOOR(ImMax(window->DC.CursorMaxPos.x, window->DC.IdealMaxPos.x) - window->DC.CursorStartPos.x); + content_size_ideal->y = (window->ContentSizeExplicit.y != 0.0f) ? window->ContentSizeExplicit.y : IM_FLOOR(ImMax(window->DC.CursorMaxPos.y, window->DC.IdealMaxPos.y) - window->DC.CursorStartPos.y); +} + +static ImVec2 CalcWindowAutoFitSize(ImGuiWindow* window, const ImVec2& size_contents) +{ + ImGuiContext& g = *GImGui; + ImGuiStyle& style = g.Style; + ImVec2 size_decorations = ImVec2(0.0f, window->TitleBarHeight() + window->MenuBarHeight()); + ImVec2 size_pad = window->WindowPadding * 2.0f; + ImVec2 size_desired = size_contents + size_pad + size_decorations; + if (window->Flags & ImGuiWindowFlags_Tooltip) + { + // Tooltip always resize + return size_desired; + } + else + { + // Maximum window size is determined by the viewport size or monitor size + const bool is_popup = (window->Flags & ImGuiWindowFlags_Popup) != 0; + const bool is_menu = (window->Flags & ImGuiWindowFlags_ChildMenu) != 0; + ImVec2 size_min = style.WindowMinSize; + if (is_popup || is_menu) // Popups and menus bypass style.WindowMinSize by default, but we give then a non-zero minimum size to facilitate understanding problematic cases (e.g. empty popups) + size_min = ImMin(size_min, ImVec2(4.0f, 4.0f)); + ImVec2 size_auto_fit = ImClamp(size_desired, size_min, ImMax(size_min, g.IO.DisplaySize - style.DisplaySafeAreaPadding * 2.0f)); + + // When the window cannot fit all contents (either because of constraints, either because screen is too small), + // we are growing the size on the other axis to compensate for expected scrollbar. FIXME: Might turn bigger than ViewportSize-WindowPadding. + ImVec2 size_auto_fit_after_constraint = CalcWindowSizeAfterConstraint(window, size_auto_fit); + bool will_have_scrollbar_x = (size_auto_fit_after_constraint.x - size_pad.x - size_decorations.x < size_contents.x && !(window->Flags & ImGuiWindowFlags_NoScrollbar) && (window->Flags & ImGuiWindowFlags_HorizontalScrollbar)) || (window->Flags & ImGuiWindowFlags_AlwaysHorizontalScrollbar); + bool will_have_scrollbar_y = (size_auto_fit_after_constraint.y - size_pad.y - size_decorations.y < size_contents.y && !(window->Flags & ImGuiWindowFlags_NoScrollbar)) || (window->Flags & ImGuiWindowFlags_AlwaysVerticalScrollbar); + if (will_have_scrollbar_x) + size_auto_fit.y += style.ScrollbarSize; + if (will_have_scrollbar_y) + size_auto_fit.x += style.ScrollbarSize; + return size_auto_fit; + } +} + +ImVec2 ImGui::CalcWindowNextAutoFitSize(ImGuiWindow* window) +{ + ImVec2 size_contents_current; + ImVec2 size_contents_ideal; + CalcWindowContentSizes(window, &size_contents_current, &size_contents_ideal); + ImVec2 size_auto_fit = CalcWindowAutoFitSize(window, size_contents_ideal); + ImVec2 size_final = CalcWindowSizeAfterConstraint(window, size_auto_fit); + return size_final; +} + +static ImGuiCol GetWindowBgColorIdxFromFlags(ImGuiWindowFlags flags) +{ + if (flags & (ImGuiWindowFlags_Tooltip | ImGuiWindowFlags_Popup)) + return ImGuiCol_PopupBg; + if (flags & ImGuiWindowFlags_ChildWindow) + return ImGuiCol_ChildBg; + return ImGuiCol_WindowBg; +} + +static void CalcResizePosSizeFromAnyCorner(ImGuiWindow* window, const ImVec2& corner_target, const ImVec2& corner_norm, ImVec2* out_pos, ImVec2* out_size) +{ + ImVec2 pos_min = ImLerp(corner_target, window->Pos, corner_norm); // Expected window upper-left + ImVec2 pos_max = ImLerp(window->Pos + window->Size, corner_target, corner_norm); // Expected window lower-right + ImVec2 size_expected = pos_max - pos_min; + ImVec2 size_constrained = CalcWindowSizeAfterConstraint(window, size_expected); + *out_pos = pos_min; + if (corner_norm.x == 0.0f) + out_pos->x -= (size_constrained.x - size_expected.x); + if (corner_norm.y == 0.0f) + out_pos->y -= (size_constrained.y - size_expected.y); + *out_size = size_constrained; +} + +struct ImGuiResizeGripDef +{ + ImVec2 CornerPosN; + ImVec2 InnerDir; + int AngleMin12, AngleMax12; +}; + +static const ImGuiResizeGripDef resize_grip_def[4] = +{ + { ImVec2(1, 1), ImVec2(-1, -1), 0, 3 }, // Lower-right + { ImVec2(0, 1), ImVec2(+1, -1), 3, 6 }, // Lower-left + { ImVec2(0, 0), ImVec2(+1, +1), 6, 9 }, // Upper-left (Unused) + { ImVec2(1, 0), ImVec2(-1, +1), 9, 12 }, // Upper-right (Unused) +}; + +struct ImGuiResizeBorderDef +{ + ImVec2 InnerDir; + ImVec2 CornerPosN1, CornerPosN2; + float OuterAngle; +}; + +static const ImGuiResizeBorderDef resize_border_def[4] = +{ + { ImVec2(0, +1), ImVec2(0, 0), ImVec2(1, 0), IM_PI * 1.50f }, // Top + { ImVec2(-1, 0), ImVec2(1, 0), ImVec2(1, 1), IM_PI * 0.00f }, // Right + { ImVec2(0, -1), ImVec2(1, 1), ImVec2(0, 1), IM_PI * 0.50f }, // Bottom + { ImVec2(+1, 0), ImVec2(0, 1), ImVec2(0, 0), IM_PI * 1.00f } // Left +}; + +static ImRect GetResizeBorderRect(ImGuiWindow* window, int border_n, float perp_padding, float thickness) +{ + ImRect rect = window->Rect(); + if (thickness == 0.0f) rect.Max -= ImVec2(1, 1); + if (border_n == 0) { return ImRect(rect.Min.x + perp_padding, rect.Min.y - thickness, rect.Max.x - perp_padding, rect.Min.y + thickness); } // Top + if (border_n == 1) { return ImRect(rect.Max.x - thickness, rect.Min.y + perp_padding, rect.Max.x + thickness, rect.Max.y - perp_padding); } // Right + if (border_n == 2) { return ImRect(rect.Min.x + perp_padding, rect.Max.y - thickness, rect.Max.x - perp_padding, rect.Max.y + thickness); } // Bottom + if (border_n == 3) { return ImRect(rect.Min.x - thickness, rect.Min.y + perp_padding, rect.Min.x + thickness, rect.Max.y - perp_padding); } // Left + IM_ASSERT(0); + return ImRect(); +} + +// 0..3: corners (Lower-right, Lower-left, Unused, Unused) +// 4..7: borders (Top, Right, Bottom, Left) +ImGuiID ImGui::GetWindowResizeID(ImGuiWindow* window, int n) +{ + IM_ASSERT(n >= 0 && n <= 7); + ImGuiID id = window->ID; + id = ImHashStr("#RESIZE", 0, id); + id = ImHashData(&n, sizeof(int), id); + return id; +} + +// Handle resize for: Resize Grips, Borders, Gamepad +// Return true when using auto-fit (double click on resize grip) +static bool ImGui::UpdateWindowManualResize(ImGuiWindow* window, const ImVec2& size_auto_fit, int* border_held, int resize_grip_count, ImU32 resize_grip_col[4], const ImRect& visibility_rect) +{ + ImGuiContext& g = *GImGui; + ImGuiWindowFlags flags = window->Flags; + + if ((flags & ImGuiWindowFlags_NoResize) || (flags & ImGuiWindowFlags_AlwaysAutoResize) || window->AutoFitFramesX > 0 || window->AutoFitFramesY > 0) + return false; + if (window->WasActive == false) // Early out to avoid running this code for e.g. an hidden implicit/fallback Debug window. + return false; + + bool ret_auto_fit = false; + const int resize_border_count = g.IO.ConfigWindowsResizeFromEdges ? 4 : 0; + const float grip_draw_size = IM_FLOOR(ImMax(g.FontSize * 1.35f, window->WindowRounding + 1.0f + g.FontSize * 0.2f)); + const float grip_hover_inner_size = IM_FLOOR(grip_draw_size * 0.75f); + const float grip_hover_outer_size = g.IO.ConfigWindowsResizeFromEdges ? WINDOWS_RESIZE_FROM_EDGES_HALF_THICKNESS : 0.0f; + + ImVec2 pos_target(FLT_MAX, FLT_MAX); + ImVec2 size_target(FLT_MAX, FLT_MAX); + + // Resize grips and borders are on layer 1 + window->DC.NavLayerCurrent = ImGuiNavLayer_Menu; + + // Manual resize grips + PushID("#RESIZE"); + for (int resize_grip_n = 0; resize_grip_n < resize_grip_count; resize_grip_n++) + { + const ImGuiResizeGripDef& grip = resize_grip_def[resize_grip_n]; + const ImVec2 corner = ImLerp(window->Pos, window->Pos + window->Size, grip.CornerPosN); + + // Using the FlattenChilds button flag we make the resize button accessible even if we are hovering over a child window + ImRect resize_rect(corner - grip.InnerDir * grip_hover_outer_size, corner + grip.InnerDir * grip_hover_inner_size); + if (resize_rect.Min.x > resize_rect.Max.x) ImSwap(resize_rect.Min.x, resize_rect.Max.x); + if (resize_rect.Min.y > resize_rect.Max.y) ImSwap(resize_rect.Min.y, resize_rect.Max.y); + bool hovered, held; + ButtonBehavior(resize_rect, window->GetID(resize_grip_n), &hovered, &held, ImGuiButtonFlags_FlattenChildren | ImGuiButtonFlags_NoNavFocus); + //GetForegroundDrawList(window)->AddRect(resize_rect.Min, resize_rect.Max, IM_COL32(255, 255, 0, 255)); + if (hovered || held) + g.MouseCursor = (resize_grip_n & 1) ? ImGuiMouseCursor_ResizeNESW : ImGuiMouseCursor_ResizeNWSE; + + if (held && g.IO.MouseDoubleClicked[0] && resize_grip_n == 0) + { + // Manual auto-fit when double-clicking + size_target = CalcWindowSizeAfterConstraint(window, size_auto_fit); + ret_auto_fit = true; + ClearActiveID(); + } + else if (held) + { + // Resize from any of the four corners + // We don't use an incremental MouseDelta but rather compute an absolute target size based on mouse position + ImVec2 corner_target = g.IO.MousePos - g.ActiveIdClickOffset + ImLerp(grip.InnerDir * grip_hover_outer_size, grip.InnerDir * -grip_hover_inner_size, grip.CornerPosN); // Corner of the window corresponding to our corner grip + ImVec2 clamp_min = ImVec2(grip.CornerPosN.x == 1.0f ? visibility_rect.Min.x : -FLT_MAX, grip.CornerPosN.y == 1.0f ? visibility_rect.Min.y : -FLT_MAX); + ImVec2 clamp_max = ImVec2(grip.CornerPosN.x == 0.0f ? visibility_rect.Max.x : +FLT_MAX, grip.CornerPosN.y == 0.0f ? visibility_rect.Max.y : +FLT_MAX); + corner_target = ImClamp(corner_target, clamp_min, clamp_max); + CalcResizePosSizeFromAnyCorner(window, corner_target, grip.CornerPosN, &pos_target, &size_target); + } + if (resize_grip_n == 0 || held || hovered) + resize_grip_col[resize_grip_n] = GetColorU32(held ? ImGuiCol_ResizeGripActive : hovered ? ImGuiCol_ResizeGripHovered : ImGuiCol_ResizeGrip); + } + for (int border_n = 0; border_n < resize_border_count; border_n++) + { + bool hovered, held; + ImRect border_rect = GetResizeBorderRect(window, border_n, grip_hover_inner_size, WINDOWS_RESIZE_FROM_EDGES_HALF_THICKNESS); + ButtonBehavior(border_rect, window->GetID(border_n + 4), &hovered, &held, ImGuiButtonFlags_FlattenChildren); + //GetForegroundDrawLists(window)->AddRect(border_rect.Min, border_rect.Max, IM_COL32(255, 255, 0, 255)); + if ((hovered && g.HoveredIdTimer > WINDOWS_RESIZE_FROM_EDGES_FEEDBACK_TIMER) || held) + { + g.MouseCursor = (border_n & 1) ? ImGuiMouseCursor_ResizeEW : ImGuiMouseCursor_ResizeNS; + if (held) + *border_held = border_n; + } + if (held) + { + ImVec2 border_target = window->Pos; + ImVec2 border_posn; + if (border_n == 0) { border_posn = ImVec2(0, 0); border_target.y = (g.IO.MousePos.y - g.ActiveIdClickOffset.y + WINDOWS_RESIZE_FROM_EDGES_HALF_THICKNESS); } // Top + if (border_n == 1) { border_posn = ImVec2(1, 0); border_target.x = (g.IO.MousePos.x - g.ActiveIdClickOffset.x + WINDOWS_RESIZE_FROM_EDGES_HALF_THICKNESS); } // Right + if (border_n == 2) { border_posn = ImVec2(0, 1); border_target.y = (g.IO.MousePos.y - g.ActiveIdClickOffset.y + WINDOWS_RESIZE_FROM_EDGES_HALF_THICKNESS); } // Bottom + if (border_n == 3) { border_posn = ImVec2(0, 0); border_target.x = (g.IO.MousePos.x - g.ActiveIdClickOffset.x + WINDOWS_RESIZE_FROM_EDGES_HALF_THICKNESS); } // Left + ImVec2 clamp_min = ImVec2(border_n == 1 ? visibility_rect.Min.x : -FLT_MAX, border_n == 2 ? visibility_rect.Min.y : -FLT_MAX); + ImVec2 clamp_max = ImVec2(border_n == 3 ? visibility_rect.Max.x : +FLT_MAX, border_n == 0 ? visibility_rect.Max.y : +FLT_MAX); + border_target = ImClamp(border_target, clamp_min, clamp_max); + CalcResizePosSizeFromAnyCorner(window, border_target, border_posn, &pos_target, &size_target); + } + } + PopID(); + + // Restore nav layer + window->DC.NavLayerCurrent = ImGuiNavLayer_Main; + + // Navigation resize (keyboard/gamepad) + if (g.NavWindowingTarget && g.NavWindowingTarget->RootWindow == window) + { + ImVec2 nav_resize_delta; + if (g.NavInputSource == ImGuiInputSource_NavKeyboard && g.IO.KeyShift) + nav_resize_delta = GetNavInputAmount2d(ImGuiNavDirSourceFlags_Keyboard, ImGuiInputReadMode_Down); + if (g.NavInputSource == ImGuiInputSource_NavGamepad) + nav_resize_delta = GetNavInputAmount2d(ImGuiNavDirSourceFlags_PadDPad, ImGuiInputReadMode_Down); + if (nav_resize_delta.x != 0.0f || nav_resize_delta.y != 0.0f) + { + const float NAV_RESIZE_SPEED = 600.0f; + nav_resize_delta *= ImFloor(NAV_RESIZE_SPEED * g.IO.DeltaTime * ImMin(g.IO.DisplayFramebufferScale.x, g.IO.DisplayFramebufferScale.y)); + nav_resize_delta = ImMax(nav_resize_delta, visibility_rect.Min - window->Pos - window->Size); + g.NavWindowingToggleLayer = false; + g.NavDisableMouseHover = true; + resize_grip_col[0] = GetColorU32(ImGuiCol_ResizeGripActive); + // FIXME-NAV: Should store and accumulate into a separate size buffer to handle sizing constraints properly, right now a constraint will make us stuck. + size_target = CalcWindowSizeAfterConstraint(window, window->SizeFull + nav_resize_delta); + } + } + + // Apply back modified position/size to window + if (size_target.x != FLT_MAX) + { + window->SizeFull = size_target; + MarkIniSettingsDirty(window); + } + if (pos_target.x != FLT_MAX) + { + window->Pos = ImFloor(pos_target); + MarkIniSettingsDirty(window); + } + + window->Size = window->SizeFull; + return ret_auto_fit; +} + +static inline void ClampWindowRect(ImGuiWindow* window, const ImRect& visibility_rect) +{ + ImGuiContext& g = *GImGui; + ImVec2 size_for_clamping = window->Size; + if (g.IO.ConfigWindowsMoveFromTitleBarOnly && !(window->Flags & ImGuiWindowFlags_NoTitleBar)) + size_for_clamping.y = window->TitleBarHeight(); + window->Pos = ImClamp(window->Pos, visibility_rect.Min - size_for_clamping, visibility_rect.Max); +} + +static void ImGui::RenderWindowOuterBorders(ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + float rounding = window->WindowRounding; + float border_size = window->WindowBorderSize; + if (border_size > 0.0f && !(window->Flags & ImGuiWindowFlags_NoBackground)) + window->DrawList->AddRect(window->Pos, window->Pos + window->Size, GetColorU32(ImGuiCol_Border), rounding, ImDrawCornerFlags_All, border_size); + + int border_held = window->ResizeBorderHeld; + if (border_held != -1) + { + const ImGuiResizeBorderDef& def = resize_border_def[border_held]; + ImRect border_r = GetResizeBorderRect(window, border_held, rounding, 0.0f); + window->DrawList->PathArcTo(ImLerp(border_r.Min, border_r.Max, def.CornerPosN1) + ImVec2(0.5f, 0.5f) + def.InnerDir * rounding, rounding, def.OuterAngle - IM_PI * 0.25f, def.OuterAngle); + window->DrawList->PathArcTo(ImLerp(border_r.Min, border_r.Max, def.CornerPosN2) + ImVec2(0.5f, 0.5f) + def.InnerDir * rounding, rounding, def.OuterAngle, def.OuterAngle + IM_PI * 0.25f); + window->DrawList->PathStroke(GetColorU32(ImGuiCol_SeparatorActive), false, ImMax(2.0f, border_size)); // Thicker than usual + } + if (g.Style.FrameBorderSize > 0 && !(window->Flags & ImGuiWindowFlags_NoTitleBar)) + { + float y = window->Pos.y + window->TitleBarHeight() - 1; + window->DrawList->AddLine(ImVec2(window->Pos.x + border_size, y), ImVec2(window->Pos.x + window->Size.x - border_size, y), GetColorU32(ImGuiCol_Border), g.Style.FrameBorderSize); + } +} + +// Draw background and borders +// Draw and handle scrollbars +void ImGui::RenderWindowDecorations(ImGuiWindow* window, const ImRect& title_bar_rect, bool title_bar_is_highlight, int resize_grip_count, const ImU32 resize_grip_col[4], float resize_grip_draw_size) +{ + ImGuiContext& g = *GImGui; + ImGuiStyle& style = g.Style; + ImGuiWindowFlags flags = window->Flags; + + // Ensure that ScrollBar doesn't read last frame's SkipItems + IM_ASSERT(window->BeginCount == 0); + window->SkipItems = false; + + // Draw window + handle manual resize + // As we highlight the title bar when want_focus is set, multiple reappearing windows will have have their title bar highlighted on their reappearing frame. + const float window_rounding = window->WindowRounding; + const float window_border_size = window->WindowBorderSize; + if (window->Collapsed) + { + // Title bar only + float backup_border_size = style.FrameBorderSize; + g.Style.FrameBorderSize = window->WindowBorderSize; + ImU32 title_bar_col = GetColorU32((title_bar_is_highlight && !g.NavDisableHighlight) ? ImGuiCol_TitleBgActive : ImGuiCol_TitleBgCollapsed); + RenderFrame(title_bar_rect.Min, title_bar_rect.Max, title_bar_col, true, window_rounding); + g.Style.FrameBorderSize = backup_border_size; + } + else + { + // Window background + if (!(flags & ImGuiWindowFlags_NoBackground)) + { + ImU32 bg_col = GetColorU32(GetWindowBgColorIdxFromFlags(flags)); + bool override_alpha = false; + float alpha = 1.0f; + if (g.NextWindowData.Flags & ImGuiNextWindowDataFlags_HasBgAlpha) + { + alpha = g.NextWindowData.BgAlphaVal; + override_alpha = true; + } + if (override_alpha) + bg_col = (bg_col & ~IM_COL32_A_MASK) | (IM_F32_TO_INT8_SAT(alpha) << IM_COL32_A_SHIFT); + window->DrawList->AddRectFilled(window->Pos + ImVec2(0, window->TitleBarHeight()), window->Pos + window->Size, bg_col, window_rounding, (flags & ImGuiWindowFlags_NoTitleBar) ? ImDrawCornerFlags_All : ImDrawCornerFlags_Bot); + } + + // Title bar + if (!(flags & ImGuiWindowFlags_NoTitleBar)) + { + ImU32 title_bar_col = GetColorU32(title_bar_is_highlight ? ImGuiCol_TitleBgActive : ImGuiCol_TitleBg); + window->DrawList->AddRectFilled(title_bar_rect.Min, title_bar_rect.Max, title_bar_col, window_rounding, ImDrawCornerFlags_Top); + } + + // Menu bar + if (flags & ImGuiWindowFlags_MenuBar) + { + ImRect menu_bar_rect = window->MenuBarRect(); + menu_bar_rect.ClipWith(window->Rect()); // Soft clipping, in particular child window don't have minimum size covering the menu bar so this is useful for them. + window->DrawList->AddRectFilled(menu_bar_rect.Min + ImVec2(window_border_size, 0), menu_bar_rect.Max - ImVec2(window_border_size, 0), GetColorU32(ImGuiCol_MenuBarBg), (flags & ImGuiWindowFlags_NoTitleBar) ? window_rounding : 0.0f, ImDrawCornerFlags_Top); + if (style.FrameBorderSize > 0.0f && menu_bar_rect.Max.y < window->Pos.y + window->Size.y) + window->DrawList->AddLine(menu_bar_rect.GetBL(), menu_bar_rect.GetBR(), GetColorU32(ImGuiCol_Border), style.FrameBorderSize); + } + + // Scrollbars + if (window->ScrollbarX) + Scrollbar(ImGuiAxis_X); + if (window->ScrollbarY) + Scrollbar(ImGuiAxis_Y); + + // Render resize grips (after their input handling so we don't have a frame of latency) + if (!(flags & ImGuiWindowFlags_NoResize)) + { + for (int resize_grip_n = 0; resize_grip_n < resize_grip_count; resize_grip_n++) + { + const ImGuiResizeGripDef& grip = resize_grip_def[resize_grip_n]; + const ImVec2 corner = ImLerp(window->Pos, window->Pos + window->Size, grip.CornerPosN); + window->DrawList->PathLineTo(corner + grip.InnerDir * ((resize_grip_n & 1) ? ImVec2(window_border_size, resize_grip_draw_size) : ImVec2(resize_grip_draw_size, window_border_size))); + window->DrawList->PathLineTo(corner + grip.InnerDir * ((resize_grip_n & 1) ? ImVec2(resize_grip_draw_size, window_border_size) : ImVec2(window_border_size, resize_grip_draw_size))); + window->DrawList->PathArcToFast(ImVec2(corner.x + grip.InnerDir.x * (window_rounding + window_border_size), corner.y + grip.InnerDir.y * (window_rounding + window_border_size)), window_rounding, grip.AngleMin12, grip.AngleMax12); + window->DrawList->PathFillConvex(resize_grip_col[resize_grip_n]); + } + } + + // Borders + RenderWindowOuterBorders(window); + } +} + +// Render title text, collapse button, close button +void ImGui::RenderWindowTitleBarContents(ImGuiWindow* window, const ImRect& title_bar_rect, const char* name, bool* p_open) +{ + ImGuiContext& g = *GImGui; + ImGuiStyle& style = g.Style; + ImGuiWindowFlags flags = window->Flags; + + const bool has_close_button = (p_open != NULL); + const bool has_collapse_button = !(flags & ImGuiWindowFlags_NoCollapse) && (style.WindowMenuButtonPosition != ImGuiDir_None); + + // Close & Collapse button are on the Menu NavLayer and don't default focus (unless there's nothing else on that layer) + const ImGuiItemFlags item_flags_backup = window->DC.ItemFlags; + window->DC.ItemFlags |= ImGuiItemFlags_NoNavDefaultFocus; + window->DC.NavLayerCurrent = ImGuiNavLayer_Menu; + + // Layout buttons + // FIXME: Would be nice to generalize the subtleties expressed here into reusable code. + float pad_l = style.FramePadding.x; + float pad_r = style.FramePadding.x; + float button_sz = g.FontSize; + ImVec2 close_button_pos; + ImVec2 collapse_button_pos; + if (has_close_button) + { + pad_r += button_sz; + close_button_pos = ImVec2(title_bar_rect.Max.x - pad_r - style.FramePadding.x, title_bar_rect.Min.y); + } + if (has_collapse_button && style.WindowMenuButtonPosition == ImGuiDir_Right) + { + pad_r += button_sz; + collapse_button_pos = ImVec2(title_bar_rect.Max.x - pad_r - style.FramePadding.x, title_bar_rect.Min.y); + } + if (has_collapse_button && style.WindowMenuButtonPosition == ImGuiDir_Left) + { + collapse_button_pos = ImVec2(title_bar_rect.Min.x + pad_l - style.FramePadding.x, title_bar_rect.Min.y); + pad_l += button_sz; + } + + // Collapse button (submitting first so it gets priority when choosing a navigation init fallback) + if (has_collapse_button) + if (CollapseButton(window->GetID("#COLLAPSE"), collapse_button_pos)) + window->WantCollapseToggle = true; // Defer actual collapsing to next frame as we are too far in the Begin() function + + // Close button + if (has_close_button) + if (CloseButton(window->GetID("#CLOSE"), close_button_pos)) + *p_open = false; + + window->DC.NavLayerCurrent = ImGuiNavLayer_Main; + window->DC.ItemFlags = item_flags_backup; + + // Title bar text (with: horizontal alignment, avoiding collapse/close button, optional "unsaved document" marker) + // FIXME: Refactor text alignment facilities along with RenderText helpers, this is WAY too much messy code.. + const char* UNSAVED_DOCUMENT_MARKER = "*"; + const float marker_size_x = (flags & ImGuiWindowFlags_UnsavedDocument) ? CalcTextSize(UNSAVED_DOCUMENT_MARKER, NULL, false).x : 0.0f; + const ImVec2 text_size = CalcTextSize(name, NULL, true) + ImVec2(marker_size_x, 0.0f); + + // As a nice touch we try to ensure that centered title text doesn't get affected by visibility of Close/Collapse button, + // while uncentered title text will still reach edges correctly. + if (pad_l > style.FramePadding.x) + pad_l += g.Style.ItemInnerSpacing.x; + if (pad_r > style.FramePadding.x) + pad_r += g.Style.ItemInnerSpacing.x; + if (style.WindowTitleAlign.x > 0.0f && style.WindowTitleAlign.x < 1.0f) + { + float centerness = ImSaturate(1.0f - ImFabs(style.WindowTitleAlign.x - 0.5f) * 2.0f); // 0.0f on either edges, 1.0f on center + float pad_extend = ImMin(ImMax(pad_l, pad_r), title_bar_rect.GetWidth() - pad_l - pad_r - text_size.x); + pad_l = ImMax(pad_l, pad_extend * centerness); + pad_r = ImMax(pad_r, pad_extend * centerness); + } + + ImRect layout_r(title_bar_rect.Min.x + pad_l, title_bar_rect.Min.y, title_bar_rect.Max.x - pad_r, title_bar_rect.Max.y); + ImRect clip_r(layout_r.Min.x, layout_r.Min.y, ImMin(layout_r.Max.x + g.Style.ItemInnerSpacing.x, title_bar_rect.Max.x), layout_r.Max.y); + //if (g.IO.KeyShift) window->DrawList->AddRect(layout_r.Min, layout_r.Max, IM_COL32(255, 128, 0, 255)); // [DEBUG] + //if (g.IO.KeyCtrl) window->DrawList->AddRect(clip_r.Min, clip_r.Max, IM_COL32(255, 128, 0, 255)); // [DEBUG] + RenderTextClipped(layout_r.Min, layout_r.Max, name, NULL, &text_size, style.WindowTitleAlign, &clip_r); + if (flags & ImGuiWindowFlags_UnsavedDocument) + { + ImVec2 marker_pos = ImVec2(ImMax(layout_r.Min.x, layout_r.Min.x + (layout_r.GetWidth() - text_size.x) * style.WindowTitleAlign.x) + text_size.x, layout_r.Min.y) + ImVec2(2 - marker_size_x, 0.0f); + ImVec2 off = ImVec2(0.0f, IM_FLOOR(-g.FontSize * 0.25f)); + RenderTextClipped(marker_pos + off, layout_r.Max + off, UNSAVED_DOCUMENT_MARKER, NULL, NULL, ImVec2(0, style.WindowTitleAlign.y), &clip_r); + } +} + +void ImGui::UpdateWindowParentAndRootLinks(ImGuiWindow* window, ImGuiWindowFlags flags, ImGuiWindow* parent_window) +{ + window->ParentWindow = parent_window; + window->RootWindow = window->RootWindowForTitleBarHighlight = window->RootWindowForNav = window; + if (parent_window && (flags & ImGuiWindowFlags_ChildWindow) && !(flags & ImGuiWindowFlags_Tooltip)) + window->RootWindow = parent_window->RootWindow; + if (parent_window && !(flags & ImGuiWindowFlags_Modal) && (flags & (ImGuiWindowFlags_ChildWindow | ImGuiWindowFlags_Popup))) + window->RootWindowForTitleBarHighlight = parent_window->RootWindowForTitleBarHighlight; + while (window->RootWindowForNav->Flags & ImGuiWindowFlags_NavFlattened) + { + IM_ASSERT(window->RootWindowForNav->ParentWindow != NULL); + window->RootWindowForNav = window->RootWindowForNav->ParentWindow; + } +} + +// Push a new Dear ImGui window to add widgets to. +// - A default window called "Debug" is automatically stacked at the beginning of every frame so you can use widgets without explicitly calling a Begin/End pair. +// - Begin/End can be called multiple times during the frame with the same window name to append content. +// - The window name is used as a unique identifier to preserve window information across frames (and save rudimentary information to the .ini file). +// You can use the "##" or "###" markers to use the same label with different id, or same id with different label. See documentation at the top of this file. +// - Return false when window is collapsed, so you can early out in your code. You always need to call ImGui::End() even if false is returned. +// - Passing 'bool* p_open' displays a Close button on the upper-right corner of the window, the pointed value will be set to false when the button is pressed. +bool ImGui::Begin(const char* name, bool* p_open, ImGuiWindowFlags flags) +{ + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + IM_ASSERT(name != NULL && name[0] != '\0'); // Window name required + IM_ASSERT(g.WithinFrameScope); // Forgot to call ImGui::NewFrame() + IM_ASSERT(g.FrameCountEnded != g.FrameCount); // Called ImGui::Render() or ImGui::EndFrame() and haven't called ImGui::NewFrame() again yet + + // Find or create + ImGuiWindow* window = FindWindowByName(name); + const bool window_just_created = (window == NULL); + if (window_just_created) + window = CreateNewWindow(name, flags); + + // Automatically disable manual moving/resizing when NoInputs is set + if ((flags & ImGuiWindowFlags_NoInputs) == ImGuiWindowFlags_NoInputs) + flags |= ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoResize; + + if (flags & ImGuiWindowFlags_NavFlattened) + IM_ASSERT(flags & ImGuiWindowFlags_ChildWindow); + + const int current_frame = g.FrameCount; + const bool first_begin_of_the_frame = (window->LastFrameActive != current_frame); + window->IsFallbackWindow = (g.CurrentWindowStack.Size == 0 && g.WithinFrameScopeWithImplicitWindow); + + // Update the Appearing flag + bool window_just_activated_by_user = (window->LastFrameActive < current_frame - 1); // Not using !WasActive because the implicit "Debug" window would always toggle off->on + const bool window_just_appearing_after_hidden_for_resize = (window->HiddenFramesCannotSkipItems > 0); + if (flags & ImGuiWindowFlags_Popup) + { + ImGuiPopupData& popup_ref = g.OpenPopupStack[g.BeginPopupStack.Size]; + window_just_activated_by_user |= (window->PopupId != popup_ref.PopupId); // We recycle popups so treat window as activated if popup id changed + window_just_activated_by_user |= (window != popup_ref.Window); + } + window->Appearing = (window_just_activated_by_user || window_just_appearing_after_hidden_for_resize); + if (window->Appearing) + SetWindowConditionAllowFlags(window, ImGuiCond_Appearing, true); + + // Update Flags, LastFrameActive, BeginOrderXXX fields + if (first_begin_of_the_frame) + { + window->Flags = (ImGuiWindowFlags)flags; + window->LastFrameActive = current_frame; + window->LastTimeActive = (float)g.Time; + window->BeginOrderWithinParent = 0; + window->BeginOrderWithinContext = (short)(g.WindowsActiveCount++); + } + else + { + flags = window->Flags; + } + + // Parent window is latched only on the first call to Begin() of the frame, so further append-calls can be done from a different window stack + ImGuiWindow* parent_window_in_stack = g.CurrentWindowStack.empty() ? NULL : g.CurrentWindowStack.back(); + ImGuiWindow* parent_window = first_begin_of_the_frame ? ((flags & (ImGuiWindowFlags_ChildWindow | ImGuiWindowFlags_Popup)) ? parent_window_in_stack : NULL) : window->ParentWindow; + IM_ASSERT(parent_window != NULL || !(flags & ImGuiWindowFlags_ChildWindow)); + + // We allow window memory to be compacted so recreate the base stack when needed. + if (window->IDStack.Size == 0) + window->IDStack.push_back(window->ID); + + // Add to stack + // We intentionally set g.CurrentWindow to NULL to prevent usage until when the viewport is set, then will call SetCurrentWindow() + g.CurrentWindowStack.push_back(window); + g.CurrentWindow = window; + window->DC.StackSizesOnBegin.SetToCurrentState(); + g.CurrentWindow = NULL; + + if (flags & ImGuiWindowFlags_Popup) + { + ImGuiPopupData& popup_ref = g.OpenPopupStack[g.BeginPopupStack.Size]; + popup_ref.Window = window; + g.BeginPopupStack.push_back(popup_ref); + window->PopupId = popup_ref.PopupId; + } + + if (window_just_appearing_after_hidden_for_resize && !(flags & ImGuiWindowFlags_ChildWindow)) + window->NavLastIds[0] = 0; + + // Update ->RootWindow and others pointers (before any possible call to FocusWindow) + if (first_begin_of_the_frame) + UpdateWindowParentAndRootLinks(window, flags, parent_window); + + // Process SetNextWindow***() calls + // (FIXME: Consider splitting the HasXXX flags into X/Y components + bool window_pos_set_by_api = false; + bool window_size_x_set_by_api = false, window_size_y_set_by_api = false; + if (g.NextWindowData.Flags & ImGuiNextWindowDataFlags_HasPos) + { + window_pos_set_by_api = (window->SetWindowPosAllowFlags & g.NextWindowData.PosCond) != 0; + if (window_pos_set_by_api && ImLengthSqr(g.NextWindowData.PosPivotVal) > 0.00001f) + { + // May be processed on the next frame if this is our first frame and we are measuring size + // FIXME: Look into removing the branch so everything can go through this same code path for consistency. + window->SetWindowPosVal = g.NextWindowData.PosVal; + window->SetWindowPosPivot = g.NextWindowData.PosPivotVal; + window->SetWindowPosAllowFlags &= ~(ImGuiCond_Once | ImGuiCond_FirstUseEver | ImGuiCond_Appearing); + } + else + { + SetWindowPos(window, g.NextWindowData.PosVal, g.NextWindowData.PosCond); + } + } + if (g.NextWindowData.Flags & ImGuiNextWindowDataFlags_HasSize) + { + window_size_x_set_by_api = (window->SetWindowSizeAllowFlags & g.NextWindowData.SizeCond) != 0 && (g.NextWindowData.SizeVal.x > 0.0f); + window_size_y_set_by_api = (window->SetWindowSizeAllowFlags & g.NextWindowData.SizeCond) != 0 && (g.NextWindowData.SizeVal.y > 0.0f); + SetWindowSize(window, g.NextWindowData.SizeVal, g.NextWindowData.SizeCond); + } + if (g.NextWindowData.Flags & ImGuiNextWindowDataFlags_HasScroll) + { + if (g.NextWindowData.ScrollVal.x >= 0.0f) + { + window->ScrollTarget.x = g.NextWindowData.ScrollVal.x; + window->ScrollTargetCenterRatio.x = 0.0f; + } + if (g.NextWindowData.ScrollVal.y >= 0.0f) + { + window->ScrollTarget.y = g.NextWindowData.ScrollVal.y; + window->ScrollTargetCenterRatio.y = 0.0f; + } + } + if (g.NextWindowData.Flags & ImGuiNextWindowDataFlags_HasContentSize) + window->ContentSizeExplicit = g.NextWindowData.ContentSizeVal; + else if (first_begin_of_the_frame) + window->ContentSizeExplicit = ImVec2(0.0f, 0.0f); + if (g.NextWindowData.Flags & ImGuiNextWindowDataFlags_HasCollapsed) + SetWindowCollapsed(window, g.NextWindowData.CollapsedVal, g.NextWindowData.CollapsedCond); + if (g.NextWindowData.Flags & ImGuiNextWindowDataFlags_HasFocus) + FocusWindow(window); + if (window->Appearing) + SetWindowConditionAllowFlags(window, ImGuiCond_Appearing, false); + + // When reusing window again multiple times a frame, just append content (don't need to setup again) + if (first_begin_of_the_frame) + { + // Initialize + const bool window_is_child_tooltip = (flags & ImGuiWindowFlags_ChildWindow) && (flags & ImGuiWindowFlags_Tooltip); // FIXME-WIP: Undocumented behavior of Child+Tooltip for pinned tooltip (#1345) + window->Active = true; + window->HasCloseButton = (p_open != NULL); + window->ClipRect = ImVec4(-FLT_MAX, -FLT_MAX, +FLT_MAX, +FLT_MAX); + window->IDStack.resize(1); + window->DrawList->_ResetForNewFrame(); + window->DC.CurrentTableIdx = -1; + + // Restore buffer capacity when woken from a compacted state, to avoid + if (window->MemoryCompacted) + GcAwakeTransientWindowBuffers(window); + + // Update stored window name when it changes (which can _only_ happen with the "###" operator, so the ID would stay unchanged). + // The title bar always display the 'name' parameter, so we only update the string storage if it needs to be visible to the end-user elsewhere. + bool window_title_visible_elsewhere = false; + if (g.NavWindowingListWindow != NULL && (window->Flags & ImGuiWindowFlags_NoNavFocus) == 0) // Window titles visible when using CTRL+TAB + window_title_visible_elsewhere = true; + if (window_title_visible_elsewhere && !window_just_created && strcmp(name, window->Name) != 0) + { + size_t buf_len = (size_t)window->NameBufLen; + window->Name = ImStrdupcpy(window->Name, &buf_len, name); + window->NameBufLen = (int)buf_len; + } + + // UPDATE CONTENTS SIZE, UPDATE HIDDEN STATUS + + // Update contents size from last frame for auto-fitting (or use explicit size) + CalcWindowContentSizes(window, &window->ContentSize, &window->ContentSizeIdeal); + if (window->HiddenFramesCanSkipItems > 0) + window->HiddenFramesCanSkipItems--; + if (window->HiddenFramesCannotSkipItems > 0) + window->HiddenFramesCannotSkipItems--; + if (window->HiddenFramesForRenderOnly > 0) + window->HiddenFramesForRenderOnly--; + + // Hide new windows for one frame until they calculate their size + if (window_just_created && (!window_size_x_set_by_api || !window_size_y_set_by_api)) + window->HiddenFramesCannotSkipItems = 1; + + // Hide popup/tooltip window when re-opening while we measure size (because we recycle the windows) + // We reset Size/ContentSize for reappearing popups/tooltips early in this function, so further code won't be tempted to use the old size. + if (window_just_activated_by_user && (flags & (ImGuiWindowFlags_Popup | ImGuiWindowFlags_Tooltip)) != 0) + { + window->HiddenFramesCannotSkipItems = 1; + if (flags & ImGuiWindowFlags_AlwaysAutoResize) + { + if (!window_size_x_set_by_api) + window->Size.x = window->SizeFull.x = 0.f; + if (!window_size_y_set_by_api) + window->Size.y = window->SizeFull.y = 0.f; + window->ContentSize = window->ContentSizeIdeal = ImVec2(0.f, 0.f); + } + } + + // SELECT VIEWPORT + // FIXME-VIEWPORT: In the docking/viewport branch, this is the point where we select the current viewport (which may affect the style) + SetCurrentWindow(window); + + // LOCK BORDER SIZE AND PADDING FOR THE FRAME (so that altering them doesn't cause inconsistencies) + + if (flags & ImGuiWindowFlags_ChildWindow) + window->WindowBorderSize = style.ChildBorderSize; + else + window->WindowBorderSize = ((flags & (ImGuiWindowFlags_Popup | ImGuiWindowFlags_Tooltip)) && !(flags & ImGuiWindowFlags_Modal)) ? style.PopupBorderSize : style.WindowBorderSize; + window->WindowPadding = style.WindowPadding; + if ((flags & ImGuiWindowFlags_ChildWindow) && !(flags & (ImGuiWindowFlags_AlwaysUseWindowPadding | ImGuiWindowFlags_Popup)) && window->WindowBorderSize == 0.0f) + window->WindowPadding = ImVec2(0.0f, (flags & ImGuiWindowFlags_MenuBar) ? style.WindowPadding.y : 0.0f); + + // Lock menu offset so size calculation can use it as menu-bar windows need a minimum size. + window->DC.MenuBarOffset.x = ImMax(ImMax(window->WindowPadding.x, style.ItemSpacing.x), g.NextWindowData.MenuBarOffsetMinVal.x); + window->DC.MenuBarOffset.y = g.NextWindowData.MenuBarOffsetMinVal.y; + + // Collapse window by double-clicking on title bar + // At this point we don't have a clipping rectangle setup yet, so we can use the title bar area for hit detection and drawing + if (!(flags & ImGuiWindowFlags_NoTitleBar) && !(flags & ImGuiWindowFlags_NoCollapse)) + { + // We don't use a regular button+id to test for double-click on title bar (mostly due to legacy reason, could be fixed), so verify that we don't have items over the title bar. + ImRect title_bar_rect = window->TitleBarRect(); + if (g.HoveredWindow == window && g.HoveredId == 0 && g.HoveredIdPreviousFrame == 0 && IsMouseHoveringRect(title_bar_rect.Min, title_bar_rect.Max) && g.IO.MouseDoubleClicked[0]) + window->WantCollapseToggle = true; + if (window->WantCollapseToggle) + { + window->Collapsed = !window->Collapsed; + MarkIniSettingsDirty(window); + FocusWindow(window); + } + } + else + { + window->Collapsed = false; + } + window->WantCollapseToggle = false; + + // SIZE + + // Calculate auto-fit size, handle automatic resize + const ImVec2 size_auto_fit = CalcWindowAutoFitSize(window, window->ContentSizeIdeal); + bool use_current_size_for_scrollbar_x = window_just_created; + bool use_current_size_for_scrollbar_y = window_just_created; + if ((flags & ImGuiWindowFlags_AlwaysAutoResize) && !window->Collapsed) + { + // Using SetNextWindowSize() overrides ImGuiWindowFlags_AlwaysAutoResize, so it can be used on tooltips/popups, etc. + if (!window_size_x_set_by_api) + { + window->SizeFull.x = size_auto_fit.x; + use_current_size_for_scrollbar_x = true; + } + if (!window_size_y_set_by_api) + { + window->SizeFull.y = size_auto_fit.y; + use_current_size_for_scrollbar_y = true; + } + } + else if (window->AutoFitFramesX > 0 || window->AutoFitFramesY > 0) + { + // Auto-fit may only grow window during the first few frames + // We still process initial auto-fit on collapsed windows to get a window width, but otherwise don't honor ImGuiWindowFlags_AlwaysAutoResize when collapsed. + if (!window_size_x_set_by_api && window->AutoFitFramesX > 0) + { + window->SizeFull.x = window->AutoFitOnlyGrows ? ImMax(window->SizeFull.x, size_auto_fit.x) : size_auto_fit.x; + use_current_size_for_scrollbar_x = true; + } + if (!window_size_y_set_by_api && window->AutoFitFramesY > 0) + { + window->SizeFull.y = window->AutoFitOnlyGrows ? ImMax(window->SizeFull.y, size_auto_fit.y) : size_auto_fit.y; + use_current_size_for_scrollbar_y = true; + } + if (!window->Collapsed) + MarkIniSettingsDirty(window); + } + + // Apply minimum/maximum window size constraints and final size + window->SizeFull = CalcWindowSizeAfterConstraint(window, window->SizeFull); + window->Size = window->Collapsed && !(flags & ImGuiWindowFlags_ChildWindow) ? window->TitleBarRect().GetSize() : window->SizeFull; + + // Decoration size + const float decoration_up_height = window->TitleBarHeight() + window->MenuBarHeight(); + + // POSITION + + // Popup latch its initial position, will position itself when it appears next frame + if (window_just_activated_by_user) + { + window->AutoPosLastDirection = ImGuiDir_None; + if ((flags & ImGuiWindowFlags_Popup) != 0 && !(flags & ImGuiWindowFlags_Modal) && !window_pos_set_by_api) // FIXME: BeginPopup() could use SetNextWindowPos() + window->Pos = g.BeginPopupStack.back().OpenPopupPos; + } + + // Position child window + if (flags & ImGuiWindowFlags_ChildWindow) + { + IM_ASSERT(parent_window && parent_window->Active); + window->BeginOrderWithinParent = (short)parent_window->DC.ChildWindows.Size; + parent_window->DC.ChildWindows.push_back(window); + if (!(flags & ImGuiWindowFlags_Popup) && !window_pos_set_by_api && !window_is_child_tooltip) + window->Pos = parent_window->DC.CursorPos; + } + + const bool window_pos_with_pivot = (window->SetWindowPosVal.x != FLT_MAX && window->HiddenFramesCannotSkipItems == 0); + if (window_pos_with_pivot) + SetWindowPos(window, window->SetWindowPosVal - window->Size * window->SetWindowPosPivot, 0); // Position given a pivot (e.g. for centering) + else if ((flags & ImGuiWindowFlags_ChildMenu) != 0) + window->Pos = FindBestWindowPosForPopup(window); + else if ((flags & ImGuiWindowFlags_Popup) != 0 && !window_pos_set_by_api && window_just_appearing_after_hidden_for_resize) + window->Pos = FindBestWindowPosForPopup(window); + else if ((flags & ImGuiWindowFlags_Tooltip) != 0 && !window_pos_set_by_api && !window_is_child_tooltip) + window->Pos = FindBestWindowPosForPopup(window); + + // Calculate the range of allowed position for that window (to be movable and visible past safe area padding) + // When clamping to stay visible, we will enforce that window->Pos stays inside of visibility_rect. + ImRect viewport_rect(GetViewportRect()); + ImVec2 visibility_padding = ImMax(style.DisplayWindowPadding, style.DisplaySafeAreaPadding); + ImRect visibility_rect(viewport_rect.Min + visibility_padding, viewport_rect.Max - visibility_padding); + + // Clamp position/size so window stays visible within its viewport or monitor + // Ignore zero-sized display explicitly to avoid losing positions if a window manager reports zero-sized window when initializing or minimizing. + if (!window_pos_set_by_api && !(flags & ImGuiWindowFlags_ChildWindow) && window->AutoFitFramesX <= 0 && window->AutoFitFramesY <= 0) + if (viewport_rect.GetWidth() > 0.0f && viewport_rect.GetHeight() > 0.0f) + ClampWindowRect(window, visibility_rect); + window->Pos = ImFloor(window->Pos); + + // Lock window rounding for the frame (so that altering them doesn't cause inconsistencies) + // Large values tend to lead to variety of artifacts and are not recommended. + window->WindowRounding = (flags & ImGuiWindowFlags_ChildWindow) ? style.ChildRounding : ((flags & ImGuiWindowFlags_Popup) && !(flags & ImGuiWindowFlags_Modal)) ? style.PopupRounding : style.WindowRounding; + + // For windows with title bar or menu bar, we clamp to FrameHeight(FontSize + FramePadding.y * 2.0f) to completely hide artifacts. + //if ((window->Flags & ImGuiWindowFlags_MenuBar) || !(window->Flags & ImGuiWindowFlags_NoTitleBar)) + // window->WindowRounding = ImMin(window->WindowRounding, g.FontSize + style.FramePadding.y * 2.0f); + + // Apply window focus (new and reactivated windows are moved to front) + bool want_focus = false; + if (window_just_activated_by_user && !(flags & ImGuiWindowFlags_NoFocusOnAppearing)) + { + if (flags & ImGuiWindowFlags_Popup) + want_focus = true; + else if ((flags & (ImGuiWindowFlags_ChildWindow | ImGuiWindowFlags_Tooltip)) == 0) + want_focus = true; + } + + // Handle manual resize: Resize Grips, Borders, Gamepad + int border_held = -1; + ImU32 resize_grip_col[4] = {}; + const int resize_grip_count = g.IO.ConfigWindowsResizeFromEdges ? 2 : 1; // Allow resize from lower-left if we have the mouse cursor feedback for it. + const float resize_grip_draw_size = IM_FLOOR(ImMax(g.FontSize * 1.10f, window->WindowRounding + 1.0f + g.FontSize * 0.2f)); + if (!window->Collapsed) + if (UpdateWindowManualResize(window, size_auto_fit, &border_held, resize_grip_count, &resize_grip_col[0], visibility_rect)) + use_current_size_for_scrollbar_x = use_current_size_for_scrollbar_y = true; + window->ResizeBorderHeld = (signed char)border_held; + + // SCROLLBAR VISIBILITY + + // Update scrollbar visibility (based on the Size that was effective during last frame or the auto-resized Size). + if (!window->Collapsed) + { + // When reading the current size we need to read it after size constraints have been applied. + // When we use InnerRect here we are intentionally reading last frame size, same for ScrollbarSizes values before we set them again. + ImVec2 avail_size_from_current_frame = ImVec2(window->SizeFull.x, window->SizeFull.y - decoration_up_height); + ImVec2 avail_size_from_last_frame = window->InnerRect.GetSize() + window->ScrollbarSizes; + ImVec2 needed_size_from_last_frame = window_just_created ? ImVec2(0, 0) : window->ContentSize + window->WindowPadding * 2.0f; + float size_x_for_scrollbars = use_current_size_for_scrollbar_x ? avail_size_from_current_frame.x : avail_size_from_last_frame.x; + float size_y_for_scrollbars = use_current_size_for_scrollbar_y ? avail_size_from_current_frame.y : avail_size_from_last_frame.y; + //bool scrollbar_y_from_last_frame = window->ScrollbarY; // FIXME: May want to use that in the ScrollbarX expression? How many pros vs cons? + window->ScrollbarY = (flags & ImGuiWindowFlags_AlwaysVerticalScrollbar) || ((needed_size_from_last_frame.y > size_y_for_scrollbars) && !(flags & ImGuiWindowFlags_NoScrollbar)); + window->ScrollbarX = (flags & ImGuiWindowFlags_AlwaysHorizontalScrollbar) || ((needed_size_from_last_frame.x > size_x_for_scrollbars - (window->ScrollbarY ? style.ScrollbarSize : 0.0f)) && !(flags & ImGuiWindowFlags_NoScrollbar) && (flags & ImGuiWindowFlags_HorizontalScrollbar)); + if (window->ScrollbarX && !window->ScrollbarY) + window->ScrollbarY = (needed_size_from_last_frame.y > size_y_for_scrollbars) && !(flags & ImGuiWindowFlags_NoScrollbar); + window->ScrollbarSizes = ImVec2(window->ScrollbarY ? style.ScrollbarSize : 0.0f, window->ScrollbarX ? style.ScrollbarSize : 0.0f); + } + + // UPDATE RECTANGLES (1- THOSE NOT AFFECTED BY SCROLLING) + // Update various regions. Variables they depends on should be set above in this function. + // We set this up after processing the resize grip so that our rectangles doesn't lag by a frame. + + // Outer rectangle + // Not affected by window border size. Used by: + // - FindHoveredWindow() (w/ extra padding when border resize is enabled) + // - Begin() initial clipping rect for drawing window background and borders. + // - Begin() clipping whole child + const ImRect host_rect = ((flags & ImGuiWindowFlags_ChildWindow) && !(flags & ImGuiWindowFlags_Popup) && !window_is_child_tooltip) ? parent_window->ClipRect : viewport_rect; + const ImRect outer_rect = window->Rect(); + const ImRect title_bar_rect = window->TitleBarRect(); + window->OuterRectClipped = outer_rect; + window->OuterRectClipped.ClipWith(host_rect); + + // Inner rectangle + // Not affected by window border size. Used by: + // - InnerClipRect + // - ScrollToBringRectIntoView() + // - NavUpdatePageUpPageDown() + // - Scrollbar() + window->InnerRect.Min.x = window->Pos.x; + window->InnerRect.Min.y = window->Pos.y + decoration_up_height; + window->InnerRect.Max.x = window->Pos.x + window->Size.x - window->ScrollbarSizes.x; + window->InnerRect.Max.y = window->Pos.y + window->Size.y - window->ScrollbarSizes.y; + + // Inner clipping rectangle. + // Will extend a little bit outside the normal work region. + // This is to allow e.g. Selectable or CollapsingHeader or some separators to cover that space. + // Force round operator last to ensure that e.g. (int)(max.x-min.x) in user's render code produce correct result. + // Note that if our window is collapsed we will end up with an inverted (~null) clipping rectangle which is the correct behavior. + // Affected by window/frame border size. Used by: + // - Begin() initial clip rect + float top_border_size = (((flags & ImGuiWindowFlags_MenuBar) || !(flags & ImGuiWindowFlags_NoTitleBar)) ? style.FrameBorderSize : window->WindowBorderSize); + window->InnerClipRect.Min.x = ImFloor(0.5f + window->InnerRect.Min.x + ImMax(ImFloor(window->WindowPadding.x * 0.5f), window->WindowBorderSize)); + window->InnerClipRect.Min.y = ImFloor(0.5f + window->InnerRect.Min.y + top_border_size); + window->InnerClipRect.Max.x = ImFloor(0.5f + window->InnerRect.Max.x - ImMax(ImFloor(window->WindowPadding.x * 0.5f), window->WindowBorderSize)); + window->InnerClipRect.Max.y = ImFloor(0.5f + window->InnerRect.Max.y - window->WindowBorderSize); + window->InnerClipRect.ClipWithFull(host_rect); + + // Default item width. Make it proportional to window size if window manually resizes + if (window->Size.x > 0.0f && !(flags & ImGuiWindowFlags_Tooltip) && !(flags & ImGuiWindowFlags_AlwaysAutoResize)) + window->ItemWidthDefault = ImFloor(window->Size.x * 0.65f); + else + window->ItemWidthDefault = ImFloor(g.FontSize * 16.0f); + + // SCROLLING + + // Lock down maximum scrolling + // The value of ScrollMax are ahead from ScrollbarX/ScrollbarY which is intentionally using InnerRect from previous rect in order to accommodate + // for right/bottom aligned items without creating a scrollbar. + window->ScrollMax.x = ImMax(0.0f, window->ContentSize.x + window->WindowPadding.x * 2.0f - window->InnerRect.GetWidth()); + window->ScrollMax.y = ImMax(0.0f, window->ContentSize.y + window->WindowPadding.y * 2.0f - window->InnerRect.GetHeight()); + + // Apply scrolling + window->Scroll = CalcNextScrollFromScrollTargetAndClamp(window); + window->ScrollTarget = ImVec2(FLT_MAX, FLT_MAX); + + // DRAWING + + // Setup draw list and outer clipping rectangle + IM_ASSERT(window->DrawList->CmdBuffer.Size == 1 && window->DrawList->CmdBuffer[0].ElemCount == 0); + window->DrawList->PushTextureID(g.Font->ContainerAtlas->TexID); + PushClipRect(host_rect.Min, host_rect.Max, false); + + // Draw modal window background (darkens what is behind them, all viewports) + const bool dim_bg_for_modal = (flags & ImGuiWindowFlags_Modal) && window == GetTopMostPopupModal() && window->HiddenFramesCannotSkipItems <= 0; + const bool dim_bg_for_window_list = g.NavWindowingTargetAnim && (window == g.NavWindowingTargetAnim->RootWindow); + if (dim_bg_for_modal || dim_bg_for_window_list) + { + const ImU32 dim_bg_col = GetColorU32(dim_bg_for_modal ? ImGuiCol_ModalWindowDimBg : ImGuiCol_NavWindowingDimBg, g.DimBgRatio); + window->DrawList->AddRectFilled(viewport_rect.Min, viewport_rect.Max, dim_bg_col); + } + + // Draw navigation selection/windowing rectangle background + if (dim_bg_for_window_list && window == g.NavWindowingTargetAnim) + { + ImRect bb = window->Rect(); + bb.Expand(g.FontSize); + if (!bb.Contains(viewport_rect)) // Avoid drawing if the window covers all the viewport anyway + window->DrawList->AddRectFilled(bb.Min, bb.Max, GetColorU32(ImGuiCol_NavWindowingHighlight, g.NavWindowingHighlightAlpha * 0.25f), g.Style.WindowRounding); + } + + // Since 1.71, child window can render their decoration (bg color, border, scrollbars, etc.) within their parent to save a draw call. + // When using overlapping child windows, this will break the assumption that child z-order is mapped to submission order. + // We disable this when the parent window has zero vertices, which is a common pattern leading to laying out multiple overlapping child. + // We also disabled this when we have dimming overlay behind this specific one child. + // FIXME: More code may rely on explicit sorting of overlapping child window and would need to disable this somehow. Please get in contact if you are affected. + { + bool render_decorations_in_parent = false; + if ((flags & ImGuiWindowFlags_ChildWindow) && !(flags & ImGuiWindowFlags_Popup) && !window_is_child_tooltip) + if (window->DrawList->CmdBuffer.back().ElemCount == 0 && parent_window->DrawList->VtxBuffer.Size > 0) + render_decorations_in_parent = true; + if (render_decorations_in_parent) + window->DrawList = parent_window->DrawList; + + // Handle title bar, scrollbar, resize grips and resize borders + const ImGuiWindow* window_to_highlight = g.NavWindowingTarget ? g.NavWindowingTarget : g.NavWindow; + const bool title_bar_is_highlight = want_focus || (window_to_highlight && window->RootWindowForTitleBarHighlight == window_to_highlight->RootWindowForTitleBarHighlight); + RenderWindowDecorations(window, title_bar_rect, title_bar_is_highlight, resize_grip_count, resize_grip_col, resize_grip_draw_size); + + if (render_decorations_in_parent) + window->DrawList = &window->DrawListInst; + } + + // Draw navigation selection/windowing rectangle border + if (g.NavWindowingTargetAnim == window) + { + float rounding = ImMax(window->WindowRounding, g.Style.WindowRounding); + ImRect bb = window->Rect(); + bb.Expand(g.FontSize); + if (bb.Contains(viewport_rect)) // If a window fits the entire viewport, adjust its highlight inward + { + bb.Expand(-g.FontSize - 1.0f); + rounding = window->WindowRounding; + } + window->DrawList->AddRect(bb.Min, bb.Max, GetColorU32(ImGuiCol_NavWindowingHighlight, g.NavWindowingHighlightAlpha), rounding, ~0, 3.0f); + } + + // UPDATE RECTANGLES (2- THOSE AFFECTED BY SCROLLING) + + // Work rectangle. + // Affected by window padding and border size. Used by: + // - Columns() for right-most edge + // - TreeNode(), CollapsingHeader() for right-most edge + // - BeginTabBar() for right-most edge + const bool allow_scrollbar_x = !(flags & ImGuiWindowFlags_NoScrollbar) && (flags & ImGuiWindowFlags_HorizontalScrollbar); + const bool allow_scrollbar_y = !(flags & ImGuiWindowFlags_NoScrollbar); + const float work_rect_size_x = (window->ContentSizeExplicit.x != 0.0f ? window->ContentSizeExplicit.x : ImMax(allow_scrollbar_x ? window->ContentSize.x : 0.0f, window->Size.x - window->WindowPadding.x * 2.0f - window->ScrollbarSizes.x)); + const float work_rect_size_y = (window->ContentSizeExplicit.y != 0.0f ? window->ContentSizeExplicit.y : ImMax(allow_scrollbar_y ? window->ContentSize.y : 0.0f, window->Size.y - window->WindowPadding.y * 2.0f - decoration_up_height - window->ScrollbarSizes.y)); + window->WorkRect.Min.x = ImFloor(window->InnerRect.Min.x - window->Scroll.x + ImMax(window->WindowPadding.x, window->WindowBorderSize)); + window->WorkRect.Min.y = ImFloor(window->InnerRect.Min.y - window->Scroll.y + ImMax(window->WindowPadding.y, window->WindowBorderSize)); + window->WorkRect.Max.x = window->WorkRect.Min.x + work_rect_size_x; + window->WorkRect.Max.y = window->WorkRect.Min.y + work_rect_size_y; + window->ParentWorkRect = window->WorkRect; + + // [LEGACY] Content Region + // FIXME-OBSOLETE: window->ContentRegionRect.Max is currently very misleading / partly faulty, but some BeginChild() patterns relies on it. + // Used by: + // - Mouse wheel scrolling + many other things + window->ContentRegionRect.Min.x = window->Pos.x - window->Scroll.x + window->WindowPadding.x; + window->ContentRegionRect.Min.y = window->Pos.y - window->Scroll.y + window->WindowPadding.y + decoration_up_height; + window->ContentRegionRect.Max.x = window->ContentRegionRect.Min.x + (window->ContentSizeExplicit.x != 0.0f ? window->ContentSizeExplicit.x : (window->Size.x - window->WindowPadding.x * 2.0f - window->ScrollbarSizes.x)); + window->ContentRegionRect.Max.y = window->ContentRegionRect.Min.y + (window->ContentSizeExplicit.y != 0.0f ? window->ContentSizeExplicit.y : (window->Size.y - window->WindowPadding.y * 2.0f - decoration_up_height - window->ScrollbarSizes.y)); + + // Setup drawing context + // (NB: That term "drawing context / DC" lost its meaning a long time ago. Initially was meant to hold transient data only. Nowadays difference between window-> and window->DC-> is dubious.) + window->DC.Indent.x = 0.0f + window->WindowPadding.x - window->Scroll.x; + window->DC.GroupOffset.x = 0.0f; + window->DC.ColumnsOffset.x = 0.0f; + window->DC.CursorStartPos = window->Pos + ImVec2(window->DC.Indent.x + window->DC.ColumnsOffset.x, decoration_up_height + window->WindowPadding.y - window->Scroll.y); + window->DC.CursorPos = window->DC.CursorStartPos; + window->DC.CursorPosPrevLine = window->DC.CursorPos; + window->DC.CursorMaxPos = window->DC.CursorStartPos; + window->DC.IdealMaxPos = window->DC.CursorStartPos; + window->DC.CurrLineSize = window->DC.PrevLineSize = ImVec2(0.0f, 0.0f); + window->DC.CurrLineTextBaseOffset = window->DC.PrevLineTextBaseOffset = 0.0f; + + window->DC.NavLayerCurrent = ImGuiNavLayer_Main; + window->DC.NavLayerActiveMask = window->DC.NavLayerActiveMaskNext; + window->DC.NavLayerActiveMaskNext = 0x00; + window->DC.NavHideHighlightOneFrame = false; + window->DC.NavHasScroll = (window->ScrollMax.y > 0.0f); + + window->DC.MenuBarAppending = false; + window->DC.MenuColumns.Update(3, style.ItemSpacing.x, window_just_activated_by_user); + window->DC.TreeDepth = 0; + window->DC.TreeJumpToParentOnPopMask = 0x00; + window->DC.ChildWindows.resize(0); + window->DC.StateStorage = &window->StateStorage; + window->DC.CurrentColumns = NULL; + window->DC.LayoutType = ImGuiLayoutType_Vertical; + window->DC.ParentLayoutType = parent_window ? parent_window->DC.LayoutType : ImGuiLayoutType_Vertical; + window->DC.FocusCounterRegular = window->DC.FocusCounterTabStop = -1; + + window->DC.ItemWidth = window->ItemWidthDefault; + window->DC.TextWrapPos = -1.0f; // disabled + window->DC.ItemWidthStack.resize(0); + window->DC.TextWrapPosStack.resize(0); + + if (window->AutoFitFramesX > 0) + window->AutoFitFramesX--; + if (window->AutoFitFramesY > 0) + window->AutoFitFramesY--; + + // Apply focus (we need to call FocusWindow() AFTER setting DC.CursorStartPos so our initial navigation reference rectangle can start around there) + if (want_focus) + { + FocusWindow(window); + NavInitWindow(window, false); + } + + // Title bar + if (!(flags & ImGuiWindowFlags_NoTitleBar)) + RenderWindowTitleBarContents(window, ImRect(title_bar_rect.Min.x + window->WindowBorderSize, title_bar_rect.Min.y, title_bar_rect.Max.x - window->WindowBorderSize, title_bar_rect.Max.y), name, p_open); + + // Clear hit test shape every frame + window->HitTestHoleSize.x = window->HitTestHoleSize.y = 0; + + // Pressing CTRL+C while holding on a window copy its content to the clipboard + // This works but 1. doesn't handle multiple Begin/End pairs, 2. recursing into another Begin/End pair - so we need to work that out and add better logging scope. + // Maybe we can support CTRL+C on every element? + /* + //if (g.NavWindow == window && g.ActiveId == 0) + if (g.ActiveId == window->MoveId) + if (g.IO.KeyCtrl && IsKeyPressedMap(ImGuiKey_C)) + LogToClipboard(); + */ + + // We fill last item data based on Title Bar/Tab, in order for IsItemHovered() and IsItemActive() to be usable after Begin(). + // This is useful to allow creating context menus on title bar only, etc. + SetLastItemData(window, window->MoveId, IsMouseHoveringRect(title_bar_rect.Min, title_bar_rect.Max, false) ? ImGuiItemStatusFlags_HoveredRect : 0, title_bar_rect); + +#ifdef IMGUI_ENABLE_TEST_ENGINE + if (!(window->Flags & ImGuiWindowFlags_NoTitleBar)) + IMGUI_TEST_ENGINE_ITEM_ADD(window->DC.LastItemRect, window->DC.LastItemId); +#endif + } + else + { + // Append + SetCurrentWindow(window); + } + + // Pull/inherit current state + window->DC.ItemFlags = g.ItemFlagsStack.back(); // Inherit from shared stack + window->DC.NavFocusScopeIdCurrent = (flags & ImGuiWindowFlags_ChildWindow) ? parent_window->DC.NavFocusScopeIdCurrent : 0; // Inherit from parent only // -V595 + + PushClipRect(window->InnerClipRect.Min, window->InnerClipRect.Max, true); + + // Clear 'accessed' flag last thing (After PushClipRect which will set the flag. We want the flag to stay false when the default "Debug" window is unused) + window->WriteAccessed = false; + window->BeginCount++; + g.NextWindowData.ClearFlags(); + + // Update visibility + if (first_begin_of_the_frame) + { + if (flags & ImGuiWindowFlags_ChildWindow) + { + // Child window can be out of sight and have "negative" clip windows. + // Mark them as collapsed so commands are skipped earlier (we can't manually collapse them because they have no title bar). + IM_ASSERT((flags & ImGuiWindowFlags_NoTitleBar) != 0); + if (!(flags & ImGuiWindowFlags_AlwaysAutoResize) && window->AutoFitFramesX <= 0 && window->AutoFitFramesY <= 0) // FIXME: Doesn't make sense for ChildWindow?? + if (!g.LogEnabled) + if (window->OuterRectClipped.Min.x >= window->OuterRectClipped.Max.x || window->OuterRectClipped.Min.y >= window->OuterRectClipped.Max.y) + window->HiddenFramesCanSkipItems = 1; + + // Hide along with parent or if parent is collapsed + if (parent_window && (parent_window->Collapsed || parent_window->HiddenFramesCanSkipItems > 0)) + window->HiddenFramesCanSkipItems = 1; + if (parent_window && (parent_window->Collapsed || parent_window->HiddenFramesCannotSkipItems > 0)) + window->HiddenFramesCannotSkipItems = 1; + } + + // Don't render if style alpha is 0.0 at the time of Begin(). This is arbitrary and inconsistent but has been there for a long while (may remove at some point) + if (style.Alpha <= 0.0f) + window->HiddenFramesCanSkipItems = 1; + + // Update the Hidden flag + window->Hidden = (window->HiddenFramesCanSkipItems > 0) || (window->HiddenFramesCannotSkipItems > 0) || (window->HiddenFramesForRenderOnly > 0); + + // Update the SkipItems flag, used to early out of all items functions (no layout required) + bool skip_items = false; + if (window->Collapsed || !window->Active || window->Hidden) + if (window->AutoFitFramesX <= 0 && window->AutoFitFramesY <= 0 && window->HiddenFramesCannotSkipItems <= 0) + skip_items = true; + window->SkipItems = skip_items; + } + + return !window->SkipItems; +} + +void ImGui::End() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + // Error checking: verify that user hasn't called End() too many times! + if (g.CurrentWindowStack.Size <= 1 && g.WithinFrameScopeWithImplicitWindow) + { + IM_ASSERT_USER_ERROR(g.CurrentWindowStack.Size > 1, "Calling End() too many times!"); + return; + } + IM_ASSERT(g.CurrentWindowStack.Size > 0); + + // Error checking: verify that user doesn't directly call End() on a child window. + if (window->Flags & ImGuiWindowFlags_ChildWindow) + IM_ASSERT_USER_ERROR(g.WithinEndChild, "Must call EndChild() and not End()!"); + + // Close anything that is open + if (window->DC.CurrentColumns) + EndColumns(); + PopClipRect(); // Inner window clip rectangle + + // Stop logging + if (!(window->Flags & ImGuiWindowFlags_ChildWindow)) // FIXME: add more options for scope of logging + LogFinish(); + + // Pop from window stack + g.CurrentWindowStack.pop_back(); + if (window->Flags & ImGuiWindowFlags_Popup) + g.BeginPopupStack.pop_back(); + window->DC.StackSizesOnBegin.CompareWithCurrentState(); + SetCurrentWindow(g.CurrentWindowStack.empty() ? NULL : g.CurrentWindowStack.back()); +} + +void ImGui::BringWindowToFocusFront(ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + if (g.WindowsFocusOrder.back() == window) + return; + for (int i = g.WindowsFocusOrder.Size - 2; i >= 0; i--) // We can ignore the top-most window + if (g.WindowsFocusOrder[i] == window) + { + memmove(&g.WindowsFocusOrder[i], &g.WindowsFocusOrder[i + 1], (size_t)(g.WindowsFocusOrder.Size - i - 1) * sizeof(ImGuiWindow*)); + g.WindowsFocusOrder[g.WindowsFocusOrder.Size - 1] = window; + break; + } +} + +void ImGui::BringWindowToDisplayFront(ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* current_front_window = g.Windows.back(); + if (current_front_window == window || current_front_window->RootWindow == window) // Cheap early out (could be better) + return; + for (int i = g.Windows.Size - 2; i >= 0; i--) // We can ignore the top-most window + if (g.Windows[i] == window) + { + memmove(&g.Windows[i], &g.Windows[i + 1], (size_t)(g.Windows.Size - i - 1) * sizeof(ImGuiWindow*)); + g.Windows[g.Windows.Size - 1] = window; + break; + } +} + +void ImGui::BringWindowToDisplayBack(ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + if (g.Windows[0] == window) + return; + for (int i = 0; i < g.Windows.Size; i++) + if (g.Windows[i] == window) + { + memmove(&g.Windows[1], &g.Windows[0], (size_t)i * sizeof(ImGuiWindow*)); + g.Windows[0] = window; + break; + } +} + +// Moving window to front of display and set focus (which happens to be back of our sorted list) +void ImGui::FocusWindow(ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + + if (g.NavWindow != window) + { + g.NavWindow = window; + if (window && g.NavDisableMouseHover) + g.NavMousePosDirty = true; + g.NavInitRequest = false; + g.NavId = window ? window->NavLastIds[0] : 0; // Restore NavId + g.NavFocusScopeId = 0; + g.NavIdIsAlive = false; + g.NavLayer = ImGuiNavLayer_Main; + //IMGUI_DEBUG_LOG("FocusWindow(\"%s\")\n", window ? window->Name : NULL); + } + + // Close popups if any + ClosePopupsOverWindow(window, false); + + // Move the root window to the top of the pile + IM_ASSERT(window == NULL || window->RootWindow != NULL); + ImGuiWindow* focus_front_window = window ? window->RootWindow : NULL; // NB: In docking branch this is window->RootWindowDockStop + ImGuiWindow* display_front_window = window ? window->RootWindow : NULL; + + // Steal active widgets. Some of the cases it triggers includes: + // - Focus a window while an InputText in another window is active, if focus happens before the old InputText can run. + // - When using Nav to activate menu items (due to timing of activating on press->new window appears->losing ActiveId) + if (g.ActiveId != 0 && g.ActiveIdWindow && g.ActiveIdWindow->RootWindow != focus_front_window) + if (!g.ActiveIdNoClearOnFocusLoss) + ClearActiveID(); + + // Passing NULL allow to disable keyboard focus + if (!window) + return; + + // Bring to front + BringWindowToFocusFront(focus_front_window); + if (((window->Flags | display_front_window->Flags) & ImGuiWindowFlags_NoBringToFrontOnFocus) == 0) + BringWindowToDisplayFront(display_front_window); +} + +void ImGui::FocusTopMostWindowUnderOne(ImGuiWindow* under_this_window, ImGuiWindow* ignore_window) +{ + ImGuiContext& g = *GImGui; + + int start_idx = g.WindowsFocusOrder.Size - 1; + if (under_this_window != NULL) + { + int under_this_window_idx = FindWindowFocusIndex(under_this_window); + if (under_this_window_idx != -1) + start_idx = under_this_window_idx - 1; + } + for (int i = start_idx; i >= 0; i--) + { + // We may later decide to test for different NoXXXInputs based on the active navigation input (mouse vs nav) but that may feel more confusing to the user. + ImGuiWindow* window = g.WindowsFocusOrder[i]; + if (window != ignore_window && window->WasActive && !(window->Flags & ImGuiWindowFlags_ChildWindow)) + if ((window->Flags & (ImGuiWindowFlags_NoMouseInputs | ImGuiWindowFlags_NoNavInputs)) != (ImGuiWindowFlags_NoMouseInputs | ImGuiWindowFlags_NoNavInputs)) + { + ImGuiWindow* focus_window = NavRestoreLastChildNavWindow(window); + FocusWindow(focus_window); + return; + } + } + FocusWindow(NULL); +} + +void ImGui::SetCurrentFont(ImFont* font) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(font && font->IsLoaded()); // Font Atlas not created. Did you call io.Fonts->GetTexDataAsRGBA32 / GetTexDataAsAlpha8 ? + IM_ASSERT(font->Scale > 0.0f); + g.Font = font; + g.FontBaseSize = ImMax(1.0f, g.IO.FontGlobalScale * g.Font->FontSize * g.Font->Scale); + g.FontSize = g.CurrentWindow ? g.CurrentWindow->CalcFontSize() : 0.0f; + + ImFontAtlas* atlas = g.Font->ContainerAtlas; + g.DrawListSharedData.TexUvWhitePixel = atlas->TexUvWhitePixel; + g.DrawListSharedData.TexUvLines = atlas->TexUvLines; + g.DrawListSharedData.Font = g.Font; + g.DrawListSharedData.FontSize = g.FontSize; +} + +void ImGui::PushFont(ImFont* font) +{ + ImGuiContext& g = *GImGui; + if (!font) + font = GetDefaultFont(); + SetCurrentFont(font); + g.FontStack.push_back(font); + g.CurrentWindow->DrawList->PushTextureID(font->ContainerAtlas->TexID); +} + +void ImGui::PopFont() +{ + ImGuiContext& g = *GImGui; + g.CurrentWindow->DrawList->PopTextureID(); + g.FontStack.pop_back(); + SetCurrentFont(g.FontStack.empty() ? GetDefaultFont() : g.FontStack.back()); +} + +void ImGui::PushItemFlag(ImGuiItemFlags option, bool enabled) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImGuiItemFlags item_flags = window->DC.ItemFlags; + IM_ASSERT(item_flags == g.ItemFlagsStack.back()); + if (enabled) + item_flags |= option; + else + item_flags &= ~option; + window->DC.ItemFlags = item_flags; + g.ItemFlagsStack.push_back(item_flags); +} + +void ImGui::PopItemFlag() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + IM_ASSERT(g.ItemFlagsStack.Size > 1); // Too many calls to PopItemFlag() - we always leave a 0 at the bottom of the stack. + g.ItemFlagsStack.pop_back(); + window->DC.ItemFlags = g.ItemFlagsStack.back(); +} + +// FIXME: Look into renaming this once we have settled the new Focus/Activation/TabStop system. +void ImGui::PushAllowKeyboardFocus(bool allow_keyboard_focus) +{ + PushItemFlag(ImGuiItemFlags_NoTabStop, !allow_keyboard_focus); +} + +void ImGui::PopAllowKeyboardFocus() +{ + PopItemFlag(); +} + +void ImGui::PushButtonRepeat(bool repeat) +{ + PushItemFlag(ImGuiItemFlags_ButtonRepeat, repeat); +} + +void ImGui::PopButtonRepeat() +{ + PopItemFlag(); +} + +void ImGui::PushTextWrapPos(float wrap_pos_x) +{ + ImGuiWindow* window = GetCurrentWindow(); + window->DC.TextWrapPosStack.push_back(window->DC.TextWrapPos); + window->DC.TextWrapPos = wrap_pos_x; +} + +void ImGui::PopTextWrapPos() +{ + ImGuiWindow* window = GetCurrentWindow(); + window->DC.TextWrapPos = window->DC.TextWrapPosStack.back(); + window->DC.TextWrapPosStack.pop_back(); +} + +bool ImGui::IsWindowChildOf(ImGuiWindow* window, ImGuiWindow* potential_parent) +{ + if (window->RootWindow == potential_parent) + return true; + while (window != NULL) + { + if (window == potential_parent) + return true; + window = window->ParentWindow; + } + return false; +} + +bool ImGui::IsWindowAbove(ImGuiWindow* potential_above, ImGuiWindow* potential_below) +{ + ImGuiContext& g = *GImGui; + for (int i = g.Windows.Size - 1; i >= 0; i--) + { + ImGuiWindow* candidate_window = g.Windows[i]; + if (candidate_window == potential_above) + return true; + if (candidate_window == potential_below) + return false; + } + return false; +} + +bool ImGui::IsWindowHovered(ImGuiHoveredFlags flags) +{ + IM_ASSERT((flags & ImGuiHoveredFlags_AllowWhenOverlapped) == 0); // Flags not supported by this function + ImGuiContext& g = *GImGui; + + if (flags & ImGuiHoveredFlags_AnyWindow) + { + if (g.HoveredWindow == NULL) + return false; + } + else + { + switch (flags & (ImGuiHoveredFlags_RootWindow | ImGuiHoveredFlags_ChildWindows)) + { + case ImGuiHoveredFlags_RootWindow | ImGuiHoveredFlags_ChildWindows: + if (g.HoveredRootWindow != g.CurrentWindow->RootWindow) + return false; + break; + case ImGuiHoveredFlags_RootWindow: + if (g.HoveredWindow != g.CurrentWindow->RootWindow) + return false; + break; + case ImGuiHoveredFlags_ChildWindows: + if (g.HoveredWindow == NULL || !IsWindowChildOf(g.HoveredWindow, g.CurrentWindow)) + return false; + break; + default: + if (g.HoveredWindow != g.CurrentWindow) + return false; + break; + } + } + + if (!IsWindowContentHoverable(g.HoveredWindow, flags)) + return false; + if (!(flags & ImGuiHoveredFlags_AllowWhenBlockedByActiveItem)) + if (g.ActiveId != 0 && !g.ActiveIdAllowOverlap && g.ActiveId != g.HoveredWindow->MoveId) + return false; + return true; +} + +bool ImGui::IsWindowFocused(ImGuiFocusedFlags flags) +{ + ImGuiContext& g = *GImGui; + + if (flags & ImGuiFocusedFlags_AnyWindow) + return g.NavWindow != NULL; + + IM_ASSERT(g.CurrentWindow); // Not inside a Begin()/End() + switch (flags & (ImGuiFocusedFlags_RootWindow | ImGuiFocusedFlags_ChildWindows)) + { + case ImGuiFocusedFlags_RootWindow | ImGuiFocusedFlags_ChildWindows: + return g.NavWindow && g.NavWindow->RootWindow == g.CurrentWindow->RootWindow; + case ImGuiFocusedFlags_RootWindow: + return g.NavWindow == g.CurrentWindow->RootWindow; + case ImGuiFocusedFlags_ChildWindows: + return g.NavWindow && IsWindowChildOf(g.NavWindow, g.CurrentWindow); + default: + return g.NavWindow == g.CurrentWindow; + } +} + +// Can we focus this window with CTRL+TAB (or PadMenu + PadFocusPrev/PadFocusNext) +// Note that NoNavFocus makes the window not reachable with CTRL+TAB but it can still be focused with mouse or programmatically. +// If you want a window to never be focused, you may use the e.g. NoInputs flag. +bool ImGui::IsWindowNavFocusable(ImGuiWindow* window) +{ + return window->WasActive && window == window->RootWindow && !(window->Flags & ImGuiWindowFlags_NoNavFocus); +} + +float ImGui::GetWindowWidth() +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->Size.x; +} + +float ImGui::GetWindowHeight() +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->Size.y; +} + +ImVec2 ImGui::GetWindowPos() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + return window->Pos; +} + +void ImGui::SetWindowPos(ImGuiWindow* window, const ImVec2& pos, ImGuiCond cond) +{ + // Test condition (NB: bit 0 is always true) and clear flags for next time + if (cond && (window->SetWindowPosAllowFlags & cond) == 0) + return; + + IM_ASSERT(cond == 0 || ImIsPowerOfTwo(cond)); // Make sure the user doesn't attempt to combine multiple condition flags. + window->SetWindowPosAllowFlags &= ~(ImGuiCond_Once | ImGuiCond_FirstUseEver | ImGuiCond_Appearing); + window->SetWindowPosVal = ImVec2(FLT_MAX, FLT_MAX); + + // Set + const ImVec2 old_pos = window->Pos; + window->Pos = ImFloor(pos); + ImVec2 offset = window->Pos - old_pos; + window->DC.CursorPos += offset; // As we happen to move the window while it is being appended to (which is a bad idea - will smear) let's at least offset the cursor + window->DC.CursorMaxPos += offset; // And more importantly we need to offset CursorMaxPos/CursorStartPos this so ContentSize calculation doesn't get affected. + window->DC.CursorStartPos += offset; +} + +void ImGui::SetWindowPos(const ImVec2& pos, ImGuiCond cond) +{ + ImGuiWindow* window = GetCurrentWindowRead(); + SetWindowPos(window, pos, cond); +} + +void ImGui::SetWindowPos(const char* name, const ImVec2& pos, ImGuiCond cond) +{ + if (ImGuiWindow* window = FindWindowByName(name)) + SetWindowPos(window, pos, cond); +} + +ImVec2 ImGui::GetWindowSize() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->Size; +} + +void ImGui::SetWindowSize(ImGuiWindow* window, const ImVec2& size, ImGuiCond cond) +{ + // Test condition (NB: bit 0 is always true) and clear flags for next time + if (cond && (window->SetWindowSizeAllowFlags & cond) == 0) + return; + + IM_ASSERT(cond == 0 || ImIsPowerOfTwo(cond)); // Make sure the user doesn't attempt to combine multiple condition flags. + window->SetWindowSizeAllowFlags &= ~(ImGuiCond_Once | ImGuiCond_FirstUseEver | ImGuiCond_Appearing); + + // Set + if (size.x > 0.0f) + { + window->AutoFitFramesX = 0; + window->SizeFull.x = IM_FLOOR(size.x); + } + else + { + window->AutoFitFramesX = 2; + window->AutoFitOnlyGrows = false; + } + if (size.y > 0.0f) + { + window->AutoFitFramesY = 0; + window->SizeFull.y = IM_FLOOR(size.y); + } + else + { + window->AutoFitFramesY = 2; + window->AutoFitOnlyGrows = false; + } +} + +void ImGui::SetWindowSize(const ImVec2& size, ImGuiCond cond) +{ + SetWindowSize(GImGui->CurrentWindow, size, cond); +} + +void ImGui::SetWindowSize(const char* name, const ImVec2& size, ImGuiCond cond) +{ + if (ImGuiWindow* window = FindWindowByName(name)) + SetWindowSize(window, size, cond); +} + +void ImGui::SetWindowCollapsed(ImGuiWindow* window, bool collapsed, ImGuiCond cond) +{ + // Test condition (NB: bit 0 is always true) and clear flags for next time + if (cond && (window->SetWindowCollapsedAllowFlags & cond) == 0) + return; + window->SetWindowCollapsedAllowFlags &= ~(ImGuiCond_Once | ImGuiCond_FirstUseEver | ImGuiCond_Appearing); + + // Set + window->Collapsed = collapsed; +} + +void ImGui::SetWindowHitTestHole(ImGuiWindow* window, const ImVec2& pos, const ImVec2& size) +{ + IM_ASSERT(window->HitTestHoleSize.x == 0); // We don't support multiple holes/hit test filters + window->HitTestHoleSize = ImVec2ih(size); + window->HitTestHoleOffset = ImVec2ih(pos - window->Pos); +} + +void ImGui::SetWindowCollapsed(bool collapsed, ImGuiCond cond) +{ + SetWindowCollapsed(GImGui->CurrentWindow, collapsed, cond); +} + +bool ImGui::IsWindowCollapsed() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->Collapsed; +} + +bool ImGui::IsWindowAppearing() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->Appearing; +} + +void ImGui::SetWindowCollapsed(const char* name, bool collapsed, ImGuiCond cond) +{ + if (ImGuiWindow* window = FindWindowByName(name)) + SetWindowCollapsed(window, collapsed, cond); +} + +void ImGui::SetWindowFocus() +{ + FocusWindow(GImGui->CurrentWindow); +} + +void ImGui::SetWindowFocus(const char* name) +{ + if (name) + { + if (ImGuiWindow* window = FindWindowByName(name)) + FocusWindow(window); + } + else + { + FocusWindow(NULL); + } +} + +void ImGui::SetNextWindowPos(const ImVec2& pos, ImGuiCond cond, const ImVec2& pivot) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(cond == 0 || ImIsPowerOfTwo(cond)); // Make sure the user doesn't attempt to combine multiple condition flags. + g.NextWindowData.Flags |= ImGuiNextWindowDataFlags_HasPos; + g.NextWindowData.PosVal = pos; + g.NextWindowData.PosPivotVal = pivot; + g.NextWindowData.PosCond = cond ? cond : ImGuiCond_Always; +} + +void ImGui::SetNextWindowSize(const ImVec2& size, ImGuiCond cond) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(cond == 0 || ImIsPowerOfTwo(cond)); // Make sure the user doesn't attempt to combine multiple condition flags. + g.NextWindowData.Flags |= ImGuiNextWindowDataFlags_HasSize; + g.NextWindowData.SizeVal = size; + g.NextWindowData.SizeCond = cond ? cond : ImGuiCond_Always; +} + +void ImGui::SetNextWindowSizeConstraints(const ImVec2& size_min, const ImVec2& size_max, ImGuiSizeCallback custom_callback, void* custom_callback_user_data) +{ + ImGuiContext& g = *GImGui; + g.NextWindowData.Flags |= ImGuiNextWindowDataFlags_HasSizeConstraint; + g.NextWindowData.SizeConstraintRect = ImRect(size_min, size_max); + g.NextWindowData.SizeCallback = custom_callback; + g.NextWindowData.SizeCallbackUserData = custom_callback_user_data; +} + +// Content size = inner scrollable rectangle, padded with WindowPadding. +// SetNextWindowContentSize(ImVec2(100,100) + ImGuiWindowFlags_AlwaysAutoResize will always allow submitting a 100x100 item. +void ImGui::SetNextWindowContentSize(const ImVec2& size) +{ + ImGuiContext& g = *GImGui; + g.NextWindowData.Flags |= ImGuiNextWindowDataFlags_HasContentSize; + g.NextWindowData.ContentSizeVal = ImFloor(size); +} + +void ImGui::SetNextWindowScroll(const ImVec2& scroll) +{ + ImGuiContext& g = *GImGui; + g.NextWindowData.Flags |= ImGuiNextWindowDataFlags_HasScroll; + g.NextWindowData.ScrollVal = scroll; +} + +void ImGui::SetNextWindowCollapsed(bool collapsed, ImGuiCond cond) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(cond == 0 || ImIsPowerOfTwo(cond)); // Make sure the user doesn't attempt to combine multiple condition flags. + g.NextWindowData.Flags |= ImGuiNextWindowDataFlags_HasCollapsed; + g.NextWindowData.CollapsedVal = collapsed; + g.NextWindowData.CollapsedCond = cond ? cond : ImGuiCond_Always; +} + +void ImGui::SetNextWindowFocus() +{ + ImGuiContext& g = *GImGui; + g.NextWindowData.Flags |= ImGuiNextWindowDataFlags_HasFocus; +} + +void ImGui::SetNextWindowBgAlpha(float alpha) +{ + ImGuiContext& g = *GImGui; + g.NextWindowData.Flags |= ImGuiNextWindowDataFlags_HasBgAlpha; + g.NextWindowData.BgAlphaVal = alpha; +} + +ImDrawList* ImGui::GetWindowDrawList() +{ + ImGuiWindow* window = GetCurrentWindow(); + return window->DrawList; +} + +ImFont* ImGui::GetFont() +{ + return GImGui->Font; +} + +float ImGui::GetFontSize() +{ + return GImGui->FontSize; +} + +ImVec2 ImGui::GetFontTexUvWhitePixel() +{ + return GImGui->DrawListSharedData.TexUvWhitePixel; +} + +void ImGui::SetWindowFontScale(float scale) +{ + IM_ASSERT(scale > 0.0f); + ImGuiContext& g = *GImGui; + ImGuiWindow* window = GetCurrentWindow(); + window->FontWindowScale = scale; + g.FontSize = g.DrawListSharedData.FontSize = window->CalcFontSize(); +} + +void ImGui::ActivateItem(ImGuiID id) +{ + ImGuiContext& g = *GImGui; + g.NavNextActivateId = id; +} + +void ImGui::PushFocusScope(ImGuiID id) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + g.FocusScopeStack.push_back(window->DC.NavFocusScopeIdCurrent); + window->DC.NavFocusScopeIdCurrent = id; +} + +void ImGui::PopFocusScope() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + IM_ASSERT(g.FocusScopeStack.Size > 0); // Too many PopFocusScope() ? + window->DC.NavFocusScopeIdCurrent = g.FocusScopeStack.back(); + g.FocusScopeStack.pop_back(); +} + +void ImGui::SetKeyboardFocusHere(int offset) +{ + IM_ASSERT(offset >= -1); // -1 is allowed but not below + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + g.FocusRequestNextWindow = window; + g.FocusRequestNextCounterRegular = window->DC.FocusCounterRegular + 1 + offset; + g.FocusRequestNextCounterTabStop = INT_MAX; +} + +void ImGui::SetItemDefaultFocus() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (!window->Appearing) + return; + if (g.NavWindow == window->RootWindowForNav && (g.NavInitRequest || g.NavInitResultId != 0) && g.NavLayer == g.NavWindow->DC.NavLayerCurrent) + { + g.NavInitRequest = false; + g.NavInitResultId = g.NavWindow->DC.LastItemId; + g.NavInitResultRectRel = ImRect(g.NavWindow->DC.LastItemRect.Min - g.NavWindow->Pos, g.NavWindow->DC.LastItemRect.Max - g.NavWindow->Pos); + NavUpdateAnyRequestFlag(); + if (!IsItemVisible()) + SetScrollHereY(); + } +} + +void ImGui::SetStateStorage(ImGuiStorage* tree) +{ + ImGuiWindow* window = GImGui->CurrentWindow; + window->DC.StateStorage = tree ? tree : &window->StateStorage; +} + +ImGuiStorage* ImGui::GetStateStorage() +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->DC.StateStorage; +} + +void ImGui::PushID(const char* str_id) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImGuiID id = window->GetIDNoKeepAlive(str_id); + window->IDStack.push_back(id); +} + +void ImGui::PushID(const char* str_id_begin, const char* str_id_end) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImGuiID id = window->GetIDNoKeepAlive(str_id_begin, str_id_end); + window->IDStack.push_back(id); +} + +void ImGui::PushID(const void* ptr_id) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImGuiID id = window->GetIDNoKeepAlive(ptr_id); + window->IDStack.push_back(id); +} + +void ImGui::PushID(int int_id) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImGuiID id = window->GetIDNoKeepAlive(int_id); + window->IDStack.push_back(id); +} + +// Push a given id value ignoring the ID stack as a seed. +void ImGui::PushOverrideID(ImGuiID id) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + window->IDStack.push_back(id); +} + +// Helper to avoid a common series of PushOverrideID -> GetID() -> PopID() call +// (note that when using this pattern, TestEngine's "Stack Tool" will tend to not display the intermediate stack level. +// for that to work we would need to do PushOverrideID() -> ItemAdd() -> PopID() which would alter widget code a little more) +ImGuiID ImGui::GetIDWithSeed(const char* str, const char* str_end, ImGuiID seed) +{ + ImGuiID id = ImHashStr(str, str_end ? (str_end - str) : 0, seed); + ImGui::KeepAliveID(id); +#ifdef IMGUI_ENABLE_TEST_ENGINE + ImGuiContext& g = *GImGui; + IMGUI_TEST_ENGINE_ID_INFO2(id, ImGuiDataType_String, str, str_end); +#endif + return id; +} + +void ImGui::PopID() +{ + ImGuiWindow* window = GImGui->CurrentWindow; + IM_ASSERT(window->IDStack.Size > 1); // Too many PopID(), or could be popping in a wrong/different window? + window->IDStack.pop_back(); +} + +ImGuiID ImGui::GetID(const char* str_id) +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->GetID(str_id); +} + +ImGuiID ImGui::GetID(const char* str_id_begin, const char* str_id_end) +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->GetID(str_id_begin, str_id_end); +} + +ImGuiID ImGui::GetID(const void* ptr_id) +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->GetID(ptr_id); +} + +bool ImGui::IsRectVisible(const ImVec2& size) +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->ClipRect.Overlaps(ImRect(window->DC.CursorPos, window->DC.CursorPos + size)); +} + +bool ImGui::IsRectVisible(const ImVec2& rect_min, const ImVec2& rect_max) +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->ClipRect.Overlaps(ImRect(rect_min, rect_max)); +} + + +//----------------------------------------------------------------------------- +// [SECTION] ERROR CHECKING +//----------------------------------------------------------------------------- + +// Helper function to verify ABI compatibility between caller code and compiled version of Dear ImGui. +// Verify that the type sizes are matching between the calling file's compilation unit and imgui.cpp's compilation unit +// If the user has inconsistent compilation settings, imgui configuration #define, packing pragma, etc. your user code +// may see different structures than what imgui.cpp sees, which is problematic. +// We usually require settings to be in imconfig.h to make sure that they are accessible to all compilation units involved with Dear ImGui. +bool ImGui::DebugCheckVersionAndDataLayout(const char* version, size_t sz_io, size_t sz_style, size_t sz_vec2, size_t sz_vec4, size_t sz_vert, size_t sz_idx) +{ + bool error = false; + if (strcmp(version, IMGUI_VERSION) != 0) { error = true; IM_ASSERT(strcmp(version, IMGUI_VERSION) == 0 && "Mismatched version string!"); } + if (sz_io != sizeof(ImGuiIO)) { error = true; IM_ASSERT(sz_io == sizeof(ImGuiIO) && "Mismatched struct layout!"); } + if (sz_style != sizeof(ImGuiStyle)) { error = true; IM_ASSERT(sz_style == sizeof(ImGuiStyle) && "Mismatched struct layout!"); } + if (sz_vec2 != sizeof(ImVec2)) { error = true; IM_ASSERT(sz_vec2 == sizeof(ImVec2) && "Mismatched struct layout!"); } + if (sz_vec4 != sizeof(ImVec4)) { error = true; IM_ASSERT(sz_vec4 == sizeof(ImVec4) && "Mismatched struct layout!"); } + if (sz_vert != sizeof(ImDrawVert)) { error = true; IM_ASSERT(sz_vert == sizeof(ImDrawVert) && "Mismatched struct layout!"); } + if (sz_idx != sizeof(ImDrawIdx)) { error = true; IM_ASSERT(sz_idx == sizeof(ImDrawIdx) && "Mismatched struct layout!"); } + return !error; +} + +static void ImGui::ErrorCheckNewFrameSanityChecks() +{ + ImGuiContext& g = *GImGui; + + // Check user IM_ASSERT macro + // (IF YOU GET A WARNING OR COMPILE ERROR HERE: it means you assert macro is incorrectly defined! + // If your macro uses multiple statements, it NEEDS to be surrounded by a 'do { ... } while (0)' block. + // This is a common C/C++ idiom to allow multiple statements macros to be used in control flow blocks.) + // #define IM_ASSERT(EXPR) if (SomeCode(EXPR)) SomeMoreCode(); // Wrong! + // #define IM_ASSERT(EXPR) do { if (SomeCode(EXPR)) SomeMoreCode(); } while (0) // Correct! + if (true) IM_ASSERT(1); else IM_ASSERT(0); + + // Check user data + // (We pass an error message in the assert expression to make it visible to programmers who are not using a debugger, as most assert handlers display their argument) + IM_ASSERT(g.Initialized); + IM_ASSERT((g.IO.DeltaTime > 0.0f || g.FrameCount == 0) && "Need a positive DeltaTime!"); + IM_ASSERT((g.FrameCount == 0 || g.FrameCountEnded == g.FrameCount) && "Forgot to call Render() or EndFrame() at the end of the previous frame?"); + IM_ASSERT(g.IO.DisplaySize.x >= 0.0f && g.IO.DisplaySize.y >= 0.0f && "Invalid DisplaySize value!"); + IM_ASSERT(g.IO.Fonts->Fonts.Size > 0 && "Font Atlas not built. Did you call io.Fonts->GetTexDataAsRGBA32() / GetTexDataAsAlpha8()?"); + IM_ASSERT(g.IO.Fonts->Fonts[0]->IsLoaded() && "Font Atlas not built. Did you call io.Fonts->GetTexDataAsRGBA32() / GetTexDataAsAlpha8()?"); + IM_ASSERT(g.Style.CurveTessellationTol > 0.0f && "Invalid style setting!"); + IM_ASSERT(g.Style.CircleSegmentMaxError > 0.0f && "Invalid style setting!"); + IM_ASSERT(g.Style.Alpha >= 0.0f && g.Style.Alpha <= 1.0f && "Invalid style setting!"); // Allows us to avoid a few clamps in color computations + IM_ASSERT(g.Style.WindowMinSize.x >= 1.0f && g.Style.WindowMinSize.y >= 1.0f && "Invalid style setting."); + IM_ASSERT(g.Style.WindowMenuButtonPosition == ImGuiDir_None || g.Style.WindowMenuButtonPosition == ImGuiDir_Left || g.Style.WindowMenuButtonPosition == ImGuiDir_Right); + for (int n = 0; n < ImGuiKey_COUNT; n++) + IM_ASSERT(g.IO.KeyMap[n] >= -1 && g.IO.KeyMap[n] < IM_ARRAYSIZE(g.IO.KeysDown) && "io.KeyMap[] contains an out of bound value (need to be 0..512, or -1 for unmapped key)"); + + // Check: required key mapping (we intentionally do NOT check all keys to not pressure user into setting up everything, but Space is required and was only added in 1.60 WIP) + if (g.IO.ConfigFlags & ImGuiConfigFlags_NavEnableKeyboard) + IM_ASSERT(g.IO.KeyMap[ImGuiKey_Space] != -1 && "ImGuiKey_Space is not mapped, required for keyboard navigation."); + + // Check: the io.ConfigWindowsResizeFromEdges option requires backend to honor mouse cursor changes and set the ImGuiBackendFlags_HasMouseCursors flag accordingly. + if (g.IO.ConfigWindowsResizeFromEdges && !(g.IO.BackendFlags & ImGuiBackendFlags_HasMouseCursors)) + g.IO.ConfigWindowsResizeFromEdges = false; +} + +static void ImGui::ErrorCheckEndFrameSanityChecks() +{ + ImGuiContext& g = *GImGui; + + // Verify that io.KeyXXX fields haven't been tampered with. Key mods should not be modified between NewFrame() and EndFrame() + // One possible reason leading to this assert is that your backends update inputs _AFTER_ NewFrame(). + // It is known that when some modal native windows called mid-frame takes focus away, some backends such as GLFW will + // send key release events mid-frame. This would normally trigger this assertion and lead to sheared inputs. + // We silently accommodate for this case by ignoring/ the case where all io.KeyXXX modifiers were released (aka key_mod_flags == 0), + // while still correctly asserting on mid-frame key press events. + const ImGuiKeyModFlags key_mod_flags = GetMergedKeyModFlags(); + IM_ASSERT((key_mod_flags == 0 || g.IO.KeyMods == key_mod_flags) && "Mismatching io.KeyCtrl/io.KeyShift/io.KeyAlt/io.KeySuper vs io.KeyMods"); + IM_UNUSED(key_mod_flags); + + // Recover from errors + //ErrorCheckEndFrameRecover(); + + // Report when there is a mismatch of Begin/BeginChild vs End/EndChild calls. Important: Remember that the Begin/BeginChild API requires you + // to always call End/EndChild even if Begin/BeginChild returns false! (this is unfortunately inconsistent with most other Begin* API). + if (g.CurrentWindowStack.Size != 1) + { + if (g.CurrentWindowStack.Size > 1) + { + IM_ASSERT_USER_ERROR(g.CurrentWindowStack.Size == 1, "Mismatched Begin/BeginChild vs End/EndChild calls: did you forget to call End/EndChild?"); + while (g.CurrentWindowStack.Size > 1) + End(); + } + else + { + IM_ASSERT_USER_ERROR(g.CurrentWindowStack.Size == 1, "Mismatched Begin/BeginChild vs End/EndChild calls: did you call End/EndChild too much?"); + } + } + + IM_ASSERT_USER_ERROR(g.GroupStack.Size == 0, "Missing EndGroup call!"); +} + +// Experimental recovery from incorrect usage of BeginXXX/EndXXX/PushXXX/PopXXX calls. +// Must be called during or before EndFrame(). +// This is generally flawed as we are not necessarily End/Popping things in the right order. +// FIXME: Can't recover from inside BeginTabItem/EndTabItem yet. +// FIXME: Can't recover from interleaved BeginTabBar/Begin +void ImGui::ErrorCheckEndFrameRecover(ImGuiErrorLogCallback log_callback, void* user_data) +{ + // PVS-Studio V1044 is "Loop break conditions do not depend on the number of iterations" + ImGuiContext& g = *GImGui; + while (g.CurrentWindowStack.Size > 0) + { +#ifdef IMGUI_HAS_TABLE + while (g.CurrentTable && (g.CurrentTable->OuterWindow == g.CurrentWindow || g.CurrentTable->InnerWindow == g.CurrentWindow)) + { + if (log_callback) log_callback(user_data, "Recovered from missing EndTable() in '%s'", g.CurrentTable->OuterWindow->Name); + EndTable(); + } +#endif + ImGuiWindow* window = g.CurrentWindow; + IM_ASSERT(window != NULL); + while (g.CurrentTabBar != NULL) //-V1044 + { + if (log_callback) log_callback(user_data, "Recovered from missing EndTabBar() in '%s'", window->Name); + EndTabBar(); + } + while (window->DC.TreeDepth > 0) + { + if (log_callback) log_callback(user_data, "Recovered from missing TreePop() in '%s'", window->Name); + TreePop(); + } + while (g.GroupStack.Size > window->DC.StackSizesOnBegin.SizeOfGroupStack) + { + if (log_callback) log_callback(user_data, "Recovered from missing EndGroup() in '%s'", window->Name); + EndGroup(); + } + while (window->IDStack.Size > 1) + { + if (log_callback) log_callback(user_data, "Recovered from missing PopID() in '%s'", window->Name); + PopID(); + } + while (g.ColorStack.Size > window->DC.StackSizesOnBegin.SizeOfColorStack) + { + if (log_callback) log_callback(user_data, "Recovered from missing PopStyleColor() in '%s' for ImGuiCol_%s", window->Name, GetStyleColorName(g.ColorStack.back().Col)); + PopStyleColor(); + } + while (g.StyleVarStack.Size > window->DC.StackSizesOnBegin.SizeOfStyleVarStack) + { + if (log_callback) log_callback(user_data, "Recovered from missing PopStyleVar() in '%s'", window->Name); + PopStyleVar(); + } + while (g.FocusScopeStack.Size > window->DC.StackSizesOnBegin.SizeOfFocusScopeStack) + { + if (log_callback) log_callback(user_data, "Recovered from missing PopFocusScope() in '%s'", window->Name); + PopFocusScope(); + } + if (g.CurrentWindowStack.Size == 1) + { + IM_ASSERT(g.CurrentWindow->IsFallbackWindow); + break; + } + IM_ASSERT(window == g.CurrentWindow); + if (window->Flags & ImGuiWindowFlags_ChildWindow) + { + if (log_callback) log_callback(user_data, "Recovered from missing EndChild() for '%s'", window->Name); + EndChild(); + } + else + { + if (log_callback) log_callback(user_data, "Recovered from missing End() for '%s'", window->Name); + End(); + } + } +} + +// Save current stack sizes for later compare +void ImGuiStackSizes::SetToCurrentState() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + SizeOfIDStack = (short)window->IDStack.Size; + SizeOfColorStack = (short)g.ColorStack.Size; + SizeOfStyleVarStack = (short)g.StyleVarStack.Size; + SizeOfFontStack = (short)g.FontStack.Size; + SizeOfFocusScopeStack = (short)g.FocusScopeStack.Size; + SizeOfGroupStack = (short)g.GroupStack.Size; + SizeOfBeginPopupStack = (short)g.BeginPopupStack.Size; +} + +// Compare to detect usage errors +void ImGuiStackSizes::CompareWithCurrentState() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + IM_UNUSED(window); + + // Window stacks + // NOT checking: DC.ItemWidth, DC.TextWrapPos (per window) to allow user to conveniently push once and not pop (they are cleared on Begin) + IM_ASSERT(SizeOfIDStack == window->IDStack.Size && "PushID/PopID or TreeNode/TreePop Mismatch!"); + + // Global stacks + // For color, style and font stacks there is an incentive to use Push/Begin/Pop/.../End patterns, so we relax our checks a little to allow them. + IM_ASSERT(SizeOfGroupStack == g.GroupStack.Size && "BeginGroup/EndGroup Mismatch!"); + IM_ASSERT(SizeOfBeginPopupStack == g.BeginPopupStack.Size && "BeginPopup/EndPopup or BeginMenu/EndMenu Mismatch!"); + IM_ASSERT(SizeOfColorStack >= g.ColorStack.Size && "PushStyleColor/PopStyleColor Mismatch!"); + IM_ASSERT(SizeOfStyleVarStack >= g.StyleVarStack.Size && "PushStyleVar/PopStyleVar Mismatch!"); + IM_ASSERT(SizeOfFontStack >= g.FontStack.Size && "PushFont/PopFont Mismatch!"); + IM_ASSERT(SizeOfFocusScopeStack == g.FocusScopeStack.Size && "PushFocusScope/PopFocusScope Mismatch!"); +} + + +//----------------------------------------------------------------------------- +// [SECTION] LAYOUT +//----------------------------------------------------------------------------- +// - ItemSize() +// - ItemAdd() +// - SameLine() +// - GetCursorScreenPos() +// - SetCursorScreenPos() +// - GetCursorPos(), GetCursorPosX(), GetCursorPosY() +// - SetCursorPos(), SetCursorPosX(), SetCursorPosY() +// - GetCursorStartPos() +// - Indent() +// - Unindent() +// - SetNextItemWidth() +// - PushItemWidth() +// - PushMultiItemsWidths() +// - PopItemWidth() +// - CalcItemWidth() +// - CalcItemSize() +// - GetTextLineHeight() +// - GetTextLineHeightWithSpacing() +// - GetFrameHeight() +// - GetFrameHeightWithSpacing() +// - GetContentRegionMax() +// - GetContentRegionMaxAbs() [Internal] +// - GetContentRegionAvail(), +// - GetWindowContentRegionMin(), GetWindowContentRegionMax() +// - GetWindowContentRegionWidth() +// - BeginGroup() +// - EndGroup() +// Also see in imgui_widgets: tab bars, columns. +//----------------------------------------------------------------------------- + +// Advance cursor given item size for layout. +// Register minimum needed size so it can extend the bounding box used for auto-fit calculation. +// See comments in ItemAdd() about how/why the size provided to ItemSize() vs ItemAdd() may often different. +void ImGui::ItemSize(const ImVec2& size, float text_baseline_y) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return; + + // We increase the height in this function to accommodate for baseline offset. + // In theory we should be offsetting the starting position (window->DC.CursorPos), that will be the topic of a larger refactor, + // but since ItemSize() is not yet an API that moves the cursor (to handle e.g. wrapping) enlarging the height has the same effect. + const float offset_to_match_baseline_y = (text_baseline_y >= 0) ? ImMax(0.0f, window->DC.CurrLineTextBaseOffset - text_baseline_y) : 0.0f; + const float line_height = ImMax(window->DC.CurrLineSize.y, size.y + offset_to_match_baseline_y); + + // Always align ourselves on pixel boundaries + //if (g.IO.KeyAlt) window->DrawList->AddRect(window->DC.CursorPos, window->DC.CursorPos + ImVec2(size.x, line_height), IM_COL32(255,0,0,200)); // [DEBUG] + window->DC.CursorPosPrevLine.x = window->DC.CursorPos.x + size.x; + window->DC.CursorPosPrevLine.y = window->DC.CursorPos.y; + window->DC.CursorPos.x = IM_FLOOR(window->Pos.x + window->DC.Indent.x + window->DC.ColumnsOffset.x); // Next line + window->DC.CursorPos.y = IM_FLOOR(window->DC.CursorPos.y + line_height + g.Style.ItemSpacing.y); // Next line + window->DC.CursorMaxPos.x = ImMax(window->DC.CursorMaxPos.x, window->DC.CursorPosPrevLine.x); + window->DC.CursorMaxPos.y = ImMax(window->DC.CursorMaxPos.y, window->DC.CursorPos.y - g.Style.ItemSpacing.y); + //if (g.IO.KeyAlt) window->DrawList->AddCircle(window->DC.CursorMaxPos, 3.0f, IM_COL32(255,0,0,255), 4); // [DEBUG] + + window->DC.PrevLineSize.y = line_height; + window->DC.CurrLineSize.y = 0.0f; + window->DC.PrevLineTextBaseOffset = ImMax(window->DC.CurrLineTextBaseOffset, text_baseline_y); + window->DC.CurrLineTextBaseOffset = 0.0f; + + // Horizontal layout mode + if (window->DC.LayoutType == ImGuiLayoutType_Horizontal) + SameLine(); +} + +void ImGui::ItemSize(const ImRect& bb, float text_baseline_y) +{ + ItemSize(bb.GetSize(), text_baseline_y); +} + +// Declare item bounding box for clipping and interaction. +// Note that the size can be different than the one provided to ItemSize(). Typically, widgets that spread over available surface +// declare their minimum size requirement to ItemSize() and provide a larger region to ItemAdd() which is used drawing/interaction. +bool ImGui::ItemAdd(const ImRect& bb, ImGuiID id, const ImRect* nav_bb_arg) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + if (id != 0) + { + // Navigation processing runs prior to clipping early-out + // (a) So that NavInitRequest can be honored, for newly opened windows to select a default widget + // (b) So that we can scroll up/down past clipped items. This adds a small O(N) cost to regular navigation requests + // unfortunately, but it is still limited to one window. It may not scale very well for windows with ten of + // thousands of item, but at least NavMoveRequest is only set on user interaction, aka maximum once a frame. + // We could early out with "if (is_clipped && !g.NavInitRequest) return false;" but when we wouldn't be able + // to reach unclipped widgets. This would work if user had explicit scrolling control (e.g. mapped on a stick). + // We intentionally don't check if g.NavWindow != NULL because g.NavAnyRequest should only be set when it is non null. + // If we crash on a NULL g.NavWindow we need to fix the bug elsewhere. + window->DC.NavLayerActiveMaskNext |= (1 << window->DC.NavLayerCurrent); + if (g.NavId == id || g.NavAnyRequest) + if (g.NavWindow->RootWindowForNav == window->RootWindowForNav) + if (window == g.NavWindow || ((window->Flags | g.NavWindow->Flags) & ImGuiWindowFlags_NavFlattened)) + NavProcessItem(window, nav_bb_arg ? *nav_bb_arg : bb, id); + + // [DEBUG] Item Picker tool, when enabling the "extended" version we perform the check in ItemAdd() +#ifdef IMGUI_DEBUG_TOOL_ITEM_PICKER_EX + if (id == g.DebugItemPickerBreakId) + { + IM_DEBUG_BREAK(); + g.DebugItemPickerBreakId = 0; + } +#endif + } + + // Equivalent to calling SetLastItemData() + window->DC.LastItemId = id; + window->DC.LastItemRect = bb; + window->DC.LastItemStatusFlags = ImGuiItemStatusFlags_None; + g.NextItemData.Flags = ImGuiNextItemDataFlags_None; + +#ifdef IMGUI_ENABLE_TEST_ENGINE + if (id != 0) + IMGUI_TEST_ENGINE_ITEM_ADD(nav_bb_arg ? *nav_bb_arg : bb, id); +#endif + + // Clipping test + const bool is_clipped = IsClippedEx(bb, id, false); + if (is_clipped) + return false; + //if (g.IO.KeyAlt) window->DrawList->AddRect(bb.Min, bb.Max, IM_COL32(255,255,0,120)); // [DEBUG] + + // We need to calculate this now to take account of the current clipping rectangle (as items like Selectable may change them) + if (IsMouseHoveringRect(bb.Min, bb.Max)) + window->DC.LastItemStatusFlags |= ImGuiItemStatusFlags_HoveredRect; + return true; +} + +// Gets back to previous line and continue with horizontal layout +// offset_from_start_x == 0 : follow right after previous item +// offset_from_start_x != 0 : align to specified x position (relative to window/group left) +// spacing_w < 0 : use default spacing if pos_x == 0, no spacing if pos_x != 0 +// spacing_w >= 0 : enforce spacing amount +void ImGui::SameLine(float offset_from_start_x, float spacing_w) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + + ImGuiContext& g = *GImGui; + if (offset_from_start_x != 0.0f) + { + if (spacing_w < 0.0f) spacing_w = 0.0f; + window->DC.CursorPos.x = window->Pos.x - window->Scroll.x + offset_from_start_x + spacing_w + window->DC.GroupOffset.x + window->DC.ColumnsOffset.x; + window->DC.CursorPos.y = window->DC.CursorPosPrevLine.y; + } + else + { + if (spacing_w < 0.0f) spacing_w = g.Style.ItemSpacing.x; + window->DC.CursorPos.x = window->DC.CursorPosPrevLine.x + spacing_w; + window->DC.CursorPos.y = window->DC.CursorPosPrevLine.y; + } + window->DC.CurrLineSize = window->DC.PrevLineSize; + window->DC.CurrLineTextBaseOffset = window->DC.PrevLineTextBaseOffset; +} + +ImVec2 ImGui::GetCursorScreenPos() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->DC.CursorPos; +} + +void ImGui::SetCursorScreenPos(const ImVec2& pos) +{ + ImGuiWindow* window = GetCurrentWindow(); + window->DC.CursorPos = pos; + window->DC.CursorMaxPos = ImMax(window->DC.CursorMaxPos, window->DC.CursorPos); +} + +// User generally sees positions in window coordinates. Internally we store CursorPos in absolute screen coordinates because it is more convenient. +// Conversion happens as we pass the value to user, but it makes our naming convention confusing because GetCursorPos() == (DC.CursorPos - window.Pos). May want to rename 'DC.CursorPos'. +ImVec2 ImGui::GetCursorPos() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->DC.CursorPos - window->Pos + window->Scroll; +} + +float ImGui::GetCursorPosX() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->DC.CursorPos.x - window->Pos.x + window->Scroll.x; +} + +float ImGui::GetCursorPosY() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->DC.CursorPos.y - window->Pos.y + window->Scroll.y; +} + +void ImGui::SetCursorPos(const ImVec2& local_pos) +{ + ImGuiWindow* window = GetCurrentWindow(); + window->DC.CursorPos = window->Pos - window->Scroll + local_pos; + window->DC.CursorMaxPos = ImMax(window->DC.CursorMaxPos, window->DC.CursorPos); +} + +void ImGui::SetCursorPosX(float x) +{ + ImGuiWindow* window = GetCurrentWindow(); + window->DC.CursorPos.x = window->Pos.x - window->Scroll.x + x; + window->DC.CursorMaxPos.x = ImMax(window->DC.CursorMaxPos.x, window->DC.CursorPos.x); +} + +void ImGui::SetCursorPosY(float y) +{ + ImGuiWindow* window = GetCurrentWindow(); + window->DC.CursorPos.y = window->Pos.y - window->Scroll.y + y; + window->DC.CursorMaxPos.y = ImMax(window->DC.CursorMaxPos.y, window->DC.CursorPos.y); +} + +ImVec2 ImGui::GetCursorStartPos() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->DC.CursorStartPos - window->Pos; +} + +void ImGui::Indent(float indent_w) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = GetCurrentWindow(); + window->DC.Indent.x += (indent_w != 0.0f) ? indent_w : g.Style.IndentSpacing; + window->DC.CursorPos.x = window->Pos.x + window->DC.Indent.x + window->DC.ColumnsOffset.x; +} + +void ImGui::Unindent(float indent_w) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = GetCurrentWindow(); + window->DC.Indent.x -= (indent_w != 0.0f) ? indent_w : g.Style.IndentSpacing; + window->DC.CursorPos.x = window->Pos.x + window->DC.Indent.x + window->DC.ColumnsOffset.x; +} + +// Affect large frame+labels widgets only. +void ImGui::SetNextItemWidth(float item_width) +{ + ImGuiContext& g = *GImGui; + g.NextItemData.Flags |= ImGuiNextItemDataFlags_HasWidth; + g.NextItemData.Width = item_width; +} + +// FIXME: Remove the == 0.0f behavior? +void ImGui::PushItemWidth(float item_width) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + window->DC.ItemWidthStack.push_back(window->DC.ItemWidth); // Backup current width + window->DC.ItemWidth = (item_width == 0.0f ? window->ItemWidthDefault : item_width); + g.NextItemData.Flags &= ~ImGuiNextItemDataFlags_HasWidth; +} + +void ImGui::PushMultiItemsWidths(int components, float w_full) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + const ImGuiStyle& style = g.Style; + const float w_item_one = ImMax(1.0f, IM_FLOOR((w_full - (style.ItemInnerSpacing.x) * (components - 1)) / (float)components)); + const float w_item_last = ImMax(1.0f, IM_FLOOR(w_full - (w_item_one + style.ItemInnerSpacing.x) * (components - 1))); + window->DC.ItemWidthStack.push_back(window->DC.ItemWidth); // Backup current width + window->DC.ItemWidthStack.push_back(w_item_last); + for (int i = 0; i < components - 2; i++) + window->DC.ItemWidthStack.push_back(w_item_one); + window->DC.ItemWidth = (components == 1) ? w_item_last : w_item_one; + g.NextItemData.Flags &= ~ImGuiNextItemDataFlags_HasWidth; +} + +void ImGui::PopItemWidth() +{ + ImGuiWindow* window = GetCurrentWindow(); + window->DC.ItemWidth = window->DC.ItemWidthStack.back(); + window->DC.ItemWidthStack.pop_back(); +} + +// Calculate default item width given value passed to PushItemWidth() or SetNextItemWidth(). +// The SetNextItemWidth() data is generally cleared/consumed by ItemAdd() or NextItemData.ClearFlags() +float ImGui::CalcItemWidth() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + float w; + if (g.NextItemData.Flags & ImGuiNextItemDataFlags_HasWidth) + w = g.NextItemData.Width; + else + w = window->DC.ItemWidth; + if (w < 0.0f) + { + float region_max_x = GetContentRegionMaxAbs().x; + w = ImMax(1.0f, region_max_x - window->DC.CursorPos.x + w); + } + w = IM_FLOOR(w); + return w; +} + +// [Internal] Calculate full item size given user provided 'size' parameter and default width/height. Default width is often == CalcItemWidth(). +// Those two functions CalcItemWidth vs CalcItemSize are awkwardly named because they are not fully symmetrical. +// Note that only CalcItemWidth() is publicly exposed. +// The 4.0f here may be changed to match CalcItemWidth() and/or BeginChild() (right now we have a mismatch which is harmless but undesirable) +ImVec2 ImGui::CalcItemSize(ImVec2 size, float default_w, float default_h) +{ + ImGuiWindow* window = GImGui->CurrentWindow; + + ImVec2 region_max; + if (size.x < 0.0f || size.y < 0.0f) + region_max = GetContentRegionMaxAbs(); + + if (size.x == 0.0f) + size.x = default_w; + else if (size.x < 0.0f) + size.x = ImMax(4.0f, region_max.x - window->DC.CursorPos.x + size.x); + + if (size.y == 0.0f) + size.y = default_h; + else if (size.y < 0.0f) + size.y = ImMax(4.0f, region_max.y - window->DC.CursorPos.y + size.y); + + return size; +} + +float ImGui::GetTextLineHeight() +{ + ImGuiContext& g = *GImGui; + return g.FontSize; +} + +float ImGui::GetTextLineHeightWithSpacing() +{ + ImGuiContext& g = *GImGui; + return g.FontSize + g.Style.ItemSpacing.y; +} + +float ImGui::GetFrameHeight() +{ + ImGuiContext& g = *GImGui; + return g.FontSize + g.Style.FramePadding.y * 2.0f; +} + +float ImGui::GetFrameHeightWithSpacing() +{ + ImGuiContext& g = *GImGui; + return g.FontSize + g.Style.FramePadding.y * 2.0f + g.Style.ItemSpacing.y; +} + +// FIXME: All the Contents Region function are messy or misleading. WE WILL AIM TO OBSOLETE ALL OF THEM WITH A NEW "WORK RECT" API. Thanks for your patience! + +// FIXME: This is in window space (not screen space!). +ImVec2 ImGui::GetContentRegionMax() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImVec2 mx = window->ContentRegionRect.Max - window->Pos; + if (window->DC.CurrentColumns || g.CurrentTable) + mx.x = window->WorkRect.Max.x - window->Pos.x; + return mx; +} + +// [Internal] Absolute coordinate. Saner. This is not exposed until we finishing refactoring work rect features. +ImVec2 ImGui::GetContentRegionMaxAbs() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImVec2 mx = window->ContentRegionRect.Max; + if (window->DC.CurrentColumns || g.CurrentTable) + mx.x = window->WorkRect.Max.x; + return mx; +} + +ImVec2 ImGui::GetContentRegionAvail() +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return GetContentRegionMaxAbs() - window->DC.CursorPos; +} + +// In window space (not screen space!) +ImVec2 ImGui::GetWindowContentRegionMin() +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->ContentRegionRect.Min - window->Pos; +} + +ImVec2 ImGui::GetWindowContentRegionMax() +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->ContentRegionRect.Max - window->Pos; +} + +float ImGui::GetWindowContentRegionWidth() +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->ContentRegionRect.GetWidth(); +} + +// Lock horizontal starting position + capture group bounding box into one "item" (so you can use IsItemHovered() or layout primitives such as SameLine() on whole group, etc.) +// Groups are currently a mishmash of functionalities which should perhaps be clarified and separated. +void ImGui::BeginGroup() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + g.GroupStack.resize(g.GroupStack.Size + 1); + ImGuiGroupData& group_data = g.GroupStack.back(); + group_data.WindowID = window->ID; + group_data.BackupCursorPos = window->DC.CursorPos; + group_data.BackupCursorMaxPos = window->DC.CursorMaxPos; + group_data.BackupIndent = window->DC.Indent; + group_data.BackupGroupOffset = window->DC.GroupOffset; + group_data.BackupCurrLineSize = window->DC.CurrLineSize; + group_data.BackupCurrLineTextBaseOffset = window->DC.CurrLineTextBaseOffset; + group_data.BackupActiveIdIsAlive = g.ActiveIdIsAlive; + group_data.BackupActiveIdPreviousFrameIsAlive = g.ActiveIdPreviousFrameIsAlive; + group_data.EmitItem = true; + + window->DC.GroupOffset.x = window->DC.CursorPos.x - window->Pos.x - window->DC.ColumnsOffset.x; + window->DC.Indent = window->DC.GroupOffset; + window->DC.CursorMaxPos = window->DC.CursorPos; + window->DC.CurrLineSize = ImVec2(0.0f, 0.0f); + if (g.LogEnabled) + g.LogLinePosY = -FLT_MAX; // To enforce Log carriage return +} + +void ImGui::EndGroup() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + IM_ASSERT(g.GroupStack.Size > 0); // Mismatched BeginGroup()/EndGroup() calls + + ImGuiGroupData& group_data = g.GroupStack.back(); + IM_ASSERT(group_data.WindowID == window->ID); // EndGroup() in wrong window? + + ImRect group_bb(group_data.BackupCursorPos, ImMax(window->DC.CursorMaxPos, group_data.BackupCursorPos)); + + window->DC.CursorPos = group_data.BackupCursorPos; + window->DC.CursorMaxPos = ImMax(group_data.BackupCursorMaxPos, window->DC.CursorMaxPos); + window->DC.Indent = group_data.BackupIndent; + window->DC.GroupOffset = group_data.BackupGroupOffset; + window->DC.CurrLineSize = group_data.BackupCurrLineSize; + window->DC.CurrLineTextBaseOffset = group_data.BackupCurrLineTextBaseOffset; + if (g.LogEnabled) + g.LogLinePosY = -FLT_MAX; // To enforce Log carriage return + + if (!group_data.EmitItem) + { + g.GroupStack.pop_back(); + return; + } + + window->DC.CurrLineTextBaseOffset = ImMax(window->DC.PrevLineTextBaseOffset, group_data.BackupCurrLineTextBaseOffset); // FIXME: Incorrect, we should grab the base offset from the *first line* of the group but it is hard to obtain now. + ItemSize(group_bb.GetSize()); + ItemAdd(group_bb, 0); + + // If the current ActiveId was declared within the boundary of our group, we copy it to LastItemId so IsItemActive(), IsItemDeactivated() etc. will be functional on the entire group. + // It would be be neater if we replaced window.DC.LastItemId by e.g. 'bool LastItemIsActive', but would put a little more burden on individual widgets. + // Also if you grep for LastItemId you'll notice it is only used in that context. + // (The two tests not the same because ActiveIdIsAlive is an ID itself, in order to be able to handle ActiveId being overwritten during the frame.) + const bool group_contains_curr_active_id = (group_data.BackupActiveIdIsAlive != g.ActiveId) && (g.ActiveIdIsAlive == g.ActiveId) && g.ActiveId; + const bool group_contains_prev_active_id = (group_data.BackupActiveIdPreviousFrameIsAlive == false) && (g.ActiveIdPreviousFrameIsAlive == true); + if (group_contains_curr_active_id) + window->DC.LastItemId = g.ActiveId; + else if (group_contains_prev_active_id) + window->DC.LastItemId = g.ActiveIdPreviousFrame; + window->DC.LastItemRect = group_bb; + + // Forward Edited flag + if (group_contains_curr_active_id && g.ActiveIdHasBeenEditedThisFrame) + window->DC.LastItemStatusFlags |= ImGuiItemStatusFlags_Edited; + + // Forward Deactivated flag + window->DC.LastItemStatusFlags |= ImGuiItemStatusFlags_HasDeactivated; + if (group_contains_prev_active_id && g.ActiveId != g.ActiveIdPreviousFrame) + window->DC.LastItemStatusFlags |= ImGuiItemStatusFlags_Deactivated; + + g.GroupStack.pop_back(); + //window->DrawList->AddRect(group_bb.Min, group_bb.Max, IM_COL32(255,0,255,255)); // [Debug] +} + + +//----------------------------------------------------------------------------- +// [SECTION] SCROLLING +//----------------------------------------------------------------------------- + +// Helper to snap on edges when aiming at an item very close to the edge, +// So the difference between WindowPadding and ItemSpacing will be in the visible area after scrolling. +// When we refactor the scrolling API this may be configurable with a flag? +// Note that the effect for this won't be visible on X axis with default Style settings as WindowPadding.x == ItemSpacing.x by default. +static float CalcScrollEdgeSnap(float target, float snap_min, float snap_max, float snap_threshold, float center_ratio) +{ + if (target <= snap_min + snap_threshold) + return ImLerp(snap_min, target, center_ratio); + if (target >= snap_max - snap_threshold) + return ImLerp(target, snap_max, center_ratio); + return target; +} + +static ImVec2 CalcNextScrollFromScrollTargetAndClamp(ImGuiWindow* window) +{ + ImVec2 scroll = window->Scroll; + if (window->ScrollTarget.x < FLT_MAX) + { + float center_x_ratio = window->ScrollTargetCenterRatio.x; + float scroll_target_x = window->ScrollTarget.x; + float snap_x_min = 0.0f; + float snap_x_max = window->ScrollMax.x + window->Size.x; + if (window->ScrollTargetEdgeSnapDist.x > 0.0f) + scroll_target_x = CalcScrollEdgeSnap(scroll_target_x, snap_x_min, snap_x_max, window->ScrollTargetEdgeSnapDist.x, center_x_ratio); + scroll.x = scroll_target_x - center_x_ratio * (window->SizeFull.x - window->ScrollbarSizes.x); + } + if (window->ScrollTarget.y < FLT_MAX) + { + float decoration_up_height = window->TitleBarHeight() + window->MenuBarHeight(); + float center_y_ratio = window->ScrollTargetCenterRatio.y; + float scroll_target_y = window->ScrollTarget.y; + float snap_y_min = 0.0f; + float snap_y_max = window->ScrollMax.y + window->Size.y - decoration_up_height; + if (window->ScrollTargetEdgeSnapDist.y > 0.0f) + scroll_target_y = CalcScrollEdgeSnap(scroll_target_y, snap_y_min, snap_y_max, window->ScrollTargetEdgeSnapDist.y, center_y_ratio); + scroll.y = scroll_target_y - center_y_ratio * (window->SizeFull.y - window->ScrollbarSizes.y - decoration_up_height); + } + scroll.x = IM_FLOOR(ImMax(scroll.x, 0.0f)); + scroll.y = IM_FLOOR(ImMax(scroll.y, 0.0f)); + if (!window->Collapsed && !window->SkipItems) + { + scroll.x = ImMin(scroll.x, window->ScrollMax.x); + scroll.y = ImMin(scroll.y, window->ScrollMax.y); + } + return scroll; +} + +// Scroll to keep newly navigated item fully into view +ImVec2 ImGui::ScrollToBringRectIntoView(ImGuiWindow* window, const ImRect& item_rect) +{ + ImGuiContext& g = *GImGui; + ImRect window_rect(window->InnerRect.Min - ImVec2(1, 1), window->InnerRect.Max + ImVec2(1, 1)); + //GetForegroundDrawList(window)->AddRect(window_rect.Min, window_rect.Max, IM_COL32_WHITE); // [DEBUG] + + ImVec2 delta_scroll; + if (!window_rect.Contains(item_rect)) + { + if (window->ScrollbarX && item_rect.Min.x < window_rect.Min.x) + SetScrollFromPosX(window, item_rect.Min.x - window->Pos.x - g.Style.ItemSpacing.x, 0.0f); + else if (window->ScrollbarX && item_rect.Max.x >= window_rect.Max.x) + SetScrollFromPosX(window, item_rect.Max.x - window->Pos.x + g.Style.ItemSpacing.x, 1.0f); + if (item_rect.Min.y < window_rect.Min.y) + SetScrollFromPosY(window, item_rect.Min.y - window->Pos.y - g.Style.ItemSpacing.y, 0.0f); + else if (item_rect.Max.y >= window_rect.Max.y) + SetScrollFromPosY(window, item_rect.Max.y - window->Pos.y + g.Style.ItemSpacing.y, 1.0f); + + ImVec2 next_scroll = CalcNextScrollFromScrollTargetAndClamp(window); + delta_scroll = next_scroll - window->Scroll; + } + + // Also scroll parent window to keep us into view if necessary + if (window->Flags & ImGuiWindowFlags_ChildWindow) + delta_scroll += ScrollToBringRectIntoView(window->ParentWindow, ImRect(item_rect.Min - delta_scroll, item_rect.Max - delta_scroll)); + + return delta_scroll; +} + +float ImGui::GetScrollX() +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->Scroll.x; +} + +float ImGui::GetScrollY() +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->Scroll.y; +} + +float ImGui::GetScrollMaxX() +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->ScrollMax.x; +} + +float ImGui::GetScrollMaxY() +{ + ImGuiWindow* window = GImGui->CurrentWindow; + return window->ScrollMax.y; +} + +void ImGui::SetScrollX(ImGuiWindow* window, float scroll_x) +{ + window->ScrollTarget.x = scroll_x; + window->ScrollTargetCenterRatio.x = 0.0f; + window->ScrollTargetEdgeSnapDist.x = 0.0f; +} + +void ImGui::SetScrollY(ImGuiWindow* window, float scroll_y) +{ + window->ScrollTarget.y = scroll_y; + window->ScrollTargetCenterRatio.y = 0.0f; + window->ScrollTargetEdgeSnapDist.y = 0.0f; +} + +void ImGui::SetScrollX(float scroll_x) +{ + ImGuiContext& g = *GImGui; + SetScrollX(g.CurrentWindow, scroll_x); +} + +void ImGui::SetScrollY(float scroll_y) +{ + ImGuiContext& g = *GImGui; + SetScrollY(g.CurrentWindow, scroll_y); +} + +// Note that a local position will vary depending on initial scroll value, +// This is a little bit confusing so bear with us: +// - local_pos = (absolution_pos - window->Pos) +// - So local_x/local_y are 0.0f for a position at the upper-left corner of a window, +// and generally local_x/local_y are >(padding+decoration) && <(size-padding-decoration) when in the visible area. +// - They mostly exists because of legacy API. +// Following the rules above, when trying to work with scrolling code, consider that: +// - SetScrollFromPosY(0.0f) == SetScrollY(0.0f + scroll.y) == has no effect! +// - SetScrollFromPosY(-scroll.y) == SetScrollY(-scroll.y + scroll.y) == SetScrollY(0.0f) == reset scroll. Of course writing SetScrollY(0.0f) directly then makes more sense +// We store a target position so centering and clamping can occur on the next frame when we are guaranteed to have a known window size +void ImGui::SetScrollFromPosX(ImGuiWindow* window, float local_x, float center_x_ratio) +{ + IM_ASSERT(center_x_ratio >= 0.0f && center_x_ratio <= 1.0f); + window->ScrollTarget.x = IM_FLOOR(local_x + window->Scroll.x); // Convert local position to scroll offset + window->ScrollTargetCenterRatio.x = center_x_ratio; + window->ScrollTargetEdgeSnapDist.x = 0.0f; +} + +void ImGui::SetScrollFromPosY(ImGuiWindow* window, float local_y, float center_y_ratio) +{ + IM_ASSERT(center_y_ratio >= 0.0f && center_y_ratio <= 1.0f); + local_y -= window->TitleBarHeight() + window->MenuBarHeight(); // FIXME: Would be nice to have a more standardized access to our scrollable/client rect + window->ScrollTarget.y = IM_FLOOR(local_y + window->Scroll.y); // Convert local position to scroll offset + window->ScrollTargetCenterRatio.y = center_y_ratio; + window->ScrollTargetEdgeSnapDist.y = 0.0f; +} + +void ImGui::SetScrollFromPosX(float local_x, float center_x_ratio) +{ + ImGuiContext& g = *GImGui; + SetScrollFromPosX(g.CurrentWindow, local_x, center_x_ratio); +} + +void ImGui::SetScrollFromPosY(float local_y, float center_y_ratio) +{ + ImGuiContext& g = *GImGui; + SetScrollFromPosY(g.CurrentWindow, local_y, center_y_ratio); +} + +// center_x_ratio: 0.0f left of last item, 0.5f horizontal center of last item, 1.0f right of last item. +void ImGui::SetScrollHereX(float center_x_ratio) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + float spacing_x = g.Style.ItemSpacing.x; + float target_pos_x = ImLerp(window->DC.LastItemRect.Min.x - spacing_x, window->DC.LastItemRect.Max.x + spacing_x, center_x_ratio); + SetScrollFromPosX(window, target_pos_x - window->Pos.x, center_x_ratio); // Convert from absolute to local pos + + // Tweak: snap on edges when aiming at an item very close to the edge + window->ScrollTargetEdgeSnapDist.x = ImMax(0.0f, window->WindowPadding.x - spacing_x); +} + +// center_y_ratio: 0.0f top of last item, 0.5f vertical center of last item, 1.0f bottom of last item. +void ImGui::SetScrollHereY(float center_y_ratio) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + float spacing_y = g.Style.ItemSpacing.y; + float target_pos_y = ImLerp(window->DC.CursorPosPrevLine.y - spacing_y, window->DC.CursorPosPrevLine.y + window->DC.PrevLineSize.y + spacing_y, center_y_ratio); + SetScrollFromPosY(window, target_pos_y - window->Pos.y, center_y_ratio); // Convert from absolute to local pos + + // Tweak: snap on edges when aiming at an item very close to the edge + window->ScrollTargetEdgeSnapDist.y = ImMax(0.0f, window->WindowPadding.y - spacing_y); +} + +//----------------------------------------------------------------------------- +// [SECTION] TOOLTIPS +//----------------------------------------------------------------------------- + +void ImGui::BeginTooltip() +{ + BeginTooltipEx(ImGuiWindowFlags_None, ImGuiTooltipFlags_None); +} + +void ImGui::BeginTooltipEx(ImGuiWindowFlags extra_flags, ImGuiTooltipFlags tooltip_flags) +{ + ImGuiContext& g = *GImGui; + + if (g.DragDropWithinSource || g.DragDropWithinTarget) + { + // The default tooltip position is a little offset to give space to see the context menu (it's also clamped within the current viewport/monitor) + // In the context of a dragging tooltip we try to reduce that offset and we enforce following the cursor. + // Whatever we do we want to call SetNextWindowPos() to enforce a tooltip position and disable clipping the tooltip without our display area, like regular tooltip do. + //ImVec2 tooltip_pos = g.IO.MousePos - g.ActiveIdClickOffset - g.Style.WindowPadding; + ImVec2 tooltip_pos = g.IO.MousePos + ImVec2(16 * g.Style.MouseCursorScale, 8 * g.Style.MouseCursorScale); + SetNextWindowPos(tooltip_pos); + SetNextWindowBgAlpha(g.Style.Colors[ImGuiCol_PopupBg].w * 0.60f); + //PushStyleVar(ImGuiStyleVar_Alpha, g.Style.Alpha * 0.60f); // This would be nice but e.g ColorButton with checkboard has issue with transparent colors :( + tooltip_flags |= ImGuiTooltipFlags_OverridePreviousTooltip; + } + + char window_name[16]; + ImFormatString(window_name, IM_ARRAYSIZE(window_name), "##Tooltip_%02d", g.TooltipOverrideCount); + if (tooltip_flags & ImGuiTooltipFlags_OverridePreviousTooltip) + if (ImGuiWindow* window = FindWindowByName(window_name)) + if (window->Active) + { + // Hide previous tooltip from being displayed. We can't easily "reset" the content of a window so we create a new one. + window->Hidden = true; + window->HiddenFramesCanSkipItems = 1; // FIXME: This may not be necessary? + ImFormatString(window_name, IM_ARRAYSIZE(window_name), "##Tooltip_%02d", ++g.TooltipOverrideCount); + } + ImGuiWindowFlags flags = ImGuiWindowFlags_Tooltip | ImGuiWindowFlags_NoInputs | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_AlwaysAutoResize; + Begin(window_name, NULL, flags | extra_flags); +} + +void ImGui::EndTooltip() +{ + IM_ASSERT(GetCurrentWindowRead()->Flags & ImGuiWindowFlags_Tooltip); // Mismatched BeginTooltip()/EndTooltip() calls + End(); +} + +void ImGui::SetTooltipV(const char* fmt, va_list args) +{ + BeginTooltipEx(0, ImGuiTooltipFlags_OverridePreviousTooltip); + TextV(fmt, args); + EndTooltip(); +} + +void ImGui::SetTooltip(const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + SetTooltipV(fmt, args); + va_end(args); +} + +//----------------------------------------------------------------------------- +// [SECTION] POPUPS +//----------------------------------------------------------------------------- + +// Supported flags: ImGuiPopupFlags_AnyPopupId, ImGuiPopupFlags_AnyPopupLevel +bool ImGui::IsPopupOpen(ImGuiID id, ImGuiPopupFlags popup_flags) +{ + ImGuiContext& g = *GImGui; + if (popup_flags & ImGuiPopupFlags_AnyPopupId) + { + // Return true if any popup is open at the current BeginPopup() level of the popup stack + // This may be used to e.g. test for another popups already opened to handle popups priorities at the same level. + IM_ASSERT(id == 0); + if (popup_flags & ImGuiPopupFlags_AnyPopupLevel) + return g.OpenPopupStack.Size > 0; + else + return g.OpenPopupStack.Size > g.BeginPopupStack.Size; + } + else + { + if (popup_flags & ImGuiPopupFlags_AnyPopupLevel) + { + // Return true if the popup is open anywhere in the popup stack + for (int n = 0; n < g.OpenPopupStack.Size; n++) + if (g.OpenPopupStack[n].PopupId == id) + return true; + return false; + } + else + { + // Return true if the popup is open at the current BeginPopup() level of the popup stack (this is the most-common query) + return g.OpenPopupStack.Size > g.BeginPopupStack.Size && g.OpenPopupStack[g.BeginPopupStack.Size].PopupId == id; + } + } +} + +bool ImGui::IsPopupOpen(const char* str_id, ImGuiPopupFlags popup_flags) +{ + ImGuiContext& g = *GImGui; + ImGuiID id = (popup_flags & ImGuiPopupFlags_AnyPopupId) ? 0 : g.CurrentWindow->GetID(str_id); + if ((popup_flags & ImGuiPopupFlags_AnyPopupLevel) && id != 0) + IM_ASSERT(0 && "Cannot use IsPopupOpen() with a string id and ImGuiPopupFlags_AnyPopupLevel."); // But non-string version is legal and used internally + return IsPopupOpen(id, popup_flags); +} + +ImGuiWindow* ImGui::GetTopMostPopupModal() +{ + ImGuiContext& g = *GImGui; + for (int n = g.OpenPopupStack.Size - 1; n >= 0; n--) + if (ImGuiWindow* popup = g.OpenPopupStack.Data[n].Window) + if (popup->Flags & ImGuiWindowFlags_Modal) + return popup; + return NULL; +} + +void ImGui::OpenPopup(const char* str_id, ImGuiPopupFlags popup_flags) +{ + ImGuiContext& g = *GImGui; + OpenPopupEx(g.CurrentWindow->GetID(str_id), popup_flags); +} + +// Mark popup as open (toggle toward open state). +// Popups are closed when user click outside, or activate a pressable item, or CloseCurrentPopup() is called within a BeginPopup()/EndPopup() block. +// Popup identifiers are relative to the current ID-stack (so OpenPopup and BeginPopup needs to be at the same level). +// One open popup per level of the popup hierarchy (NB: when assigning we reset the Window member of ImGuiPopupRef to NULL) +void ImGui::OpenPopupEx(ImGuiID id, ImGuiPopupFlags popup_flags) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* parent_window = g.CurrentWindow; + const int current_stack_size = g.BeginPopupStack.Size; + + if (popup_flags & ImGuiPopupFlags_NoOpenOverExistingPopup) + if (IsPopupOpen(0u, ImGuiPopupFlags_AnyPopupId)) + return; + + ImGuiPopupData popup_ref; // Tagged as new ref as Window will be set back to NULL if we write this into OpenPopupStack. + popup_ref.PopupId = id; + popup_ref.Window = NULL; + popup_ref.SourceWindow = g.NavWindow; + popup_ref.OpenFrameCount = g.FrameCount; + popup_ref.OpenParentId = parent_window->IDStack.back(); + popup_ref.OpenPopupPos = NavCalcPreferredRefPos(); + popup_ref.OpenMousePos = IsMousePosValid(&g.IO.MousePos) ? g.IO.MousePos : popup_ref.OpenPopupPos; + + IMGUI_DEBUG_LOG_POPUP("OpenPopupEx(0x%08X)\n", id); + if (g.OpenPopupStack.Size < current_stack_size + 1) + { + g.OpenPopupStack.push_back(popup_ref); + } + else + { + // Gently handle the user mistakenly calling OpenPopup() every frame. It is a programming mistake! However, if we were to run the regular code path, the ui + // would become completely unusable because the popup will always be in hidden-while-calculating-size state _while_ claiming focus. Which would be a very confusing + // situation for the programmer. Instead, we silently allow the popup to proceed, it will keep reappearing and the programming error will be more obvious to understand. + if (g.OpenPopupStack[current_stack_size].PopupId == id && g.OpenPopupStack[current_stack_size].OpenFrameCount == g.FrameCount - 1) + { + g.OpenPopupStack[current_stack_size].OpenFrameCount = popup_ref.OpenFrameCount; + } + else + { + // Close child popups if any, then flag popup for open/reopen + ClosePopupToLevel(current_stack_size, false); + g.OpenPopupStack.push_back(popup_ref); + } + + // When reopening a popup we first refocus its parent, otherwise if its parent is itself a popup it would get closed by ClosePopupsOverWindow(). + // This is equivalent to what ClosePopupToLevel() does. + //if (g.OpenPopupStack[current_stack_size].PopupId == id) + // FocusWindow(parent_window); + } +} + +// When popups are stacked, clicking on a lower level popups puts focus back to it and close popups above it. +// This function closes any popups that are over 'ref_window'. +void ImGui::ClosePopupsOverWindow(ImGuiWindow* ref_window, bool restore_focus_to_window_under_popup) +{ + ImGuiContext& g = *GImGui; + if (g.OpenPopupStack.Size == 0) + return; + + // Don't close our own child popup windows. + int popup_count_to_keep = 0; + if (ref_window) + { + // Find the highest popup which is a descendant of the reference window (generally reference window = NavWindow) + for (; popup_count_to_keep < g.OpenPopupStack.Size; popup_count_to_keep++) + { + ImGuiPopupData& popup = g.OpenPopupStack[popup_count_to_keep]; + if (!popup.Window) + continue; + IM_ASSERT((popup.Window->Flags & ImGuiWindowFlags_Popup) != 0); + if (popup.Window->Flags & ImGuiWindowFlags_ChildWindow) + continue; + + // Trim the stack unless the popup is a direct parent of the reference window (the reference window is often the NavWindow) + // - With this stack of window, clicking/focusing Popup1 will close Popup2 and Popup3: + // Window -> Popup1 -> Popup2 -> Popup3 + // - Each popups may contain child windows, which is why we compare ->RootWindow! + // Window -> Popup1 -> Popup1_Child -> Popup2 -> Popup2_Child + bool ref_window_is_descendent_of_popup = false; + for (int n = popup_count_to_keep; n < g.OpenPopupStack.Size; n++) + if (ImGuiWindow* popup_window = g.OpenPopupStack[n].Window) + if (popup_window->RootWindow == ref_window->RootWindow) + { + ref_window_is_descendent_of_popup = true; + break; + } + if (!ref_window_is_descendent_of_popup) + break; + } + } + if (popup_count_to_keep < g.OpenPopupStack.Size) // This test is not required but it allows to set a convenient breakpoint on the statement below + { + IMGUI_DEBUG_LOG_POPUP("ClosePopupsOverWindow(\"%s\") -> ClosePopupToLevel(%d)\n", ref_window->Name, popup_count_to_keep); + ClosePopupToLevel(popup_count_to_keep, restore_focus_to_window_under_popup); + } +} + +void ImGui::ClosePopupToLevel(int remaining, bool restore_focus_to_window_under_popup) +{ + ImGuiContext& g = *GImGui; + IMGUI_DEBUG_LOG_POPUP("ClosePopupToLevel(%d), restore_focus_to_window_under_popup=%d\n", remaining, restore_focus_to_window_under_popup); + IM_ASSERT(remaining >= 0 && remaining < g.OpenPopupStack.Size); + + // Trim open popup stack + ImGuiWindow* focus_window = g.OpenPopupStack[remaining].SourceWindow; + ImGuiWindow* popup_window = g.OpenPopupStack[remaining].Window; + g.OpenPopupStack.resize(remaining); + + if (restore_focus_to_window_under_popup) + { + if (focus_window && !focus_window->WasActive && popup_window) + { + // Fallback + FocusTopMostWindowUnderOne(popup_window, NULL); + } + else + { + if (g.NavLayer == ImGuiNavLayer_Main && focus_window) + focus_window = NavRestoreLastChildNavWindow(focus_window); + FocusWindow(focus_window); + } + } +} + +// Close the popup we have begin-ed into. +void ImGui::CloseCurrentPopup() +{ + ImGuiContext& g = *GImGui; + int popup_idx = g.BeginPopupStack.Size - 1; + if (popup_idx < 0 || popup_idx >= g.OpenPopupStack.Size || g.BeginPopupStack[popup_idx].PopupId != g.OpenPopupStack[popup_idx].PopupId) + return; + + // Closing a menu closes its top-most parent popup (unless a modal) + while (popup_idx > 0) + { + ImGuiWindow* popup_window = g.OpenPopupStack[popup_idx].Window; + ImGuiWindow* parent_popup_window = g.OpenPopupStack[popup_idx - 1].Window; + bool close_parent = false; + if (popup_window && (popup_window->Flags & ImGuiWindowFlags_ChildMenu)) + if (parent_popup_window == NULL || !(parent_popup_window->Flags & ImGuiWindowFlags_Modal)) + close_parent = true; + if (!close_parent) + break; + popup_idx--; + } + IMGUI_DEBUG_LOG_POPUP("CloseCurrentPopup %d -> %d\n", g.BeginPopupStack.Size - 1, popup_idx); + ClosePopupToLevel(popup_idx, true); + + // A common pattern is to close a popup when selecting a menu item/selectable that will open another window. + // To improve this usage pattern, we avoid nav highlight for a single frame in the parent window. + // Similarly, we could avoid mouse hover highlight in this window but it is less visually problematic. + if (ImGuiWindow* window = g.NavWindow) + window->DC.NavHideHighlightOneFrame = true; +} + +// Attention! BeginPopup() adds default flags which BeginPopupEx()! +bool ImGui::BeginPopupEx(ImGuiID id, ImGuiWindowFlags flags) +{ + ImGuiContext& g = *GImGui; + if (!IsPopupOpen(id, ImGuiPopupFlags_None)) + { + g.NextWindowData.ClearFlags(); // We behave like Begin() and need to consume those values + return false; + } + + char name[20]; + if (flags & ImGuiWindowFlags_ChildMenu) + ImFormatString(name, IM_ARRAYSIZE(name), "##Menu_%02d", g.BeginPopupStack.Size); // Recycle windows based on depth + else + ImFormatString(name, IM_ARRAYSIZE(name), "##Popup_%08x", id); // Not recycling, so we can close/open during the same frame + + flags |= ImGuiWindowFlags_Popup; + bool is_open = Begin(name, NULL, flags); + if (!is_open) // NB: Begin can return false when the popup is completely clipped (e.g. zero size display) + EndPopup(); + + return is_open; +} + +bool ImGui::BeginPopup(const char* str_id, ImGuiWindowFlags flags) +{ + ImGuiContext& g = *GImGui; + if (g.OpenPopupStack.Size <= g.BeginPopupStack.Size) // Early out for performance + { + g.NextWindowData.ClearFlags(); // We behave like Begin() and need to consume those values + return false; + } + flags |= ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoSavedSettings; + return BeginPopupEx(g.CurrentWindow->GetID(str_id), flags); +} + +// If 'p_open' is specified for a modal popup window, the popup will have a regular close button which will close the popup. +// Note that popup visibility status is owned by Dear ImGui (and manipulated with e.g. OpenPopup) so the actual value of *p_open is meaningless here. +bool ImGui::BeginPopupModal(const char* name, bool* p_open, ImGuiWindowFlags flags) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + const ImGuiID id = window->GetID(name); + if (!IsPopupOpen(id, ImGuiPopupFlags_None)) + { + g.NextWindowData.ClearFlags(); // We behave like Begin() and need to consume those values + return false; + } + + // Center modal windows by default for increased visibility + // (this won't really last as settings will kick in, and is mostly for backward compatibility. user may do the same themselves) + // FIXME: Should test for (PosCond & window->SetWindowPosAllowFlags) with the upcoming window. + if ((g.NextWindowData.Flags & ImGuiNextWindowDataFlags_HasPos) == 0) + SetNextWindowPos(g.IO.DisplaySize * 0.5f, ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f)); + + flags |= ImGuiWindowFlags_Popup | ImGuiWindowFlags_Modal | ImGuiWindowFlags_NoCollapse; + const bool is_open = Begin(name, p_open, flags); + if (!is_open || (p_open && !*p_open)) // NB: is_open can be 'false' when the popup is completely clipped (e.g. zero size display) + { + EndPopup(); + if (is_open) + ClosePopupToLevel(g.BeginPopupStack.Size, true); + return false; + } + return is_open; +} + +void ImGui::EndPopup() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + IM_ASSERT(window->Flags & ImGuiWindowFlags_Popup); // Mismatched BeginPopup()/EndPopup() calls + IM_ASSERT(g.BeginPopupStack.Size > 0); + + // Make all menus and popups wrap around for now, may need to expose that policy. + if (g.NavWindow == window) + NavMoveRequestTryWrapping(window, ImGuiNavMoveFlags_LoopY); + + // Child-popups don't need to be laid out + IM_ASSERT(g.WithinEndChild == false); + if (window->Flags & ImGuiWindowFlags_ChildWindow) + g.WithinEndChild = true; + End(); + g.WithinEndChild = false; +} + +// Helper to open a popup if mouse button is released over the item +// - This is essentially the same as BeginPopupContextItem() but without the trailing BeginPopup() +void ImGui::OpenPopupOnItemClick(const char* str_id, ImGuiPopupFlags popup_flags) +{ + ImGuiWindow* window = GImGui->CurrentWindow; + int mouse_button = (popup_flags & ImGuiPopupFlags_MouseButtonMask_); + if (IsMouseReleased(mouse_button) && IsItemHovered(ImGuiHoveredFlags_AllowWhenBlockedByPopup)) + { + ImGuiID id = str_id ? window->GetID(str_id) : window->DC.LastItemId; // If user hasn't passed an ID, we can use the LastItemID. Using LastItemID as a Popup ID won't conflict! + IM_ASSERT(id != 0); // You cannot pass a NULL str_id if the last item has no identifier (e.g. a Text() item) + OpenPopupEx(id, popup_flags); + } +} + +// This is a helper to handle the simplest case of associating one named popup to one given widget. +// - You can pass a NULL str_id to use the identifier of the last item. +// - You may want to handle this on user side if you have specific needs (e.g. tweaking IsItemHovered() parameters). +// - This is essentially the same as calling OpenPopupOnItemClick() + BeginPopup() but written to avoid +// computing the ID twice because BeginPopupContextXXX functions may be called very frequently. +bool ImGui::BeginPopupContextItem(const char* str_id, ImGuiPopupFlags popup_flags) +{ + ImGuiWindow* window = GImGui->CurrentWindow; + if (window->SkipItems) + return false; + ImGuiID id = str_id ? window->GetID(str_id) : window->DC.LastItemId; // If user hasn't passed an ID, we can use the LastItemID. Using LastItemID as a Popup ID won't conflict! + IM_ASSERT(id != 0); // You cannot pass a NULL str_id if the last item has no identifier (e.g. a Text() item) + int mouse_button = (popup_flags & ImGuiPopupFlags_MouseButtonMask_); + if (IsMouseReleased(mouse_button) && IsItemHovered(ImGuiHoveredFlags_AllowWhenBlockedByPopup)) + OpenPopupEx(id, popup_flags); + return BeginPopupEx(id, ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoSavedSettings); +} + +bool ImGui::BeginPopupContextWindow(const char* str_id, ImGuiPopupFlags popup_flags) +{ + ImGuiWindow* window = GImGui->CurrentWindow; + if (!str_id) + str_id = "window_context"; + ImGuiID id = window->GetID(str_id); + int mouse_button = (popup_flags & ImGuiPopupFlags_MouseButtonMask_); + if (IsMouseReleased(mouse_button) && IsWindowHovered(ImGuiHoveredFlags_AllowWhenBlockedByPopup)) + if (!(popup_flags & ImGuiPopupFlags_NoOpenOverItems) || !IsAnyItemHovered()) + OpenPopupEx(id, popup_flags); + return BeginPopupEx(id, ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoSavedSettings); +} + +bool ImGui::BeginPopupContextVoid(const char* str_id, ImGuiPopupFlags popup_flags) +{ + ImGuiWindow* window = GImGui->CurrentWindow; + if (!str_id) + str_id = "void_context"; + ImGuiID id = window->GetID(str_id); + int mouse_button = (popup_flags & ImGuiPopupFlags_MouseButtonMask_); + if (IsMouseReleased(mouse_button) && !IsWindowHovered(ImGuiHoveredFlags_AnyWindow)) + if (GetTopMostPopupModal() == NULL) + OpenPopupEx(id, popup_flags); + return BeginPopupEx(id, ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoSavedSettings); +} + +// r_avoid = the rectangle to avoid (e.g. for tooltip it is a rectangle around the mouse cursor which we want to avoid. for popups it's a small point around the cursor.) +// r_outer = the visible area rectangle, minus safe area padding. If our popup size won't fit because of safe area padding we ignore it. +ImVec2 ImGui::FindBestWindowPosForPopupEx(const ImVec2& ref_pos, const ImVec2& size, ImGuiDir* last_dir, const ImRect& r_outer, const ImRect& r_avoid, ImGuiPopupPositionPolicy policy) +{ + ImVec2 base_pos_clamped = ImClamp(ref_pos, r_outer.Min, r_outer.Max - size); + //GetForegroundDrawList()->AddRect(r_avoid.Min, r_avoid.Max, IM_COL32(255,0,0,255)); + //GetForegroundDrawList()->AddRect(r_outer.Min, r_outer.Max, IM_COL32(0,255,0,255)); + + // Combo Box policy (we want a connecting edge) + if (policy == ImGuiPopupPositionPolicy_ComboBox) + { + const ImGuiDir dir_prefered_order[ImGuiDir_COUNT] = { ImGuiDir_Down, ImGuiDir_Right, ImGuiDir_Left, ImGuiDir_Up }; + for (int n = (*last_dir != ImGuiDir_None) ? -1 : 0; n < ImGuiDir_COUNT; n++) + { + const ImGuiDir dir = (n == -1) ? *last_dir : dir_prefered_order[n]; + if (n != -1 && dir == *last_dir) // Already tried this direction? + continue; + ImVec2 pos; + if (dir == ImGuiDir_Down) pos = ImVec2(r_avoid.Min.x, r_avoid.Max.y); // Below, Toward Right (default) + if (dir == ImGuiDir_Right) pos = ImVec2(r_avoid.Min.x, r_avoid.Min.y - size.y); // Above, Toward Right + if (dir == ImGuiDir_Left) pos = ImVec2(r_avoid.Max.x - size.x, r_avoid.Max.y); // Below, Toward Left + if (dir == ImGuiDir_Up) pos = ImVec2(r_avoid.Max.x - size.x, r_avoid.Min.y - size.y); // Above, Toward Left + if (!r_outer.Contains(ImRect(pos, pos + size))) + continue; + *last_dir = dir; + return pos; + } + } + + // Tooltip and Default popup policy + // (Always first try the direction we used on the last frame, if any) + if (policy == ImGuiPopupPositionPolicy_Tooltip || policy == ImGuiPopupPositionPolicy_Default) + { + const ImGuiDir dir_prefered_order[ImGuiDir_COUNT] = { ImGuiDir_Right, ImGuiDir_Down, ImGuiDir_Up, ImGuiDir_Left }; + for (int n = (*last_dir != ImGuiDir_None) ? -1 : 0; n < ImGuiDir_COUNT; n++) + { + const ImGuiDir dir = (n == -1) ? *last_dir : dir_prefered_order[n]; + if (n != -1 && dir == *last_dir) // Already tried this direction? + continue; + + const float avail_w = (dir == ImGuiDir_Left ? r_avoid.Min.x : r_outer.Max.x) - (dir == ImGuiDir_Right ? r_avoid.Max.x : r_outer.Min.x); + const float avail_h = (dir == ImGuiDir_Up ? r_avoid.Min.y : r_outer.Max.y) - (dir == ImGuiDir_Down ? r_avoid.Max.y : r_outer.Min.y); + + // If there not enough room on one axis, there's no point in positioning on a side on this axis (e.g. when not enough width, use a top/bottom position to maximize available width) + if (avail_w < size.x && (dir == ImGuiDir_Left || dir == ImGuiDir_Right)) + continue; + if (avail_h < size.y && (dir == ImGuiDir_Up || dir == ImGuiDir_Down)) + continue; + + ImVec2 pos; + pos.x = (dir == ImGuiDir_Left) ? r_avoid.Min.x - size.x : (dir == ImGuiDir_Right) ? r_avoid.Max.x : base_pos_clamped.x; + pos.y = (dir == ImGuiDir_Up) ? r_avoid.Min.y - size.y : (dir == ImGuiDir_Down) ? r_avoid.Max.y : base_pos_clamped.y; + + // Clamp top-left corner of popup + pos.x = ImMax(pos.x, r_outer.Min.x); + pos.y = ImMax(pos.y, r_outer.Min.y); + + *last_dir = dir; + return pos; + } + } + + // Fallback when not enough room: + *last_dir = ImGuiDir_None; + + // For tooltip we prefer avoiding the cursor at all cost even if it means that part of the tooltip won't be visible. + if (policy == ImGuiPopupPositionPolicy_Tooltip) + return ref_pos + ImVec2(2, 2); + + // Otherwise try to keep within display + ImVec2 pos = ref_pos; + pos.x = ImMax(ImMin(pos.x + size.x, r_outer.Max.x) - size.x, r_outer.Min.x); + pos.y = ImMax(ImMin(pos.y + size.y, r_outer.Max.y) - size.y, r_outer.Min.y); + return pos; +} + +ImRect ImGui::GetWindowAllowedExtentRect(ImGuiWindow* window) +{ + IM_UNUSED(window); + ImVec2 padding = GImGui->Style.DisplaySafeAreaPadding; + ImRect r_screen = GetViewportRect(); + r_screen.Expand(ImVec2((r_screen.GetWidth() > padding.x * 2) ? -padding.x : 0.0f, (r_screen.GetHeight() > padding.y * 2) ? -padding.y : 0.0f)); + return r_screen; +} + +ImVec2 ImGui::FindBestWindowPosForPopup(ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + + ImRect r_outer = GetWindowAllowedExtentRect(window); + if (window->Flags & ImGuiWindowFlags_ChildMenu) + { + // Child menus typically request _any_ position within the parent menu item, and then we move the new menu outside the parent bounds. + // This is how we end up with child menus appearing (most-commonly) on the right of the parent menu. + IM_ASSERT(g.CurrentWindow == window); + ImGuiWindow* parent_window = g.CurrentWindowStack[g.CurrentWindowStack.Size - 2]; + float horizontal_overlap = g.Style.ItemInnerSpacing.x; // We want some overlap to convey the relative depth of each menu (currently the amount of overlap is hard-coded to style.ItemSpacing.x). + ImRect r_avoid; + if (parent_window->DC.MenuBarAppending) + r_avoid = ImRect(-FLT_MAX, parent_window->ClipRect.Min.y, FLT_MAX, parent_window->ClipRect.Max.y); // Avoid parent menu-bar. If we wanted multi-line menu-bar, we may instead want to have the calling window setup e.g. a NextWindowData.PosConstraintAvoidRect field + else + r_avoid = ImRect(parent_window->Pos.x + horizontal_overlap, -FLT_MAX, parent_window->Pos.x + parent_window->Size.x - horizontal_overlap - parent_window->ScrollbarSizes.x, FLT_MAX); + return FindBestWindowPosForPopupEx(window->Pos, window->Size, &window->AutoPosLastDirection, r_outer, r_avoid, ImGuiPopupPositionPolicy_Default); + } + if (window->Flags & ImGuiWindowFlags_Popup) + { + ImRect r_avoid = ImRect(window->Pos.x - 1, window->Pos.y - 1, window->Pos.x + 1, window->Pos.y + 1); + return FindBestWindowPosForPopupEx(window->Pos, window->Size, &window->AutoPosLastDirection, r_outer, r_avoid, ImGuiPopupPositionPolicy_Default); + } + if (window->Flags & ImGuiWindowFlags_Tooltip) + { + // Position tooltip (always follows mouse) + float sc = g.Style.MouseCursorScale; + ImVec2 ref_pos = NavCalcPreferredRefPos(); + ImRect r_avoid; + if (!g.NavDisableHighlight && g.NavDisableMouseHover && !(g.IO.ConfigFlags & ImGuiConfigFlags_NavEnableSetMousePos)) + r_avoid = ImRect(ref_pos.x - 16, ref_pos.y - 8, ref_pos.x + 16, ref_pos.y + 8); + else + r_avoid = ImRect(ref_pos.x - 16, ref_pos.y - 8, ref_pos.x + 24 * sc, ref_pos.y + 24 * sc); // FIXME: Hard-coded based on mouse cursor shape expectation. Exact dimension not very important. + return FindBestWindowPosForPopupEx(ref_pos, window->Size, &window->AutoPosLastDirection, r_outer, r_avoid, ImGuiPopupPositionPolicy_Tooltip); + } + IM_ASSERT(0); + return window->Pos; +} + +//----------------------------------------------------------------------------- +// [SECTION] KEYBOARD/GAMEPAD NAVIGATION +//----------------------------------------------------------------------------- + +// FIXME-NAV: The existence of SetNavID vs SetNavIDWithRectRel vs SetFocusID is incredibly messy and confusing, +// and needs some explanation or serious refactoring. +void ImGui::SetNavID(ImGuiID id, int nav_layer, ImGuiID focus_scope_id) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(g.NavWindow); + IM_ASSERT(nav_layer == 0 || nav_layer == 1); + g.NavId = id; + g.NavFocusScopeId = focus_scope_id; + g.NavWindow->NavLastIds[nav_layer] = id; +} + +void ImGui::SetNavIDWithRectRel(ImGuiID id, int nav_layer, ImGuiID focus_scope_id, const ImRect& rect_rel) +{ + ImGuiContext& g = *GImGui; + SetNavID(id, nav_layer, focus_scope_id); + g.NavWindow->NavRectRel[nav_layer] = rect_rel; + g.NavMousePosDirty = true; + g.NavDisableHighlight = false; + g.NavDisableMouseHover = true; +} + +void ImGui::SetFocusID(ImGuiID id, ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(id != 0); + + // Assume that SetFocusID() is called in the context where its window->DC.NavLayerCurrent and window->DC.NavFocusScopeIdCurrent are valid. + // Note that window may be != g.CurrentWindow (e.g. SetFocusID call in InputTextEx for multi-line text) + const ImGuiNavLayer nav_layer = window->DC.NavLayerCurrent; + if (g.NavWindow != window) + g.NavInitRequest = false; + g.NavWindow = window; + g.NavId = id; + g.NavLayer = nav_layer; + g.NavFocusScopeId = window->DC.NavFocusScopeIdCurrent; + window->NavLastIds[nav_layer] = id; + if (window->DC.LastItemId == id) + window->NavRectRel[nav_layer] = ImRect(window->DC.LastItemRect.Min - window->Pos, window->DC.LastItemRect.Max - window->Pos); + + if (g.ActiveIdSource == ImGuiInputSource_Nav) + g.NavDisableMouseHover = true; + else + g.NavDisableHighlight = true; +} + +ImGuiDir ImGetDirQuadrantFromDelta(float dx, float dy) +{ + if (ImFabs(dx) > ImFabs(dy)) + return (dx > 0.0f) ? ImGuiDir_Right : ImGuiDir_Left; + return (dy > 0.0f) ? ImGuiDir_Down : ImGuiDir_Up; +} + +static float inline NavScoreItemDistInterval(float a0, float a1, float b0, float b1) +{ + if (a1 < b0) + return a1 - b0; + if (b1 < a0) + return a0 - b1; + return 0.0f; +} + +static void inline NavClampRectToVisibleAreaForMoveDir(ImGuiDir move_dir, ImRect& r, const ImRect& clip_rect) +{ + if (move_dir == ImGuiDir_Left || move_dir == ImGuiDir_Right) + { + r.Min.y = ImClamp(r.Min.y, clip_rect.Min.y, clip_rect.Max.y); + r.Max.y = ImClamp(r.Max.y, clip_rect.Min.y, clip_rect.Max.y); + } + else + { + r.Min.x = ImClamp(r.Min.x, clip_rect.Min.x, clip_rect.Max.x); + r.Max.x = ImClamp(r.Max.x, clip_rect.Min.x, clip_rect.Max.x); + } +} + +// Scoring function for gamepad/keyboard directional navigation. Based on https://gist.github.com/rygorous/6981057 +static bool ImGui::NavScoreItem(ImGuiNavMoveResult* result, ImRect cand) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (g.NavLayer != window->DC.NavLayerCurrent) + return false; + + const ImRect& curr = g.NavScoringRect; // Current modified source rect (NB: we've applied Max.x = Min.x in NavUpdate() to inhibit the effect of having varied item width) + g.NavScoringCount++; + + // When entering through a NavFlattened border, we consider child window items as fully clipped for scoring + if (window->ParentWindow == g.NavWindow) + { + IM_ASSERT((window->Flags | g.NavWindow->Flags) & ImGuiWindowFlags_NavFlattened); + if (!window->ClipRect.Overlaps(cand)) + return false; + cand.ClipWithFull(window->ClipRect); // This allows the scored item to not overlap other candidates in the parent window + } + + // We perform scoring on items bounding box clipped by the current clipping rectangle on the other axis (clipping on our movement axis would give us equal scores for all clipped items) + // For example, this ensure that items in one column are not reached when moving vertically from items in another column. + NavClampRectToVisibleAreaForMoveDir(g.NavMoveClipDir, cand, window->ClipRect); + + // Compute distance between boxes + // FIXME-NAV: Introducing biases for vertical navigation, needs to be removed. + float dbx = NavScoreItemDistInterval(cand.Min.x, cand.Max.x, curr.Min.x, curr.Max.x); + float dby = NavScoreItemDistInterval(ImLerp(cand.Min.y, cand.Max.y, 0.2f), ImLerp(cand.Min.y, cand.Max.y, 0.8f), ImLerp(curr.Min.y, curr.Max.y, 0.2f), ImLerp(curr.Min.y, curr.Max.y, 0.8f)); // Scale down on Y to keep using box-distance for vertically touching items + if (dby != 0.0f && dbx != 0.0f) + dbx = (dbx / 1000.0f) + ((dbx > 0.0f) ? +1.0f : -1.0f); + float dist_box = ImFabs(dbx) + ImFabs(dby); + + // Compute distance between centers (this is off by a factor of 2, but we only compare center distances with each other so it doesn't matter) + float dcx = (cand.Min.x + cand.Max.x) - (curr.Min.x + curr.Max.x); + float dcy = (cand.Min.y + cand.Max.y) - (curr.Min.y + curr.Max.y); + float dist_center = ImFabs(dcx) + ImFabs(dcy); // L1 metric (need this for our connectedness guarantee) + + // Determine which quadrant of 'curr' our candidate item 'cand' lies in based on distance + ImGuiDir quadrant; + float dax = 0.0f, day = 0.0f, dist_axial = 0.0f; + if (dbx != 0.0f || dby != 0.0f) + { + // For non-overlapping boxes, use distance between boxes + dax = dbx; + day = dby; + dist_axial = dist_box; + quadrant = ImGetDirQuadrantFromDelta(dbx, dby); + } + else if (dcx != 0.0f || dcy != 0.0f) + { + // For overlapping boxes with different centers, use distance between centers + dax = dcx; + day = dcy; + dist_axial = dist_center; + quadrant = ImGetDirQuadrantFromDelta(dcx, dcy); + } + else + { + // Degenerate case: two overlapping buttons with same center, break ties arbitrarily (note that LastItemId here is really the _previous_ item order, but it doesn't matter) + quadrant = (window->DC.LastItemId < g.NavId) ? ImGuiDir_Left : ImGuiDir_Right; + } + +#if IMGUI_DEBUG_NAV_SCORING + char buf[128]; + if (IsMouseHoveringRect(cand.Min, cand.Max)) + { + ImFormatString(buf, IM_ARRAYSIZE(buf), "dbox (%.2f,%.2f->%.4f)\ndcen (%.2f,%.2f->%.4f)\nd (%.2f,%.2f->%.4f)\nnav %c, quadrant %c", dbx, dby, dist_box, dcx, dcy, dist_center, dax, day, dist_axial, "WENS"[g.NavMoveDir], "WENS"[quadrant]); + ImDrawList* draw_list = GetForegroundDrawList(window); + draw_list->AddRect(curr.Min, curr.Max, IM_COL32(255,200,0,100)); + draw_list->AddRect(cand.Min, cand.Max, IM_COL32(255,255,0,200)); + draw_list->AddRectFilled(cand.Max - ImVec2(4, 4), cand.Max + CalcTextSize(buf) + ImVec2(4, 4), IM_COL32(40,0,0,150)); + draw_list->AddText(g.IO.FontDefault, 13.0f, cand.Max, ~0U, buf); + } + else if (g.IO.KeyCtrl) // Hold to preview score in matching quadrant. Press C to rotate. + { + if (IsKeyPressedMap(ImGuiKey_C)) { g.NavMoveDirLast = (ImGuiDir)((g.NavMoveDirLast + 1) & 3); g.IO.KeysDownDuration[g.IO.KeyMap[ImGuiKey_C]] = 0.01f; } + if (quadrant == g.NavMoveDir) + { + ImFormatString(buf, IM_ARRAYSIZE(buf), "%.0f/%.0f", dist_box, dist_center); + ImDrawList* draw_list = GetForegroundDrawList(window); + draw_list->AddRectFilled(cand.Min, cand.Max, IM_COL32(255, 0, 0, 200)); + draw_list->AddText(g.IO.FontDefault, 13.0f, cand.Min, IM_COL32(255, 255, 255, 255), buf); + } + } +#endif + + // Is it in the quadrant we're interesting in moving to? + bool new_best = false; + if (quadrant == g.NavMoveDir) + { + // Does it beat the current best candidate? + if (dist_box < result->DistBox) + { + result->DistBox = dist_box; + result->DistCenter = dist_center; + return true; + } + if (dist_box == result->DistBox) + { + // Try using distance between center points to break ties + if (dist_center < result->DistCenter) + { + result->DistCenter = dist_center; + new_best = true; + } + else if (dist_center == result->DistCenter) + { + // Still tied! we need to be extra-careful to make sure everything gets linked properly. We consistently break ties by symbolically moving "later" items + // (with higher index) to the right/downwards by an infinitesimal amount since we the current "best" button already (so it must have a lower index), + // this is fairly easy. This rule ensures that all buttons with dx==dy==0 will end up being linked in order of appearance along the x axis. + if (((g.NavMoveDir == ImGuiDir_Up || g.NavMoveDir == ImGuiDir_Down) ? dby : dbx) < 0.0f) // moving bj to the right/down decreases distance + new_best = true; + } + } + } + + // Axial check: if 'curr' has no link at all in some direction and 'cand' lies roughly in that direction, add a tentative link. This will only be kept if no "real" matches + // are found, so it only augments the graph produced by the above method using extra links. (important, since it doesn't guarantee strong connectedness) + // This is just to avoid buttons having no links in a particular direction when there's a suitable neighbor. you get good graphs without this too. + // 2017/09/29: FIXME: This now currently only enabled inside menu bars, ideally we'd disable it everywhere. Menus in particular need to catch failure. For general navigation it feels awkward. + // Disabling it may lead to disconnected graphs when nodes are very spaced out on different axis. Perhaps consider offering this as an option? + if (result->DistBox == FLT_MAX && dist_axial < result->DistAxial) // Check axial match + if (g.NavLayer == ImGuiNavLayer_Menu && !(g.NavWindow->Flags & ImGuiWindowFlags_ChildMenu)) + if ((g.NavMoveDir == ImGuiDir_Left && dax < 0.0f) || (g.NavMoveDir == ImGuiDir_Right && dax > 0.0f) || (g.NavMoveDir == ImGuiDir_Up && day < 0.0f) || (g.NavMoveDir == ImGuiDir_Down && day > 0.0f)) + { + result->DistAxial = dist_axial; + new_best = true; + } + + return new_best; +} + +static void ImGui::NavApplyItemToResult(ImGuiNavMoveResult* result, ImGuiWindow* window, ImGuiID id, const ImRect& nav_bb_rel) +{ + result->Window = window; + result->ID = id; + result->FocusScopeId = window->DC.NavFocusScopeIdCurrent; + result->RectRel = nav_bb_rel; +} + +// We get there when either NavId == id, or when g.NavAnyRequest is set (which is updated by NavUpdateAnyRequestFlag above) +static void ImGui::NavProcessItem(ImGuiWindow* window, const ImRect& nav_bb, const ImGuiID id) +{ + ImGuiContext& g = *GImGui; + //if (!g.IO.NavActive) // [2017/10/06] Removed this possibly redundant test but I am not sure of all the side-effects yet. Some of the feature here will need to work regardless of using a _NoNavInputs flag. + // return; + + const ImGuiItemFlags item_flags = window->DC.ItemFlags; + const ImRect nav_bb_rel(nav_bb.Min - window->Pos, nav_bb.Max - window->Pos); + + // Process Init Request + if (g.NavInitRequest && g.NavLayer == window->DC.NavLayerCurrent) + { + // Even if 'ImGuiItemFlags_NoNavDefaultFocus' is on (typically collapse/close button) we record the first ResultId so they can be used as a fallback + if (!(item_flags & ImGuiItemFlags_NoNavDefaultFocus) || g.NavInitResultId == 0) + { + g.NavInitResultId = id; + g.NavInitResultRectRel = nav_bb_rel; + } + if (!(item_flags & ImGuiItemFlags_NoNavDefaultFocus)) + { + g.NavInitRequest = false; // Found a match, clear request + NavUpdateAnyRequestFlag(); + } + } + + // Process Move Request (scoring for navigation) + // FIXME-NAV: Consider policy for double scoring (scoring from NavScoringRectScreen + scoring from a rect wrapped according to current wrapping policy) + if ((g.NavId != id || (g.NavMoveRequestFlags & ImGuiNavMoveFlags_AllowCurrentNavId)) && !(item_flags & (ImGuiItemFlags_Disabled | ImGuiItemFlags_NoNav))) + { + ImGuiNavMoveResult* result = (window == g.NavWindow) ? &g.NavMoveResultLocal : &g.NavMoveResultOther; +#if IMGUI_DEBUG_NAV_SCORING + // [DEBUG] Score all items in NavWindow at all times + if (!g.NavMoveRequest) + g.NavMoveDir = g.NavMoveDirLast; + bool new_best = NavScoreItem(result, nav_bb) && g.NavMoveRequest; +#else + bool new_best = g.NavMoveRequest && NavScoreItem(result, nav_bb); +#endif + if (new_best) + NavApplyItemToResult(result, window, id, nav_bb_rel); + + // Features like PageUp/PageDown need to maintain a separate score for the visible set of items. + const float VISIBLE_RATIO = 0.70f; + if ((g.NavMoveRequestFlags & ImGuiNavMoveFlags_AlsoScoreVisibleSet) && window->ClipRect.Overlaps(nav_bb)) + if (ImClamp(nav_bb.Max.y, window->ClipRect.Min.y, window->ClipRect.Max.y) - ImClamp(nav_bb.Min.y, window->ClipRect.Min.y, window->ClipRect.Max.y) >= (nav_bb.Max.y - nav_bb.Min.y) * VISIBLE_RATIO) + if (NavScoreItem(&g.NavMoveResultLocalVisibleSet, nav_bb)) + NavApplyItemToResult(&g.NavMoveResultLocalVisibleSet, window, id, nav_bb_rel); + } + + // Update window-relative bounding box of navigated item + if (g.NavId == id) + { + g.NavWindow = window; // Always refresh g.NavWindow, because some operations such as FocusItem() don't have a window. + g.NavLayer = window->DC.NavLayerCurrent; + g.NavFocusScopeId = window->DC.NavFocusScopeIdCurrent; + g.NavIdIsAlive = true; + g.NavIdTabCounter = window->DC.FocusCounterTabStop; + window->NavRectRel[window->DC.NavLayerCurrent] = nav_bb_rel; // Store item bounding box (relative to window position) + } +} + +bool ImGui::NavMoveRequestButNoResultYet() +{ + ImGuiContext& g = *GImGui; + return g.NavMoveRequest && g.NavMoveResultLocal.ID == 0 && g.NavMoveResultOther.ID == 0; +} + +void ImGui::NavMoveRequestCancel() +{ + ImGuiContext& g = *GImGui; + g.NavMoveRequest = false; + NavUpdateAnyRequestFlag(); +} + +void ImGui::NavMoveRequestForward(ImGuiDir move_dir, ImGuiDir clip_dir, const ImRect& bb_rel, ImGuiNavMoveFlags move_flags) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(g.NavMoveRequestForward == ImGuiNavForward_None); + NavMoveRequestCancel(); + g.NavMoveDir = move_dir; + g.NavMoveClipDir = clip_dir; + g.NavMoveRequestForward = ImGuiNavForward_ForwardQueued; + g.NavMoveRequestFlags = move_flags; + g.NavWindow->NavRectRel[g.NavLayer] = bb_rel; +} + +void ImGui::NavMoveRequestTryWrapping(ImGuiWindow* window, ImGuiNavMoveFlags move_flags) +{ + ImGuiContext& g = *GImGui; + + // Navigation wrap-around logic is delayed to the end of the frame because this operation is only valid after entire + // popup is assembled and in case of appended popups it is not clear which EndPopup() call is final. + g.NavWrapRequestWindow = window; + g.NavWrapRequestFlags = move_flags; +} + +// FIXME: This could be replaced by updating a frame number in each window when (window == NavWindow) and (NavLayer == 0). +// This way we could find the last focused window among our children. It would be much less confusing this way? +static void ImGui::NavSaveLastChildNavWindowIntoParent(ImGuiWindow* nav_window) +{ + ImGuiWindow* parent = nav_window; + while (parent && (parent->Flags & ImGuiWindowFlags_ChildWindow) != 0 && (parent->Flags & (ImGuiWindowFlags_Popup | ImGuiWindowFlags_ChildMenu)) == 0) + parent = parent->ParentWindow; + if (parent && parent != nav_window) + parent->NavLastChildNavWindow = nav_window; +} + +// Restore the last focused child. +// Call when we are expected to land on the Main Layer (0) after FocusWindow() +static ImGuiWindow* ImGui::NavRestoreLastChildNavWindow(ImGuiWindow* window) +{ + if (window->NavLastChildNavWindow && window->NavLastChildNavWindow->WasActive) + return window->NavLastChildNavWindow; + return window; +} + +static void NavRestoreLayer(ImGuiNavLayer layer) +{ + ImGuiContext& g = *GImGui; + g.NavLayer = layer; + if (layer == 0) + g.NavWindow = ImGui::NavRestoreLastChildNavWindow(g.NavWindow); + ImGuiWindow* window = g.NavWindow; + if (layer == 0 && window->NavLastIds[0] != 0) + ImGui::SetNavIDWithRectRel(window->NavLastIds[0], layer, 0, window->NavRectRel[0]); + else + ImGui::NavInitWindow(window, true); +} + +static inline void ImGui::NavUpdateAnyRequestFlag() +{ + ImGuiContext& g = *GImGui; + g.NavAnyRequest = g.NavMoveRequest || g.NavInitRequest || (IMGUI_DEBUG_NAV_SCORING && g.NavWindow != NULL); + if (g.NavAnyRequest) + IM_ASSERT(g.NavWindow != NULL); +} + +// This needs to be called before we submit any widget (aka in or before Begin) +void ImGui::NavInitWindow(ImGuiWindow* window, bool force_reinit) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(window == g.NavWindow); + bool init_for_nav = false; + if (!(window->Flags & ImGuiWindowFlags_NoNavInputs)) + if (!(window->Flags & ImGuiWindowFlags_ChildWindow) || (window->Flags & ImGuiWindowFlags_Popup) || (window->NavLastIds[0] == 0) || force_reinit) + init_for_nav = true; + IMGUI_DEBUG_LOG_NAV("[nav] NavInitRequest: from NavInitWindow(), init_for_nav=%d, window=\"%s\", layer=%d\n", init_for_nav, window->Name, g.NavLayer); + if (init_for_nav) + { + SetNavID(0, g.NavLayer, 0); + g.NavInitRequest = true; + g.NavInitRequestFromMove = false; + g.NavInitResultId = 0; + g.NavInitResultRectRel = ImRect(); + NavUpdateAnyRequestFlag(); + } + else + { + g.NavId = window->NavLastIds[0]; + g.NavFocusScopeId = 0; + } +} + +static ImVec2 ImGui::NavCalcPreferredRefPos() +{ + ImGuiContext& g = *GImGui; + if (g.NavDisableHighlight || !g.NavDisableMouseHover || !g.NavWindow) + { + // Mouse (we need a fallback in case the mouse becomes invalid after being used) + if (IsMousePosValid(&g.IO.MousePos)) + return g.IO.MousePos; + return g.LastValidMousePos; + } + else + { + // When navigation is active and mouse is disabled, decide on an arbitrary position around the bottom left of the currently navigated item. + const ImRect& rect_rel = g.NavWindow->NavRectRel[g.NavLayer]; + ImVec2 pos = g.NavWindow->Pos + ImVec2(rect_rel.Min.x + ImMin(g.Style.FramePadding.x * 4, rect_rel.GetWidth()), rect_rel.Max.y - ImMin(g.Style.FramePadding.y, rect_rel.GetHeight())); + ImRect visible_rect = GetViewportRect(); + return ImFloor(ImClamp(pos, visible_rect.Min, visible_rect.Max)); // ImFloor() is important because non-integer mouse position application in backend might be lossy and result in undesirable non-zero delta. + } +} + +float ImGui::GetNavInputAmount(ImGuiNavInput n, ImGuiInputReadMode mode) +{ + ImGuiContext& g = *GImGui; + if (mode == ImGuiInputReadMode_Down) + return g.IO.NavInputs[n]; // Instant, read analog input (0.0f..1.0f, as provided by user) + + const float t = g.IO.NavInputsDownDuration[n]; + if (t < 0.0f && mode == ImGuiInputReadMode_Released) // Return 1.0f when just released, no repeat, ignore analog input. + return (g.IO.NavInputsDownDurationPrev[n] >= 0.0f ? 1.0f : 0.0f); + if (t < 0.0f) + return 0.0f; + if (mode == ImGuiInputReadMode_Pressed) // Return 1.0f when just pressed, no repeat, ignore analog input. + return (t == 0.0f) ? 1.0f : 0.0f; + if (mode == ImGuiInputReadMode_Repeat) + return (float)CalcTypematicRepeatAmount(t - g.IO.DeltaTime, t, g.IO.KeyRepeatDelay * 0.72f, g.IO.KeyRepeatRate * 0.80f); + if (mode == ImGuiInputReadMode_RepeatSlow) + return (float)CalcTypematicRepeatAmount(t - g.IO.DeltaTime, t, g.IO.KeyRepeatDelay * 1.25f, g.IO.KeyRepeatRate * 2.00f); + if (mode == ImGuiInputReadMode_RepeatFast) + return (float)CalcTypematicRepeatAmount(t - g.IO.DeltaTime, t, g.IO.KeyRepeatDelay * 0.72f, g.IO.KeyRepeatRate * 0.30f); + return 0.0f; +} + +ImVec2 ImGui::GetNavInputAmount2d(ImGuiNavDirSourceFlags dir_sources, ImGuiInputReadMode mode, float slow_factor, float fast_factor) +{ + ImVec2 delta(0.0f, 0.0f); + if (dir_sources & ImGuiNavDirSourceFlags_Keyboard) + delta += ImVec2(GetNavInputAmount(ImGuiNavInput_KeyRight_, mode) - GetNavInputAmount(ImGuiNavInput_KeyLeft_, mode), GetNavInputAmount(ImGuiNavInput_KeyDown_, mode) - GetNavInputAmount(ImGuiNavInput_KeyUp_, mode)); + if (dir_sources & ImGuiNavDirSourceFlags_PadDPad) + delta += ImVec2(GetNavInputAmount(ImGuiNavInput_DpadRight, mode) - GetNavInputAmount(ImGuiNavInput_DpadLeft, mode), GetNavInputAmount(ImGuiNavInput_DpadDown, mode) - GetNavInputAmount(ImGuiNavInput_DpadUp, mode)); + if (dir_sources & ImGuiNavDirSourceFlags_PadLStick) + delta += ImVec2(GetNavInputAmount(ImGuiNavInput_LStickRight, mode) - GetNavInputAmount(ImGuiNavInput_LStickLeft, mode), GetNavInputAmount(ImGuiNavInput_LStickDown, mode) - GetNavInputAmount(ImGuiNavInput_LStickUp, mode)); + if (slow_factor != 0.0f && IsNavInputDown(ImGuiNavInput_TweakSlow)) + delta *= slow_factor; + if (fast_factor != 0.0f && IsNavInputDown(ImGuiNavInput_TweakFast)) + delta *= fast_factor; + return delta; +} + +static void ImGui::NavUpdate() +{ + ImGuiContext& g = *GImGui; + ImGuiIO& io = g.IO; + + io.WantSetMousePos = false; + g.NavWrapRequestWindow = NULL; + g.NavWrapRequestFlags = ImGuiNavMoveFlags_None; +#if 0 + if (g.NavScoringCount > 0) IMGUI_DEBUG_LOG("NavScoringCount %d for '%s' layer %d (Init:%d, Move:%d)\n", g.FrameCount, g.NavScoringCount, g.NavWindow ? g.NavWindow->Name : "NULL", g.NavLayer, g.NavInitRequest || g.NavInitResultId != 0, g.NavMoveRequest); +#endif + + // Set input source as Gamepad when buttons are pressed (as some features differs when used with Gamepad vs Keyboard) + // (do it before we map Keyboard input!) + bool nav_keyboard_active = (io.ConfigFlags & ImGuiConfigFlags_NavEnableKeyboard) != 0; + bool nav_gamepad_active = (io.ConfigFlags & ImGuiConfigFlags_NavEnableGamepad) != 0 && (io.BackendFlags & ImGuiBackendFlags_HasGamepad) != 0; + if (nav_gamepad_active && g.NavInputSource != ImGuiInputSource_NavGamepad) + { + if (io.NavInputs[ImGuiNavInput_Activate] > 0.0f || io.NavInputs[ImGuiNavInput_Input] > 0.0f || io.NavInputs[ImGuiNavInput_Cancel] > 0.0f || io.NavInputs[ImGuiNavInput_Menu] > 0.0f + || io.NavInputs[ImGuiNavInput_DpadLeft] > 0.0f || io.NavInputs[ImGuiNavInput_DpadRight] > 0.0f || io.NavInputs[ImGuiNavInput_DpadUp] > 0.0f || io.NavInputs[ImGuiNavInput_DpadDown] > 0.0f) + g.NavInputSource = ImGuiInputSource_NavGamepad; + } + + // Update Keyboard->Nav inputs mapping + if (nav_keyboard_active) + { + #define NAV_MAP_KEY(_KEY, _NAV_INPUT) do { if (IsKeyDown(io.KeyMap[_KEY])) { io.NavInputs[_NAV_INPUT] = 1.0f; g.NavInputSource = ImGuiInputSource_NavKeyboard; } } while (0) + NAV_MAP_KEY(ImGuiKey_Space, ImGuiNavInput_Activate ); + NAV_MAP_KEY(ImGuiKey_Enter, ImGuiNavInput_Input ); + NAV_MAP_KEY(ImGuiKey_Escape, ImGuiNavInput_Cancel ); + NAV_MAP_KEY(ImGuiKey_LeftArrow, ImGuiNavInput_KeyLeft_ ); + NAV_MAP_KEY(ImGuiKey_RightArrow,ImGuiNavInput_KeyRight_); + NAV_MAP_KEY(ImGuiKey_UpArrow, ImGuiNavInput_KeyUp_ ); + NAV_MAP_KEY(ImGuiKey_DownArrow, ImGuiNavInput_KeyDown_ ); + if (io.KeyCtrl) + io.NavInputs[ImGuiNavInput_TweakSlow] = 1.0f; + if (io.KeyShift) + io.NavInputs[ImGuiNavInput_TweakFast] = 1.0f; + if (io.KeyAlt && !io.KeyCtrl) // AltGR is Alt+Ctrl, also even on keyboards without AltGR we don't want Alt+Ctrl to open menu. + io.NavInputs[ImGuiNavInput_KeyMenu_] = 1.0f; + #undef NAV_MAP_KEY + } + memcpy(io.NavInputsDownDurationPrev, io.NavInputsDownDuration, sizeof(io.NavInputsDownDuration)); + for (int i = 0; i < IM_ARRAYSIZE(io.NavInputs); i++) + io.NavInputsDownDuration[i] = (io.NavInputs[i] > 0.0f) ? (io.NavInputsDownDuration[i] < 0.0f ? 0.0f : io.NavInputsDownDuration[i] + io.DeltaTime) : -1.0f; + + // Process navigation init request (select first/default focus) + if (g.NavInitResultId != 0 && (!g.NavDisableHighlight || g.NavInitRequestFromMove)) + NavUpdateInitResult(); + g.NavInitRequest = false; + g.NavInitRequestFromMove = false; + g.NavInitResultId = 0; + g.NavJustMovedToId = 0; + + // Process navigation move request + if (g.NavMoveRequest) + NavUpdateMoveResult(); + + // When a forwarded move request failed, we restore the highlight that we disabled during the forward frame + if (g.NavMoveRequestForward == ImGuiNavForward_ForwardActive) + { + IM_ASSERT(g.NavMoveRequest); + if (g.NavMoveResultLocal.ID == 0 && g.NavMoveResultOther.ID == 0) + g.NavDisableHighlight = false; + g.NavMoveRequestForward = ImGuiNavForward_None; + } + + // Apply application mouse position movement, after we had a chance to process move request result. + if (g.NavMousePosDirty && g.NavIdIsAlive) + { + // Set mouse position given our knowledge of the navigated item position from last frame + if ((io.ConfigFlags & ImGuiConfigFlags_NavEnableSetMousePos) && (io.BackendFlags & ImGuiBackendFlags_HasSetMousePos)) + { + if (!g.NavDisableHighlight && g.NavDisableMouseHover && g.NavWindow) + { + io.MousePos = io.MousePosPrev = NavCalcPreferredRefPos(); + io.WantSetMousePos = true; + } + } + g.NavMousePosDirty = false; + } + g.NavIdIsAlive = false; + g.NavJustTabbedId = 0; + IM_ASSERT(g.NavLayer == 0 || g.NavLayer == 1); + + // Store our return window (for returning from Layer 1 to Layer 0) and clear it as soon as we step back in our own Layer 0 + if (g.NavWindow) + NavSaveLastChildNavWindowIntoParent(g.NavWindow); + if (g.NavWindow && g.NavWindow->NavLastChildNavWindow != NULL && g.NavLayer == ImGuiNavLayer_Main) + g.NavWindow->NavLastChildNavWindow = NULL; + + // Update CTRL+TAB and Windowing features (hold Square to move/resize/etc.) + NavUpdateWindowing(); + + // Set output flags for user application + io.NavActive = (nav_keyboard_active || nav_gamepad_active) && g.NavWindow && !(g.NavWindow->Flags & ImGuiWindowFlags_NoNavInputs); + io.NavVisible = (io.NavActive && g.NavId != 0 && !g.NavDisableHighlight) || (g.NavWindowingTarget != NULL); + + // Process NavCancel input (to close a popup, get back to parent, clear focus) + if (IsNavInputTest(ImGuiNavInput_Cancel, ImGuiInputReadMode_Pressed)) + { + IMGUI_DEBUG_LOG_NAV("[nav] ImGuiNavInput_Cancel\n"); + if (g.ActiveId != 0) + { + if (!IsActiveIdUsingNavInput(ImGuiNavInput_Cancel)) + ClearActiveID(); + } + else if (g.NavWindow && (g.NavWindow->Flags & ImGuiWindowFlags_ChildWindow) && !(g.NavWindow->Flags & ImGuiWindowFlags_Popup) && g.NavWindow->ParentWindow) + { + // Exit child window + ImGuiWindow* child_window = g.NavWindow; + ImGuiWindow* parent_window = g.NavWindow->ParentWindow; + IM_ASSERT(child_window->ChildId != 0); + FocusWindow(parent_window); + SetNavID(child_window->ChildId, 0, 0); + // Reassigning with same value, we're being explicit here. + g.NavIdIsAlive = false; // -V1048 + if (g.NavDisableMouseHover) + g.NavMousePosDirty = true; + } + else if (g.OpenPopupStack.Size > 0) + { + // Close open popup/menu + if (!(g.OpenPopupStack.back().Window->Flags & ImGuiWindowFlags_Modal)) + ClosePopupToLevel(g.OpenPopupStack.Size - 1, true); + } + else if (g.NavLayer != ImGuiNavLayer_Main) + { + // Leave the "menu" layer + NavRestoreLayer(ImGuiNavLayer_Main); + } + else + { + // Clear NavLastId for popups but keep it for regular child window so we can leave one and come back where we were + if (g.NavWindow && ((g.NavWindow->Flags & ImGuiWindowFlags_Popup) || !(g.NavWindow->Flags & ImGuiWindowFlags_ChildWindow))) + g.NavWindow->NavLastIds[0] = 0; + g.NavId = g.NavFocusScopeId = 0; + } + } + + // Process manual activation request + g.NavActivateId = g.NavActivateDownId = g.NavActivatePressedId = g.NavInputId = 0; + if (g.NavId != 0 && !g.NavDisableHighlight && !g.NavWindowingTarget && g.NavWindow && !(g.NavWindow->Flags & ImGuiWindowFlags_NoNavInputs)) + { + bool activate_down = IsNavInputDown(ImGuiNavInput_Activate); + bool activate_pressed = activate_down && IsNavInputTest(ImGuiNavInput_Activate, ImGuiInputReadMode_Pressed); + if (g.ActiveId == 0 && activate_pressed) + g.NavActivateId = g.NavId; + if ((g.ActiveId == 0 || g.ActiveId == g.NavId) && activate_down) + g.NavActivateDownId = g.NavId; + if ((g.ActiveId == 0 || g.ActiveId == g.NavId) && activate_pressed) + g.NavActivatePressedId = g.NavId; + if ((g.ActiveId == 0 || g.ActiveId == g.NavId) && IsNavInputTest(ImGuiNavInput_Input, ImGuiInputReadMode_Pressed)) + g.NavInputId = g.NavId; + } + if (g.NavWindow && (g.NavWindow->Flags & ImGuiWindowFlags_NoNavInputs)) + g.NavDisableHighlight = true; + if (g.NavActivateId != 0) + IM_ASSERT(g.NavActivateDownId == g.NavActivateId); + g.NavMoveRequest = false; + + // Process programmatic activation request + if (g.NavNextActivateId != 0) + g.NavActivateId = g.NavActivateDownId = g.NavActivatePressedId = g.NavInputId = g.NavNextActivateId; + g.NavNextActivateId = 0; + + // Initiate directional inputs request + if (g.NavMoveRequestForward == ImGuiNavForward_None) + { + g.NavMoveDir = ImGuiDir_None; + g.NavMoveRequestFlags = ImGuiNavMoveFlags_None; + if (g.NavWindow && !g.NavWindowingTarget && !(g.NavWindow->Flags & ImGuiWindowFlags_NoNavInputs)) + { + const ImGuiInputReadMode read_mode = ImGuiInputReadMode_Repeat; + if (!IsActiveIdUsingNavDir(ImGuiDir_Left) && (IsNavInputTest(ImGuiNavInput_DpadLeft, read_mode) || IsNavInputTest(ImGuiNavInput_KeyLeft_, read_mode))) { g.NavMoveDir = ImGuiDir_Left; } + if (!IsActiveIdUsingNavDir(ImGuiDir_Right) && (IsNavInputTest(ImGuiNavInput_DpadRight, read_mode) || IsNavInputTest(ImGuiNavInput_KeyRight_, read_mode))) { g.NavMoveDir = ImGuiDir_Right; } + if (!IsActiveIdUsingNavDir(ImGuiDir_Up) && (IsNavInputTest(ImGuiNavInput_DpadUp, read_mode) || IsNavInputTest(ImGuiNavInput_KeyUp_, read_mode))) { g.NavMoveDir = ImGuiDir_Up; } + if (!IsActiveIdUsingNavDir(ImGuiDir_Down) && (IsNavInputTest(ImGuiNavInput_DpadDown, read_mode) || IsNavInputTest(ImGuiNavInput_KeyDown_, read_mode))) { g.NavMoveDir = ImGuiDir_Down; } + } + g.NavMoveClipDir = g.NavMoveDir; + } + else + { + // Forwarding previous request (which has been modified, e.g. wrap around menus rewrite the requests with a starting rectangle at the other side of the window) + // (Preserve g.NavMoveRequestFlags, g.NavMoveClipDir which were set by the NavMoveRequestForward() function) + IM_ASSERT(g.NavMoveDir != ImGuiDir_None && g.NavMoveClipDir != ImGuiDir_None); + IM_ASSERT(g.NavMoveRequestForward == ImGuiNavForward_ForwardQueued); + IMGUI_DEBUG_LOG_NAV("[nav] NavMoveRequestForward %d\n", g.NavMoveDir); + g.NavMoveRequestForward = ImGuiNavForward_ForwardActive; + } + + // Update PageUp/PageDown/Home/End scroll + // FIXME-NAV: Consider enabling those keys even without the master ImGuiConfigFlags_NavEnableKeyboard flag? + float nav_scoring_rect_offset_y = 0.0f; + if (nav_keyboard_active) + nav_scoring_rect_offset_y = NavUpdatePageUpPageDown(); + + // If we initiate a movement request and have no current NavId, we initiate a InitDefautRequest that will be used as a fallback if the direction fails to find a match + if (g.NavMoveDir != ImGuiDir_None) + { + g.NavMoveRequest = true; + g.NavMoveRequestKeyMods = io.KeyMods; + g.NavMoveDirLast = g.NavMoveDir; + } + if (g.NavMoveRequest && g.NavId == 0) + { + IMGUI_DEBUG_LOG_NAV("[nav] NavInitRequest: from move, window \"%s\", layer=%d\n", g.NavWindow->Name, g.NavLayer); + g.NavInitRequest = g.NavInitRequestFromMove = true; + // Reassigning with same value, we're being explicit here. + g.NavInitResultId = 0; // -V1048 + g.NavDisableHighlight = false; + } + NavUpdateAnyRequestFlag(); + + // Scrolling + if (g.NavWindow && !(g.NavWindow->Flags & ImGuiWindowFlags_NoNavInputs) && !g.NavWindowingTarget) + { + // *Fallback* manual-scroll with Nav directional keys when window has no navigable item + ImGuiWindow* window = g.NavWindow; + const float scroll_speed = IM_ROUND(window->CalcFontSize() * 100 * io.DeltaTime); // We need round the scrolling speed because sub-pixel scroll isn't reliably supported. + if (window->DC.NavLayerActiveMask == 0x00 && window->DC.NavHasScroll && g.NavMoveRequest) + { + if (g.NavMoveDir == ImGuiDir_Left || g.NavMoveDir == ImGuiDir_Right) + SetScrollX(window, ImFloor(window->Scroll.x + ((g.NavMoveDir == ImGuiDir_Left) ? -1.0f : +1.0f) * scroll_speed)); + if (g.NavMoveDir == ImGuiDir_Up || g.NavMoveDir == ImGuiDir_Down) + SetScrollY(window, ImFloor(window->Scroll.y + ((g.NavMoveDir == ImGuiDir_Up) ? -1.0f : +1.0f) * scroll_speed)); + } + + // *Normal* Manual scroll with NavScrollXXX keys + // Next movement request will clamp the NavId reference rectangle to the visible area, so navigation will resume within those bounds. + ImVec2 scroll_dir = GetNavInputAmount2d(ImGuiNavDirSourceFlags_PadLStick, ImGuiInputReadMode_Down, 1.0f / 10.0f, 10.0f); + if (scroll_dir.x != 0.0f && window->ScrollbarX) + SetScrollX(window, ImFloor(window->Scroll.x + scroll_dir.x * scroll_speed)); + if (scroll_dir.y != 0.0f) + SetScrollY(window, ImFloor(window->Scroll.y + scroll_dir.y * scroll_speed)); + } + + // Reset search results + g.NavMoveResultLocal.Clear(); + g.NavMoveResultLocalVisibleSet.Clear(); + g.NavMoveResultOther.Clear(); + + // When using gamepad, we project the reference nav bounding box into window visible area. + // This is to allow resuming navigation inside the visible area after doing a large amount of scrolling, since with gamepad every movements are relative + // (can't focus a visible object like we can with the mouse). + if (g.NavMoveRequest && g.NavInputSource == ImGuiInputSource_NavGamepad && g.NavLayer == ImGuiNavLayer_Main) + { + ImGuiWindow* window = g.NavWindow; + ImRect window_rect_rel(window->InnerRect.Min - window->Pos - ImVec2(1, 1), window->InnerRect.Max - window->Pos + ImVec2(1, 1)); + if (!window_rect_rel.Contains(window->NavRectRel[g.NavLayer])) + { + IMGUI_DEBUG_LOG_NAV("[nav] NavMoveRequest: clamp NavRectRel\n"); + float pad = window->CalcFontSize() * 0.5f; + window_rect_rel.Expand(ImVec2(-ImMin(window_rect_rel.GetWidth(), pad), -ImMin(window_rect_rel.GetHeight(), pad))); // Terrible approximation for the intent of starting navigation from first fully visible item + window->NavRectRel[g.NavLayer].ClipWithFull(window_rect_rel); + g.NavId = g.NavFocusScopeId = 0; + } + } + + // For scoring we use a single segment on the left side our current item bounding box (not touching the edge to avoid box overlap with zero-spaced items) + ImRect nav_rect_rel = g.NavWindow ? g.NavWindow->NavRectRel[g.NavLayer] : ImRect(0, 0, 0, 0); + g.NavScoringRect = g.NavWindow ? ImRect(g.NavWindow->Pos + nav_rect_rel.Min, g.NavWindow->Pos + nav_rect_rel.Max) : GetViewportRect(); + g.NavScoringRect.TranslateY(nav_scoring_rect_offset_y); + g.NavScoringRect.Min.x = ImMin(g.NavScoringRect.Min.x + 1.0f, g.NavScoringRect.Max.x); + g.NavScoringRect.Max.x = g.NavScoringRect.Min.x; + IM_ASSERT(!g.NavScoringRect.IsInverted()); // Ensure if we have a finite, non-inverted bounding box here will allows us to remove extraneous ImFabs() calls in NavScoreItem(). + //GetForegroundDrawList()->AddRect(g.NavScoringRectScreen.Min, g.NavScoringRectScreen.Max, IM_COL32(255,200,0,255)); // [DEBUG] + g.NavScoringCount = 0; +#if IMGUI_DEBUG_NAV_RECTS + if (g.NavWindow) + { + ImDrawList* draw_list = GetForegroundDrawList(g.NavWindow); + if (1) { for (int layer = 0; layer < 2; layer++) draw_list->AddRect(g.NavWindow->Pos + g.NavWindow->NavRectRel[layer].Min, g.NavWindow->Pos + g.NavWindow->NavRectRel[layer].Max, IM_COL32(255,200,0,255)); } // [DEBUG] + if (1) { ImU32 col = (!g.NavWindow->Hidden) ? IM_COL32(255,0,255,255) : IM_COL32(255,0,0,255); ImVec2 p = NavCalcPreferredRefPos(); char buf[32]; ImFormatString(buf, 32, "%d", g.NavLayer); draw_list->AddCircleFilled(p, 3.0f, col); draw_list->AddText(NULL, 13.0f, p + ImVec2(8,-4), col, buf); } + } +#endif +} + +static void ImGui::NavUpdateInitResult() +{ + // In very rare cases g.NavWindow may be null (e.g. clearing focus after requesting an init request, which does happen when releasing Alt while clicking on void) + ImGuiContext& g = *GImGui; + if (!g.NavWindow) + return; + + // Apply result from previous navigation init request (will typically select the first item, unless SetItemDefaultFocus() has been called) + IMGUI_DEBUG_LOG_NAV("[nav] NavInitRequest: result NavID 0x%08X in Layer %d Window \"%s\"\n", g.NavInitResultId, g.NavLayer, g.NavWindow->Name); + if (g.NavInitRequestFromMove) + SetNavIDWithRectRel(g.NavInitResultId, g.NavLayer, 0, g.NavInitResultRectRel); + else + SetNavID(g.NavInitResultId, g.NavLayer, 0); + g.NavWindow->NavRectRel[g.NavLayer] = g.NavInitResultRectRel; +} + +// Apply result from previous frame navigation directional move request +static void ImGui::NavUpdateMoveResult() +{ + ImGuiContext& g = *GImGui; + if (g.NavMoveResultLocal.ID == 0 && g.NavMoveResultOther.ID == 0) + { + // In a situation when there is no results but NavId != 0, re-enable the Navigation highlight (because g.NavId is not considered as a possible result) + if (g.NavId != 0) + { + g.NavDisableHighlight = false; + g.NavDisableMouseHover = true; + } + return; + } + + // Select which result to use + ImGuiNavMoveResult* result = (g.NavMoveResultLocal.ID != 0) ? &g.NavMoveResultLocal : &g.NavMoveResultOther; + + // PageUp/PageDown behavior first jumps to the bottom/top mostly visible item, _otherwise_ use the result from the previous/next page. + if (g.NavMoveRequestFlags & ImGuiNavMoveFlags_AlsoScoreVisibleSet) + if (g.NavMoveResultLocalVisibleSet.ID != 0 && g.NavMoveResultLocalVisibleSet.ID != g.NavId) + result = &g.NavMoveResultLocalVisibleSet; + + // Maybe entering a flattened child from the outside? In this case solve the tie using the regular scoring rules. + if (result != &g.NavMoveResultOther && g.NavMoveResultOther.ID != 0 && g.NavMoveResultOther.Window->ParentWindow == g.NavWindow) + if ((g.NavMoveResultOther.DistBox < result->DistBox) || (g.NavMoveResultOther.DistBox == result->DistBox && g.NavMoveResultOther.DistCenter < result->DistCenter)) + result = &g.NavMoveResultOther; + IM_ASSERT(g.NavWindow && result->Window); + + // Scroll to keep newly navigated item fully into view. + if (g.NavLayer == ImGuiNavLayer_Main) + { + ImVec2 delta_scroll; + if (g.NavMoveRequestFlags & ImGuiNavMoveFlags_ScrollToEdge) + { + float scroll_target = (g.NavMoveDir == ImGuiDir_Up) ? result->Window->ScrollMax.y : 0.0f; + delta_scroll.y = result->Window->Scroll.y - scroll_target; + SetScrollY(result->Window, scroll_target); + } + else + { + ImRect rect_abs = ImRect(result->RectRel.Min + result->Window->Pos, result->RectRel.Max + result->Window->Pos); + delta_scroll = ScrollToBringRectIntoView(result->Window, rect_abs); + } + + // Offset our result position so mouse position can be applied immediately after in NavUpdate() + result->RectRel.TranslateX(-delta_scroll.x); + result->RectRel.TranslateY(-delta_scroll.y); + } + + ClearActiveID(); + g.NavWindow = result->Window; + if (g.NavId != result->ID) + { + // Don't set NavJustMovedToId if just landed on the same spot (which may happen with ImGuiNavMoveFlags_AllowCurrentNavId) + g.NavJustMovedToId = result->ID; + g.NavJustMovedToFocusScopeId = result->FocusScopeId; + g.NavJustMovedToKeyMods = g.NavMoveRequestKeyMods; + } + IMGUI_DEBUG_LOG_NAV("[nav] NavMoveRequest: result NavID 0x%08X in Layer %d Window \"%s\"\n", result->ID, g.NavLayer, g.NavWindow->Name); + SetNavIDWithRectRel(result->ID, g.NavLayer, result->FocusScopeId, result->RectRel); +} + +// Handle PageUp/PageDown/Home/End keys +static float ImGui::NavUpdatePageUpPageDown() +{ + ImGuiContext& g = *GImGui; + ImGuiIO& io = g.IO; + + if (g.NavMoveDir != ImGuiDir_None || g.NavWindow == NULL) + return 0.0f; + if ((g.NavWindow->Flags & ImGuiWindowFlags_NoNavInputs) || g.NavWindowingTarget != NULL || g.NavLayer != ImGuiNavLayer_Main) + return 0.0f; + + ImGuiWindow* window = g.NavWindow; + const bool page_up_held = IsKeyDown(io.KeyMap[ImGuiKey_PageUp]) && !IsActiveIdUsingKey(ImGuiKey_PageUp); + const bool page_down_held = IsKeyDown(io.KeyMap[ImGuiKey_PageDown]) && !IsActiveIdUsingKey(ImGuiKey_PageDown); + const bool home_pressed = IsKeyPressed(io.KeyMap[ImGuiKey_Home]) && !IsActiveIdUsingKey(ImGuiKey_Home); + const bool end_pressed = IsKeyPressed(io.KeyMap[ImGuiKey_End]) && !IsActiveIdUsingKey(ImGuiKey_End); + if (page_up_held != page_down_held || home_pressed != end_pressed) // If either (not both) are pressed + { + if (window->DC.NavLayerActiveMask == 0x00 && window->DC.NavHasScroll) + { + // Fallback manual-scroll when window has no navigable item + if (IsKeyPressed(io.KeyMap[ImGuiKey_PageUp], true)) + SetScrollY(window, window->Scroll.y - window->InnerRect.GetHeight()); + else if (IsKeyPressed(io.KeyMap[ImGuiKey_PageDown], true)) + SetScrollY(window, window->Scroll.y + window->InnerRect.GetHeight()); + else if (home_pressed) + SetScrollY(window, 0.0f); + else if (end_pressed) + SetScrollY(window, window->ScrollMax.y); + } + else + { + ImRect& nav_rect_rel = window->NavRectRel[g.NavLayer]; + const float page_offset_y = ImMax(0.0f, window->InnerRect.GetHeight() - window->CalcFontSize() * 1.0f + nav_rect_rel.GetHeight()); + float nav_scoring_rect_offset_y = 0.0f; + if (IsKeyPressed(io.KeyMap[ImGuiKey_PageUp], true)) + { + nav_scoring_rect_offset_y = -page_offset_y; + g.NavMoveDir = ImGuiDir_Down; // Because our scoring rect is offset up, we request the down direction (so we can always land on the last item) + g.NavMoveClipDir = ImGuiDir_Up; + g.NavMoveRequestFlags = ImGuiNavMoveFlags_AllowCurrentNavId | ImGuiNavMoveFlags_AlsoScoreVisibleSet; + } + else if (IsKeyPressed(io.KeyMap[ImGuiKey_PageDown], true)) + { + nav_scoring_rect_offset_y = +page_offset_y; + g.NavMoveDir = ImGuiDir_Up; // Because our scoring rect is offset down, we request the up direction (so we can always land on the last item) + g.NavMoveClipDir = ImGuiDir_Down; + g.NavMoveRequestFlags = ImGuiNavMoveFlags_AllowCurrentNavId | ImGuiNavMoveFlags_AlsoScoreVisibleSet; + } + else if (home_pressed) + { + // FIXME-NAV: handling of Home/End is assuming that the top/bottom most item will be visible with Scroll.y == 0/ScrollMax.y + // Scrolling will be handled via the ImGuiNavMoveFlags_ScrollToEdge flag, we don't scroll immediately to avoid scrolling happening before nav result. + // Preserve current horizontal position if we have any. + nav_rect_rel.Min.y = nav_rect_rel.Max.y = -window->Scroll.y; + if (nav_rect_rel.IsInverted()) + nav_rect_rel.Min.x = nav_rect_rel.Max.x = 0.0f; + g.NavMoveDir = ImGuiDir_Down; + g.NavMoveRequestFlags = ImGuiNavMoveFlags_AllowCurrentNavId | ImGuiNavMoveFlags_ScrollToEdge; + } + else if (end_pressed) + { + nav_rect_rel.Min.y = nav_rect_rel.Max.y = window->ScrollMax.y + window->SizeFull.y - window->Scroll.y; + if (nav_rect_rel.IsInverted()) + nav_rect_rel.Min.x = nav_rect_rel.Max.x = 0.0f; + g.NavMoveDir = ImGuiDir_Up; + g.NavMoveRequestFlags = ImGuiNavMoveFlags_AllowCurrentNavId | ImGuiNavMoveFlags_ScrollToEdge; + } + return nav_scoring_rect_offset_y; + } + } + return 0.0f; +} + +static void ImGui::NavEndFrame() +{ + ImGuiContext& g = *GImGui; + + // Show CTRL+TAB list window + if (g.NavWindowingTarget != NULL) + NavUpdateWindowingOverlay(); + + // Perform wrap-around in menus + ImGuiWindow* window = g.NavWrapRequestWindow; + ImGuiNavMoveFlags move_flags = g.NavWrapRequestFlags; + if (window != NULL && g.NavWindow == window && NavMoveRequestButNoResultYet() && g.NavMoveRequestForward == ImGuiNavForward_None && g.NavLayer == ImGuiNavLayer_Main) + { + IM_ASSERT(move_flags != 0); // No points calling this with no wrapping + ImRect bb_rel = window->NavRectRel[0]; + + ImGuiDir clip_dir = g.NavMoveDir; + if (g.NavMoveDir == ImGuiDir_Left && (move_flags & (ImGuiNavMoveFlags_WrapX | ImGuiNavMoveFlags_LoopX))) + { + bb_rel.Min.x = bb_rel.Max.x = + ImMax(window->SizeFull.x, window->ContentSize.x + window->WindowPadding.x * 2.0f) - window->Scroll.x; + if (move_flags & ImGuiNavMoveFlags_WrapX) + { + bb_rel.TranslateY(-bb_rel.GetHeight()); + clip_dir = ImGuiDir_Up; + } + NavMoveRequestForward(g.NavMoveDir, clip_dir, bb_rel, move_flags); + } + if (g.NavMoveDir == ImGuiDir_Right && (move_flags & (ImGuiNavMoveFlags_WrapX | ImGuiNavMoveFlags_LoopX))) + { + bb_rel.Min.x = bb_rel.Max.x = -window->Scroll.x; + if (move_flags & ImGuiNavMoveFlags_WrapX) + { + bb_rel.TranslateY(+bb_rel.GetHeight()); + clip_dir = ImGuiDir_Down; + } + NavMoveRequestForward(g.NavMoveDir, clip_dir, bb_rel, move_flags); + } + if (g.NavMoveDir == ImGuiDir_Up && (move_flags & (ImGuiNavMoveFlags_WrapY | ImGuiNavMoveFlags_LoopY))) + { + bb_rel.Min.y = bb_rel.Max.y = + ImMax(window->SizeFull.y, window->ContentSize.y + window->WindowPadding.y * 2.0f) - window->Scroll.y; + if (move_flags & ImGuiNavMoveFlags_WrapY) + { + bb_rel.TranslateX(-bb_rel.GetWidth()); + clip_dir = ImGuiDir_Left; + } + NavMoveRequestForward(g.NavMoveDir, clip_dir, bb_rel, move_flags); + } + if (g.NavMoveDir == ImGuiDir_Down && (move_flags & (ImGuiNavMoveFlags_WrapY | ImGuiNavMoveFlags_LoopY))) + { + bb_rel.Min.y = bb_rel.Max.y = -window->Scroll.y; + if (move_flags & ImGuiNavMoveFlags_WrapY) + { + bb_rel.TranslateX(+bb_rel.GetWidth()); + clip_dir = ImGuiDir_Right; + } + NavMoveRequestForward(g.NavMoveDir, clip_dir, bb_rel, move_flags); + } + } +} + +static int ImGui::FindWindowFocusIndex(ImGuiWindow* window) // FIXME-OPT O(N) +{ + ImGuiContext& g = *GImGui; + for (int i = g.WindowsFocusOrder.Size - 1; i >= 0; i--) + if (g.WindowsFocusOrder[i] == window) + return i; + return -1; +} + +static ImGuiWindow* FindWindowNavFocusable(int i_start, int i_stop, int dir) // FIXME-OPT O(N) +{ + ImGuiContext& g = *GImGui; + for (int i = i_start; i >= 0 && i < g.WindowsFocusOrder.Size && i != i_stop; i += dir) + if (ImGui::IsWindowNavFocusable(g.WindowsFocusOrder[i])) + return g.WindowsFocusOrder[i]; + return NULL; +} + +static void NavUpdateWindowingHighlightWindow(int focus_change_dir) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(g.NavWindowingTarget); + if (g.NavWindowingTarget->Flags & ImGuiWindowFlags_Modal) + return; + + const int i_current = ImGui::FindWindowFocusIndex(g.NavWindowingTarget); + ImGuiWindow* window_target = FindWindowNavFocusable(i_current + focus_change_dir, -INT_MAX, focus_change_dir); + if (!window_target) + window_target = FindWindowNavFocusable((focus_change_dir < 0) ? (g.WindowsFocusOrder.Size - 1) : 0, i_current, focus_change_dir); + if (window_target) // Don't reset windowing target if there's a single window in the list + g.NavWindowingTarget = g.NavWindowingTargetAnim = window_target; + g.NavWindowingToggleLayer = false; +} + +// Windowing management mode +// Keyboard: CTRL+Tab (change focus/move/resize), Alt (toggle menu layer) +// Gamepad: Hold Menu/Square (change focus/move/resize), Tap Menu/Square (toggle menu layer) +static void ImGui::NavUpdateWindowing() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* apply_focus_window = NULL; + bool apply_toggle_layer = false; + + ImGuiWindow* modal_window = GetTopMostPopupModal(); + bool allow_windowing = (modal_window == NULL); + if (!allow_windowing) + g.NavWindowingTarget = NULL; + + // Fade out + if (g.NavWindowingTargetAnim && g.NavWindowingTarget == NULL) + { + g.NavWindowingHighlightAlpha = ImMax(g.NavWindowingHighlightAlpha - g.IO.DeltaTime * 10.0f, 0.0f); + if (g.DimBgRatio <= 0.0f && g.NavWindowingHighlightAlpha <= 0.0f) + g.NavWindowingTargetAnim = NULL; + } + + // Start CTRL-TAB or Square+L/R window selection + bool start_windowing_with_gamepad = allow_windowing && !g.NavWindowingTarget && IsNavInputTest(ImGuiNavInput_Menu, ImGuiInputReadMode_Pressed); + bool start_windowing_with_keyboard = allow_windowing && !g.NavWindowingTarget && g.IO.KeyCtrl && IsKeyPressedMap(ImGuiKey_Tab) && (g.IO.ConfigFlags & ImGuiConfigFlags_NavEnableKeyboard); + if (start_windowing_with_gamepad || start_windowing_with_keyboard) + if (ImGuiWindow* window = g.NavWindow ? g.NavWindow : FindWindowNavFocusable(g.WindowsFocusOrder.Size - 1, -INT_MAX, -1)) + { + g.NavWindowingTarget = g.NavWindowingTargetAnim = window->RootWindow; // FIXME-DOCK: Will need to use RootWindowDockStop + g.NavWindowingTimer = g.NavWindowingHighlightAlpha = 0.0f; + g.NavWindowingToggleLayer = start_windowing_with_keyboard ? false : true; + g.NavInputSource = start_windowing_with_keyboard ? ImGuiInputSource_NavKeyboard : ImGuiInputSource_NavGamepad; + } + + // Gamepad update + g.NavWindowingTimer += g.IO.DeltaTime; + if (g.NavWindowingTarget && g.NavInputSource == ImGuiInputSource_NavGamepad) + { + // Highlight only appears after a brief time holding the button, so that a fast tap on PadMenu (to toggle NavLayer) doesn't add visual noise + g.NavWindowingHighlightAlpha = ImMax(g.NavWindowingHighlightAlpha, ImSaturate((g.NavWindowingTimer - NAV_WINDOWING_HIGHLIGHT_DELAY) / 0.05f)); + + // Select window to focus + const int focus_change_dir = (int)IsNavInputTest(ImGuiNavInput_FocusPrev, ImGuiInputReadMode_RepeatSlow) - (int)IsNavInputTest(ImGuiNavInput_FocusNext, ImGuiInputReadMode_RepeatSlow); + if (focus_change_dir != 0) + { + NavUpdateWindowingHighlightWindow(focus_change_dir); + g.NavWindowingHighlightAlpha = 1.0f; + } + + // Single press toggles NavLayer, long press with L/R apply actual focus on release (until then the window was merely rendered top-most) + if (!IsNavInputDown(ImGuiNavInput_Menu)) + { + g.NavWindowingToggleLayer &= (g.NavWindowingHighlightAlpha < 1.0f); // Once button was held long enough we don't consider it a tap-to-toggle-layer press anymore. + if (g.NavWindowingToggleLayer && g.NavWindow) + apply_toggle_layer = true; + else if (!g.NavWindowingToggleLayer) + apply_focus_window = g.NavWindowingTarget; + g.NavWindowingTarget = NULL; + } + } + + // Keyboard: Focus + if (g.NavWindowingTarget && g.NavInputSource == ImGuiInputSource_NavKeyboard) + { + // Visuals only appears after a brief time after pressing TAB the first time, so that a fast CTRL+TAB doesn't add visual noise + g.NavWindowingHighlightAlpha = ImMax(g.NavWindowingHighlightAlpha, ImSaturate((g.NavWindowingTimer - NAV_WINDOWING_HIGHLIGHT_DELAY) / 0.05f)); // 1.0f + if (IsKeyPressedMap(ImGuiKey_Tab, true)) + NavUpdateWindowingHighlightWindow(g.IO.KeyShift ? +1 : -1); + if (!g.IO.KeyCtrl) + apply_focus_window = g.NavWindowingTarget; + } + + // Keyboard: Press and Release ALT to toggle menu layer + // FIXME: We lack an explicit IO variable for "is the imgui window focused", so compare mouse validity to detect the common case of backend clearing releases all keys on ALT-TAB + if (IsNavInputTest(ImGuiNavInput_KeyMenu_, ImGuiInputReadMode_Pressed)) + g.NavWindowingToggleLayer = true; + if ((g.ActiveId == 0 || g.ActiveIdAllowOverlap) && g.NavWindowingToggleLayer && IsNavInputTest(ImGuiNavInput_KeyMenu_, ImGuiInputReadMode_Released)) + if (IsMousePosValid(&g.IO.MousePos) == IsMousePosValid(&g.IO.MousePosPrev)) + apply_toggle_layer = true; + + // Move window + if (g.NavWindowingTarget && !(g.NavWindowingTarget->Flags & ImGuiWindowFlags_NoMove)) + { + ImVec2 move_delta; + if (g.NavInputSource == ImGuiInputSource_NavKeyboard && !g.IO.KeyShift) + move_delta = GetNavInputAmount2d(ImGuiNavDirSourceFlags_Keyboard, ImGuiInputReadMode_Down); + if (g.NavInputSource == ImGuiInputSource_NavGamepad) + move_delta = GetNavInputAmount2d(ImGuiNavDirSourceFlags_PadLStick, ImGuiInputReadMode_Down); + if (move_delta.x != 0.0f || move_delta.y != 0.0f) + { + const float NAV_MOVE_SPEED = 800.0f; + const float move_speed = ImFloor(NAV_MOVE_SPEED * g.IO.DeltaTime * ImMin(g.IO.DisplayFramebufferScale.x, g.IO.DisplayFramebufferScale.y)); // FIXME: Doesn't handle variable framerate very well + ImGuiWindow* moving_window = g.NavWindowingTarget->RootWindow; + SetWindowPos(moving_window, moving_window->Pos + move_delta * move_speed, ImGuiCond_Always); + MarkIniSettingsDirty(moving_window); + g.NavDisableMouseHover = true; + } + } + + // Apply final focus + if (apply_focus_window && (g.NavWindow == NULL || apply_focus_window != g.NavWindow->RootWindow)) + { + ClearActiveID(); + g.NavDisableHighlight = false; + g.NavDisableMouseHover = true; + apply_focus_window = NavRestoreLastChildNavWindow(apply_focus_window); + ClosePopupsOverWindow(apply_focus_window, false); + FocusWindow(apply_focus_window); + if (apply_focus_window->NavLastIds[0] == 0) + NavInitWindow(apply_focus_window, false); + + // If the window only has a menu layer, select it directly + if (apply_focus_window->DC.NavLayerActiveMask == (1 << ImGuiNavLayer_Menu)) + g.NavLayer = ImGuiNavLayer_Menu; + } + if (apply_focus_window) + g.NavWindowingTarget = NULL; + + // Apply menu/layer toggle + if (apply_toggle_layer && g.NavWindow) + { + // Move to parent menu if necessary + ImGuiWindow* new_nav_window = g.NavWindow; + while (new_nav_window->ParentWindow + && (new_nav_window->DC.NavLayerActiveMask & (1 << ImGuiNavLayer_Menu)) == 0 + && (new_nav_window->Flags & ImGuiWindowFlags_ChildWindow) != 0 + && (new_nav_window->Flags & (ImGuiWindowFlags_Popup | ImGuiWindowFlags_ChildMenu)) == 0) + new_nav_window = new_nav_window->ParentWindow; + if (new_nav_window != g.NavWindow) + { + ImGuiWindow* old_nav_window = g.NavWindow; + FocusWindow(new_nav_window); + new_nav_window->NavLastChildNavWindow = old_nav_window; + } + g.NavDisableHighlight = false; + g.NavDisableMouseHover = true; + + // When entering a regular menu bar with the Alt key, we always reinitialize the navigation ID. + const ImGuiNavLayer new_nav_layer = (g.NavWindow->DC.NavLayerActiveMask & (1 << ImGuiNavLayer_Menu)) ? (ImGuiNavLayer)((int)g.NavLayer ^ 1) : ImGuiNavLayer_Main; + NavRestoreLayer(new_nav_layer); + } +} + +// Window has already passed the IsWindowNavFocusable() +static const char* GetFallbackWindowNameForWindowingList(ImGuiWindow* window) +{ + if (window->Flags & ImGuiWindowFlags_Popup) + return "(Popup)"; + if ((window->Flags & ImGuiWindowFlags_MenuBar) && strcmp(window->Name, "##MainMenuBar") == 0) + return "(Main menu bar)"; + return "(Untitled)"; +} + +// Overlay displayed when using CTRL+TAB. Called by EndFrame(). +void ImGui::NavUpdateWindowingOverlay() +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(g.NavWindowingTarget != NULL); + + if (g.NavWindowingTimer < NAV_WINDOWING_LIST_APPEAR_DELAY) + return; + + if (g.NavWindowingListWindow == NULL) + g.NavWindowingListWindow = FindWindowByName("###NavWindowingList"); + SetNextWindowSizeConstraints(ImVec2(g.IO.DisplaySize.x * 0.20f, g.IO.DisplaySize.y * 0.20f), ImVec2(FLT_MAX, FLT_MAX)); + SetNextWindowPos(g.IO.DisplaySize * 0.5f, ImGuiCond_Always, ImVec2(0.5f, 0.5f)); + PushStyleVar(ImGuiStyleVar_WindowPadding, g.Style.WindowPadding * 2.0f); + Begin("###NavWindowingList", NULL, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoFocusOnAppearing | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoInputs | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoSavedSettings); + for (int n = g.WindowsFocusOrder.Size - 1; n >= 0; n--) + { + ImGuiWindow* window = g.WindowsFocusOrder[n]; + if (!IsWindowNavFocusable(window)) + continue; + const char* label = window->Name; + if (label == FindRenderedTextEnd(label)) + label = GetFallbackWindowNameForWindowingList(window); + Selectable(label, g.NavWindowingTarget == window); + } + End(); + PopStyleVar(); +} + + +//----------------------------------------------------------------------------- +// [SECTION] DRAG AND DROP +//----------------------------------------------------------------------------- + +void ImGui::ClearDragDrop() +{ + ImGuiContext& g = *GImGui; + g.DragDropActive = false; + g.DragDropPayload.Clear(); + g.DragDropAcceptFlags = ImGuiDragDropFlags_None; + g.DragDropAcceptIdCurr = g.DragDropAcceptIdPrev = 0; + g.DragDropAcceptIdCurrRectSurface = FLT_MAX; + g.DragDropAcceptFrameCount = -1; + + g.DragDropPayloadBufHeap.clear(); + memset(&g.DragDropPayloadBufLocal, 0, sizeof(g.DragDropPayloadBufLocal)); +} + +// Call when current ID is active. +// When this returns true you need to: a) call SetDragDropPayload() exactly once, b) you may render the payload visual/description, c) call EndDragDropSource() +bool ImGui::BeginDragDropSource(ImGuiDragDropFlags flags) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + bool source_drag_active = false; + ImGuiID source_id = 0; + ImGuiID source_parent_id = 0; + ImGuiMouseButton mouse_button = ImGuiMouseButton_Left; + if (!(flags & ImGuiDragDropFlags_SourceExtern)) + { + source_id = window->DC.LastItemId; + if (source_id != 0 && g.ActiveId != source_id) // Early out for most common case + return false; + if (g.IO.MouseDown[mouse_button] == false) + return false; + + if (source_id == 0) + { + // If you want to use BeginDragDropSource() on an item with no unique identifier for interaction, such as Text() or Image(), you need to: + // A) Read the explanation below, B) Use the ImGuiDragDropFlags_SourceAllowNullID flag, C) Swallow your programmer pride. + if (!(flags & ImGuiDragDropFlags_SourceAllowNullID)) + { + IM_ASSERT(0); + return false; + } + + // Early out + if ((window->DC.LastItemStatusFlags & ImGuiItemStatusFlags_HoveredRect) == 0 && (g.ActiveId == 0 || g.ActiveIdWindow != window)) + return false; + + // Magic fallback (=somehow reprehensible) to handle items with no assigned ID, e.g. Text(), Image() + // We build a throwaway ID based on current ID stack + relative AABB of items in window. + // THE IDENTIFIER WON'T SURVIVE ANY REPOSITIONING OF THE WIDGET, so if your widget moves your dragging operation will be canceled. + // We don't need to maintain/call ClearActiveID() as releasing the button will early out this function and trigger !ActiveIdIsAlive. + source_id = window->DC.LastItemId = window->GetIDFromRectangle(window->DC.LastItemRect); + bool is_hovered = ItemHoverable(window->DC.LastItemRect, source_id); + if (is_hovered && g.IO.MouseClicked[mouse_button]) + { + SetActiveID(source_id, window); + FocusWindow(window); + } + if (g.ActiveId == source_id) // Allow the underlying widget to display/return hovered during the mouse release frame, else we would get a flicker. + g.ActiveIdAllowOverlap = is_hovered; + } + else + { + g.ActiveIdAllowOverlap = false; + } + if (g.ActiveId != source_id) + return false; + source_parent_id = window->IDStack.back(); + source_drag_active = IsMouseDragging(mouse_button); + + // Disable navigation and key inputs while dragging + g.ActiveIdUsingNavDirMask = ~(ImU32)0; + g.ActiveIdUsingNavInputMask = ~(ImU32)0; + g.ActiveIdUsingKeyInputMask = ~(ImU64)0; + } + else + { + window = NULL; + source_id = ImHashStr("#SourceExtern"); + source_drag_active = true; + } + + if (source_drag_active) + { + if (!g.DragDropActive) + { + IM_ASSERT(source_id != 0); + ClearDragDrop(); + ImGuiPayload& payload = g.DragDropPayload; + payload.SourceId = source_id; + payload.SourceParentId = source_parent_id; + g.DragDropActive = true; + g.DragDropSourceFlags = flags; + g.DragDropMouseButton = mouse_button; + if (payload.SourceId == g.ActiveId) + g.ActiveIdNoClearOnFocusLoss = true; + } + g.DragDropSourceFrameCount = g.FrameCount; + g.DragDropWithinSource = true; + + if (!(flags & ImGuiDragDropFlags_SourceNoPreviewTooltip)) + { + // Target can request the Source to not display its tooltip (we use a dedicated flag to make this request explicit) + // We unfortunately can't just modify the source flags and skip the call to BeginTooltip, as caller may be emitting contents. + BeginTooltip(); + if (g.DragDropAcceptIdPrev && (g.DragDropAcceptFlags & ImGuiDragDropFlags_AcceptNoPreviewTooltip)) + { + ImGuiWindow* tooltip_window = g.CurrentWindow; + tooltip_window->SkipItems = true; + tooltip_window->HiddenFramesCanSkipItems = 1; + } + } + + if (!(flags & ImGuiDragDropFlags_SourceNoDisableHover) && !(flags & ImGuiDragDropFlags_SourceExtern)) + window->DC.LastItemStatusFlags &= ~ImGuiItemStatusFlags_HoveredRect; + + return true; + } + return false; +} + +void ImGui::EndDragDropSource() +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(g.DragDropActive); + IM_ASSERT(g.DragDropWithinSource && "Not after a BeginDragDropSource()?"); + + if (!(g.DragDropSourceFlags & ImGuiDragDropFlags_SourceNoPreviewTooltip)) + EndTooltip(); + + // Discard the drag if have not called SetDragDropPayload() + if (g.DragDropPayload.DataFrameCount == -1) + ClearDragDrop(); + g.DragDropWithinSource = false; +} + +// Use 'cond' to choose to submit payload on drag start or every frame +bool ImGui::SetDragDropPayload(const char* type, const void* data, size_t data_size, ImGuiCond cond) +{ + ImGuiContext& g = *GImGui; + ImGuiPayload& payload = g.DragDropPayload; + if (cond == 0) + cond = ImGuiCond_Always; + + IM_ASSERT(type != NULL); + IM_ASSERT(strlen(type) < IM_ARRAYSIZE(payload.DataType) && "Payload type can be at most 32 characters long"); + IM_ASSERT((data != NULL && data_size > 0) || (data == NULL && data_size == 0)); + IM_ASSERT(cond == ImGuiCond_Always || cond == ImGuiCond_Once); + IM_ASSERT(payload.SourceId != 0); // Not called between BeginDragDropSource() and EndDragDropSource() + + if (cond == ImGuiCond_Always || payload.DataFrameCount == -1) + { + // Copy payload + ImStrncpy(payload.DataType, type, IM_ARRAYSIZE(payload.DataType)); + g.DragDropPayloadBufHeap.resize(0); + if (data_size > sizeof(g.DragDropPayloadBufLocal)) + { + // Store in heap + g.DragDropPayloadBufHeap.resize((int)data_size); + payload.Data = g.DragDropPayloadBufHeap.Data; + memcpy(payload.Data, data, data_size); + } + else if (data_size > 0) + { + // Store locally + memset(&g.DragDropPayloadBufLocal, 0, sizeof(g.DragDropPayloadBufLocal)); + payload.Data = g.DragDropPayloadBufLocal; + memcpy(payload.Data, data, data_size); + } + else + { + payload.Data = NULL; + } + payload.DataSize = (int)data_size; + } + payload.DataFrameCount = g.FrameCount; + + return (g.DragDropAcceptFrameCount == g.FrameCount) || (g.DragDropAcceptFrameCount == g.FrameCount - 1); +} + +bool ImGui::BeginDragDropTargetCustom(const ImRect& bb, ImGuiID id) +{ + ImGuiContext& g = *GImGui; + if (!g.DragDropActive) + return false; + + ImGuiWindow* window = g.CurrentWindow; + ImGuiWindow* hovered_window = g.HoveredWindowUnderMovingWindow; + if (hovered_window == NULL || window->RootWindow != hovered_window->RootWindow) + return false; + IM_ASSERT(id != 0); + if (!IsMouseHoveringRect(bb.Min, bb.Max) || (id == g.DragDropPayload.SourceId)) + return false; + if (window->SkipItems) + return false; + + IM_ASSERT(g.DragDropWithinTarget == false); + g.DragDropTargetRect = bb; + g.DragDropTargetId = id; + g.DragDropWithinTarget = true; + return true; +} + +// We don't use BeginDragDropTargetCustom() and duplicate its code because: +// 1) we use LastItemRectHoveredRect which handles items that pushes a temporarily clip rectangle in their code. Calling BeginDragDropTargetCustom(LastItemRect) would not handle them. +// 2) and it's faster. as this code may be very frequently called, we want to early out as fast as we can. +// Also note how the HoveredWindow test is positioned differently in both functions (in both functions we optimize for the cheapest early out case) +bool ImGui::BeginDragDropTarget() +{ + ImGuiContext& g = *GImGui; + if (!g.DragDropActive) + return false; + + ImGuiWindow* window = g.CurrentWindow; + if (!(window->DC.LastItemStatusFlags & ImGuiItemStatusFlags_HoveredRect)) + return false; + ImGuiWindow* hovered_window = g.HoveredWindowUnderMovingWindow; + if (hovered_window == NULL || window->RootWindow != hovered_window->RootWindow) + return false; + + const ImRect& display_rect = (window->DC.LastItemStatusFlags & ImGuiItemStatusFlags_HasDisplayRect) ? window->DC.LastItemDisplayRect : window->DC.LastItemRect; + ImGuiID id = window->DC.LastItemId; + if (id == 0) + id = window->GetIDFromRectangle(display_rect); + if (g.DragDropPayload.SourceId == id) + return false; + + IM_ASSERT(g.DragDropWithinTarget == false); + g.DragDropTargetRect = display_rect; + g.DragDropTargetId = id; + g.DragDropWithinTarget = true; + return true; +} + +bool ImGui::IsDragDropPayloadBeingAccepted() +{ + ImGuiContext& g = *GImGui; + return g.DragDropActive && g.DragDropAcceptIdPrev != 0; +} + +const ImGuiPayload* ImGui::AcceptDragDropPayload(const char* type, ImGuiDragDropFlags flags) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImGuiPayload& payload = g.DragDropPayload; + IM_ASSERT(g.DragDropActive); // Not called between BeginDragDropTarget() and EndDragDropTarget() ? + IM_ASSERT(payload.DataFrameCount != -1); // Forgot to call EndDragDropTarget() ? + if (type != NULL && !payload.IsDataType(type)) + return NULL; + + // Accept smallest drag target bounding box, this allows us to nest drag targets conveniently without ordering constraints. + // NB: We currently accept NULL id as target. However, overlapping targets requires a unique ID to function! + const bool was_accepted_previously = (g.DragDropAcceptIdPrev == g.DragDropTargetId); + ImRect r = g.DragDropTargetRect; + float r_surface = r.GetWidth() * r.GetHeight(); + if (r_surface <= g.DragDropAcceptIdCurrRectSurface) + { + g.DragDropAcceptFlags = flags; + g.DragDropAcceptIdCurr = g.DragDropTargetId; + g.DragDropAcceptIdCurrRectSurface = r_surface; + } + + // Render default drop visuals + payload.Preview = was_accepted_previously; + flags |= (g.DragDropSourceFlags & ImGuiDragDropFlags_AcceptNoDrawDefaultRect); // Source can also inhibit the preview (useful for external sources that lives for 1 frame) + if (!(flags & ImGuiDragDropFlags_AcceptNoDrawDefaultRect) && payload.Preview) + { + // FIXME-DRAG: Settle on a proper default visuals for drop target. + r.Expand(3.5f); + bool push_clip_rect = !window->ClipRect.Contains(r); + if (push_clip_rect) window->DrawList->PushClipRect(r.Min - ImVec2(1, 1), r.Max + ImVec2(1, 1)); + window->DrawList->AddRect(r.Min, r.Max, GetColorU32(ImGuiCol_DragDropTarget), 0.0f, ~0, 2.0f); + if (push_clip_rect) window->DrawList->PopClipRect(); + } + + g.DragDropAcceptFrameCount = g.FrameCount; + payload.Delivery = was_accepted_previously && !IsMouseDown(g.DragDropMouseButton); // For extern drag sources affecting os window focus, it's easier to just test !IsMouseDown() instead of IsMouseReleased() + if (!payload.Delivery && !(flags & ImGuiDragDropFlags_AcceptBeforeDelivery)) + return NULL; + + return &payload; +} + +const ImGuiPayload* ImGui::GetDragDropPayload() +{ + ImGuiContext& g = *GImGui; + return g.DragDropActive ? &g.DragDropPayload : NULL; +} + +// We don't really use/need this now, but added it for the sake of consistency and because we might need it later. +void ImGui::EndDragDropTarget() +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(g.DragDropActive); + IM_ASSERT(g.DragDropWithinTarget); + g.DragDropWithinTarget = false; +} + +//----------------------------------------------------------------------------- +// [SECTION] LOGGING/CAPTURING +//----------------------------------------------------------------------------- +// All text output from the interface can be captured into tty/file/clipboard. +// By default, tree nodes are automatically opened during logging. +//----------------------------------------------------------------------------- + +// Pass text data straight to log (without being displayed) +void ImGui::LogText(const char* fmt, ...) +{ + ImGuiContext& g = *GImGui; + if (!g.LogEnabled) + return; + + va_list args; + va_start(args, fmt); + if (g.LogFile) + { + g.LogBuffer.Buf.resize(0); + g.LogBuffer.appendfv(fmt, args); + ImFileWrite(g.LogBuffer.c_str(), sizeof(char), (ImU64)g.LogBuffer.size(), g.LogFile); + } + else + { + g.LogBuffer.appendfv(fmt, args); + } + va_end(args); +} + +// Internal version that takes a position to decide on newline placement and pad items according to their depth. +// We split text into individual lines to add current tree level padding +void ImGui::LogRenderedText(const ImVec2* ref_pos, const char* text, const char* text_end) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + if (!text_end) + text_end = FindRenderedTextEnd(text, text_end); + + const bool log_new_line = ref_pos && (ref_pos->y > g.LogLinePosY + 1); + if (ref_pos) + g.LogLinePosY = ref_pos->y; + if (log_new_line) + g.LogLineFirstItem = true; + + const char* text_remaining = text; + if (g.LogDepthRef > window->DC.TreeDepth) // Re-adjust padding if we have popped out of our starting depth + g.LogDepthRef = window->DC.TreeDepth; + const int tree_depth = (window->DC.TreeDepth - g.LogDepthRef); + for (;;) + { + // Split the string. Each new line (after a '\n') is followed by spacing corresponding to the current depth of our log entry. + // We don't add a trailing \n to allow a subsequent item on the same line to be captured. + const char* line_start = text_remaining; + const char* line_end = ImStreolRange(line_start, text_end); + const bool is_first_line = (line_start == text); + const bool is_last_line = (line_end == text_end); + if (!is_last_line || (line_start != line_end)) + { + const int char_count = (int)(line_end - line_start); + if (log_new_line || !is_first_line) + LogText(IM_NEWLINE "%*s%.*s", tree_depth * 4, "", char_count, line_start); + else if (g.LogLineFirstItem) + LogText("%*s%.*s", tree_depth * 4, "", char_count, line_start); + else + LogText(" %.*s", char_count, line_start); + g.LogLineFirstItem = false; + } + else if (log_new_line) + { + // An empty "" string at a different Y position should output a carriage return. + LogText(IM_NEWLINE); + break; + } + + if (is_last_line) + break; + text_remaining = line_end + 1; + } +} + +// Start logging/capturing text output +void ImGui::LogBegin(ImGuiLogType type, int auto_open_depth) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + IM_ASSERT(g.LogEnabled == false); + IM_ASSERT(g.LogFile == NULL); + IM_ASSERT(g.LogBuffer.empty()); + g.LogEnabled = true; + g.LogType = type; + g.LogDepthRef = window->DC.TreeDepth; + g.LogDepthToExpand = ((auto_open_depth >= 0) ? auto_open_depth : g.LogDepthToExpandDefault); + g.LogLinePosY = FLT_MAX; + g.LogLineFirstItem = true; +} + +void ImGui::LogToTTY(int auto_open_depth) +{ + ImGuiContext& g = *GImGui; + if (g.LogEnabled) + return; + IM_UNUSED(auto_open_depth); +#ifndef IMGUI_DISABLE_TTY_FUNCTIONS + LogBegin(ImGuiLogType_TTY, auto_open_depth); + g.LogFile = stdout; +#endif +} + +// Start logging/capturing text output to given file +void ImGui::LogToFile(int auto_open_depth, const char* filename) +{ + ImGuiContext& g = *GImGui; + if (g.LogEnabled) + return; + + // FIXME: We could probably open the file in text mode "at", however note that clipboard/buffer logging will still + // be subject to outputting OS-incompatible carriage return if within strings the user doesn't use IM_NEWLINE. + // By opening the file in binary mode "ab" we have consistent output everywhere. + if (!filename) + filename = g.IO.LogFilename; + if (!filename || !filename[0]) + return; + ImFileHandle f = ImFileOpen(filename, "ab"); + if (!f) + { + IM_ASSERT(0); + return; + } + + LogBegin(ImGuiLogType_File, auto_open_depth); + g.LogFile = f; +} + +// Start logging/capturing text output to clipboard +void ImGui::LogToClipboard(int auto_open_depth) +{ + ImGuiContext& g = *GImGui; + if (g.LogEnabled) + return; + LogBegin(ImGuiLogType_Clipboard, auto_open_depth); +} + +void ImGui::LogToBuffer(int auto_open_depth) +{ + ImGuiContext& g = *GImGui; + if (g.LogEnabled) + return; + LogBegin(ImGuiLogType_Buffer, auto_open_depth); +} + +void ImGui::LogFinish() +{ + ImGuiContext& g = *GImGui; + if (!g.LogEnabled) + return; + + LogText(IM_NEWLINE); + switch (g.LogType) + { + case ImGuiLogType_TTY: +#ifndef IMGUI_DISABLE_TTY_FUNCTIONS + fflush(g.LogFile); +#endif + break; + case ImGuiLogType_File: + ImFileClose(g.LogFile); + break; + case ImGuiLogType_Buffer: + break; + case ImGuiLogType_Clipboard: + if (!g.LogBuffer.empty()) + SetClipboardText(g.LogBuffer.begin()); + break; + case ImGuiLogType_None: + IM_ASSERT(0); + break; + } + + g.LogEnabled = false; + g.LogType = ImGuiLogType_None; + g.LogFile = NULL; + g.LogBuffer.clear(); +} + +// Helper to display logging buttons +// FIXME-OBSOLETE: We should probably obsolete this and let the user have their own helper (this is one of the oldest function alive!) +void ImGui::LogButtons() +{ + ImGuiContext& g = *GImGui; + + PushID("LogButtons"); +#ifndef IMGUI_DISABLE_TTY_FUNCTIONS + const bool log_to_tty = Button("Log To TTY"); SameLine(); +#else + const bool log_to_tty = false; +#endif + const bool log_to_file = Button("Log To File"); SameLine(); + const bool log_to_clipboard = Button("Log To Clipboard"); SameLine(); + PushAllowKeyboardFocus(false); + SetNextItemWidth(80.0f); + SliderInt("Default Depth", &g.LogDepthToExpandDefault, 0, 9, NULL); + PopAllowKeyboardFocus(); + PopID(); + + // Start logging at the end of the function so that the buttons don't appear in the log + if (log_to_tty) + LogToTTY(); + if (log_to_file) + LogToFile(); + if (log_to_clipboard) + LogToClipboard(); +} + + +//----------------------------------------------------------------------------- +// [SECTION] SETTINGS +//----------------------------------------------------------------------------- +// - UpdateSettings() [Internal] +// - MarkIniSettingsDirty() [Internal] +// - CreateNewWindowSettings() [Internal] +// - FindWindowSettings() [Internal] +// - FindOrCreateWindowSettings() [Internal] +// - FindSettingsHandler() [Internal] +// - ClearIniSettings() [Internal] +// - LoadIniSettingsFromDisk() +// - LoadIniSettingsFromMemory() +// - SaveIniSettingsToDisk() +// - SaveIniSettingsToMemory() +// - WindowSettingsHandler_***() [Internal] +//----------------------------------------------------------------------------- + +// Called by NewFrame() +void ImGui::UpdateSettings() +{ + // Load settings on first frame (if not explicitly loaded manually before) + ImGuiContext& g = *GImGui; + if (!g.SettingsLoaded) + { + IM_ASSERT(g.SettingsWindows.empty()); + if (g.IO.IniFilename) + LoadIniSettingsFromDisk(g.IO.IniFilename); + g.SettingsLoaded = true; + } + + // Save settings (with a delay after the last modification, so we don't spam disk too much) + if (g.SettingsDirtyTimer > 0.0f) + { + g.SettingsDirtyTimer -= g.IO.DeltaTime; + if (g.SettingsDirtyTimer <= 0.0f) + { + if (g.IO.IniFilename != NULL) + SaveIniSettingsToDisk(g.IO.IniFilename); + else + g.IO.WantSaveIniSettings = true; // Let user know they can call SaveIniSettingsToMemory(). user will need to clear io.WantSaveIniSettings themselves. + g.SettingsDirtyTimer = 0.0f; + } + } +} + +void ImGui::MarkIniSettingsDirty() +{ + ImGuiContext& g = *GImGui; + if (g.SettingsDirtyTimer <= 0.0f) + g.SettingsDirtyTimer = g.IO.IniSavingRate; +} + +void ImGui::MarkIniSettingsDirty(ImGuiWindow* window) +{ + ImGuiContext& g = *GImGui; + if (!(window->Flags & ImGuiWindowFlags_NoSavedSettings)) + if (g.SettingsDirtyTimer <= 0.0f) + g.SettingsDirtyTimer = g.IO.IniSavingRate; +} + +ImGuiWindowSettings* ImGui::CreateNewWindowSettings(const char* name) +{ + ImGuiContext& g = *GImGui; + +#if !IMGUI_DEBUG_INI_SETTINGS + // Skip to the "###" marker if any. We don't skip past to match the behavior of GetID() + // Preserve the full string when IMGUI_DEBUG_INI_SETTINGS is set to make .ini inspection easier. + if (const char* p = strstr(name, "###")) + name = p; +#endif + const size_t name_len = strlen(name); + + // Allocate chunk + const size_t chunk_size = sizeof(ImGuiWindowSettings) + name_len + 1; + ImGuiWindowSettings* settings = g.SettingsWindows.alloc_chunk(chunk_size); + IM_PLACEMENT_NEW(settings) ImGuiWindowSettings(); + settings->ID = ImHashStr(name, name_len); + memcpy(settings->GetName(), name, name_len + 1); // Store with zero terminator + + return settings; +} + +ImGuiWindowSettings* ImGui::FindWindowSettings(ImGuiID id) +{ + ImGuiContext& g = *GImGui; + for (ImGuiWindowSettings* settings = g.SettingsWindows.begin(); settings != NULL; settings = g.SettingsWindows.next_chunk(settings)) + if (settings->ID == id) + return settings; + return NULL; +} + +ImGuiWindowSettings* ImGui::FindOrCreateWindowSettings(const char* name) +{ + if (ImGuiWindowSettings* settings = FindWindowSettings(ImHashStr(name))) + return settings; + return CreateNewWindowSettings(name); +} + +ImGuiSettingsHandler* ImGui::FindSettingsHandler(const char* type_name) +{ + ImGuiContext& g = *GImGui; + const ImGuiID type_hash = ImHashStr(type_name); + for (int handler_n = 0; handler_n < g.SettingsHandlers.Size; handler_n++) + if (g.SettingsHandlers[handler_n].TypeHash == type_hash) + return &g.SettingsHandlers[handler_n]; + return NULL; +} + +void ImGui::ClearIniSettings() +{ + ImGuiContext& g = *GImGui; + g.SettingsIniData.clear(); + for (int handler_n = 0; handler_n < g.SettingsHandlers.Size; handler_n++) + if (g.SettingsHandlers[handler_n].ClearAllFn) + g.SettingsHandlers[handler_n].ClearAllFn(&g, &g.SettingsHandlers[handler_n]); +} + +void ImGui::LoadIniSettingsFromDisk(const char* ini_filename) +{ + size_t file_data_size = 0; + char* file_data = (char*)ImFileLoadToMemory(ini_filename, "rb", &file_data_size); + if (!file_data) + return; + LoadIniSettingsFromMemory(file_data, (size_t)file_data_size); + IM_FREE(file_data); +} + +// Zero-tolerance, no error reporting, cheap .ini parsing +void ImGui::LoadIniSettingsFromMemory(const char* ini_data, size_t ini_size) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(g.Initialized); + //IM_ASSERT(!g.WithinFrameScope && "Cannot be called between NewFrame() and EndFrame()"); + //IM_ASSERT(g.SettingsLoaded == false && g.FrameCount == 0); + + // For user convenience, we allow passing a non zero-terminated string (hence the ini_size parameter). + // For our convenience and to make the code simpler, we'll also write zero-terminators within the buffer. So let's create a writable copy.. + if (ini_size == 0) + ini_size = strlen(ini_data); + g.SettingsIniData.Buf.resize((int)ini_size + 1); + char* const buf = g.SettingsIniData.Buf.Data; + char* const buf_end = buf + ini_size; + memcpy(buf, ini_data, ini_size); + buf_end[0] = 0; + + // Call pre-read handlers + // Some types will clear their data (e.g. dock information) some types will allow merge/override (window) + for (int handler_n = 0; handler_n < g.SettingsHandlers.Size; handler_n++) + if (g.SettingsHandlers[handler_n].ReadInitFn) + g.SettingsHandlers[handler_n].ReadInitFn(&g, &g.SettingsHandlers[handler_n]); + + void* entry_data = NULL; + ImGuiSettingsHandler* entry_handler = NULL; + + char* line_end = NULL; + for (char* line = buf; line < buf_end; line = line_end + 1) + { + // Skip new lines markers, then find end of the line + while (*line == '\n' || *line == '\r') + line++; + line_end = line; + while (line_end < buf_end && *line_end != '\n' && *line_end != '\r') + line_end++; + line_end[0] = 0; + if (line[0] == ';') + continue; + if (line[0] == '[' && line_end > line && line_end[-1] == ']') + { + // Parse "[Type][Name]". Note that 'Name' can itself contains [] characters, which is acceptable with the current format and parsing code. + line_end[-1] = 0; + const char* name_end = line_end - 1; + const char* type_start = line + 1; + char* type_end = (char*)(void*)ImStrchrRange(type_start, name_end, ']'); + const char* name_start = type_end ? ImStrchrRange(type_end + 1, name_end, '[') : NULL; + if (!type_end || !name_start) + continue; + *type_end = 0; // Overwrite first ']' + name_start++; // Skip second '[' + entry_handler = FindSettingsHandler(type_start); + entry_data = entry_handler ? entry_handler->ReadOpenFn(&g, entry_handler, name_start) : NULL; + } + else if (entry_handler != NULL && entry_data != NULL) + { + // Let type handler parse the line + entry_handler->ReadLineFn(&g, entry_handler, entry_data, line); + } + } + g.SettingsLoaded = true; + + // [DEBUG] Restore untouched copy so it can be browsed in Metrics (not strictly necessary) + memcpy(buf, ini_data, ini_size); + + // Call post-read handlers + for (int handler_n = 0; handler_n < g.SettingsHandlers.Size; handler_n++) + if (g.SettingsHandlers[handler_n].ApplyAllFn) + g.SettingsHandlers[handler_n].ApplyAllFn(&g, &g.SettingsHandlers[handler_n]); +} + +void ImGui::SaveIniSettingsToDisk(const char* ini_filename) +{ + ImGuiContext& g = *GImGui; + g.SettingsDirtyTimer = 0.0f; + if (!ini_filename) + return; + + size_t ini_data_size = 0; + const char* ini_data = SaveIniSettingsToMemory(&ini_data_size); + ImFileHandle f = ImFileOpen(ini_filename, "wt"); + if (!f) + return; + ImFileWrite(ini_data, sizeof(char), ini_data_size, f); + ImFileClose(f); +} + +// Call registered handlers (e.g. SettingsHandlerWindow_WriteAll() + custom handlers) to write their stuff into a text buffer +const char* ImGui::SaveIniSettingsToMemory(size_t* out_size) +{ + ImGuiContext& g = *GImGui; + g.SettingsDirtyTimer = 0.0f; + g.SettingsIniData.Buf.resize(0); + g.SettingsIniData.Buf.push_back(0); + for (int handler_n = 0; handler_n < g.SettingsHandlers.Size; handler_n++) + { + ImGuiSettingsHandler* handler = &g.SettingsHandlers[handler_n]; + handler->WriteAllFn(&g, handler, &g.SettingsIniData); + } + if (out_size) + *out_size = (size_t)g.SettingsIniData.size(); + return g.SettingsIniData.c_str(); +} + +static void WindowSettingsHandler_ClearAll(ImGuiContext* ctx, ImGuiSettingsHandler*) +{ + ImGuiContext& g = *ctx; + for (int i = 0; i != g.Windows.Size; i++) + g.Windows[i]->SettingsOffset = -1; + g.SettingsWindows.clear(); +} + +static void* WindowSettingsHandler_ReadOpen(ImGuiContext*, ImGuiSettingsHandler*, const char* name) +{ + ImGuiWindowSettings* settings = ImGui::FindOrCreateWindowSettings(name); + ImGuiID id = settings->ID; + *settings = ImGuiWindowSettings(); // Clear existing if recycling previous entry + settings->ID = id; + settings->WantApply = true; + return (void*)settings; +} + +static void WindowSettingsHandler_ReadLine(ImGuiContext*, ImGuiSettingsHandler*, void* entry, const char* line) +{ + ImGuiWindowSettings* settings = (ImGuiWindowSettings*)entry; + int x, y; + int i; + if (sscanf(line, "Pos=%i,%i", &x, &y) == 2) { settings->Pos = ImVec2ih((short)x, (short)y); } + else if (sscanf(line, "Size=%i,%i", &x, &y) == 2) { settings->Size = ImVec2ih((short)x, (short)y); } + else if (sscanf(line, "Collapsed=%d", &i) == 1) { settings->Collapsed = (i != 0); } +} + +// Apply to existing windows (if any) +static void WindowSettingsHandler_ApplyAll(ImGuiContext* ctx, ImGuiSettingsHandler*) +{ + ImGuiContext& g = *ctx; + for (ImGuiWindowSettings* settings = g.SettingsWindows.begin(); settings != NULL; settings = g.SettingsWindows.next_chunk(settings)) + if (settings->WantApply) + { + if (ImGuiWindow* window = ImGui::FindWindowByID(settings->ID)) + ApplyWindowSettings(window, settings); + settings->WantApply = false; + } +} + +static void WindowSettingsHandler_WriteAll(ImGuiContext* ctx, ImGuiSettingsHandler* handler, ImGuiTextBuffer* buf) +{ + // Gather data from windows that were active during this session + // (if a window wasn't opened in this session we preserve its settings) + ImGuiContext& g = *ctx; + for (int i = 0; i != g.Windows.Size; i++) + { + ImGuiWindow* window = g.Windows[i]; + if (window->Flags & ImGuiWindowFlags_NoSavedSettings) + continue; + + ImGuiWindowSettings* settings = (window->SettingsOffset != -1) ? g.SettingsWindows.ptr_from_offset(window->SettingsOffset) : ImGui::FindWindowSettings(window->ID); + if (!settings) + { + settings = ImGui::CreateNewWindowSettings(window->Name); + window->SettingsOffset = g.SettingsWindows.offset_from_ptr(settings); + } + IM_ASSERT(settings->ID == window->ID); + settings->Pos = ImVec2ih((short)window->Pos.x, (short)window->Pos.y); + settings->Size = ImVec2ih((short)window->SizeFull.x, (short)window->SizeFull.y); + settings->Collapsed = window->Collapsed; + } + + // Write to text buffer + buf->reserve(buf->size() + g.SettingsWindows.size() * 6); // ballpark reserve + for (ImGuiWindowSettings* settings = g.SettingsWindows.begin(); settings != NULL; settings = g.SettingsWindows.next_chunk(settings)) + { + const char* settings_name = settings->GetName(); + buf->appendf("[%s][%s]\n", handler->TypeName, settings_name); + buf->appendf("Pos=%d,%d\n", settings->Pos.x, settings->Pos.y); + buf->appendf("Size=%d,%d\n", settings->Size.x, settings->Size.y); + buf->appendf("Collapsed=%d\n", settings->Collapsed); + buf->append("\n"); + } +} + + +//----------------------------------------------------------------------------- +// [SECTION] VIEWPORTS, PLATFORM WINDOWS +//----------------------------------------------------------------------------- + +// (this section is filled in the 'docking' branch) + + +//----------------------------------------------------------------------------- +// [SECTION] DOCKING +//----------------------------------------------------------------------------- + +// (this section is filled in the 'docking' branch) + + +//----------------------------------------------------------------------------- +// [SECTION] PLATFORM DEPENDENT HELPERS +//----------------------------------------------------------------------------- + +#if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS) + +#ifdef _MSC_VER +#pragma comment(lib, "user32") +#pragma comment(lib, "kernel32") +#endif + +// Win32 clipboard implementation +// We use g.ClipboardHandlerData for temporary storage to ensure it is freed on Shutdown() +static const char* GetClipboardTextFn_DefaultImpl(void*) +{ + ImGuiContext& g = *GImGui; + g.ClipboardHandlerData.clear(); + if (!::OpenClipboard(NULL)) + return NULL; + HANDLE wbuf_handle = ::GetClipboardData(CF_UNICODETEXT); + if (wbuf_handle == NULL) + { + ::CloseClipboard(); + return NULL; + } + if (const WCHAR* wbuf_global = (const WCHAR*)::GlobalLock(wbuf_handle)) + { + int buf_len = ::WideCharToMultiByte(CP_UTF8, 0, wbuf_global, -1, NULL, 0, NULL, NULL); + g.ClipboardHandlerData.resize(buf_len); + ::WideCharToMultiByte(CP_UTF8, 0, wbuf_global, -1, g.ClipboardHandlerData.Data, buf_len, NULL, NULL); + } + ::GlobalUnlock(wbuf_handle); + ::CloseClipboard(); + return g.ClipboardHandlerData.Data; +} + +static void SetClipboardTextFn_DefaultImpl(void*, const char* text) +{ + if (!::OpenClipboard(NULL)) + return; + const int wbuf_length = ::MultiByteToWideChar(CP_UTF8, 0, text, -1, NULL, 0); + HGLOBAL wbuf_handle = ::GlobalAlloc(GMEM_MOVEABLE, (SIZE_T)wbuf_length * sizeof(WCHAR)); + if (wbuf_handle == NULL) + { + ::CloseClipboard(); + return; + } + WCHAR* wbuf_global = (WCHAR*)::GlobalLock(wbuf_handle); + ::MultiByteToWideChar(CP_UTF8, 0, text, -1, wbuf_global, wbuf_length); + ::GlobalUnlock(wbuf_handle); + ::EmptyClipboard(); + if (::SetClipboardData(CF_UNICODETEXT, wbuf_handle) == NULL) + ::GlobalFree(wbuf_handle); + ::CloseClipboard(); +} + +#elif defined(__APPLE__) && TARGET_OS_OSX && defined(IMGUI_ENABLE_OSX_DEFAULT_CLIPBOARD_FUNCTIONS) + +#include // Use old API to avoid need for separate .mm file +static PasteboardRef main_clipboard = 0; + +// OSX clipboard implementation +// If you enable this you will need to add '-framework ApplicationServices' to your linker command-line! +static void SetClipboardTextFn_DefaultImpl(void*, const char* text) +{ + if (!main_clipboard) + PasteboardCreate(kPasteboardClipboard, &main_clipboard); + PasteboardClear(main_clipboard); + CFDataRef cf_data = CFDataCreate(kCFAllocatorDefault, (const UInt8*)text, strlen(text)); + if (cf_data) + { + PasteboardPutItemFlavor(main_clipboard, (PasteboardItemID)1, CFSTR("public.utf8-plain-text"), cf_data, 0); + CFRelease(cf_data); + } +} + +static const char* GetClipboardTextFn_DefaultImpl(void*) +{ + if (!main_clipboard) + PasteboardCreate(kPasteboardClipboard, &main_clipboard); + PasteboardSynchronize(main_clipboard); + + ItemCount item_count = 0; + PasteboardGetItemCount(main_clipboard, &item_count); + for (ItemCount i = 0; i < item_count; i++) + { + PasteboardItemID item_id = 0; + PasteboardGetItemIdentifier(main_clipboard, i + 1, &item_id); + CFArrayRef flavor_type_array = 0; + PasteboardCopyItemFlavors(main_clipboard, item_id, &flavor_type_array); + for (CFIndex j = 0, nj = CFArrayGetCount(flavor_type_array); j < nj; j++) + { + CFDataRef cf_data; + if (PasteboardCopyItemFlavorData(main_clipboard, item_id, CFSTR("public.utf8-plain-text"), &cf_data) == noErr) + { + ImGuiContext& g = *GImGui; + g.ClipboardHandlerData.clear(); + int length = (int)CFDataGetLength(cf_data); + g.ClipboardHandlerData.resize(length + 1); + CFDataGetBytes(cf_data, CFRangeMake(0, length), (UInt8*)g.ClipboardHandlerData.Data); + g.ClipboardHandlerData[length] = 0; + CFRelease(cf_data); + return g.ClipboardHandlerData.Data; + } + } + } + return NULL; +} + +#else + +// Local Dear ImGui-only clipboard implementation, if user hasn't defined better clipboard handlers. +static const char* GetClipboardTextFn_DefaultImpl(void*) +{ + ImGuiContext& g = *GImGui; + return g.ClipboardHandlerData.empty() ? NULL : g.ClipboardHandlerData.begin(); +} + +static void SetClipboardTextFn_DefaultImpl(void*, const char* text) +{ + ImGuiContext& g = *GImGui; + g.ClipboardHandlerData.clear(); + const char* text_end = text + strlen(text); + g.ClipboardHandlerData.resize((int)(text_end - text) + 1); + memcpy(&g.ClipboardHandlerData[0], text, (size_t)(text_end - text)); + g.ClipboardHandlerData[(int)(text_end - text)] = 0; +} + +#endif + +// Win32 API IME support (for Asian languages, etc.) +#if defined(_WIN32) && !defined(__GNUC__) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS) + +#include +#ifdef _MSC_VER +#pragma comment(lib, "imm32") +#endif + +static void ImeSetInputScreenPosFn_DefaultImpl(int x, int y) +{ + // Notify OS Input Method Editor of text input position + ImGuiIO& io = ImGui::GetIO(); + if (HWND hwnd = (HWND)io.ImeWindowHandle) + if (HIMC himc = ::ImmGetContext(hwnd)) + { + COMPOSITIONFORM cf; + cf.ptCurrentPos.x = x; + cf.ptCurrentPos.y = y; + cf.dwStyle = CFS_FORCE_POSITION; + ::ImmSetCompositionWindow(himc, &cf); + ::ImmReleaseContext(hwnd, himc); + } +} + +#else + +static void ImeSetInputScreenPosFn_DefaultImpl(int, int) {} + +#endif + +//----------------------------------------------------------------------------- +// [SECTION] METRICS/DEBUGGER WINDOW +//----------------------------------------------------------------------------- +// - MetricsHelpMarker() [Internal] +// - ShowMetricsWindow() +// - DebugNodeColumns() [Internal] +// - DebugNodeDrawList() [Internal] +// - DebugNodeDrawCmdShowMeshAndBoundingBox() [Internal] +// - DebugNodeStorage() [Internal] +// - DebugNodeTabBar() [Internal] +// - DebugNodeWindow() [Internal] +// - DebugNodeWindowSettings() [Internal] +// - DebugNodeWindowsList() [Internal] +//----------------------------------------------------------------------------- + +#ifndef IMGUI_DISABLE_METRICS_WINDOW + +// Avoid naming collision with imgui_demo.cpp's HelpMarker() for unity builds. +static void MetricsHelpMarker(const char* desc) +{ + ImGui::TextDisabled("(?)"); + if (ImGui::IsItemHovered()) + { + ImGui::BeginTooltip(); + ImGui::PushTextWrapPos(ImGui::GetFontSize() * 35.0f); + ImGui::TextUnformatted(desc); + ImGui::PopTextWrapPos(); + ImGui::EndTooltip(); + } +} + +void ImGui::ShowMetricsWindow(bool* p_open) +{ + if (!Begin("Dear ImGui Metrics/Debugger", p_open)) + { + End(); + return; + } + + ImGuiContext& g = *GImGui; + ImGuiIO& io = g.IO; + ImGuiMetricsConfig* cfg = &g.DebugMetricsConfig; + + // Basic info + Text("Dear ImGui %s", ImGui::GetVersion()); + Text("Application average %.3f ms/frame (%.1f FPS)", 1000.0f / io.Framerate, io.Framerate); + Text("%d vertices, %d indices (%d triangles)", io.MetricsRenderVertices, io.MetricsRenderIndices, io.MetricsRenderIndices / 3); + Text("%d active windows (%d visible)", io.MetricsActiveWindows, io.MetricsRenderWindows); + Text("%d active allocations", io.MetricsActiveAllocations); + //SameLine(); if (SmallButton("GC")) { g.GcCompactAll = true; } + + Separator(); + + // Debugging enums + enum { WRT_OuterRect, WRT_OuterRectClipped, WRT_InnerRect, WRT_InnerClipRect, WRT_WorkRect, WRT_Content, WRT_ContentIdeal, WRT_ContentRegionRect, WRT_Count }; // Windows Rect Type + const char* wrt_rects_names[WRT_Count] = { "OuterRect", "OuterRectClipped", "InnerRect", "InnerClipRect", "WorkRect", "Content", "ContentIdeal", "ContentRegionRect" }; + enum { TRT_OuterRect, TRT_InnerRect, TRT_WorkRect, TRT_HostClipRect, TRT_InnerClipRect, TRT_BackgroundClipRect, TRT_ColumnsRect, TRT_ColumnsWorkRect, TRT_ColumnsClipRect, TRT_ColumnsContentHeadersUsed, TRT_ColumnsContentHeadersIdeal, TRT_ColumnsContentFrozen, TRT_ColumnsContentUnfrozen, TRT_Count }; // Tables Rect Type + const char* trt_rects_names[TRT_Count] = { "OuterRect", "InnerRect", "WorkRect", "HostClipRect", "InnerClipRect", "BackgroundClipRect", "ColumnsRect", "ColumnsWorkRect", "ColumnsClipRect", "ColumnsContentHeadersUsed", "ColumnsContentHeadersIdeal", "ColumnsContentFrozen", "ColumnsContentUnfrozen" }; + if (cfg->ShowWindowsRectsType < 0) + cfg->ShowWindowsRectsType = WRT_WorkRect; + if (cfg->ShowTablesRectsType < 0) + cfg->ShowTablesRectsType = TRT_WorkRect; + + struct Funcs + { + static ImRect GetTableRect(ImGuiTable* table, int rect_type, int n) + { + if (rect_type == TRT_OuterRect) { return table->OuterRect; } + else if (rect_type == TRT_InnerRect) { return table->InnerRect; } + else if (rect_type == TRT_WorkRect) { return table->WorkRect; } + else if (rect_type == TRT_HostClipRect) { return table->HostClipRect; } + else if (rect_type == TRT_InnerClipRect) { return table->InnerClipRect; } + else if (rect_type == TRT_BackgroundClipRect) { return table->BgClipRect; } + else if (rect_type == TRT_ColumnsRect) { ImGuiTableColumn* c = &table->Columns[n]; return ImRect(c->MinX, table->InnerClipRect.Min.y, c->MaxX, table->InnerClipRect.Min.y + table->LastOuterHeight); } + else if (rect_type == TRT_ColumnsWorkRect) { ImGuiTableColumn* c = &table->Columns[n]; return ImRect(c->WorkMinX, table->WorkRect.Min.y, c->WorkMaxX, table->WorkRect.Max.y); } + else if (rect_type == TRT_ColumnsClipRect) { ImGuiTableColumn* c = &table->Columns[n]; return c->ClipRect; } + else if (rect_type == TRT_ColumnsContentHeadersUsed){ ImGuiTableColumn* c = &table->Columns[n]; return ImRect(c->WorkMinX, table->InnerClipRect.Min.y, c->ContentMaxXHeadersUsed, table->InnerClipRect.Min.y + table->LastFirstRowHeight); } // Note: y1/y2 not always accurate + else if (rect_type == TRT_ColumnsContentHeadersIdeal){ImGuiTableColumn* c = &table->Columns[n]; return ImRect(c->WorkMinX, table->InnerClipRect.Min.y, c->ContentMaxXHeadersIdeal, table->InnerClipRect.Min.y + table->LastFirstRowHeight); } + else if (rect_type == TRT_ColumnsContentFrozen) { ImGuiTableColumn* c = &table->Columns[n]; return ImRect(c->WorkMinX, table->InnerClipRect.Min.y, c->ContentMaxXFrozen, table->InnerClipRect.Min.y + table->LastFirstRowHeight); } + else if (rect_type == TRT_ColumnsContentUnfrozen) { ImGuiTableColumn* c = &table->Columns[n]; return ImRect(c->WorkMinX, table->InnerClipRect.Min.y + table->LastFirstRowHeight, c->ContentMaxXUnfrozen, table->InnerClipRect.Max.y); } + IM_ASSERT(0); + return ImRect(); + } + + static ImRect GetWindowRect(ImGuiWindow* window, int rect_type) + { + if (rect_type == WRT_OuterRect) { return window->Rect(); } + else if (rect_type == WRT_OuterRectClipped) { return window->OuterRectClipped; } + else if (rect_type == WRT_InnerRect) { return window->InnerRect; } + else if (rect_type == WRT_InnerClipRect) { return window->InnerClipRect; } + else if (rect_type == WRT_WorkRect) { return window->WorkRect; } + else if (rect_type == WRT_Content) { ImVec2 min = window->InnerRect.Min - window->Scroll + window->WindowPadding; return ImRect(min, min + window->ContentSize); } + else if (rect_type == WRT_ContentIdeal) { ImVec2 min = window->InnerRect.Min - window->Scroll + window->WindowPadding; return ImRect(min, min + window->ContentSizeIdeal); } + else if (rect_type == WRT_ContentRegionRect) { return window->ContentRegionRect; } + IM_ASSERT(0); + return ImRect(); + } + }; + + // Tools + if (TreeNode("Tools")) + { + // The Item Picker tool is super useful to visually select an item and break into the call-stack of where it was submitted. + if (Button("Item Picker..")) + DebugStartItemPicker(); + SameLine(); + MetricsHelpMarker("Will call the IM_DEBUG_BREAK() macro to break in debugger.\nWarning: If you don't have a debugger attached, this will probably crash."); + + Checkbox("Show windows begin order", &cfg->ShowWindowsBeginOrder); + Checkbox("Show windows rectangles", &cfg->ShowWindowsRects); + SameLine(); + SetNextItemWidth(GetFontSize() * 12); + cfg->ShowWindowsRects |= Combo("##show_windows_rect_type", &cfg->ShowWindowsRectsType, wrt_rects_names, WRT_Count, WRT_Count); + if (cfg->ShowWindowsRects && g.NavWindow != NULL) + { + BulletText("'%s':", g.NavWindow->Name); + Indent(); + for (int rect_n = 0; rect_n < WRT_Count; rect_n++) + { + ImRect r = Funcs::GetWindowRect(g.NavWindow, rect_n); + Text("(%6.1f,%6.1f) (%6.1f,%6.1f) Size (%6.1f,%6.1f) %s", r.Min.x, r.Min.y, r.Max.x, r.Max.y, r.GetWidth(), r.GetHeight(), wrt_rects_names[rect_n]); + } + Unindent(); + } + Checkbox("Show ImDrawCmd mesh when hovering", &cfg->ShowDrawCmdMesh); + Checkbox("Show ImDrawCmd bounding boxes when hovering", &cfg->ShowDrawCmdBoundingBoxes); + + Checkbox("Show tables rectangles", &cfg->ShowTablesRects); + SameLine(); + SetNextItemWidth(GetFontSize() * 12); + cfg->ShowTablesRects |= Combo("##show_table_rects_type", &cfg->ShowTablesRectsType, trt_rects_names, TRT_Count, TRT_Count); + if (cfg->ShowTablesRects && g.NavWindow != NULL) + { + for (int table_n = 0; table_n < g.Tables.GetSize(); table_n++) + { + ImGuiTable* table = g.Tables.GetByIndex(table_n); + if (table->LastFrameActive < g.FrameCount - 1 || (table->OuterWindow != g.NavWindow && table->InnerWindow != g.NavWindow)) + continue; + + BulletText("Table 0x%08X (%d columns, in '%s')", table->ID, table->ColumnsCount, table->OuterWindow->Name); + if (IsItemHovered()) + GetForegroundDrawList()->AddRect(table->OuterRect.Min - ImVec2(1, 1), table->OuterRect.Max + ImVec2(1, 1), IM_COL32(255, 255, 0, 255), 0.0f, ~0, 2.0f); + Indent(); + char buf[128]; + for (int rect_n = 0; rect_n < TRT_Count; rect_n++) + { + if (rect_n >= TRT_ColumnsRect) + { + if (rect_n != TRT_ColumnsRect && rect_n != TRT_ColumnsClipRect) + continue; + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + ImRect r = Funcs::GetTableRect(table, rect_n, column_n); + ImFormatString(buf, IM_ARRAYSIZE(buf), "(%6.1f,%6.1f) (%6.1f,%6.1f) Size (%6.1f,%6.1f) Col %d %s", r.Min.x, r.Min.y, r.Max.x, r.Max.y, r.GetWidth(), r.GetHeight(), column_n, trt_rects_names[rect_n]); + Selectable(buf); + if (IsItemHovered()) + GetForegroundDrawList()->AddRect(r.Min - ImVec2(1, 1), r.Max + ImVec2(1, 1), IM_COL32(255, 255, 0, 255), 0.0f, ~0, 2.0f); + } + } + else + { + ImRect r = Funcs::GetTableRect(table, rect_n, -1); + ImFormatString(buf, IM_ARRAYSIZE(buf), "(%6.1f,%6.1f) (%6.1f,%6.1f) Size (%6.1f,%6.1f) %s", r.Min.x, r.Min.y, r.Max.x, r.Max.y, r.GetWidth(), r.GetHeight(), trt_rects_names[rect_n]); + Selectable(buf); + if (IsItemHovered()) + GetForegroundDrawList()->AddRect(r.Min - ImVec2(1, 1), r.Max + ImVec2(1, 1), IM_COL32(255, 255, 0, 255), 0.0f, ~0, 2.0f); + } + } + Unindent(); + } + } + + TreePop(); + } + + // Contents + DebugNodeWindowsList(&g.Windows, "Windows"); + //DebugNodeWindowList(&g.WindowsFocusOrder, "WindowsFocusOrder"); + if (TreeNode("DrawLists", "Active DrawLists (%d)", g.DrawDataBuilder.Layers[0].Size)) + { + for (int i = 0; i < g.DrawDataBuilder.Layers[0].Size; i++) + DebugNodeDrawList(NULL, g.DrawDataBuilder.Layers[0][i], "DrawList"); + TreePop(); + } + + // Details for Popups + if (TreeNode("Popups", "Popups (%d)", g.OpenPopupStack.Size)) + { + for (int i = 0; i < g.OpenPopupStack.Size; i++) + { + ImGuiWindow* window = g.OpenPopupStack[i].Window; + BulletText("PopupID: %08x, Window: '%s'%s%s", g.OpenPopupStack[i].PopupId, window ? window->Name : "NULL", window && (window->Flags & ImGuiWindowFlags_ChildWindow) ? " ChildWindow" : "", window && (window->Flags & ImGuiWindowFlags_ChildMenu) ? " ChildMenu" : ""); + } + TreePop(); + } + + // Details for TabBars + if (TreeNode("TabBars", "Tab Bars (%d)", g.TabBars.GetSize())) + { + for (int n = 0; n < g.TabBars.GetSize(); n++) + DebugNodeTabBar(g.TabBars.GetByIndex(n), "TabBar"); + TreePop(); + } + + // Details for Tables +#ifdef IMGUI_HAS_TABLE + if (TreeNode("Tables", "Tables (%d)", g.Tables.GetSize())) + { + for (int n = 0; n < g.Tables.GetSize(); n++) + DebugNodeTable(g.Tables.GetByIndex(n)); + TreePop(); + } +#endif // #ifdef IMGUI_HAS_TABLE + + // Details for Docking +#ifdef IMGUI_HAS_DOCK + if (TreeNode("Docking")) + { + TreePop(); + } +#endif // #ifdef IMGUI_HAS_DOCK + + // Settings + if (TreeNode("Settings")) + { + if (SmallButton("Clear")) + ClearIniSettings(); + SameLine(); + if (SmallButton("Save to memory")) + SaveIniSettingsToMemory(); + SameLine(); + if (SmallButton("Save to disk")) + SaveIniSettingsToDisk(g.IO.IniFilename); + SameLine(); + if (g.IO.IniFilename) + Text("\"%s\"", g.IO.IniFilename); + else + TextUnformatted(""); + Text("SettingsDirtyTimer %.2f", g.SettingsDirtyTimer); + if (TreeNode("SettingsHandlers", "Settings handlers: (%d)", g.SettingsHandlers.Size)) + { + for (int n = 0; n < g.SettingsHandlers.Size; n++) + BulletText("%s", g.SettingsHandlers[n].TypeName); + TreePop(); + } + if (TreeNode("SettingsWindows", "Settings packed data: Windows: %d bytes", g.SettingsWindows.size())) + { + for (ImGuiWindowSettings* settings = g.SettingsWindows.begin(); settings != NULL; settings = g.SettingsWindows.next_chunk(settings)) + DebugNodeWindowSettings(settings); + TreePop(); + } + +#ifdef IMGUI_HAS_TABLE + if (TreeNode("SettingsTables", "Settings packed data: Tables: %d bytes", g.SettingsTables.size())) + { + for (ImGuiTableSettings* settings = g.SettingsTables.begin(); settings != NULL; settings = g.SettingsTables.next_chunk(settings)) + DebugNodeTableSettings(settings); + TreePop(); + } +#endif // #ifdef IMGUI_HAS_TABLE + +#ifdef IMGUI_HAS_DOCK +#endif // #ifdef IMGUI_HAS_DOCK + + if (TreeNode("SettingsIniData", "Settings unpacked data (.ini): %d bytes", g.SettingsIniData.size())) + { + InputTextMultiline("##Ini", (char*)(void*)g.SettingsIniData.c_str(), g.SettingsIniData.Buf.Size, ImVec2(-FLT_MIN, GetTextLineHeight() * 20), ImGuiInputTextFlags_ReadOnly); + TreePop(); + } + TreePop(); + } + + // Misc Details + if (TreeNode("Internal state")) + { + const char* input_source_names[] = { "None", "Mouse", "Nav", "NavKeyboard", "NavGamepad" }; IM_ASSERT(IM_ARRAYSIZE(input_source_names) == ImGuiInputSource_COUNT); + + Text("WINDOWING"); + Indent(); + Text("HoveredWindow: '%s'", g.HoveredWindow ? g.HoveredWindow->Name : "NULL"); + Text("HoveredRootWindow: '%s'", g.HoveredRootWindow ? g.HoveredRootWindow->Name : "NULL"); + Text("HoveredWindowUnderMovingWindow: '%s'", g.HoveredWindowUnderMovingWindow ? g.HoveredWindowUnderMovingWindow->Name : "NULL"); + Text("MovingWindow: '%s'", g.MovingWindow ? g.MovingWindow->Name : "NULL"); + Unindent(); + + Text("ITEMS"); + Indent(); + Text("ActiveId: 0x%08X/0x%08X (%.2f sec), AllowOverlap: %d, Source: %s", g.ActiveId, g.ActiveIdPreviousFrame, g.ActiveIdTimer, g.ActiveIdAllowOverlap, input_source_names[g.ActiveIdSource]); + Text("ActiveIdWindow: '%s'", g.ActiveIdWindow ? g.ActiveIdWindow->Name : "NULL"); + Text("HoveredId: 0x%08X/0x%08X (%.2f sec), AllowOverlap: %d", g.HoveredId, g.HoveredIdPreviousFrame, g.HoveredIdTimer, g.HoveredIdAllowOverlap); // Data is "in-flight" so depending on when the Metrics window is called we may see current frame information or not + Text("DragDrop: %d, SourceId = 0x%08X, Payload \"%s\" (%d bytes)", g.DragDropActive, g.DragDropPayload.SourceId, g.DragDropPayload.DataType, g.DragDropPayload.DataSize); + Unindent(); + + Text("NAV,FOCUS"); + Indent(); + Text("NavWindow: '%s'", g.NavWindow ? g.NavWindow->Name : "NULL"); + Text("NavId: 0x%08X, NavLayer: %d", g.NavId, g.NavLayer); + Text("NavInputSource: %s", input_source_names[g.NavInputSource]); + Text("NavActive: %d, NavVisible: %d", g.IO.NavActive, g.IO.NavVisible); + Text("NavActivateId: 0x%08X, NavInputId: 0x%08X", g.NavActivateId, g.NavInputId); + Text("NavDisableHighlight: %d, NavDisableMouseHover: %d", g.NavDisableHighlight, g.NavDisableMouseHover); + Text("NavFocusScopeId = 0x%08X", g.NavFocusScopeId); + Text("NavWindowingTarget: '%s'", g.NavWindowingTarget ? g.NavWindowingTarget->Name : "NULL"); + Unindent(); + + TreePop(); + } + + // Overlay: Display windows Rectangles and Begin Order + if (cfg->ShowWindowsRects || cfg->ShowWindowsBeginOrder) + { + for (int n = 0; n < g.Windows.Size; n++) + { + ImGuiWindow* window = g.Windows[n]; + if (!window->WasActive) + continue; + ImDrawList* draw_list = GetForegroundDrawList(window); + if (cfg->ShowWindowsRects) + { + ImRect r = Funcs::GetWindowRect(window, cfg->ShowWindowsRectsType); + draw_list->AddRect(r.Min, r.Max, IM_COL32(255, 0, 128, 255)); + } + if (cfg->ShowWindowsBeginOrder && !(window->Flags & ImGuiWindowFlags_ChildWindow)) + { + char buf[32]; + ImFormatString(buf, IM_ARRAYSIZE(buf), "%d", window->BeginOrderWithinContext); + float font_size = GetFontSize(); + draw_list->AddRectFilled(window->Pos, window->Pos + ImVec2(font_size, font_size), IM_COL32(200, 100, 100, 255)); + draw_list->AddText(window->Pos, IM_COL32(255, 255, 255, 255), buf); + } + } + } + +#ifdef IMGUI_HAS_TABLE + // Overlay: Display Tables Rectangles + if (cfg->ShowTablesRects) + { + for (int table_n = 0; table_n < g.Tables.GetSize(); table_n++) + { + ImGuiTable* table = g.Tables.GetByIndex(table_n); + if (table->LastFrameActive < g.FrameCount - 1) + continue; + ImDrawList* draw_list = GetForegroundDrawList(table->OuterWindow); + if (cfg->ShowTablesRectsType >= TRT_ColumnsRect) + { + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + ImRect r = Funcs::GetTableRect(table, cfg->ShowTablesRectsType, column_n); + ImU32 col = (table->HoveredColumnBody == column_n) ? IM_COL32(255, 255, 128, 255) : IM_COL32(255, 0, 128, 255); + float thickness = (table->HoveredColumnBody == column_n) ? 3.0f : 1.0f; + draw_list->AddRect(r.Min, r.Max, col, 0.0f, ~0, thickness); + } + } + else + { + ImRect r = Funcs::GetTableRect(table, cfg->ShowTablesRectsType, -1); + draw_list->AddRect(r.Min, r.Max, IM_COL32(255, 0, 128, 255)); + } + } + } +#endif // #ifdef IMGUI_HAS_TABLE + +#ifdef IMGUI_HAS_DOCK + // Overlay: Display Docking info + if (show_docking_nodes && g.IO.KeyCtrl) + { + } +#endif // #ifdef IMGUI_HAS_DOCK + + End(); +} + +// [DEBUG] Display contents of Columns +void ImGui::DebugNodeColumns(ImGuiOldColumns* columns) +{ + if (!TreeNode((void*)(uintptr_t)columns->ID, "Columns Id: 0x%08X, Count: %d, Flags: 0x%04X", columns->ID, columns->Count, columns->Flags)) + return; + BulletText("Width: %.1f (MinX: %.1f, MaxX: %.1f)", columns->OffMaxX - columns->OffMinX, columns->OffMinX, columns->OffMaxX); + for (int column_n = 0; column_n < columns->Columns.Size; column_n++) + BulletText("Column %02d: OffsetNorm %.3f (= %.1f px)", column_n, columns->Columns[column_n].OffsetNorm, GetColumnOffsetFromNorm(columns, columns->Columns[column_n].OffsetNorm)); + TreePop(); +} + +// [DEBUG] Display contents of ImDrawList +void ImGui::DebugNodeDrawList(ImGuiWindow* window, const ImDrawList* draw_list, const char* label) +{ + ImGuiContext& g = *GImGui; + ImGuiMetricsConfig* cfg = &g.DebugMetricsConfig; + int cmd_count = draw_list->CmdBuffer.Size; + if (cmd_count > 0 && draw_list->CmdBuffer.back().ElemCount == 0 && draw_list->CmdBuffer.back().UserCallback == NULL) + cmd_count--; + bool node_open = TreeNode(draw_list, "%s: '%s' %d vtx, %d indices, %d cmds", label, draw_list->_OwnerName ? draw_list->_OwnerName : "", draw_list->VtxBuffer.Size, draw_list->IdxBuffer.Size, cmd_count); + if (draw_list == GetWindowDrawList()) + { + SameLine(); + TextColored(ImVec4(1.0f, 0.4f, 0.4f, 1.0f), "CURRENTLY APPENDING"); // Can't display stats for active draw list! (we don't have the data double-buffered) + if (node_open) + TreePop(); + return; + } + + ImDrawList* fg_draw_list = GetForegroundDrawList(window); // Render additional visuals into the top-most draw list + if (window && IsItemHovered()) + fg_draw_list->AddRect(window->Pos, window->Pos + window->Size, IM_COL32(255, 255, 0, 255)); + if (!node_open) + return; + + if (window && !window->WasActive) + TextDisabled("Warning: owning Window is inactive. This DrawList is not being rendered!"); + + for (const ImDrawCmd* pcmd = draw_list->CmdBuffer.Data; pcmd < draw_list->CmdBuffer.Data + cmd_count; pcmd++) + { + if (pcmd->UserCallback) + { + BulletText("Callback %p, user_data %p", pcmd->UserCallback, pcmd->UserCallbackData); + continue; + } + + char buf[300]; + ImFormatString(buf, IM_ARRAYSIZE(buf), "DrawCmd:%5d tris, Tex 0x%p, ClipRect (%4.0f,%4.0f)-(%4.0f,%4.0f)", + pcmd->ElemCount / 3, (void*)(intptr_t)pcmd->TextureId, + pcmd->ClipRect.x, pcmd->ClipRect.y, pcmd->ClipRect.z, pcmd->ClipRect.w); + bool pcmd_node_open = TreeNode((void*)(pcmd - draw_list->CmdBuffer.begin()), "%s", buf); + if (IsItemHovered() && (cfg->ShowDrawCmdMesh || cfg->ShowDrawCmdBoundingBoxes) && fg_draw_list) + DebugNodeDrawCmdShowMeshAndBoundingBox(window, draw_list, pcmd, cfg->ShowDrawCmdMesh, cfg->ShowDrawCmdBoundingBoxes); + if (!pcmd_node_open) + continue; + + // Calculate approximate coverage area (touched pixel count) + // This will be in pixels squared as long there's no post-scaling happening to the renderer output. + const ImDrawIdx* idx_buffer = (draw_list->IdxBuffer.Size > 0) ? draw_list->IdxBuffer.Data : NULL; + const ImDrawVert* vtx_buffer = draw_list->VtxBuffer.Data + pcmd->VtxOffset; + float total_area = 0.0f; + for (unsigned int idx_n = pcmd->IdxOffset; idx_n < pcmd->IdxOffset + pcmd->ElemCount; ) + { + ImVec2 triangle[3]; + for (int n = 0; n < 3; n++, idx_n++) + triangle[n] = vtx_buffer[idx_buffer ? idx_buffer[idx_n] : idx_n].pos; + total_area += ImTriangleArea(triangle[0], triangle[1], triangle[2]); + } + + // Display vertex information summary. Hover to get all triangles drawn in wire-frame + ImFormatString(buf, IM_ARRAYSIZE(buf), "Mesh: ElemCount: %d, VtxOffset: +%d, IdxOffset: +%d, Area: ~%0.f px", pcmd->ElemCount, pcmd->VtxOffset, pcmd->IdxOffset, total_area); + Selectable(buf); + if (IsItemHovered() && fg_draw_list) + DebugNodeDrawCmdShowMeshAndBoundingBox(window, draw_list, pcmd, true, false); + + // Display individual triangles/vertices. Hover on to get the corresponding triangle highlighted. + ImGuiListClipper clipper; + clipper.Begin(pcmd->ElemCount / 3); // Manually coarse clip our print out of individual vertices to save CPU, only items that may be visible. + while (clipper.Step()) + for (int prim = clipper.DisplayStart, idx_i = pcmd->IdxOffset + clipper.DisplayStart * 3; prim < clipper.DisplayEnd; prim++) + { + char* buf_p = buf, * buf_end = buf + IM_ARRAYSIZE(buf); + ImVec2 triangle[3]; + for (int n = 0; n < 3; n++, idx_i++) + { + const ImDrawVert& v = vtx_buffer[idx_buffer ? idx_buffer[idx_i] : idx_i]; + triangle[n] = v.pos; + buf_p += ImFormatString(buf_p, buf_end - buf_p, "%s %04d: pos (%8.2f,%8.2f), uv (%.6f,%.6f), col %08X\n", + (n == 0) ? "Vert:" : " ", idx_i, v.pos.x, v.pos.y, v.uv.x, v.uv.y, v.col); + } + + Selectable(buf, false); + if (fg_draw_list && IsItemHovered()) + { + ImDrawListFlags backup_flags = fg_draw_list->Flags; + fg_draw_list->Flags &= ~ImDrawListFlags_AntiAliasedLines; // Disable AA on triangle outlines is more readable for very large and thin triangles. + fg_draw_list->AddPolyline(triangle, 3, IM_COL32(255, 255, 0, 255), true, 1.0f); + fg_draw_list->Flags = backup_flags; + } + } + TreePop(); + } + TreePop(); +} + +// [DEBUG] Display mesh/aabb of a ImDrawCmd +void ImGui::DebugNodeDrawCmdShowMeshAndBoundingBox(ImGuiWindow* window, const ImDrawList* draw_list, const ImDrawCmd* draw_cmd, bool show_mesh, bool show_aabb) +{ + IM_ASSERT(show_mesh || show_aabb); + ImDrawList* fg_draw_list = GetForegroundDrawList(window); // Render additional visuals into the top-most draw list + ImDrawIdx* idx_buffer = (draw_list->IdxBuffer.Size > 0) ? draw_list->IdxBuffer.Data : NULL; + ImDrawVert* vtx_buffer = draw_list->VtxBuffer.Data + draw_cmd->VtxOffset; + + // Draw wire-frame version of all triangles + ImRect clip_rect = draw_cmd->ClipRect; + ImRect vtxs_rect(FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX); + ImDrawListFlags backup_flags = fg_draw_list->Flags; + fg_draw_list->Flags &= ~ImDrawListFlags_AntiAliasedLines; // Disable AA on triangle outlines is more readable for very large and thin triangles. + for (unsigned int idx_n = draw_cmd->IdxOffset; idx_n < draw_cmd->IdxOffset + draw_cmd->ElemCount; ) + { + ImVec2 triangle[3]; + for (int n = 0; n < 3; n++, idx_n++) + vtxs_rect.Add((triangle[n] = vtx_buffer[idx_buffer ? idx_buffer[idx_n] : idx_n].pos)); + if (show_mesh) + fg_draw_list->AddPolyline(triangle, 3, IM_COL32(255, 255, 0, 255), true, 1.0f); // In yellow: mesh triangles + } + // Draw bounding boxes + if (show_aabb) + { + fg_draw_list->AddRect(ImFloor(clip_rect.Min), ImFloor(clip_rect.Max), IM_COL32(255, 0, 255, 255)); // In pink: clipping rectangle submitted to GPU + fg_draw_list->AddRect(ImFloor(vtxs_rect.Min), ImFloor(vtxs_rect.Max), IM_COL32(0, 255, 255, 255)); // In cyan: bounding box of triangles + } + fg_draw_list->Flags = backup_flags; +} + +// [DEBUG] Display contents of ImGuiStorage +void ImGui::DebugNodeStorage(ImGuiStorage* storage, const char* label) +{ + if (!TreeNode(label, "%s: %d entries, %d bytes", label, storage->Data.Size, storage->Data.size_in_bytes())) + return; + for (int n = 0; n < storage->Data.Size; n++) + { + const ImGuiStorage::ImGuiStoragePair& p = storage->Data[n]; + BulletText("Key 0x%08X Value { i: %d }", p.key, p.val_i); // Important: we currently don't store a type, real value may not be integer. + } + TreePop(); +} + +// [DEBUG] Display contents of ImGuiTabBar +void ImGui::DebugNodeTabBar(ImGuiTabBar* tab_bar, const char* label) +{ + // Standalone tab bars (not associated to docking/windows functionality) currently hold no discernible strings. + char buf[256]; + char* p = buf; + const char* buf_end = buf + IM_ARRAYSIZE(buf); + const bool is_active = (tab_bar->PrevFrameVisible >= GetFrameCount() - 2); + p += ImFormatString(p, buf_end - p, "%s 0x%08X (%d tabs)%s", label, tab_bar->ID, tab_bar->Tabs.Size, is_active ? "" : " *Inactive*"); + IM_UNUSED(p); + if (!is_active) { PushStyleColor(ImGuiCol_Text, GetStyleColorVec4(ImGuiCol_TextDisabled)); } + bool open = TreeNode(tab_bar, "%s", buf); + if (!is_active) { PopStyleColor(); } + if (is_active && IsItemHovered()) + { + ImDrawList* draw_list = GetForegroundDrawList(); + draw_list->AddRect(tab_bar->BarRect.Min, tab_bar->BarRect.Max, IM_COL32(255, 255, 0, 255)); + draw_list->AddLine(ImVec2(tab_bar->ScrollingRectMinX, tab_bar->BarRect.Min.y), ImVec2(tab_bar->ScrollingRectMinX, tab_bar->BarRect.Max.y), IM_COL32(0, 255, 0, 255)); + draw_list->AddLine(ImVec2(tab_bar->ScrollingRectMaxX, tab_bar->BarRect.Min.y), ImVec2(tab_bar->ScrollingRectMaxX, tab_bar->BarRect.Max.y), IM_COL32(0, 255, 0, 255)); + } + if (open) + { + for (int tab_n = 0; tab_n < tab_bar->Tabs.Size; tab_n++) + { + const ImGuiTabItem* tab = &tab_bar->Tabs[tab_n]; + PushID(tab); + if (SmallButton("<")) { TabBarQueueReorder(tab_bar, tab, -1); } SameLine(0, 2); + if (SmallButton(">")) { TabBarQueueReorder(tab_bar, tab, +1); } SameLine(); + Text("%02d%c Tab 0x%08X '%s' Offset: %.1f, Width: %.1f/%.1f", + tab_n, (tab->ID == tab_bar->SelectedTabId) ? '*' : ' ', tab->ID, (tab->NameOffset != -1) ? tab_bar->GetTabName(tab) : "", tab->Offset, tab->Width, tab->ContentWidth); + PopID(); + } + TreePop(); + } +} + +void ImGui::DebugNodeWindow(ImGuiWindow* window, const char* label) +{ + if (window == NULL) + { + BulletText("%s: NULL", label); + return; + } + + ImGuiContext& g = *GImGui; + const bool is_active = window->WasActive; + ImGuiTreeNodeFlags tree_node_flags = (window == g.NavWindow) ? ImGuiTreeNodeFlags_Selected : ImGuiTreeNodeFlags_None; + if (!is_active) { PushStyleColor(ImGuiCol_Text, GetStyleColorVec4(ImGuiCol_TextDisabled)); } + const bool open = TreeNodeEx(label, tree_node_flags, "%s '%s'%s", label, window->Name, is_active ? "" : " *Inactive*"); + if (!is_active) { PopStyleColor(); } + if (IsItemHovered() && is_active) + GetForegroundDrawList(window)->AddRect(window->Pos, window->Pos + window->Size, IM_COL32(255, 255, 0, 255)); + if (!open) + return; + + if (window->MemoryCompacted) + TextDisabled("Note: some memory buffers have been compacted/freed."); + + ImGuiWindowFlags flags = window->Flags; + DebugNodeDrawList(window, window->DrawList, "DrawList"); + BulletText("Pos: (%.1f,%.1f), Size: (%.1f,%.1f), ContentSize (%.1f,%.1f) Ideal (%.1f,%.1f)", window->Pos.x, window->Pos.y, window->Size.x, window->Size.y, window->ContentSize.x, window->ContentSize.y, window->ContentSizeIdeal.x, window->ContentSizeIdeal.y); + BulletText("Flags: 0x%08X (%s%s%s%s%s%s%s%s%s..)", flags, + (flags & ImGuiWindowFlags_ChildWindow) ? "Child " : "", (flags & ImGuiWindowFlags_Tooltip) ? "Tooltip " : "", (flags & ImGuiWindowFlags_Popup) ? "Popup " : "", + (flags & ImGuiWindowFlags_Modal) ? "Modal " : "", (flags & ImGuiWindowFlags_ChildMenu) ? "ChildMenu " : "", (flags & ImGuiWindowFlags_NoSavedSettings) ? "NoSavedSettings " : "", + (flags & ImGuiWindowFlags_NoMouseInputs)? "NoMouseInputs":"", (flags & ImGuiWindowFlags_NoNavInputs) ? "NoNavInputs" : "", (flags & ImGuiWindowFlags_AlwaysAutoResize) ? "AlwaysAutoResize" : ""); + BulletText("Scroll: (%.2f/%.2f,%.2f/%.2f) Scrollbar:%s%s", window->Scroll.x, window->ScrollMax.x, window->Scroll.y, window->ScrollMax.y, window->ScrollbarX ? "X" : "", window->ScrollbarY ? "Y" : ""); + BulletText("Active: %d/%d, WriteAccessed: %d, BeginOrderWithinContext: %d", window->Active, window->WasActive, window->WriteAccessed, (window->Active || window->WasActive) ? window->BeginOrderWithinContext : -1); + BulletText("Appearing: %d, Hidden: %d (CanSkip %d Cannot %d), SkipItems: %d", window->Appearing, window->Hidden, window->HiddenFramesCanSkipItems, window->HiddenFramesCannotSkipItems, window->SkipItems); + BulletText("NavLastIds: 0x%08X,0x%08X, NavLayerActiveMask: %X", window->NavLastIds[0], window->NavLastIds[1], window->DC.NavLayerActiveMask); + BulletText("NavLastChildNavWindow: %s", window->NavLastChildNavWindow ? window->NavLastChildNavWindow->Name : "NULL"); + if (!window->NavRectRel[0].IsInverted()) + BulletText("NavRectRel[0]: (%.1f,%.1f)(%.1f,%.1f)", window->NavRectRel[0].Min.x, window->NavRectRel[0].Min.y, window->NavRectRel[0].Max.x, window->NavRectRel[0].Max.y); + else + BulletText("NavRectRel[0]: "); + if (window->RootWindow != window) { DebugNodeWindow(window->RootWindow, "RootWindow"); } + if (window->ParentWindow != NULL) { DebugNodeWindow(window->ParentWindow, "ParentWindow"); } + if (window->DC.ChildWindows.Size > 0) { DebugNodeWindowsList(&window->DC.ChildWindows, "ChildWindows"); } + if (window->ColumnsStorage.Size > 0 && TreeNode("Columns", "Columns sets (%d)", window->ColumnsStorage.Size)) + { + for (int n = 0; n < window->ColumnsStorage.Size; n++) + DebugNodeColumns(&window->ColumnsStorage[n]); + TreePop(); + } + DebugNodeStorage(&window->StateStorage, "Storage"); + TreePop(); +} + +void ImGui::DebugNodeWindowSettings(ImGuiWindowSettings* settings) +{ + Text("0x%08X \"%s\" Pos (%d,%d) Size (%d,%d) Collapsed=%d", + settings->ID, settings->GetName(), settings->Pos.x, settings->Pos.y, settings->Size.x, settings->Size.y, settings->Collapsed); +} + + +void ImGui::DebugNodeWindowsList(ImVector* windows, const char* label) +{ + if (!TreeNode(label, "%s (%d)", label, windows->Size)) + return; + Text("(In front-to-back order:)"); + for (int i = windows->Size - 1; i >= 0; i--) // Iterate front to back + { + PushID((*windows)[i]); + DebugNodeWindow((*windows)[i], "Window"); + PopID(); + } + TreePop(); +} + +#else + +void ImGui::ShowMetricsWindow(bool*) {} +void ImGui::DebugNodeColumns(ImGuiOldColumns*) {} +void ImGui::DebugNodeDrawList(ImGuiWindow*, const ImDrawList*, const char*) {} +void ImGui::DebugNodeDrawCmdShowMeshAndBoundingBox(ImGuiWindow*, const ImDrawList*, const ImDrawCmd*, bool, bool) {} +void ImGui::DebugNodeStorage(ImGuiStorage*, const char*) {} +void ImGui::DebugNodeTabBar(ImGuiTabBar*, const char*) {} +void ImGui::DebugNodeWindow(ImGuiWindow*, const char*) {} +void ImGui::DebugNodeWindowSettings(ImGuiWindowSettings*) {} +void ImGui::DebugNodeWindowsList(ImVector*, const char*) {} + +#endif + +//----------------------------------------------------------------------------- + +// Include imgui_user.inl at the end of imgui.cpp to access private data/functions that aren't exposed. +// Prefer just including imgui_internal.h from your code rather than using this define. If a declaration is missing from imgui_internal.h add it or request it on the github. +#ifdef IMGUI_INCLUDE_IMGUI_USER_INL +#include "imgui_user.inl" +#endif + +//----------------------------------------------------------------------------- + +#endif // #ifndef IMGUI_DISABLE diff --git a/cpp-projects/3d-engine/imgui/imgui.h b/cpp-projects/3d-engine/imgui/imgui.h new file mode 100644 index 0000000..6bfe753 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/imgui.h @@ -0,0 +1,2730 @@ +// dear imgui, v1.81 WIP +// (headers) + +// Help: +// - Read FAQ at http://dearimgui.org/faq +// - Newcomers, read 'Programmer guide' in imgui.cpp for notes on how to setup Dear ImGui in your codebase. +// - Call and read ImGui::ShowDemoWindow() in imgui_demo.cpp. All applications in examples/ are doing that. +// Read imgui.cpp for details, links and comments. + +// Resources: +// - FAQ http://dearimgui.org/faq +// - Homepage & latest https://github.com/ocornut/imgui +// - Releases & changelog https://github.com/ocornut/imgui/releases +// - Gallery https://github.com/ocornut/imgui/issues/3488 (please post your screenshots/video there!) +// - Glossary https://github.com/ocornut/imgui/wiki/Glossary +// - Wiki https://github.com/ocornut/imgui/wiki +// - Issues & support https://github.com/ocornut/imgui/issues + +/* + +Index of this file: +// [SECTION] Header mess +// [SECTION] Forward declarations and basic types +// [SECTION] Dear ImGui end-user API functions +// [SECTION] Flags & Enumerations +// [SECTION] Helpers: Memory allocations macros, ImVector<> +// [SECTION] ImGuiStyle +// [SECTION] ImGuiIO +// [SECTION] Misc data structures (ImGuiInputTextCallbackData, ImGuiSizeCallbackData, ImGuiPayload, ImGuiTableSortSpecs, ImGuiTableColumnSortSpecs) +// [SECTION] Obsolete functions +// [SECTION] Helpers (ImGuiOnceUponAFrame, ImGuiTextFilter, ImGuiTextBuffer, ImGuiStorage, ImGuiListClipper, ImColor) +// [SECTION] Drawing API (ImDrawCallback, ImDrawCmd, ImDrawIdx, ImDrawVert, ImDrawChannel, ImDrawListSplitter, ImDrawListFlags, ImDrawList, ImDrawData) +// [SECTION] Font API (ImFontConfig, ImFontGlyph, ImFontGlyphRangesBuilder, ImFontAtlasFlags, ImFontAtlas, ImFont) + +*/ + +#pragma once + +// Configuration file with compile-time options (edit imconfig.h or '#define IMGUI_USER_CONFIG "myfilename.h" from your build system') +#ifdef IMGUI_USER_CONFIG +#include IMGUI_USER_CONFIG +#endif +#if !defined(IMGUI_DISABLE_INCLUDE_IMCONFIG_H) || defined(IMGUI_INCLUDE_IMCONFIG_H) +#include "imconfig.h" +#endif + +#ifndef IMGUI_DISABLE + +//----------------------------------------------------------------------------- +// [SECTION] Header mess +//----------------------------------------------------------------------------- + +// Includes +#include // FLT_MIN, FLT_MAX +#include // va_list, va_start, va_end +#include // ptrdiff_t, NULL +#include // memset, memmove, memcpy, strlen, strchr, strcpy, strcmp + +// Version +// (Integer encoded as XYYZZ for use in #if preprocessor conditionals. Work in progress versions typically starts at XYY99 then bounce up to XYY00, XYY01 etc. when release tagging happens) +#define IMGUI_VERSION "1.81 WIP" +#define IMGUI_VERSION_NUM 18002 +#define IMGUI_CHECKVERSION() ImGui::DebugCheckVersionAndDataLayout(IMGUI_VERSION, sizeof(ImGuiIO), sizeof(ImGuiStyle), sizeof(ImVec2), sizeof(ImVec4), sizeof(ImDrawVert), sizeof(ImDrawIdx)) +#define IMGUI_HAS_TABLE + +// Define attributes of all API symbols declarations (e.g. for DLL under Windows) +// IMGUI_API is used for core imgui functions, IMGUI_IMPL_API is used for the default backends files (imgui_impl_xxx.h) +// Using dear imgui via a shared library is not recommended, because we don't guarantee backward nor forward ABI compatibility (also function call overhead, as dear imgui is a call-heavy API) +#ifndef IMGUI_API +#define IMGUI_API +#endif +#ifndef IMGUI_IMPL_API +#define IMGUI_IMPL_API IMGUI_API +#endif + +// Helper Macros +#ifndef IM_ASSERT +#include +#define IM_ASSERT(_EXPR) assert(_EXPR) // You can override the default assert handler by editing imconfig.h +#endif +#define IM_ARRAYSIZE(_ARR) ((int)(sizeof(_ARR) / sizeof(*(_ARR)))) // Size of a static C-style array. Don't use on pointers! +#define IM_UNUSED(_VAR) ((void)(_VAR)) // Used to silence "unused variable warnings". Often useful as asserts may be stripped out from final builds. +#if (__cplusplus >= 201100) || (defined(_MSVC_LANG) && _MSVC_LANG >= 201100) +#define IM_OFFSETOF(_TYPE,_MEMBER) offsetof(_TYPE, _MEMBER) // Offset of _MEMBER within _TYPE. Standardized as offsetof() in C++11 +#else +#define IM_OFFSETOF(_TYPE,_MEMBER) ((size_t)&(((_TYPE*)0)->_MEMBER)) // Offset of _MEMBER within _TYPE. Old style macro. +#endif + +// Helper Macros - IM_FMTARGS, IM_FMTLIST: Apply printf-style warnings to our formatting functions. +#if !defined(IMGUI_USE_STB_SPRINTF) && defined(__clang__) +#define IM_FMTARGS(FMT) __attribute__((format(printf, FMT, FMT+1))) +#define IM_FMTLIST(FMT) __attribute__((format(printf, FMT, 0))) +#elif !defined(IMGUI_USE_STB_SPRINTF) && defined(__GNUC__) && defined(__MINGW32__) +#define IM_FMTARGS(FMT) __attribute__((format(gnu_printf, FMT, FMT+1))) +#define IM_FMTLIST(FMT) __attribute__((format(gnu_printf, FMT, 0))) +#else +#define IM_FMTARGS(FMT) +#define IM_FMTLIST(FMT) +#endif + +// Warnings +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#if __has_warning("-Wzero-as-null-pointer-constant") +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" // warning: unknown option after '#pragma GCC diagnostic' kind +#pragma GCC diagnostic ignored "-Wclass-memaccess" // [__GNUC__ >= 8] warning: 'memset/memcpy' clearing/writing an object of type 'xxxx' with no trivial copy-assignment; use assignment or value-initialization instead +#endif + +//----------------------------------------------------------------------------- +// [SECTION] Forward declarations and basic types +//----------------------------------------------------------------------------- + +// Forward declarations +struct ImDrawChannel; // Temporary storage to output draw commands out of order, used by ImDrawListSplitter and ImDrawList::ChannelsSplit() +struct ImDrawCmd; // A single draw command within a parent ImDrawList (generally maps to 1 GPU draw call, unless it is a callback) +struct ImDrawData; // All draw command lists required to render the frame + pos/size coordinates to use for the projection matrix. +struct ImDrawList; // A single draw command list (generally one per window, conceptually you may see this as a dynamic "mesh" builder) +struct ImDrawListSharedData; // Data shared among multiple draw lists (typically owned by parent ImGui context, but you may create one yourself) +struct ImDrawListSplitter; // Helper to split a draw list into different layers which can be drawn into out of order, then flattened back. +struct ImDrawVert; // A single vertex (pos + uv + col = 20 bytes by default. Override layout with IMGUI_OVERRIDE_DRAWVERT_STRUCT_LAYOUT) +struct ImFont; // Runtime data for a single font within a parent ImFontAtlas +struct ImFontAtlas; // Runtime data for multiple fonts, bake multiple fonts into a single texture, TTF/OTF font loader +struct ImFontBuilderIO; // Opaque interface to a font builder (stb_truetype or FreeType). +struct ImFontConfig; // Configuration data when adding a font or merging fonts +struct ImFontGlyph; // A single font glyph (code point + coordinates within in ImFontAtlas + offset) +struct ImFontGlyphRangesBuilder; // Helper to build glyph ranges from text/string data +struct ImColor; // Helper functions to create a color that can be converted to either u32 or float4 (*OBSOLETE* please avoid using) +struct ImGuiContext; // Dear ImGui context (opaque structure, unless including imgui_internal.h) +struct ImGuiIO; // Main configuration and I/O between your application and ImGui +struct ImGuiInputTextCallbackData; // Shared state of InputText() when using custom ImGuiInputTextCallback (rare/advanced use) +struct ImGuiListClipper; // Helper to manually clip large list of items +struct ImGuiOnceUponAFrame; // Helper for running a block of code not more than once a frame, used by IMGUI_ONCE_UPON_A_FRAME macro +struct ImGuiPayload; // User data payload for drag and drop operations +struct ImGuiSizeCallbackData; // Callback data when using SetNextWindowSizeConstraints() (rare/advanced use) +struct ImGuiStorage; // Helper for key->value storage +struct ImGuiStyle; // Runtime data for styling/colors +struct ImGuiTableSortSpecs; // Sorting specifications for a table (often handling sort specs for a single column, occasionally more) +struct ImGuiTableColumnSortSpecs; // Sorting specification for one column of a table +struct ImGuiTextBuffer; // Helper to hold and append into a text buffer (~string builder) +struct ImGuiTextFilter; // Helper to parse and apply text filters (e.g. "aaaaa[,bbbbb][,ccccc]") + +// Enums/Flags (declared as int for compatibility with old C++, to allow using as flags and to not pollute the top of this file) +// - Tip: Use your programming IDE navigation facilities on the names in the _central column_ below to find the actual flags/enum lists! +// In Visual Studio IDE: CTRL+comma ("Edit.NavigateTo") can follow symbols in comments, whereas CTRL+F12 ("Edit.GoToImplementation") cannot. +// With Visual Assist installed: ALT+G ("VAssistX.GoToImplementation") can also follow symbols in comments. +typedef int ImGuiCol; // -> enum ImGuiCol_ // Enum: A color identifier for styling +typedef int ImGuiCond; // -> enum ImGuiCond_ // Enum: A condition for many Set*() functions +typedef int ImGuiDataType; // -> enum ImGuiDataType_ // Enum: A primary data type +typedef int ImGuiDir; // -> enum ImGuiDir_ // Enum: A cardinal direction +typedef int ImGuiKey; // -> enum ImGuiKey_ // Enum: A key identifier (ImGui-side enum) +typedef int ImGuiNavInput; // -> enum ImGuiNavInput_ // Enum: An input identifier for navigation +typedef int ImGuiMouseButton; // -> enum ImGuiMouseButton_ // Enum: A mouse button identifier (0=left, 1=right, 2=middle) +typedef int ImGuiMouseCursor; // -> enum ImGuiMouseCursor_ // Enum: A mouse cursor identifier +typedef int ImGuiSortDirection; // -> enum ImGuiSortDirection_ // Enum: A sorting direction (ascending or descending) +typedef int ImGuiStyleVar; // -> enum ImGuiStyleVar_ // Enum: A variable identifier for styling +typedef int ImGuiTableBgTarget; // -> enum ImGuiTableBgTarget_ // Enum: A color target for TableSetBgColor() +typedef int ImDrawCornerFlags; // -> enum ImDrawCornerFlags_ // Flags: for ImDrawList::AddRect(), AddRectFilled() etc. +typedef int ImDrawListFlags; // -> enum ImDrawListFlags_ // Flags: for ImDrawList +typedef int ImFontAtlasFlags; // -> enum ImFontAtlasFlags_ // Flags: for ImFontAtlas build +typedef int ImGuiBackendFlags; // -> enum ImGuiBackendFlags_ // Flags: for io.BackendFlags +typedef int ImGuiButtonFlags; // -> enum ImGuiButtonFlags_ // Flags: for InvisibleButton() +typedef int ImGuiColorEditFlags; // -> enum ImGuiColorEditFlags_ // Flags: for ColorEdit4(), ColorPicker4() etc. +typedef int ImGuiConfigFlags; // -> enum ImGuiConfigFlags_ // Flags: for io.ConfigFlags +typedef int ImGuiComboFlags; // -> enum ImGuiComboFlags_ // Flags: for BeginCombo() +typedef int ImGuiDragDropFlags; // -> enum ImGuiDragDropFlags_ // Flags: for BeginDragDropSource(), AcceptDragDropPayload() +typedef int ImGuiFocusedFlags; // -> enum ImGuiFocusedFlags_ // Flags: for IsWindowFocused() +typedef int ImGuiHoveredFlags; // -> enum ImGuiHoveredFlags_ // Flags: for IsItemHovered(), IsWindowHovered() etc. +typedef int ImGuiInputTextFlags; // -> enum ImGuiInputTextFlags_ // Flags: for InputText(), InputTextMultiline() +typedef int ImGuiKeyModFlags; // -> enum ImGuiKeyModFlags_ // Flags: for io.KeyMods (Ctrl/Shift/Alt/Super) +typedef int ImGuiPopupFlags; // -> enum ImGuiPopupFlags_ // Flags: for OpenPopup*(), BeginPopupContext*(), IsPopupOpen() +typedef int ImGuiSelectableFlags; // -> enum ImGuiSelectableFlags_ // Flags: for Selectable() +typedef int ImGuiSliderFlags; // -> enum ImGuiSliderFlags_ // Flags: for DragFloat(), DragInt(), SliderFloat(), SliderInt() etc. +typedef int ImGuiTabBarFlags; // -> enum ImGuiTabBarFlags_ // Flags: for BeginTabBar() +typedef int ImGuiTabItemFlags; // -> enum ImGuiTabItemFlags_ // Flags: for BeginTabItem() +typedef int ImGuiTableFlags; // -> enum ImGuiTableFlags_ // Flags: For BeginTable() +typedef int ImGuiTableColumnFlags; // -> enum ImGuiTableColumnFlags_// Flags: For TableSetupColumn() +typedef int ImGuiTableRowFlags; // -> enum ImGuiTableRowFlags_ // Flags: For TableNextRow() +typedef int ImGuiTreeNodeFlags; // -> enum ImGuiTreeNodeFlags_ // Flags: for TreeNode(), TreeNodeEx(), CollapsingHeader() +typedef int ImGuiWindowFlags; // -> enum ImGuiWindowFlags_ // Flags: for Begin(), BeginChild() + +// Other types +#ifndef ImTextureID // ImTextureID [configurable type: override in imconfig.h with '#define ImTextureID xxx'] +typedef void* ImTextureID; // User data for rendering backend to identify a texture. This is whatever to you want it to be! read the FAQ about ImTextureID for details. +#endif +typedef unsigned int ImGuiID; // A unique ID used by widgets, typically hashed from a stack of string. +typedef int (*ImGuiInputTextCallback)(ImGuiInputTextCallbackData* data); // Callback function for ImGui::InputText() +typedef void (*ImGuiSizeCallback)(ImGuiSizeCallbackData* data); // Callback function for ImGui::SetNextWindowSizeConstraints() + +// Character types +// (we generally use UTF-8 encoded string in the API. This is storage specifically for a decoded character used for keyboard input and display) +typedef unsigned short ImWchar16; // A single decoded U16 character/code point. We encode them as multi bytes UTF-8 when used in strings. +typedef unsigned int ImWchar32; // A single decoded U32 character/code point. We encode them as multi bytes UTF-8 when used in strings. +#ifdef IMGUI_USE_WCHAR32 // ImWchar [configurable type: override in imconfig.h with '#define IMGUI_USE_WCHAR32' to support Unicode planes 1-16] +typedef ImWchar32 ImWchar; +#else +typedef ImWchar16 ImWchar; +#endif + +// Basic scalar data types +typedef signed char ImS8; // 8-bit signed integer +typedef unsigned char ImU8; // 8-bit unsigned integer +typedef signed short ImS16; // 16-bit signed integer +typedef unsigned short ImU16; // 16-bit unsigned integer +typedef signed int ImS32; // 32-bit signed integer == int +typedef unsigned int ImU32; // 32-bit unsigned integer (often used to store packed colors) +#if defined(_MSC_VER) && !defined(__clang__) +typedef signed __int64 ImS64; // 64-bit signed integer (pre and post C++11 with Visual Studio) +typedef unsigned __int64 ImU64; // 64-bit unsigned integer (pre and post C++11 with Visual Studio) +#elif (defined(__clang__) || defined(__GNUC__)) && (__cplusplus < 201100) +#include +typedef int64_t ImS64; // 64-bit signed integer (pre C++11) +typedef uint64_t ImU64; // 64-bit unsigned integer (pre C++11) +#else +typedef signed long long ImS64; // 64-bit signed integer (post C++11) +typedef unsigned long long ImU64; // 64-bit unsigned integer (post C++11) +#endif + +// 2D vector (often used to store positions or sizes) +struct ImVec2 +{ + float x, y; + ImVec2() { x = y = 0.0f; } + ImVec2(float _x, float _y) { x = _x; y = _y; } + float operator[] (size_t idx) const { IM_ASSERT(idx <= 1); return (&x)[idx]; } // We very rarely use this [] operator, the assert overhead is fine. + float& operator[] (size_t idx) { IM_ASSERT(idx <= 1); return (&x)[idx]; } // We very rarely use this [] operator, the assert overhead is fine. +#ifdef IM_VEC2_CLASS_EXTRA + IM_VEC2_CLASS_EXTRA // Define additional constructors and implicit cast operators in imconfig.h to convert back and forth between your math types and ImVec2. +#endif +}; + +// 4D vector (often used to store floating-point colors) +struct ImVec4 +{ + float x, y, z, w; + ImVec4() { x = y = z = w = 0.0f; } + ImVec4(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; } +#ifdef IM_VEC4_CLASS_EXTRA + IM_VEC4_CLASS_EXTRA // Define additional constructors and implicit cast operators in imconfig.h to convert back and forth between your math types and ImVec4. +#endif +}; + +//----------------------------------------------------------------------------- +// [SECTION] Dear ImGui end-user API functions +// (Note that ImGui:: being a namespace, you can add extra ImGui:: functions in your own separate file. Please don't modify imgui source files!) +//----------------------------------------------------------------------------- + +namespace ImGui +{ + // Context creation and access + // Each context create its own ImFontAtlas by default. You may instance one yourself and pass it to CreateContext() to share a font atlas between imgui contexts. + // None of those functions is reliant on the current context. + IMGUI_API ImGuiContext* CreateContext(ImFontAtlas* shared_font_atlas = NULL); + IMGUI_API void DestroyContext(ImGuiContext* ctx = NULL); // NULL = destroy current context + IMGUI_API ImGuiContext* GetCurrentContext(); + IMGUI_API void SetCurrentContext(ImGuiContext* ctx); + + // Main + IMGUI_API ImGuiIO& GetIO(); // access the IO structure (mouse/keyboard/gamepad inputs, time, various configuration options/flags) + IMGUI_API ImGuiStyle& GetStyle(); // access the Style structure (colors, sizes). Always use PushStyleCol(), PushStyleVar() to modify style mid-frame! + IMGUI_API void NewFrame(); // start a new Dear ImGui frame, you can submit any command from this point until Render()/EndFrame(). + IMGUI_API void EndFrame(); // ends the Dear ImGui frame. automatically called by Render(). If you don't need to render data (skipping rendering) you may call EndFrame() without Render()... but you'll have wasted CPU already! If you don't need to render, better to not create any windows and not call NewFrame() at all! + IMGUI_API void Render(); // ends the Dear ImGui frame, finalize the draw data. You can then get call GetDrawData(). + IMGUI_API ImDrawData* GetDrawData(); // valid after Render() and until the next call to NewFrame(). this is what you have to render. + + // Demo, Debug, Information + IMGUI_API void ShowDemoWindow(bool* p_open = NULL); // create Demo window. demonstrate most ImGui features. call this to learn about the library! try to make it always available in your application! + IMGUI_API void ShowMetricsWindow(bool* p_open = NULL); // create Metrics/Debugger window. display Dear ImGui internals: windows, draw commands, various internal state, etc. + IMGUI_API void ShowAboutWindow(bool* p_open = NULL); // create About window. display Dear ImGui version, credits and build/system information. + IMGUI_API void ShowStyleEditor(ImGuiStyle* ref = NULL); // add style editor block (not a window). you can pass in a reference ImGuiStyle structure to compare to, revert to and save to (else it uses the default style) + IMGUI_API bool ShowStyleSelector(const char* label); // add style selector block (not a window), essentially a combo listing the default styles. + IMGUI_API void ShowFontSelector(const char* label); // add font selector block (not a window), essentially a combo listing the loaded fonts. + IMGUI_API void ShowUserGuide(); // add basic help/info block (not a window): how to manipulate ImGui as a end-user (mouse/keyboard controls). + IMGUI_API const char* GetVersion(); // get the compiled version string e.g. "1.80 WIP" (essentially the value for IMGUI_VERSION from the compiled version of imgui.cpp) + + // Styles + IMGUI_API void StyleColorsDark(ImGuiStyle* dst = NULL); // new, recommended style (default) + IMGUI_API void StyleColorsLight(ImGuiStyle* dst = NULL); // best used with borders and a custom, thicker font + IMGUI_API void StyleColorsClassic(ImGuiStyle* dst = NULL); // classic imgui style + + // Windows + // - Begin() = push window to the stack and start appending to it. End() = pop window from the stack. + // - Passing 'bool* p_open != NULL' shows a window-closing widget in the upper-right corner of the window, + // which clicking will set the boolean to false when clicked. + // - You may append multiple times to the same window during the same frame by calling Begin()/End() pairs multiple times. + // Some information such as 'flags' or 'p_open' will only be considered by the first call to Begin(). + // - Begin() return false to indicate the window is collapsed or fully clipped, so you may early out and omit submitting + // anything to the window. Always call a matching End() for each Begin() call, regardless of its return value! + // [Important: due to legacy reason, this is inconsistent with most other functions such as BeginMenu/EndMenu, + // BeginPopup/EndPopup, etc. where the EndXXX call should only be called if the corresponding BeginXXX function + // returned true. Begin and BeginChild are the only odd ones out. Will be fixed in a future update.] + // - Note that the bottom of window stack always contains a window called "Debug". + IMGUI_API bool Begin(const char* name, bool* p_open = NULL, ImGuiWindowFlags flags = 0); + IMGUI_API void End(); + + // Child Windows + // - Use child windows to begin into a self-contained independent scrolling/clipping regions within a host window. Child windows can embed their own child. + // - For each independent axis of 'size': ==0.0f: use remaining host window size / >0.0f: fixed size / <0.0f: use remaining window size minus abs(size) / Each axis can use a different mode, e.g. ImVec2(0,400). + // - BeginChild() returns false to indicate the window is collapsed or fully clipped, so you may early out and omit submitting anything to the window. + // Always call a matching EndChild() for each BeginChild() call, regardless of its return value. + // [Important: due to legacy reason, this is inconsistent with most other functions such as BeginMenu/EndMenu, + // BeginPopup/EndPopup, etc. where the EndXXX call should only be called if the corresponding BeginXXX function + // returned true. Begin and BeginChild are the only odd ones out. Will be fixed in a future update.] + IMGUI_API bool BeginChild(const char* str_id, const ImVec2& size = ImVec2(0, 0), bool border = false, ImGuiWindowFlags flags = 0); + IMGUI_API bool BeginChild(ImGuiID id, const ImVec2& size = ImVec2(0, 0), bool border = false, ImGuiWindowFlags flags = 0); + IMGUI_API void EndChild(); + + // Windows Utilities + // - 'current window' = the window we are appending into while inside a Begin()/End() block. 'next window' = next window we will Begin() into. + IMGUI_API bool IsWindowAppearing(); + IMGUI_API bool IsWindowCollapsed(); + IMGUI_API bool IsWindowFocused(ImGuiFocusedFlags flags=0); // is current window focused? or its root/child, depending on flags. see flags for options. + IMGUI_API bool IsWindowHovered(ImGuiHoveredFlags flags=0); // is current window hovered (and typically: not blocked by a popup/modal)? see flags for options. NB: If you are trying to check whether your mouse should be dispatched to imgui or to your app, you should use the 'io.WantCaptureMouse' boolean for that! Please read the FAQ! + IMGUI_API ImDrawList* GetWindowDrawList(); // get draw list associated to the current window, to append your own drawing primitives + IMGUI_API ImVec2 GetWindowPos(); // get current window position in screen space (useful if you want to do your own drawing via the DrawList API) + IMGUI_API ImVec2 GetWindowSize(); // get current window size + IMGUI_API float GetWindowWidth(); // get current window width (shortcut for GetWindowSize().x) + IMGUI_API float GetWindowHeight(); // get current window height (shortcut for GetWindowSize().y) + + // Prefer using SetNextXXX functions (before Begin) rather that SetXXX functions (after Begin). + IMGUI_API void SetNextWindowPos(const ImVec2& pos, ImGuiCond cond = 0, const ImVec2& pivot = ImVec2(0, 0)); // set next window position. call before Begin(). use pivot=(0.5f,0.5f) to center on given point, etc. + IMGUI_API void SetNextWindowSize(const ImVec2& size, ImGuiCond cond = 0); // set next window size. set axis to 0.0f to force an auto-fit on this axis. call before Begin() + IMGUI_API void SetNextWindowSizeConstraints(const ImVec2& size_min, const ImVec2& size_max, ImGuiSizeCallback custom_callback = NULL, void* custom_callback_data = NULL); // set next window size limits. use -1,-1 on either X/Y axis to preserve the current size. Sizes will be rounded down. Use callback to apply non-trivial programmatic constraints. + IMGUI_API void SetNextWindowContentSize(const ImVec2& size); // set next window content size (~ scrollable client area, which enforce the range of scrollbars). Not including window decorations (title bar, menu bar, etc.) nor WindowPadding. set an axis to 0.0f to leave it automatic. call before Begin() + IMGUI_API void SetNextWindowCollapsed(bool collapsed, ImGuiCond cond = 0); // set next window collapsed state. call before Begin() + IMGUI_API void SetNextWindowFocus(); // set next window to be focused / top-most. call before Begin() + IMGUI_API void SetNextWindowBgAlpha(float alpha); // set next window background color alpha. helper to easily override the Alpha component of ImGuiCol_WindowBg/ChildBg/PopupBg. you may also use ImGuiWindowFlags_NoBackground. + IMGUI_API void SetWindowPos(const ImVec2& pos, ImGuiCond cond = 0); // (not recommended) set current window position - call within Begin()/End(). prefer using SetNextWindowPos(), as this may incur tearing and side-effects. + IMGUI_API void SetWindowSize(const ImVec2& size, ImGuiCond cond = 0); // (not recommended) set current window size - call within Begin()/End(). set to ImVec2(0, 0) to force an auto-fit. prefer using SetNextWindowSize(), as this may incur tearing and minor side-effects. + IMGUI_API void SetWindowCollapsed(bool collapsed, ImGuiCond cond = 0); // (not recommended) set current window collapsed state. prefer using SetNextWindowCollapsed(). + IMGUI_API void SetWindowFocus(); // (not recommended) set current window to be focused / top-most. prefer using SetNextWindowFocus(). + IMGUI_API void SetWindowFontScale(float scale); // set font scale. Adjust IO.FontGlobalScale if you want to scale all windows. This is an old API! For correct scaling, prefer to reload font + rebuild ImFontAtlas + call style.ScaleAllSizes(). + IMGUI_API void SetWindowPos(const char* name, const ImVec2& pos, ImGuiCond cond = 0); // set named window position. + IMGUI_API void SetWindowSize(const char* name, const ImVec2& size, ImGuiCond cond = 0); // set named window size. set axis to 0.0f to force an auto-fit on this axis. + IMGUI_API void SetWindowCollapsed(const char* name, bool collapsed, ImGuiCond cond = 0); // set named window collapsed state + IMGUI_API void SetWindowFocus(const char* name); // set named window to be focused / top-most. use NULL to remove focus. + + // Content region + // - Retrieve available space from a given point. GetContentRegionAvail() is frequently useful. + // - Those functions are bound to be redesigned (they are confusing, incomplete and the Min/Max return values are in local window coordinates which increases confusion) + IMGUI_API ImVec2 GetContentRegionAvail(); // == GetContentRegionMax() - GetCursorPos() + IMGUI_API ImVec2 GetContentRegionMax(); // current content boundaries (typically window boundaries including scrolling, or current column boundaries), in windows coordinates + IMGUI_API ImVec2 GetWindowContentRegionMin(); // content boundaries min (roughly (0,0)-Scroll), in window coordinates + IMGUI_API ImVec2 GetWindowContentRegionMax(); // content boundaries max (roughly (0,0)+Size-Scroll) where Size can be override with SetNextWindowContentSize(), in window coordinates + IMGUI_API float GetWindowContentRegionWidth(); // + + // Windows Scrolling + IMGUI_API float GetScrollX(); // get scrolling amount [0 .. GetScrollMaxX()] + IMGUI_API float GetScrollY(); // get scrolling amount [0 .. GetScrollMaxY()] + IMGUI_API void SetScrollX(float scroll_x); // set scrolling amount [0 .. GetScrollMaxX()] + IMGUI_API void SetScrollY(float scroll_y); // set scrolling amount [0 .. GetScrollMaxY()] + IMGUI_API float GetScrollMaxX(); // get maximum scrolling amount ~~ ContentSize.x - WindowSize.x - DecorationsSize.x + IMGUI_API float GetScrollMaxY(); // get maximum scrolling amount ~~ ContentSize.y - WindowSize.y - DecorationsSize.y + IMGUI_API void SetScrollHereX(float center_x_ratio = 0.5f); // adjust scrolling amount to make current cursor position visible. center_x_ratio=0.0: left, 0.5: center, 1.0: right. When using to make a "default/current item" visible, consider using SetItemDefaultFocus() instead. + IMGUI_API void SetScrollHereY(float center_y_ratio = 0.5f); // adjust scrolling amount to make current cursor position visible. center_y_ratio=0.0: top, 0.5: center, 1.0: bottom. When using to make a "default/current item" visible, consider using SetItemDefaultFocus() instead. + IMGUI_API void SetScrollFromPosX(float local_x, float center_x_ratio = 0.5f); // adjust scrolling amount to make given position visible. Generally GetCursorStartPos() + offset to compute a valid position. + IMGUI_API void SetScrollFromPosY(float local_y, float center_y_ratio = 0.5f); // adjust scrolling amount to make given position visible. Generally GetCursorStartPos() + offset to compute a valid position. + + // Parameters stacks (shared) + IMGUI_API void PushFont(ImFont* font); // use NULL as a shortcut to push default font + IMGUI_API void PopFont(); + IMGUI_API void PushStyleColor(ImGuiCol idx, ImU32 col); // modify a style color. always use this if you modify the style after NewFrame(). + IMGUI_API void PushStyleColor(ImGuiCol idx, const ImVec4& col); + IMGUI_API void PopStyleColor(int count = 1); + IMGUI_API void PushStyleVar(ImGuiStyleVar idx, float val); // modify a style float variable. always use this if you modify the style after NewFrame(). + IMGUI_API void PushStyleVar(ImGuiStyleVar idx, const ImVec2& val); // modify a style ImVec2 variable. always use this if you modify the style after NewFrame(). + IMGUI_API void PopStyleVar(int count = 1); + IMGUI_API void PushAllowKeyboardFocus(bool allow_keyboard_focus); // allow focusing using TAB/Shift-TAB, enabled by default but you can disable it for certain widgets + IMGUI_API void PopAllowKeyboardFocus(); + IMGUI_API void PushButtonRepeat(bool repeat); // in 'repeat' mode, Button*() functions return repeated true in a typematic manner (using io.KeyRepeatDelay/io.KeyRepeatRate setting). Note that you can call IsItemActive() after any Button() to tell if the button is held in the current frame. + IMGUI_API void PopButtonRepeat(); + + // Parameters stacks (current window) + IMGUI_API void PushItemWidth(float item_width); // push width of items for common large "item+label" widgets. >0.0f: width in pixels, <0.0f align xx pixels to the right of window (so -FLT_MIN always align width to the right side). + IMGUI_API void PopItemWidth(); + IMGUI_API void SetNextItemWidth(float item_width); // set width of the _next_ common large "item+label" widget. >0.0f: width in pixels, <0.0f align xx pixels to the right of window (so -FLT_MIN always align width to the right side) + IMGUI_API float CalcItemWidth(); // width of item given pushed settings and current cursor position. NOT necessarily the width of last item unlike most 'Item' functions. + IMGUI_API void PushTextWrapPos(float wrap_local_pos_x = 0.0f); // push word-wrapping position for Text*() commands. < 0.0f: no wrapping; 0.0f: wrap to end of window (or column); > 0.0f: wrap at 'wrap_pos_x' position in window local space + IMGUI_API void PopTextWrapPos(); + + // Style read access + IMGUI_API ImFont* GetFont(); // get current font + IMGUI_API float GetFontSize(); // get current font size (= height in pixels) of current font with current scale applied + IMGUI_API ImVec2 GetFontTexUvWhitePixel(); // get UV coordinate for a while pixel, useful to draw custom shapes via the ImDrawList API + IMGUI_API ImU32 GetColorU32(ImGuiCol idx, float alpha_mul = 1.0f); // retrieve given style color with style alpha applied and optional extra alpha multiplier, packed as a 32-bit value suitable for ImDrawList + IMGUI_API ImU32 GetColorU32(const ImVec4& col); // retrieve given color with style alpha applied, packed as a 32-bit value suitable for ImDrawList + IMGUI_API ImU32 GetColorU32(ImU32 col); // retrieve given color with style alpha applied, packed as a 32-bit value suitable for ImDrawList + IMGUI_API const ImVec4& GetStyleColorVec4(ImGuiCol idx); // retrieve style color as stored in ImGuiStyle structure. use to feed back into PushStyleColor(), otherwise use GetColorU32() to get style color with style alpha baked in. + + // Cursor / Layout + // - By "cursor" we mean the current output position. + // - The typical widget behavior is to output themselves at the current cursor position, then move the cursor one line down. + // - You can call SameLine() between widgets to undo the last carriage return and output at the right of the preceding widget. + // - Attention! We currently have inconsistencies between window-local and absolute positions we will aim to fix with future API: + // Window-local coordinates: SameLine(), GetCursorPos(), SetCursorPos(), GetCursorStartPos(), GetContentRegionMax(), GetWindowContentRegion*(), PushTextWrapPos() + // Absolute coordinate: GetCursorScreenPos(), SetCursorScreenPos(), all ImDrawList:: functions. + IMGUI_API void Separator(); // separator, generally horizontal. inside a menu bar or in horizontal layout mode, this becomes a vertical separator. + IMGUI_API void SameLine(float offset_from_start_x=0.0f, float spacing=-1.0f); // call between widgets or groups to layout them horizontally. X position given in window coordinates. + IMGUI_API void NewLine(); // undo a SameLine() or force a new line when in an horizontal-layout context. + IMGUI_API void Spacing(); // add vertical spacing. + IMGUI_API void Dummy(const ImVec2& size); // add a dummy item of given size. unlike InvisibleButton(), Dummy() won't take the mouse click or be navigable into. + IMGUI_API void Indent(float indent_w = 0.0f); // move content position toward the right, by indent_w, or style.IndentSpacing if indent_w <= 0 + IMGUI_API void Unindent(float indent_w = 0.0f); // move content position back to the left, by indent_w, or style.IndentSpacing if indent_w <= 0 + IMGUI_API void BeginGroup(); // lock horizontal starting position + IMGUI_API void EndGroup(); // unlock horizontal starting position + capture the whole group bounding box into one "item" (so you can use IsItemHovered() or layout primitives such as SameLine() on whole group, etc.) + IMGUI_API ImVec2 GetCursorPos(); // cursor position in window coordinates (relative to window position) + IMGUI_API float GetCursorPosX(); // (some functions are using window-relative coordinates, such as: GetCursorPos, GetCursorStartPos, GetContentRegionMax, GetWindowContentRegion* etc. + IMGUI_API float GetCursorPosY(); // other functions such as GetCursorScreenPos or everything in ImDrawList:: + IMGUI_API void SetCursorPos(const ImVec2& local_pos); // are using the main, absolute coordinate system. + IMGUI_API void SetCursorPosX(float local_x); // GetWindowPos() + GetCursorPos() == GetCursorScreenPos() etc.) + IMGUI_API void SetCursorPosY(float local_y); // + IMGUI_API ImVec2 GetCursorStartPos(); // initial cursor position in window coordinates + IMGUI_API ImVec2 GetCursorScreenPos(); // cursor position in absolute screen coordinates [0..io.DisplaySize] (useful to work with ImDrawList API) + IMGUI_API void SetCursorScreenPos(const ImVec2& pos); // cursor position in absolute screen coordinates [0..io.DisplaySize] + IMGUI_API void AlignTextToFramePadding(); // vertically align upcoming text baseline to FramePadding.y so that it will align properly to regularly framed items (call if you have text on a line before a framed item) + IMGUI_API float GetTextLineHeight(); // ~ FontSize + IMGUI_API float GetTextLineHeightWithSpacing(); // ~ FontSize + style.ItemSpacing.y (distance in pixels between 2 consecutive lines of text) + IMGUI_API float GetFrameHeight(); // ~ FontSize + style.FramePadding.y * 2 + IMGUI_API float GetFrameHeightWithSpacing(); // ~ FontSize + style.FramePadding.y * 2 + style.ItemSpacing.y (distance in pixels between 2 consecutive lines of framed widgets) + + // ID stack/scopes + // - Read the FAQ for more details about how ID are handled in dear imgui. If you are creating widgets in a loop you most + // likely want to push a unique identifier (e.g. object pointer, loop index) to uniquely differentiate them. + // - The resulting ID are hashes of the entire stack. + // - You can also use the "Label##foobar" syntax within widget label to distinguish them from each others. + // - In this header file we use the "label"/"name" terminology to denote a string that will be displayed and used as an ID, + // whereas "str_id" denote a string that is only used as an ID and not normally displayed. + IMGUI_API void PushID(const char* str_id); // push string into the ID stack (will hash string). + IMGUI_API void PushID(const char* str_id_begin, const char* str_id_end); // push string into the ID stack (will hash string). + IMGUI_API void PushID(const void* ptr_id); // push pointer into the ID stack (will hash pointer). + IMGUI_API void PushID(int int_id); // push integer into the ID stack (will hash integer). + IMGUI_API void PopID(); // pop from the ID stack. + IMGUI_API ImGuiID GetID(const char* str_id); // calculate unique ID (hash of whole ID stack + given parameter). e.g. if you want to query into ImGuiStorage yourself + IMGUI_API ImGuiID GetID(const char* str_id_begin, const char* str_id_end); + IMGUI_API ImGuiID GetID(const void* ptr_id); + + // Widgets: Text + IMGUI_API void TextUnformatted(const char* text, const char* text_end = NULL); // raw text without formatting. Roughly equivalent to Text("%s", text) but: A) doesn't require null terminated string if 'text_end' is specified, B) it's faster, no memory copy is done, no buffer size limits, recommended for long chunks of text. + IMGUI_API void Text(const char* fmt, ...) IM_FMTARGS(1); // formatted text + IMGUI_API void TextV(const char* fmt, va_list args) IM_FMTLIST(1); + IMGUI_API void TextColored(const ImVec4& col, const char* fmt, ...) IM_FMTARGS(2); // shortcut for PushStyleColor(ImGuiCol_Text, col); Text(fmt, ...); PopStyleColor(); + IMGUI_API void TextColoredV(const ImVec4& col, const char* fmt, va_list args) IM_FMTLIST(2); + IMGUI_API void TextDisabled(const char* fmt, ...) IM_FMTARGS(1); // shortcut for PushStyleColor(ImGuiCol_Text, style.Colors[ImGuiCol_TextDisabled]); Text(fmt, ...); PopStyleColor(); + IMGUI_API void TextDisabledV(const char* fmt, va_list args) IM_FMTLIST(1); + IMGUI_API void TextWrapped(const char* fmt, ...) IM_FMTARGS(1); // shortcut for PushTextWrapPos(0.0f); Text(fmt, ...); PopTextWrapPos();. Note that this won't work on an auto-resizing window if there's no other widgets to extend the window width, yoy may need to set a size using SetNextWindowSize(). + IMGUI_API void TextWrappedV(const char* fmt, va_list args) IM_FMTLIST(1); + IMGUI_API void LabelText(const char* label, const char* fmt, ...) IM_FMTARGS(2); // display text+label aligned the same way as value+label widgets + IMGUI_API void LabelTextV(const char* label, const char* fmt, va_list args) IM_FMTLIST(2); + IMGUI_API void BulletText(const char* fmt, ...) IM_FMTARGS(1); // shortcut for Bullet()+Text() + IMGUI_API void BulletTextV(const char* fmt, va_list args) IM_FMTLIST(1); + + // Widgets: Main + // - Most widgets return true when the value has been changed or when pressed/selected + // - You may also use one of the many IsItemXXX functions (e.g. IsItemActive, IsItemHovered, etc.) to query widget state. + IMGUI_API bool Button(const char* label, const ImVec2& size = ImVec2(0, 0)); // button + IMGUI_API bool SmallButton(const char* label); // button with FramePadding=(0,0) to easily embed within text + IMGUI_API bool InvisibleButton(const char* str_id, const ImVec2& size, ImGuiButtonFlags flags = 0); // flexible button behavior without the visuals, frequently useful to build custom behaviors using the public api (along with IsItemActive, IsItemHovered, etc.) + IMGUI_API bool ArrowButton(const char* str_id, ImGuiDir dir); // square button with an arrow shape + IMGUI_API void Image(ImTextureID user_texture_id, const ImVec2& size, const ImVec2& uv0 = ImVec2(0, 0), const ImVec2& uv1 = ImVec2(1,1), const ImVec4& tint_col = ImVec4(1,1,1,1), const ImVec4& border_col = ImVec4(0,0,0,0)); + IMGUI_API bool ImageButton(ImTextureID user_texture_id, const ImVec2& size, const ImVec2& uv0 = ImVec2(0, 0), const ImVec2& uv1 = ImVec2(1,1), int frame_padding = -1, const ImVec4& bg_col = ImVec4(0,0,0,0), const ImVec4& tint_col = ImVec4(1,1,1,1)); // <0 frame_padding uses default frame padding settings. 0 for no padding + IMGUI_API bool Checkbox(const char* label, bool* v); + IMGUI_API bool CheckboxFlags(const char* label, int* flags, int flags_value); + IMGUI_API bool CheckboxFlags(const char* label, unsigned int* flags, unsigned int flags_value); + IMGUI_API bool RadioButton(const char* label, bool active); // use with e.g. if (RadioButton("one", my_value==1)) { my_value = 1; } + IMGUI_API bool RadioButton(const char* label, int* v, int v_button); // shortcut to handle the above pattern when value is an integer + IMGUI_API void ProgressBar(float fraction, const ImVec2& size_arg = ImVec2(-FLT_MIN, 0), const char* overlay = NULL); + IMGUI_API void Bullet(); // draw a small circle + keep the cursor on the same line. advance cursor x position by GetTreeNodeToLabelSpacing(), same distance that TreeNode() uses + + // Widgets: Combo Box + // - The BeginCombo()/EndCombo() api allows you to manage your contents and selection state however you want it, by creating e.g. Selectable() items. + // - The old Combo() api are helpers over BeginCombo()/EndCombo() which are kept available for convenience purpose. + IMGUI_API bool BeginCombo(const char* label, const char* preview_value, ImGuiComboFlags flags = 0); + IMGUI_API void EndCombo(); // only call EndCombo() if BeginCombo() returns true! + IMGUI_API bool Combo(const char* label, int* current_item, const char* const items[], int items_count, int popup_max_height_in_items = -1); + IMGUI_API bool Combo(const char* label, int* current_item, const char* items_separated_by_zeros, int popup_max_height_in_items = -1); // Separate items with \0 within a string, end item-list with \0\0. e.g. "One\0Two\0Three\0" + IMGUI_API bool Combo(const char* label, int* current_item, bool(*items_getter)(void* data, int idx, const char** out_text), void* data, int items_count, int popup_max_height_in_items = -1); + + // Widgets: Drag Sliders + // - CTRL+Click on any drag box to turn them into an input box. Manually input values aren't clamped and can go off-bounds. + // - For all the Float2/Float3/Float4/Int2/Int3/Int4 versions of every functions, note that a 'float v[X]' function argument is the same as 'float* v', the array syntax is just a way to document the number of elements that are expected to be accessible. You can pass address of your first element out of a contiguous set, e.g. &myvector.x + // - Adjust format string to decorate the value with a prefix, a suffix, or adapt the editing and display precision e.g. "%.3f" -> 1.234; "%5.2f secs" -> 01.23 secs; "Biscuit: %.0f" -> Biscuit: 1; etc. + // - Format string may also be set to NULL or use the default format ("%f" or "%d"). + // - Speed are per-pixel of mouse movement (v_speed=0.2f: mouse needs to move by 5 pixels to increase value by 1). For gamepad/keyboard navigation, minimum speed is Max(v_speed, minimum_step_at_given_precision). + // - Use v_min < v_max to clamp edits to given limits. Note that CTRL+Click manual input can override those limits. + // - Use v_max = FLT_MAX / INT_MAX etc to avoid clamping to a maximum, same with v_min = -FLT_MAX / INT_MIN to avoid clamping to a minimum. + // - We use the same sets of flags for DragXXX() and SliderXXX() functions as the features are the same and it makes it easier to swap them. + // - Legacy: Pre-1.78 there are DragXXX() function signatures that takes a final `float power=1.0f' argument instead of the `ImGuiSliderFlags flags=0' argument. + // If you get a warning converting a float to ImGuiSliderFlags, read https://github.com/ocornut/imgui/issues/3361 + IMGUI_API bool DragFloat(const char* label, float* v, float v_speed = 1.0f, float v_min = 0.0f, float v_max = 0.0f, const char* format = "%.3f", ImGuiSliderFlags flags = 0); // If v_min >= v_max we have no bound + IMGUI_API bool DragFloat2(const char* label, float v[2], float v_speed = 1.0f, float v_min = 0.0f, float v_max = 0.0f, const char* format = "%.3f", ImGuiSliderFlags flags = 0); + IMGUI_API bool DragFloat3(const char* label, float v[3], float v_speed = 1.0f, float v_min = 0.0f, float v_max = 0.0f, const char* format = "%.3f", ImGuiSliderFlags flags = 0); + IMGUI_API bool DragFloat4(const char* label, float v[4], float v_speed = 1.0f, float v_min = 0.0f, float v_max = 0.0f, const char* format = "%.3f", ImGuiSliderFlags flags = 0); + IMGUI_API bool DragFloatRange2(const char* label, float* v_current_min, float* v_current_max, float v_speed = 1.0f, float v_min = 0.0f, float v_max = 0.0f, const char* format = "%.3f", const char* format_max = NULL, ImGuiSliderFlags flags = 0); + IMGUI_API bool DragInt(const char* label, int* v, float v_speed = 1.0f, int v_min = 0, int v_max = 0, const char* format = "%d", ImGuiSliderFlags flags = 0); // If v_min >= v_max we have no bound + IMGUI_API bool DragInt2(const char* label, int v[2], float v_speed = 1.0f, int v_min = 0, int v_max = 0, const char* format = "%d", ImGuiSliderFlags flags = 0); + IMGUI_API bool DragInt3(const char* label, int v[3], float v_speed = 1.0f, int v_min = 0, int v_max = 0, const char* format = "%d", ImGuiSliderFlags flags = 0); + IMGUI_API bool DragInt4(const char* label, int v[4], float v_speed = 1.0f, int v_min = 0, int v_max = 0, const char* format = "%d", ImGuiSliderFlags flags = 0); + IMGUI_API bool DragIntRange2(const char* label, int* v_current_min, int* v_current_max, float v_speed = 1.0f, int v_min = 0, int v_max = 0, const char* format = "%d", const char* format_max = NULL, ImGuiSliderFlags flags = 0); + IMGUI_API bool DragScalar(const char* label, ImGuiDataType data_type, void* p_data, float v_speed, const void* p_min = NULL, const void* p_max = NULL, const char* format = NULL, ImGuiSliderFlags flags = 0); + IMGUI_API bool DragScalarN(const char* label, ImGuiDataType data_type, void* p_data, int components, float v_speed, const void* p_min = NULL, const void* p_max = NULL, const char* format = NULL, ImGuiSliderFlags flags = 0); + + // Widgets: Regular Sliders + // - CTRL+Click on any slider to turn them into an input box. Manually input values aren't clamped and can go off-bounds. + // - Adjust format string to decorate the value with a prefix, a suffix, or adapt the editing and display precision e.g. "%.3f" -> 1.234; "%5.2f secs" -> 01.23 secs; "Biscuit: %.0f" -> Biscuit: 1; etc. + // - Format string may also be set to NULL or use the default format ("%f" or "%d"). + // - Legacy: Pre-1.78 there are SliderXXX() function signatures that takes a final `float power=1.0f' argument instead of the `ImGuiSliderFlags flags=0' argument. + // If you get a warning converting a float to ImGuiSliderFlags, read https://github.com/ocornut/imgui/issues/3361 + IMGUI_API bool SliderFloat(const char* label, float* v, float v_min, float v_max, const char* format = "%.3f", ImGuiSliderFlags flags = 0); // adjust format to decorate the value with a prefix or a suffix for in-slider labels or unit display. + IMGUI_API bool SliderFloat2(const char* label, float v[2], float v_min, float v_max, const char* format = "%.3f", ImGuiSliderFlags flags = 0); + IMGUI_API bool SliderFloat3(const char* label, float v[3], float v_min, float v_max, const char* format = "%.3f", ImGuiSliderFlags flags = 0); + IMGUI_API bool SliderFloat4(const char* label, float v[4], float v_min, float v_max, const char* format = "%.3f", ImGuiSliderFlags flags = 0); + IMGUI_API bool SliderAngle(const char* label, float* v_rad, float v_degrees_min = -360.0f, float v_degrees_max = +360.0f, const char* format = "%.0f deg", ImGuiSliderFlags flags = 0); + IMGUI_API bool SliderInt(const char* label, int* v, int v_min, int v_max, const char* format = "%d", ImGuiSliderFlags flags = 0); + IMGUI_API bool SliderInt2(const char* label, int v[2], int v_min, int v_max, const char* format = "%d", ImGuiSliderFlags flags = 0); + IMGUI_API bool SliderInt3(const char* label, int v[3], int v_min, int v_max, const char* format = "%d", ImGuiSliderFlags flags = 0); + IMGUI_API bool SliderInt4(const char* label, int v[4], int v_min, int v_max, const char* format = "%d", ImGuiSliderFlags flags = 0); + IMGUI_API bool SliderScalar(const char* label, ImGuiDataType data_type, void* p_data, const void* p_min, const void* p_max, const char* format = NULL, ImGuiSliderFlags flags = 0); + IMGUI_API bool SliderScalarN(const char* label, ImGuiDataType data_type, void* p_data, int components, const void* p_min, const void* p_max, const char* format = NULL, ImGuiSliderFlags flags = 0); + IMGUI_API bool VSliderFloat(const char* label, const ImVec2& size, float* v, float v_min, float v_max, const char* format = "%.3f", ImGuiSliderFlags flags = 0); + IMGUI_API bool VSliderInt(const char* label, const ImVec2& size, int* v, int v_min, int v_max, const char* format = "%d", ImGuiSliderFlags flags = 0); + IMGUI_API bool VSliderScalar(const char* label, const ImVec2& size, ImGuiDataType data_type, void* p_data, const void* p_min, const void* p_max, const char* format = NULL, ImGuiSliderFlags flags = 0); + + // Widgets: Input with Keyboard + // - If you want to use InputText() with std::string or any custom dynamic string type, see misc/cpp/imgui_stdlib.h and comments in imgui_demo.cpp. + // - Most of the ImGuiInputTextFlags flags are only useful for InputText() and not for InputFloatX, InputIntX, InputDouble etc. + IMGUI_API bool InputText(const char* label, char* buf, size_t buf_size, ImGuiInputTextFlags flags = 0, ImGuiInputTextCallback callback = NULL, void* user_data = NULL); + IMGUI_API bool InputTextMultiline(const char* label, char* buf, size_t buf_size, const ImVec2& size = ImVec2(0, 0), ImGuiInputTextFlags flags = 0, ImGuiInputTextCallback callback = NULL, void* user_data = NULL); + IMGUI_API bool InputTextWithHint(const char* label, const char* hint, char* buf, size_t buf_size, ImGuiInputTextFlags flags = 0, ImGuiInputTextCallback callback = NULL, void* user_data = NULL); + IMGUI_API bool InputFloat(const char* label, float* v, float step = 0.0f, float step_fast = 0.0f, const char* format = "%.3f", ImGuiInputTextFlags flags = 0); + IMGUI_API bool InputFloat2(const char* label, float v[2], const char* format = "%.3f", ImGuiInputTextFlags flags = 0); + IMGUI_API bool InputFloat3(const char* label, float v[3], const char* format = "%.3f", ImGuiInputTextFlags flags = 0); + IMGUI_API bool InputFloat4(const char* label, float v[4], const char* format = "%.3f", ImGuiInputTextFlags flags = 0); + IMGUI_API bool InputInt(const char* label, int* v, int step = 1, int step_fast = 100, ImGuiInputTextFlags flags = 0); + IMGUI_API bool InputInt2(const char* label, int v[2], ImGuiInputTextFlags flags = 0); + IMGUI_API bool InputInt3(const char* label, int v[3], ImGuiInputTextFlags flags = 0); + IMGUI_API bool InputInt4(const char* label, int v[4], ImGuiInputTextFlags flags = 0); + IMGUI_API bool InputDouble(const char* label, double* v, double step = 0.0, double step_fast = 0.0, const char* format = "%.6f", ImGuiInputTextFlags flags = 0); + IMGUI_API bool InputScalar(const char* label, ImGuiDataType data_type, void* p_data, const void* p_step = NULL, const void* p_step_fast = NULL, const char* format = NULL, ImGuiInputTextFlags flags = 0); + IMGUI_API bool InputScalarN(const char* label, ImGuiDataType data_type, void* p_data, int components, const void* p_step = NULL, const void* p_step_fast = NULL, const char* format = NULL, ImGuiInputTextFlags flags = 0); + + // Widgets: Color Editor/Picker (tip: the ColorEdit* functions have a little color square that can be left-clicked to open a picker, and right-clicked to open an option menu.) + // - Note that in C++ a 'float v[X]' function argument is the _same_ as 'float* v', the array syntax is just a way to document the number of elements that are expected to be accessible. + // - You can pass the address of a first float element out of a contiguous structure, e.g. &myvector.x + IMGUI_API bool ColorEdit3(const char* label, float col[3], ImGuiColorEditFlags flags = 0); + IMGUI_API bool ColorEdit4(const char* label, float col[4], ImGuiColorEditFlags flags = 0); + IMGUI_API bool ColorPicker3(const char* label, float col[3], ImGuiColorEditFlags flags = 0); + IMGUI_API bool ColorPicker4(const char* label, float col[4], ImGuiColorEditFlags flags = 0, const float* ref_col = NULL); + IMGUI_API bool ColorButton(const char* desc_id, const ImVec4& col, ImGuiColorEditFlags flags = 0, ImVec2 size = ImVec2(0, 0)); // display a color square/button, hover for details, return true when pressed. + IMGUI_API void SetColorEditOptions(ImGuiColorEditFlags flags); // initialize current options (generally on application startup) if you want to select a default format, picker type, etc. User will be able to change many settings, unless you pass the _NoOptions flag to your calls. + + // Widgets: Trees + // - TreeNode functions return true when the node is open, in which case you need to also call TreePop() when you are finished displaying the tree node contents. + IMGUI_API bool TreeNode(const char* label); + IMGUI_API bool TreeNode(const char* str_id, const char* fmt, ...) IM_FMTARGS(2); // helper variation to easily decorelate the id from the displayed string. Read the FAQ about why and how to use ID. to align arbitrary text at the same level as a TreeNode() you can use Bullet(). + IMGUI_API bool TreeNode(const void* ptr_id, const char* fmt, ...) IM_FMTARGS(2); // " + IMGUI_API bool TreeNodeV(const char* str_id, const char* fmt, va_list args) IM_FMTLIST(2); + IMGUI_API bool TreeNodeV(const void* ptr_id, const char* fmt, va_list args) IM_FMTLIST(2); + IMGUI_API bool TreeNodeEx(const char* label, ImGuiTreeNodeFlags flags = 0); + IMGUI_API bool TreeNodeEx(const char* str_id, ImGuiTreeNodeFlags flags, const char* fmt, ...) IM_FMTARGS(3); + IMGUI_API bool TreeNodeEx(const void* ptr_id, ImGuiTreeNodeFlags flags, const char* fmt, ...) IM_FMTARGS(3); + IMGUI_API bool TreeNodeExV(const char* str_id, ImGuiTreeNodeFlags flags, const char* fmt, va_list args) IM_FMTLIST(3); + IMGUI_API bool TreeNodeExV(const void* ptr_id, ImGuiTreeNodeFlags flags, const char* fmt, va_list args) IM_FMTLIST(3); + IMGUI_API void TreePush(const char* str_id); // ~ Indent()+PushId(). Already called by TreeNode() when returning true, but you can call TreePush/TreePop yourself if desired. + IMGUI_API void TreePush(const void* ptr_id = NULL); // " + IMGUI_API void TreePop(); // ~ Unindent()+PopId() + IMGUI_API float GetTreeNodeToLabelSpacing(); // horizontal distance preceding label when using TreeNode*() or Bullet() == (g.FontSize + style.FramePadding.x*2) for a regular unframed TreeNode + IMGUI_API bool CollapsingHeader(const char* label, ImGuiTreeNodeFlags flags = 0); // if returning 'true' the header is open. doesn't indent nor push on ID stack. user doesn't have to call TreePop(). + IMGUI_API bool CollapsingHeader(const char* label, bool* p_visible, ImGuiTreeNodeFlags flags = 0); // when 'p_visible != NULL': if '*p_visible==true' display an additional small close button on upper right of the header which will set the bool to false when clicked, if '*p_visible==false' don't display the header. + IMGUI_API void SetNextItemOpen(bool is_open, ImGuiCond cond = 0); // set next TreeNode/CollapsingHeader open state. + + // Widgets: Selectables + // - A selectable highlights when hovered, and can display another color when selected. + // - Neighbors selectable extend their highlight bounds in order to leave no gap between them. This is so a series of selected Selectable appear contiguous. + IMGUI_API bool Selectable(const char* label, bool selected = false, ImGuiSelectableFlags flags = 0, const ImVec2& size = ImVec2(0, 0)); // "bool selected" carry the selection state (read-only). Selectable() is clicked is returns true so you can modify your selection state. size.x==0.0: use remaining width, size.x>0.0: specify width. size.y==0.0: use label height, size.y>0.0: specify height + IMGUI_API bool Selectable(const char* label, bool* p_selected, ImGuiSelectableFlags flags = 0, const ImVec2& size = ImVec2(0, 0)); // "bool* p_selected" point to the selection state (read-write), as a convenient helper. + + // Widgets: List Boxes + // - FIXME: To be consistent with all the newer API, ListBoxHeader/ListBoxFooter should in reality be called BeginListBox/EndListBox. Will rename them. + IMGUI_API bool ListBox(const char* label, int* current_item, const char* const items[], int items_count, int height_in_items = -1); + IMGUI_API bool ListBox(const char* label, int* current_item, bool (*items_getter)(void* data, int idx, const char** out_text), void* data, int items_count, int height_in_items = -1); + IMGUI_API bool ListBoxHeader(const char* label, const ImVec2& size = ImVec2(0, 0)); // use if you want to reimplement ListBox() will custom data or interactions. if the function return true, you can output elements then call ListBoxFooter() afterwards. + IMGUI_API bool ListBoxHeader(const char* label, int items_count, int height_in_items = -1); // " + IMGUI_API void ListBoxFooter(); // terminate the scrolling region. only call ListBoxFooter() if ListBoxHeader() returned true! + + // Widgets: Data Plotting + IMGUI_API void PlotLines(const char* label, const float* values, int values_count, int values_offset = 0, const char* overlay_text = NULL, float scale_min = FLT_MAX, float scale_max = FLT_MAX, ImVec2 graph_size = ImVec2(0, 0), int stride = sizeof(float)); + IMGUI_API void PlotLines(const char* label, float(*values_getter)(void* data, int idx), void* data, int values_count, int values_offset = 0, const char* overlay_text = NULL, float scale_min = FLT_MAX, float scale_max = FLT_MAX, ImVec2 graph_size = ImVec2(0, 0)); + IMGUI_API void PlotHistogram(const char* label, const float* values, int values_count, int values_offset = 0, const char* overlay_text = NULL, float scale_min = FLT_MAX, float scale_max = FLT_MAX, ImVec2 graph_size = ImVec2(0, 0), int stride = sizeof(float)); + IMGUI_API void PlotHistogram(const char* label, float(*values_getter)(void* data, int idx), void* data, int values_count, int values_offset = 0, const char* overlay_text = NULL, float scale_min = FLT_MAX, float scale_max = FLT_MAX, ImVec2 graph_size = ImVec2(0, 0)); + + // Widgets: Value() Helpers. + // - Those are merely shortcut to calling Text() with a format string. Output single value in "name: value" format (tip: freely declare more in your code to handle your types. you can add functions to the ImGui namespace) + IMGUI_API void Value(const char* prefix, bool b); + IMGUI_API void Value(const char* prefix, int v); + IMGUI_API void Value(const char* prefix, unsigned int v); + IMGUI_API void Value(const char* prefix, float v, const char* float_format = NULL); + + // Widgets: Menus + // - Use BeginMenuBar() on a window ImGuiWindowFlags_MenuBar to append to its menu bar. + // - Use BeginMainMenuBar() to create a menu bar at the top of the screen and append to it. + // - Use BeginMenu() to create a menu. You can call BeginMenu() multiple time with the same identifier to append more items to it. + IMGUI_API bool BeginMenuBar(); // append to menu-bar of current window (requires ImGuiWindowFlags_MenuBar flag set on parent window). + IMGUI_API void EndMenuBar(); // only call EndMenuBar() if BeginMenuBar() returns true! + IMGUI_API bool BeginMainMenuBar(); // create and append to a full screen menu-bar. + IMGUI_API void EndMainMenuBar(); // only call EndMainMenuBar() if BeginMainMenuBar() returns true! + IMGUI_API bool BeginMenu(const char* label, bool enabled = true); // create a sub-menu entry. only call EndMenu() if this returns true! + IMGUI_API void EndMenu(); // only call EndMenu() if BeginMenu() returns true! + IMGUI_API bool MenuItem(const char* label, const char* shortcut = NULL, bool selected = false, bool enabled = true); // return true when activated. shortcuts are displayed for convenience but not processed by ImGui at the moment + IMGUI_API bool MenuItem(const char* label, const char* shortcut, bool* p_selected, bool enabled = true); // return true when activated + toggle (*p_selected) if p_selected != NULL + + // Tooltips + // - Tooltip are windows following the mouse. They do not take focus away. + IMGUI_API void BeginTooltip(); // begin/append a tooltip window. to create full-featured tooltip (with any kind of items). + IMGUI_API void EndTooltip(); + IMGUI_API void SetTooltip(const char* fmt, ...) IM_FMTARGS(1); // set a text-only tooltip, typically use with ImGui::IsItemHovered(). override any previous call to SetTooltip(). + IMGUI_API void SetTooltipV(const char* fmt, va_list args) IM_FMTLIST(1); + + // Popups, Modals + // - They block normal mouse hovering detection (and therefore most mouse interactions) behind them. + // - If not modal: they can be closed by clicking anywhere outside them, or by pressing ESCAPE. + // - Their visibility state (~bool) is held internally instead of being held by the programmer as we are used to with regular Begin*() calls. + // - The 3 properties above are related: we need to retain popup visibility state in the library because popups may be closed as any time. + // - You can bypass the hovering restriction by using ImGuiHoveredFlags_AllowWhenBlockedByPopup when calling IsItemHovered() or IsWindowHovered(). + // - IMPORTANT: Popup identifiers are relative to the current ID stack, so OpenPopup and BeginPopup generally needs to be at the same level of the stack. + // This is sometimes leading to confusing mistakes. May rework this in the future. + // Popups: begin/end functions + // - BeginPopup(): query popup state, if open start appending into the window. Call EndPopup() afterwards. ImGuiWindowFlags are forwarded to the window. + // - BeginPopupModal(): block every interactions behind the window, cannot be closed by user, add a dimming background, has a title bar. + IMGUI_API bool BeginPopup(const char* str_id, ImGuiWindowFlags flags = 0); // return true if the popup is open, and you can start outputting to it. + IMGUI_API bool BeginPopupModal(const char* name, bool* p_open = NULL, ImGuiWindowFlags flags = 0); // return true if the modal is open, and you can start outputting to it. + IMGUI_API void EndPopup(); // only call EndPopup() if BeginPopupXXX() returns true! + // Popups: open/close functions + // - OpenPopup(): set popup state to open. ImGuiPopupFlags are available for opening options. + // - If not modal: they can be closed by clicking anywhere outside them, or by pressing ESCAPE. + // - CloseCurrentPopup(): use inside the BeginPopup()/EndPopup() scope to close manually. + // - CloseCurrentPopup() is called by default by Selectable()/MenuItem() when activated (FIXME: need some options). + // - Use ImGuiPopupFlags_NoOpenOverExistingPopup to avoid opening a popup if there's already one at the same level. This is equivalent to e.g. testing for !IsAnyPopupOpen() prior to OpenPopup(). + IMGUI_API void OpenPopup(const char* str_id, ImGuiPopupFlags popup_flags = 0); // call to mark popup as open (don't call every frame!). + IMGUI_API void OpenPopupOnItemClick(const char* str_id = NULL, ImGuiPopupFlags popup_flags = 1); // helper to open popup when clicked on last item. return true when just opened. (note: actually triggers on the mouse _released_ event to be consistent with popup behaviors) + IMGUI_API void CloseCurrentPopup(); // manually close the popup we have begin-ed into. + // Popups: open+begin combined functions helpers + // - Helpers to do OpenPopup+BeginPopup where the Open action is triggered by e.g. hovering an item and right-clicking. + // - They are convenient to easily create context menus, hence the name. + // - IMPORTANT: Notice that BeginPopupContextXXX takes ImGuiPopupFlags just like OpenPopup() and unlike BeginPopup(). For full consistency, we may add ImGuiWindowFlags to the BeginPopupContextXXX functions in the future. + // - IMPORTANT: we exceptionally default their flags to 1 (== ImGuiPopupFlags_MouseButtonRight) for backward compatibility with older API taking 'int mouse_button = 1' parameter, so if you add other flags remember to re-add the ImGuiPopupFlags_MouseButtonRight. + IMGUI_API bool BeginPopupContextItem(const char* str_id = NULL, ImGuiPopupFlags popup_flags = 1); // open+begin popup when clicked on last item. if you can pass a NULL str_id only if the previous item had an id. If you want to use that on a non-interactive item such as Text() you need to pass in an explicit ID here. read comments in .cpp! + IMGUI_API bool BeginPopupContextWindow(const char* str_id = NULL, ImGuiPopupFlags popup_flags = 1);// open+begin popup when clicked on current window. + IMGUI_API bool BeginPopupContextVoid(const char* str_id = NULL, ImGuiPopupFlags popup_flags = 1); // open+begin popup when clicked in void (where there are no windows). + // Popups: test function + // - IsPopupOpen(): return true if the popup is open at the current BeginPopup() level of the popup stack. + // - IsPopupOpen() with ImGuiPopupFlags_AnyPopupId: return true if any popup is open at the current BeginPopup() level of the popup stack. + // - IsPopupOpen() with ImGuiPopupFlags_AnyPopupId + ImGuiPopupFlags_AnyPopupLevel: return true if any popup is open. + IMGUI_API bool IsPopupOpen(const char* str_id, ImGuiPopupFlags flags = 0); // return true if the popup is open. + + // Tables + // [BETA API] API may evolve slightly! If you use this, please update to the next version when it comes out! + // - Full-featured replacement for old Columns API. + // - See Demo->Tables for demo code. + // - See top of imgui_tables.cpp for general commentary. + // - See ImGuiTableFlags_ and ImGuiTableColumnFlags_ enums for a description of available flags. + // The typical call flow is: + // - 1. Call BeginTable(). + // - 2. Optionally call TableSetupColumn() to submit column name/flags/defaults. + // - 3. Optionally call TableSetupScrollFreeze() to request scroll freezing of columns/rows. + // - 4. Optionally call TableHeadersRow() to submit a header row. Names are pulled from TableSetupColumn() data. + // - 5. Populate contents: + // - In most situations you can use TableNextRow() + TableSetColumnIndex(N) to start appending into a column. + // - If you are using tables as a sort of grid, where every columns is holding the same type of contents, + // you may prefer using TableNextColumn() instead of TableNextRow() + TableSetColumnIndex(). + // TableNextColumn() will automatically wrap-around into the next row if needed. + // - IMPORTANT: Comparatively to the old Columns() API, we need to call TableNextColumn() for the first column! + // - Summary of possible call flow: + // -------------------------------------------------------------------------------------------------------- + // TableNextRow() -> TableSetColumnIndex(0) -> Text("Hello 0") -> TableSetColumnIndex(1) -> Text("Hello 1") // OK + // TableNextRow() -> TableNextColumn() -> Text("Hello 0") -> TableNextColumn() -> Text("Hello 1") // OK + // TableNextColumn() -> Text("Hello 0") -> TableNextColumn() -> Text("Hello 1") // OK: TableNextColumn() automatically gets to next row! + // TableNextRow() -> Text("Hello 0") // Not OK! Missing TableSetColumnIndex() or TableNextColumn()! Text will not appear! + // -------------------------------------------------------------------------------------------------------- + // - 5. Call EndTable() + IMGUI_API bool BeginTable(const char* str_id, int column, ImGuiTableFlags flags = 0, const ImVec2& outer_size = ImVec2(0.0f, 0.0f), float inner_width = 0.0f); + IMGUI_API void EndTable(); // only call EndTable() if BeginTable() returns true! + IMGUI_API void TableNextRow(ImGuiTableRowFlags row_flags = 0, float min_row_height = 0.0f); // append into the first cell of a new row. + IMGUI_API bool TableNextColumn(); // append into the next column (or first column of next row if currently in last column). Return true when column is visible. + IMGUI_API bool TableSetColumnIndex(int column_n); // append into the specified column. Return true when column is visible. + // Tables: Headers & Columns declaration + // - Use TableSetupColumn() to specify label, resizing policy, default width/weight, id, various other flags etc. + // - Use TableHeadersRow() to create a header row and automatically submit a TableHeader() for each column. + // Headers are required to perform: reordering, sorting, and opening the context menu. + // The context menu can also be made available in columns body using ImGuiTableFlags_ContextMenuInBody. + // - You may manually submit headers using TableNextRow() + TableHeader() calls, but this is only useful in + // some advanced use cases (e.g. adding custom widgets in header row). + // - Use TableSetupScrollFreeze() to lock columns/rows so they stay visible when scrolled. + IMGUI_API void TableSetupColumn(const char* label, ImGuiTableColumnFlags flags = 0, float init_width_or_weight = 0.0f, ImU32 user_id = 0); + IMGUI_API void TableSetupScrollFreeze(int cols, int rows); // lock columns/rows so they stay visible when scrolled. + IMGUI_API void TableHeadersRow(); // submit all headers cells based on data provided to TableSetupColumn() + submit context menu + IMGUI_API void TableHeader(const char* label); // submit one header cell manually (rarely used) + // Tables: Sorting + // - Call TableGetSortSpecs() to retrieve latest sort specs for the table. NULL when not sorting. + // - When 'SpecsDirty == true' you should sort your data. It will be true when sorting specs have changed + // since last call, or the first time. Make sure to set 'SpecsDirty = false' after sorting, else you may + // wastefully sort your data every frame! + // - Lifetime: don't hold on this pointer over multiple frames or past any subsequent call to BeginTable(). + IMGUI_API ImGuiTableSortSpecs* TableGetSortSpecs(); // get latest sort specs for the table (NULL if not sorting). + // Tables: Miscellaneous functions + // - Functions args 'int column_n' treat the default value of -1 as the same as passing the current column index. + IMGUI_API int TableGetColumnCount(); // return number of columns (value passed to BeginTable) + IMGUI_API int TableGetColumnIndex(); // return current column index. + IMGUI_API int TableGetRowIndex(); // return current row index. + IMGUI_API const char* TableGetColumnName(int column_n = -1); // return "" if column didn't have a name declared by TableSetupColumn(). Pass -1 to use current column. + IMGUI_API ImGuiTableColumnFlags TableGetColumnFlags(int column_n = -1); // return column flags so you can query their Enabled/Visible/Sorted/Hovered status flags. Pass -1 to use current column. + IMGUI_API void TableSetBgColor(ImGuiTableBgTarget target, ImU32 color, int column_n = -1); // change the color of a cell, row, or column. See ImGuiTableBgTarget_ flags for details. + + // Legacy Columns API (2020: prefer using Tables!) + // - You can also use SameLine(pos_x) to mimic simplified columns. + IMGUI_API void Columns(int count = 1, const char* id = NULL, bool border = true); + IMGUI_API void NextColumn(); // next column, defaults to current row or next row if the current row is finished + IMGUI_API int GetColumnIndex(); // get current column index + IMGUI_API float GetColumnWidth(int column_index = -1); // get column width (in pixels). pass -1 to use current column + IMGUI_API void SetColumnWidth(int column_index, float width); // set column width (in pixels). pass -1 to use current column + IMGUI_API float GetColumnOffset(int column_index = -1); // get position of column line (in pixels, from the left side of the contents region). pass -1 to use current column, otherwise 0..GetColumnsCount() inclusive. column 0 is typically 0.0f + IMGUI_API void SetColumnOffset(int column_index, float offset_x); // set position of column line (in pixels, from the left side of the contents region). pass -1 to use current column + IMGUI_API int GetColumnsCount(); + + // Tab Bars, Tabs + IMGUI_API bool BeginTabBar(const char* str_id, ImGuiTabBarFlags flags = 0); // create and append into a TabBar + IMGUI_API void EndTabBar(); // only call EndTabBar() if BeginTabBar() returns true! + IMGUI_API bool BeginTabItem(const char* label, bool* p_open = NULL, ImGuiTabItemFlags flags = 0); // create a Tab. Returns true if the Tab is selected. + IMGUI_API void EndTabItem(); // only call EndTabItem() if BeginTabItem() returns true! + IMGUI_API bool TabItemButton(const char* label, ImGuiTabItemFlags flags = 0); // create a Tab behaving like a button. return true when clicked. cannot be selected in the tab bar. + IMGUI_API void SetTabItemClosed(const char* tab_or_docked_window_label); // notify TabBar or Docking system of a closed tab/window ahead (useful to reduce visual flicker on reorderable tab bars). For tab-bar: call after BeginTabBar() and before Tab submissions. Otherwise call with a window name. + + // Logging/Capture + // - All text output from the interface can be captured into tty/file/clipboard. By default, tree nodes are automatically opened during logging. + IMGUI_API void LogToTTY(int auto_open_depth = -1); // start logging to tty (stdout) + IMGUI_API void LogToFile(int auto_open_depth = -1, const char* filename = NULL); // start logging to file + IMGUI_API void LogToClipboard(int auto_open_depth = -1); // start logging to OS clipboard + IMGUI_API void LogFinish(); // stop logging (close file, etc.) + IMGUI_API void LogButtons(); // helper to display buttons for logging to tty/file/clipboard + IMGUI_API void LogText(const char* fmt, ...) IM_FMTARGS(1); // pass text data straight to log (without being displayed) + + // Drag and Drop + // - If you stop calling BeginDragDropSource() the payload is preserved however it won't have a preview tooltip (we currently display a fallback "..." tooltip as replacement) + IMGUI_API bool BeginDragDropSource(ImGuiDragDropFlags flags = 0); // call when the current item is active. If this return true, you can call SetDragDropPayload() + EndDragDropSource() + IMGUI_API bool SetDragDropPayload(const char* type, const void* data, size_t sz, ImGuiCond cond = 0); // type is a user defined string of maximum 32 characters. Strings starting with '_' are reserved for dear imgui internal types. Data is copied and held by imgui. + IMGUI_API void EndDragDropSource(); // only call EndDragDropSource() if BeginDragDropSource() returns true! + IMGUI_API bool BeginDragDropTarget(); // call after submitting an item that may receive a payload. If this returns true, you can call AcceptDragDropPayload() + EndDragDropTarget() + IMGUI_API const ImGuiPayload* AcceptDragDropPayload(const char* type, ImGuiDragDropFlags flags = 0); // accept contents of a given type. If ImGuiDragDropFlags_AcceptBeforeDelivery is set you can peek into the payload before the mouse button is released. + IMGUI_API void EndDragDropTarget(); // only call EndDragDropTarget() if BeginDragDropTarget() returns true! + IMGUI_API const ImGuiPayload* GetDragDropPayload(); // peek directly into the current payload from anywhere. may return NULL. use ImGuiPayload::IsDataType() to test for the payload type. + + // Clipping + // - Mouse hovering is affected by ImGui::PushClipRect() calls, unlike direct calls to ImDrawList::PushClipRect() which are render only. + IMGUI_API void PushClipRect(const ImVec2& clip_rect_min, const ImVec2& clip_rect_max, bool intersect_with_current_clip_rect); + IMGUI_API void PopClipRect(); + + // Focus, Activation + // - Prefer using "SetItemDefaultFocus()" over "if (IsWindowAppearing()) SetScrollHereY()" when applicable to signify "this is the default item" + IMGUI_API void SetItemDefaultFocus(); // make last item the default focused item of a window. + IMGUI_API void SetKeyboardFocusHere(int offset = 0); // focus keyboard on the next widget. Use positive 'offset' to access sub components of a multiple component widget. Use -1 to access previous widget. + + // Item/Widgets Utilities + // - Most of the functions are referring to the last/previous item we submitted. + // - See Demo Window under "Widgets->Querying Status" for an interactive visualization of most of those functions. + IMGUI_API bool IsItemHovered(ImGuiHoveredFlags flags = 0); // is the last item hovered? (and usable, aka not blocked by a popup, etc.). See ImGuiHoveredFlags for more options. + IMGUI_API bool IsItemActive(); // is the last item active? (e.g. button being held, text field being edited. This will continuously return true while holding mouse button on an item. Items that don't interact will always return false) + IMGUI_API bool IsItemFocused(); // is the last item focused for keyboard/gamepad navigation? + IMGUI_API bool IsItemClicked(ImGuiMouseButton mouse_button = 0); // is the last item clicked? (e.g. button/node just clicked on) == IsMouseClicked(mouse_button) && IsItemHovered() + IMGUI_API bool IsItemVisible(); // is the last item visible? (items may be out of sight because of clipping/scrolling) + IMGUI_API bool IsItemEdited(); // did the last item modify its underlying value this frame? or was pressed? This is generally the same as the "bool" return value of many widgets. + IMGUI_API bool IsItemActivated(); // was the last item just made active (item was previously inactive). + IMGUI_API bool IsItemDeactivated(); // was the last item just made inactive (item was previously active). Useful for Undo/Redo patterns with widgets that requires continuous editing. + IMGUI_API bool IsItemDeactivatedAfterEdit(); // was the last item just made inactive and made a value change when it was active? (e.g. Slider/Drag moved). Useful for Undo/Redo patterns with widgets that requires continuous editing. Note that you may get false positives (some widgets such as Combo()/ListBox()/Selectable() will return true even when clicking an already selected item). + IMGUI_API bool IsItemToggledOpen(); // was the last item open state toggled? set by TreeNode(). + IMGUI_API bool IsAnyItemHovered(); // is any item hovered? + IMGUI_API bool IsAnyItemActive(); // is any item active? + IMGUI_API bool IsAnyItemFocused(); // is any item focused? + IMGUI_API ImVec2 GetItemRectMin(); // get upper-left bounding rectangle of the last item (screen space) + IMGUI_API ImVec2 GetItemRectMax(); // get lower-right bounding rectangle of the last item (screen space) + IMGUI_API ImVec2 GetItemRectSize(); // get size of last item + IMGUI_API void SetItemAllowOverlap(); // allow last item to be overlapped by a subsequent item. sometimes useful with invisible buttons, selectables, etc. to catch unused area. + + // Miscellaneous Utilities + IMGUI_API bool IsRectVisible(const ImVec2& size); // test if rectangle (of given size, starting from cursor position) is visible / not clipped. + IMGUI_API bool IsRectVisible(const ImVec2& rect_min, const ImVec2& rect_max); // test if rectangle (in screen space) is visible / not clipped. to perform coarse clipping on user's side. + IMGUI_API double GetTime(); // get global imgui time. incremented by io.DeltaTime every frame. + IMGUI_API int GetFrameCount(); // get global imgui frame count. incremented by 1 every frame. + IMGUI_API ImDrawList* GetBackgroundDrawList(); // this draw list will be the first rendering one. Useful to quickly draw shapes/text behind dear imgui contents. + IMGUI_API ImDrawList* GetForegroundDrawList(); // this draw list will be the last rendered one. Useful to quickly draw shapes/text over dear imgui contents. + IMGUI_API ImDrawListSharedData* GetDrawListSharedData(); // you may use this when creating your own ImDrawList instances. + IMGUI_API const char* GetStyleColorName(ImGuiCol idx); // get a string corresponding to the enum value (for display, saving, etc.). + IMGUI_API void SetStateStorage(ImGuiStorage* storage); // replace current window storage with our own (if you want to manipulate it yourself, typically clear subsection of it) + IMGUI_API ImGuiStorage* GetStateStorage(); + IMGUI_API void CalcListClipping(int items_count, float items_height, int* out_items_display_start, int* out_items_display_end); // calculate coarse clipping for large list of evenly sized items. Prefer using the ImGuiListClipper higher-level helper if you can. + IMGUI_API bool BeginChildFrame(ImGuiID id, const ImVec2& size, ImGuiWindowFlags flags = 0); // helper to create a child window / scrolling region that looks like a normal widget frame + IMGUI_API void EndChildFrame(); // always call EndChildFrame() regardless of BeginChildFrame() return values (which indicates a collapsed/clipped window) + + // Text Utilities + IMGUI_API ImVec2 CalcTextSize(const char* text, const char* text_end = NULL, bool hide_text_after_double_hash = false, float wrap_width = -1.0f); + + // Color Utilities + IMGUI_API ImVec4 ColorConvertU32ToFloat4(ImU32 in); + IMGUI_API ImU32 ColorConvertFloat4ToU32(const ImVec4& in); + IMGUI_API void ColorConvertRGBtoHSV(float r, float g, float b, float& out_h, float& out_s, float& out_v); + IMGUI_API void ColorConvertHSVtoRGB(float h, float s, float v, float& out_r, float& out_g, float& out_b); + + // Inputs Utilities: Keyboard + // - For 'int user_key_index' you can use your own indices/enums according to how your backend/engine stored them in io.KeysDown[]. + // - We don't know the meaning of those value. You can use GetKeyIndex() to map a ImGuiKey_ value into the user index. + IMGUI_API int GetKeyIndex(ImGuiKey imgui_key); // map ImGuiKey_* values into user's key index. == io.KeyMap[key] + IMGUI_API bool IsKeyDown(int user_key_index); // is key being held. == io.KeysDown[user_key_index]. + IMGUI_API bool IsKeyPressed(int user_key_index, bool repeat = true); // was key pressed (went from !Down to Down)? if repeat=true, uses io.KeyRepeatDelay / KeyRepeatRate + IMGUI_API bool IsKeyReleased(int user_key_index); // was key released (went from Down to !Down)? + IMGUI_API int GetKeyPressedAmount(int key_index, float repeat_delay, float rate); // uses provided repeat rate/delay. return a count, most often 0 or 1 but might be >1 if RepeatRate is small enough that DeltaTime > RepeatRate + IMGUI_API void CaptureKeyboardFromApp(bool want_capture_keyboard_value = true); // attention: misleading name! manually override io.WantCaptureKeyboard flag next frame (said flag is entirely left for your application to handle). e.g. force capture keyboard when your widget is being hovered. This is equivalent to setting "io.WantCaptureKeyboard = want_capture_keyboard_value"; after the next NewFrame() call. + + // Inputs Utilities: Mouse + // - To refer to a mouse button, you may use named enums in your code e.g. ImGuiMouseButton_Left, ImGuiMouseButton_Right. + // - You can also use regular integer: it is forever guaranteed that 0=Left, 1=Right, 2=Middle. + // - Dragging operations are only reported after mouse has moved a certain distance away from the initial clicking position (see 'lock_threshold' and 'io.MouseDraggingThreshold') + IMGUI_API bool IsMouseDown(ImGuiMouseButton button); // is mouse button held? + IMGUI_API bool IsMouseClicked(ImGuiMouseButton button, bool repeat = false); // did mouse button clicked? (went from !Down to Down) + IMGUI_API bool IsMouseReleased(ImGuiMouseButton button); // did mouse button released? (went from Down to !Down) + IMGUI_API bool IsMouseDoubleClicked(ImGuiMouseButton button); // did mouse button double-clicked? (note that a double-click will also report IsMouseClicked() == true) + IMGUI_API bool IsMouseHoveringRect(const ImVec2& r_min, const ImVec2& r_max, bool clip = true);// is mouse hovering given bounding rect (in screen space). clipped by current clipping settings, but disregarding of other consideration of focus/window ordering/popup-block. + IMGUI_API bool IsMousePosValid(const ImVec2* mouse_pos = NULL); // by convention we use (-FLT_MAX,-FLT_MAX) to denote that there is no mouse available + IMGUI_API bool IsAnyMouseDown(); // is any mouse button held? + IMGUI_API ImVec2 GetMousePos(); // shortcut to ImGui::GetIO().MousePos provided by user, to be consistent with other calls + IMGUI_API ImVec2 GetMousePosOnOpeningCurrentPopup(); // retrieve mouse position at the time of opening popup we have BeginPopup() into (helper to avoid user backing that value themselves) + IMGUI_API bool IsMouseDragging(ImGuiMouseButton button, float lock_threshold = -1.0f); // is mouse dragging? (if lock_threshold < -1.0f, uses io.MouseDraggingThreshold) + IMGUI_API ImVec2 GetMouseDragDelta(ImGuiMouseButton button = 0, float lock_threshold = -1.0f); // return the delta from the initial clicking position while the mouse button is pressed or was just released. This is locked and return 0.0f until the mouse moves past a distance threshold at least once (if lock_threshold < -1.0f, uses io.MouseDraggingThreshold) + IMGUI_API void ResetMouseDragDelta(ImGuiMouseButton button = 0); // + IMGUI_API ImGuiMouseCursor GetMouseCursor(); // get desired cursor type, reset in ImGui::NewFrame(), this is updated during the frame. valid before Render(). If you use software rendering by setting io.MouseDrawCursor ImGui will render those for you + IMGUI_API void SetMouseCursor(ImGuiMouseCursor cursor_type); // set desired cursor type + IMGUI_API void CaptureMouseFromApp(bool want_capture_mouse_value = true); // attention: misleading name! manually override io.WantCaptureMouse flag next frame (said flag is entirely left for your application to handle). This is equivalent to setting "io.WantCaptureMouse = want_capture_mouse_value;" after the next NewFrame() call. + + // Clipboard Utilities + // - Also see the LogToClipboard() function to capture GUI into clipboard, or easily output text data to the clipboard. + IMGUI_API const char* GetClipboardText(); + IMGUI_API void SetClipboardText(const char* text); + + // Settings/.Ini Utilities + // - The disk functions are automatically called if io.IniFilename != NULL (default is "imgui.ini"). + // - Set io.IniFilename to NULL to load/save manually. Read io.WantSaveIniSettings description about handling .ini saving manually. + IMGUI_API void LoadIniSettingsFromDisk(const char* ini_filename); // call after CreateContext() and before the first call to NewFrame(). NewFrame() automatically calls LoadIniSettingsFromDisk(io.IniFilename). + IMGUI_API void LoadIniSettingsFromMemory(const char* ini_data, size_t ini_size=0); // call after CreateContext() and before the first call to NewFrame() to provide .ini data from your own data source. + IMGUI_API void SaveIniSettingsToDisk(const char* ini_filename); // this is automatically called (if io.IniFilename is not empty) a few seconds after any modification that should be reflected in the .ini file (and also by DestroyContext). + IMGUI_API const char* SaveIniSettingsToMemory(size_t* out_ini_size = NULL); // return a zero-terminated string with the .ini data which you can save by your own mean. call when io.WantSaveIniSettings is set, then save data by your own mean and clear io.WantSaveIniSettings. + + // Debug Utilities + IMGUI_API bool DebugCheckVersionAndDataLayout(const char* version_str, size_t sz_io, size_t sz_style, size_t sz_vec2, size_t sz_vec4, size_t sz_drawvert, size_t sz_drawidx); // This is called by IMGUI_CHECKVERSION() macro. + + // Memory Allocators + // - All those functions are not reliant on the current context. + // - If you reload the contents of imgui.cpp at runtime, you may need to call SetCurrentContext() + SetAllocatorFunctions() again because we use global storage for those. + IMGUI_API void SetAllocatorFunctions(void* (*alloc_func)(size_t sz, void* user_data), void (*free_func)(void* ptr, void* user_data), void* user_data = NULL); + IMGUI_API void* MemAlloc(size_t size); + IMGUI_API void MemFree(void* ptr); + +} // namespace ImGui + +//----------------------------------------------------------------------------- +// [SECTION] Flags & Enumerations +//----------------------------------------------------------------------------- + +// Flags for ImGui::Begin() +enum ImGuiWindowFlags_ +{ + ImGuiWindowFlags_None = 0, + ImGuiWindowFlags_NoTitleBar = 1 << 0, // Disable title-bar + ImGuiWindowFlags_NoResize = 1 << 1, // Disable user resizing with the lower-right grip + ImGuiWindowFlags_NoMove = 1 << 2, // Disable user moving the window + ImGuiWindowFlags_NoScrollbar = 1 << 3, // Disable scrollbars (window can still scroll with mouse or programmatically) + ImGuiWindowFlags_NoScrollWithMouse = 1 << 4, // Disable user vertically scrolling with mouse wheel. On child window, mouse wheel will be forwarded to the parent unless NoScrollbar is also set. + ImGuiWindowFlags_NoCollapse = 1 << 5, // Disable user collapsing window by double-clicking on it + ImGuiWindowFlags_AlwaysAutoResize = 1 << 6, // Resize every window to its content every frame + ImGuiWindowFlags_NoBackground = 1 << 7, // Disable drawing background color (WindowBg, etc.) and outside border. Similar as using SetNextWindowBgAlpha(0.0f). + ImGuiWindowFlags_NoSavedSettings = 1 << 8, // Never load/save settings in .ini file + ImGuiWindowFlags_NoMouseInputs = 1 << 9, // Disable catching mouse, hovering test with pass through. + ImGuiWindowFlags_MenuBar = 1 << 10, // Has a menu-bar + ImGuiWindowFlags_HorizontalScrollbar = 1 << 11, // Allow horizontal scrollbar to appear (off by default). You may use SetNextWindowContentSize(ImVec2(width,0.0f)); prior to calling Begin() to specify width. Read code in imgui_demo in the "Horizontal Scrolling" section. + ImGuiWindowFlags_NoFocusOnAppearing = 1 << 12, // Disable taking focus when transitioning from hidden to visible state + ImGuiWindowFlags_NoBringToFrontOnFocus = 1 << 13, // Disable bringing window to front when taking focus (e.g. clicking on it or programmatically giving it focus) + ImGuiWindowFlags_AlwaysVerticalScrollbar= 1 << 14, // Always show vertical scrollbar (even if ContentSize.y < Size.y) + ImGuiWindowFlags_AlwaysHorizontalScrollbar=1<< 15, // Always show horizontal scrollbar (even if ContentSize.x < Size.x) + ImGuiWindowFlags_AlwaysUseWindowPadding = 1 << 16, // Ensure child windows without border uses style.WindowPadding (ignored by default for non-bordered child windows, because more convenient) + ImGuiWindowFlags_NoNavInputs = 1 << 18, // No gamepad/keyboard navigation within the window + ImGuiWindowFlags_NoNavFocus = 1 << 19, // No focusing toward this window with gamepad/keyboard navigation (e.g. skipped by CTRL+TAB) + ImGuiWindowFlags_UnsavedDocument = 1 << 20, // Append '*' to title without affecting the ID, as a convenience to avoid using the ### operator. When used in a tab/docking context, tab is selected on closure and closure is deferred by one frame to allow code to cancel the closure (with a confirmation popup, etc.) without flicker. + ImGuiWindowFlags_NoNav = ImGuiWindowFlags_NoNavInputs | ImGuiWindowFlags_NoNavFocus, + ImGuiWindowFlags_NoDecoration = ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoCollapse, + ImGuiWindowFlags_NoInputs = ImGuiWindowFlags_NoMouseInputs | ImGuiWindowFlags_NoNavInputs | ImGuiWindowFlags_NoNavFocus, + + // [Internal] + ImGuiWindowFlags_NavFlattened = 1 << 23, // [BETA] Allow gamepad/keyboard navigation to cross over parent border to this child (only use on child that have no scrolling!) + ImGuiWindowFlags_ChildWindow = 1 << 24, // Don't use! For internal use by BeginChild() + ImGuiWindowFlags_Tooltip = 1 << 25, // Don't use! For internal use by BeginTooltip() + ImGuiWindowFlags_Popup = 1 << 26, // Don't use! For internal use by BeginPopup() + ImGuiWindowFlags_Modal = 1 << 27, // Don't use! For internal use by BeginPopupModal() + ImGuiWindowFlags_ChildMenu = 1 << 28 // Don't use! For internal use by BeginMenu() + + // [Obsolete] + //ImGuiWindowFlags_ResizeFromAnySide = 1 << 17, // --> Set io.ConfigWindowsResizeFromEdges=true and make sure mouse cursors are supported by backend (io.BackendFlags & ImGuiBackendFlags_HasMouseCursors) +}; + +// Flags for ImGui::InputText() +enum ImGuiInputTextFlags_ +{ + ImGuiInputTextFlags_None = 0, + ImGuiInputTextFlags_CharsDecimal = 1 << 0, // Allow 0123456789.+-*/ + ImGuiInputTextFlags_CharsHexadecimal = 1 << 1, // Allow 0123456789ABCDEFabcdef + ImGuiInputTextFlags_CharsUppercase = 1 << 2, // Turn a..z into A..Z + ImGuiInputTextFlags_CharsNoBlank = 1 << 3, // Filter out spaces, tabs + ImGuiInputTextFlags_AutoSelectAll = 1 << 4, // Select entire text when first taking mouse focus + ImGuiInputTextFlags_EnterReturnsTrue = 1 << 5, // Return 'true' when Enter is pressed (as opposed to every time the value was modified). Consider looking at the IsItemDeactivatedAfterEdit() function. + ImGuiInputTextFlags_CallbackCompletion = 1 << 6, // Callback on pressing TAB (for completion handling) + ImGuiInputTextFlags_CallbackHistory = 1 << 7, // Callback on pressing Up/Down arrows (for history handling) + ImGuiInputTextFlags_CallbackAlways = 1 << 8, // Callback on each iteration. User code may query cursor position, modify text buffer. + ImGuiInputTextFlags_CallbackCharFilter = 1 << 9, // Callback on character inputs to replace or discard them. Modify 'EventChar' to replace or discard, or return 1 in callback to discard. + ImGuiInputTextFlags_AllowTabInput = 1 << 10, // Pressing TAB input a '\t' character into the text field + ImGuiInputTextFlags_CtrlEnterForNewLine = 1 << 11, // In multi-line mode, unfocus with Enter, add new line with Ctrl+Enter (default is opposite: unfocus with Ctrl+Enter, add line with Enter). + ImGuiInputTextFlags_NoHorizontalScroll = 1 << 12, // Disable following the cursor horizontally + ImGuiInputTextFlags_AlwaysInsertMode = 1 << 13, // Insert mode + ImGuiInputTextFlags_ReadOnly = 1 << 14, // Read-only mode + ImGuiInputTextFlags_Password = 1 << 15, // Password mode, display all characters as '*' + ImGuiInputTextFlags_NoUndoRedo = 1 << 16, // Disable undo/redo. Note that input text owns the text data while active, if you want to provide your own undo/redo stack you need e.g. to call ClearActiveID(). + ImGuiInputTextFlags_CharsScientific = 1 << 17, // Allow 0123456789.+-*/eE (Scientific notation input) + ImGuiInputTextFlags_CallbackResize = 1 << 18, // Callback on buffer capacity changes request (beyond 'buf_size' parameter value), allowing the string to grow. Notify when the string wants to be resized (for string types which hold a cache of their Size). You will be provided a new BufSize in the callback and NEED to honor it. (see misc/cpp/imgui_stdlib.h for an example of using this) + ImGuiInputTextFlags_CallbackEdit = 1 << 19, // Callback on any edit (note that InputText() already returns true on edit, the callback is useful mainly to manipulate the underlying buffer while focus is active) + // [Internal] + ImGuiInputTextFlags_Multiline = 1 << 20, // For internal use by InputTextMultiline() + ImGuiInputTextFlags_NoMarkEdited = 1 << 21 // For internal use by functions using InputText() before reformatting data +}; + +// Flags for ImGui::TreeNodeEx(), ImGui::CollapsingHeader*() +enum ImGuiTreeNodeFlags_ +{ + ImGuiTreeNodeFlags_None = 0, + ImGuiTreeNodeFlags_Selected = 1 << 0, // Draw as selected + ImGuiTreeNodeFlags_Framed = 1 << 1, // Draw frame with background (e.g. for CollapsingHeader) + ImGuiTreeNodeFlags_AllowItemOverlap = 1 << 2, // Hit testing to allow subsequent widgets to overlap this one + ImGuiTreeNodeFlags_NoTreePushOnOpen = 1 << 3, // Don't do a TreePush() when open (e.g. for CollapsingHeader) = no extra indent nor pushing on ID stack + ImGuiTreeNodeFlags_NoAutoOpenOnLog = 1 << 4, // Don't automatically and temporarily open node when Logging is active (by default logging will automatically open tree nodes) + ImGuiTreeNodeFlags_DefaultOpen = 1 << 5, // Default node to be open + ImGuiTreeNodeFlags_OpenOnDoubleClick = 1 << 6, // Need double-click to open node + ImGuiTreeNodeFlags_OpenOnArrow = 1 << 7, // Only open when clicking on the arrow part. If ImGuiTreeNodeFlags_OpenOnDoubleClick is also set, single-click arrow or double-click all box to open. + ImGuiTreeNodeFlags_Leaf = 1 << 8, // No collapsing, no arrow (use as a convenience for leaf nodes). + ImGuiTreeNodeFlags_Bullet = 1 << 9, // Display a bullet instead of arrow + ImGuiTreeNodeFlags_FramePadding = 1 << 10, // Use FramePadding (even for an unframed text node) to vertically align text baseline to regular widget height. Equivalent to calling AlignTextToFramePadding(). + ImGuiTreeNodeFlags_SpanAvailWidth = 1 << 11, // Extend hit box to the right-most edge, even if not framed. This is not the default in order to allow adding other items on the same line. In the future we may refactor the hit system to be front-to-back, allowing natural overlaps and then this can become the default. + ImGuiTreeNodeFlags_SpanFullWidth = 1 << 12, // Extend hit box to the left-most and right-most edges (bypass the indented area). + ImGuiTreeNodeFlags_NavLeftJumpsBackHere = 1 << 13, // (WIP) Nav: left direction may move to this TreeNode() from any of its child (items submitted between TreeNode and TreePop) + //ImGuiTreeNodeFlags_NoScrollOnOpen = 1 << 14, // FIXME: TODO: Disable automatic scroll on TreePop() if node got just open and contents is not visible + ImGuiTreeNodeFlags_CollapsingHeader = ImGuiTreeNodeFlags_Framed | ImGuiTreeNodeFlags_NoTreePushOnOpen | ImGuiTreeNodeFlags_NoAutoOpenOnLog +}; + +// Flags for OpenPopup*(), BeginPopupContext*(), IsPopupOpen() functions. +// - To be backward compatible with older API which took an 'int mouse_button = 1' argument, we need to treat +// small flags values as a mouse button index, so we encode the mouse button in the first few bits of the flags. +// It is therefore guaranteed to be legal to pass a mouse button index in ImGuiPopupFlags. +// - For the same reason, we exceptionally default the ImGuiPopupFlags argument of BeginPopupContextXXX functions to 1 instead of 0. +// IMPORTANT: because the default parameter is 1 (==ImGuiPopupFlags_MouseButtonRight), if you rely on the default parameter +// and want to another another flag, you need to pass in the ImGuiPopupFlags_MouseButtonRight flag. +// - Multiple buttons currently cannot be combined/or-ed in those functions (we could allow it later). +enum ImGuiPopupFlags_ +{ + ImGuiPopupFlags_None = 0, + ImGuiPopupFlags_MouseButtonLeft = 0, // For BeginPopupContext*(): open on Left Mouse release. Guaranteed to always be == 0 (same as ImGuiMouseButton_Left) + ImGuiPopupFlags_MouseButtonRight = 1, // For BeginPopupContext*(): open on Right Mouse release. Guaranteed to always be == 1 (same as ImGuiMouseButton_Right) + ImGuiPopupFlags_MouseButtonMiddle = 2, // For BeginPopupContext*(): open on Middle Mouse release. Guaranteed to always be == 2 (same as ImGuiMouseButton_Middle) + ImGuiPopupFlags_MouseButtonMask_ = 0x1F, + ImGuiPopupFlags_MouseButtonDefault_ = 1, + ImGuiPopupFlags_NoOpenOverExistingPopup = 1 << 5, // For OpenPopup*(), BeginPopupContext*(): don't open if there's already a popup at the same level of the popup stack + ImGuiPopupFlags_NoOpenOverItems = 1 << 6, // For BeginPopupContextWindow(): don't return true when hovering items, only when hovering empty space + ImGuiPopupFlags_AnyPopupId = 1 << 7, // For IsPopupOpen(): ignore the ImGuiID parameter and test for any popup. + ImGuiPopupFlags_AnyPopupLevel = 1 << 8, // For IsPopupOpen(): search/test at any level of the popup stack (default test in the current level) + ImGuiPopupFlags_AnyPopup = ImGuiPopupFlags_AnyPopupId | ImGuiPopupFlags_AnyPopupLevel +}; + +// Flags for ImGui::Selectable() +enum ImGuiSelectableFlags_ +{ + ImGuiSelectableFlags_None = 0, + ImGuiSelectableFlags_DontClosePopups = 1 << 0, // Clicking this don't close parent popup window + ImGuiSelectableFlags_SpanAllColumns = 1 << 1, // Selectable frame can span all columns (text will still fit in current column) + ImGuiSelectableFlags_AllowDoubleClick = 1 << 2, // Generate press events on double clicks too + ImGuiSelectableFlags_Disabled = 1 << 3, // Cannot be selected, display grayed out text + ImGuiSelectableFlags_AllowItemOverlap = 1 << 4 // (WIP) Hit testing to allow subsequent widgets to overlap this one +}; + +// Flags for ImGui::BeginCombo() +enum ImGuiComboFlags_ +{ + ImGuiComboFlags_None = 0, + ImGuiComboFlags_PopupAlignLeft = 1 << 0, // Align the popup toward the left by default + ImGuiComboFlags_HeightSmall = 1 << 1, // Max ~4 items visible. Tip: If you want your combo popup to be a specific size you can use SetNextWindowSizeConstraints() prior to calling BeginCombo() + ImGuiComboFlags_HeightRegular = 1 << 2, // Max ~8 items visible (default) + ImGuiComboFlags_HeightLarge = 1 << 3, // Max ~20 items visible + ImGuiComboFlags_HeightLargest = 1 << 4, // As many fitting items as possible + ImGuiComboFlags_NoArrowButton = 1 << 5, // Display on the preview box without the square arrow button + ImGuiComboFlags_NoPreview = 1 << 6, // Display only a square arrow button + ImGuiComboFlags_HeightMask_ = ImGuiComboFlags_HeightSmall | ImGuiComboFlags_HeightRegular | ImGuiComboFlags_HeightLarge | ImGuiComboFlags_HeightLargest +}; + +// Flags for ImGui::BeginTabBar() +enum ImGuiTabBarFlags_ +{ + ImGuiTabBarFlags_None = 0, + ImGuiTabBarFlags_Reorderable = 1 << 0, // Allow manually dragging tabs to re-order them + New tabs are appended at the end of list + ImGuiTabBarFlags_AutoSelectNewTabs = 1 << 1, // Automatically select new tabs when they appear + ImGuiTabBarFlags_TabListPopupButton = 1 << 2, // Disable buttons to open the tab list popup + ImGuiTabBarFlags_NoCloseWithMiddleMouseButton = 1 << 3, // Disable behavior of closing tabs (that are submitted with p_open != NULL) with middle mouse button. You can still repro this behavior on user's side with if (IsItemHovered() && IsMouseClicked(2)) *p_open = false. + ImGuiTabBarFlags_NoTabListScrollingButtons = 1 << 4, // Disable scrolling buttons (apply when fitting policy is ImGuiTabBarFlags_FittingPolicyScroll) + ImGuiTabBarFlags_NoTooltip = 1 << 5, // Disable tooltips when hovering a tab + ImGuiTabBarFlags_FittingPolicyResizeDown = 1 << 6, // Resize tabs when they don't fit + ImGuiTabBarFlags_FittingPolicyScroll = 1 << 7, // Add scroll buttons when tabs don't fit + ImGuiTabBarFlags_FittingPolicyMask_ = ImGuiTabBarFlags_FittingPolicyResizeDown | ImGuiTabBarFlags_FittingPolicyScroll, + ImGuiTabBarFlags_FittingPolicyDefault_ = ImGuiTabBarFlags_FittingPolicyResizeDown +}; + +// Flags for ImGui::BeginTabItem() +enum ImGuiTabItemFlags_ +{ + ImGuiTabItemFlags_None = 0, + ImGuiTabItemFlags_UnsavedDocument = 1 << 0, // Append '*' to title without affecting the ID, as a convenience to avoid using the ### operator. Also: tab is selected on closure and closure is deferred by one frame to allow code to undo it without flicker. + ImGuiTabItemFlags_SetSelected = 1 << 1, // Trigger flag to programmatically make the tab selected when calling BeginTabItem() + ImGuiTabItemFlags_NoCloseWithMiddleMouseButton = 1 << 2, // Disable behavior of closing tabs (that are submitted with p_open != NULL) with middle mouse button. You can still repro this behavior on user's side with if (IsItemHovered() && IsMouseClicked(2)) *p_open = false. + ImGuiTabItemFlags_NoPushId = 1 << 3, // Don't call PushID(tab->ID)/PopID() on BeginTabItem()/EndTabItem() + ImGuiTabItemFlags_NoTooltip = 1 << 4, // Disable tooltip for the given tab + ImGuiTabItemFlags_NoReorder = 1 << 5, // Disable reordering this tab or having another tab cross over this tab + ImGuiTabItemFlags_Leading = 1 << 6, // Enforce the tab position to the left of the tab bar (after the tab list popup button) + ImGuiTabItemFlags_Trailing = 1 << 7 // Enforce the tab position to the right of the tab bar (before the scrolling buttons) +}; + +// Flags for ImGui::BeginTable() +// [BETA API] API may evolve slightly! If you use this, please update to the next version when it comes out! +// - Important! Sizing policies have complex and subtle side effects, more so than you would expect. +// Read comments/demos carefully + experiment with live demos to get acquainted with them. +// - The DEFAULT sizing policies are: +// - Default to ImGuiTableFlags_SizingFixedFit if ScrollX is on, or if host window has ImGuiWindowFlags_AlwaysAutoResize. +// - Default to ImGuiTableFlags_SizingStretchSame if ScrollX is off. +// - When ScrollX is off: +// - Table defaults to ImGuiTableFlags_SizingStretchSame -> all Columns defaults to ImGuiTableColumnFlags_WidthStretch with same weight. +// - Columns sizing policy allowed: Stretch (default), Fixed/Auto. +// - Fixed Columns will generally obtain their requested width (unless the table cannot fit them all). +// - Stretch Columns will share the remaining width. +// - Mixed Fixed/Stretch columns is possible but has various side-effects on resizing behaviors. +// The typical use of mixing sizing policies is: any number of LEADING Fixed columns, followed by one or two TRAILING Stretch columns. +// (this is because the visible order of columns have subtle but necessary effects on how they react to manual resizing). +// - When ScrollX is on: +// - Table defaults to ImGuiTableFlags_SizingFixedFit -> all Columns defaults to ImGuiTableColumnFlags_WidthFixed +// - Columns sizing policy allowed: Fixed/Auto mostly. +// - Fixed Columns can be enlarged as needed. Table will show an horizontal scrollbar if needed. +// - When using auto-resizing (non-resizable) fixed columns, querying the content width to use item right-alignment e.g. SetNextItemWidth(-FLT_MIN) doesn't make sense, would create a feedback loop. +// - Using Stretch columns OFTEN DOES NOT MAKE SENSE if ScrollX is on, UNLESS you have specified a value for 'inner_width' in BeginTable(). +// If you specify a value for 'inner_width' then effectively the scrolling space is known and Stretch or mixed Fixed/Stretch columns become meaningful again. +// - Read on documentation at the top of imgui_tables.cpp for details. +enum ImGuiTableFlags_ +{ + // Features + ImGuiTableFlags_None = 0, + ImGuiTableFlags_Resizable = 1 << 0, // Enable resizing columns. + ImGuiTableFlags_Reorderable = 1 << 1, // Enable reordering columns in header row (need calling TableSetupColumn() + TableHeadersRow() to display headers) + ImGuiTableFlags_Hideable = 1 << 2, // Enable hiding/disabling columns in context menu. + ImGuiTableFlags_Sortable = 1 << 3, // Enable sorting. Call TableGetSortSpecs() to obtain sort specs. Also see ImGuiTableFlags_SortMulti and ImGuiTableFlags_SortTristate. + ImGuiTableFlags_NoSavedSettings = 1 << 4, // Disable persisting columns order, width and sort settings in the .ini file. + ImGuiTableFlags_ContextMenuInBody = 1 << 5, // Right-click on columns body/contents will display table context menu. By default it is available in TableHeadersRow(). + // Decorations + ImGuiTableFlags_RowBg = 1 << 6, // Set each RowBg color with ImGuiCol_TableRowBg or ImGuiCol_TableRowBgAlt (equivalent of calling TableSetBgColor with ImGuiTableBgFlags_RowBg0 on each row manually) + ImGuiTableFlags_BordersInnerH = 1 << 7, // Draw horizontal borders between rows. + ImGuiTableFlags_BordersOuterH = 1 << 8, // Draw horizontal borders at the top and bottom. + ImGuiTableFlags_BordersInnerV = 1 << 9, // Draw vertical borders between columns. + ImGuiTableFlags_BordersOuterV = 1 << 10, // Draw vertical borders on the left and right sides. + ImGuiTableFlags_BordersH = ImGuiTableFlags_BordersInnerH | ImGuiTableFlags_BordersOuterH, // Draw horizontal borders. + ImGuiTableFlags_BordersV = ImGuiTableFlags_BordersInnerV | ImGuiTableFlags_BordersOuterV, // Draw vertical borders. + ImGuiTableFlags_BordersInner = ImGuiTableFlags_BordersInnerV | ImGuiTableFlags_BordersInnerH, // Draw inner borders. + ImGuiTableFlags_BordersOuter = ImGuiTableFlags_BordersOuterV | ImGuiTableFlags_BordersOuterH, // Draw outer borders. + ImGuiTableFlags_Borders = ImGuiTableFlags_BordersInner | ImGuiTableFlags_BordersOuter, // Draw all borders. + ImGuiTableFlags_NoBordersInBody = 1 << 11, // [ALPHA] Disable vertical borders in columns Body (borders will always appears in Headers). -> May move to style + ImGuiTableFlags_NoBordersInBodyUntilResize = 1 << 12, // [ALPHA] Disable vertical borders in columns Body until hovered for resize (borders will always appears in Headers). -> May move to style + // Sizing Policy (read above for defaults) + ImGuiTableFlags_SizingFixedFit = 1 << 13, // Columns default to _WidthFixed or _WidthAuto (if resizable or not resizable), matching contents width. + ImGuiTableFlags_SizingFixedSame = 2 << 13, // Columns default to _WidthFixed or _WidthAuto (if resizable or not resizable), matching the maximum contents width of all columns. Implicitly enable ImGuiTableFlags_NoKeepColumnsVisible. + ImGuiTableFlags_SizingStretchProp = 3 << 13, // Columns default to _WidthStretch with default weights proportional to each columns contents widths. + ImGuiTableFlags_SizingStretchSame = 4 << 13, // Columns default to _WidthStretch with default weights all equal, unless overriden by TableSetupColumn(). + // Sizing Extra Options + ImGuiTableFlags_NoHostExtendX = 1 << 16, // Make outer width auto-fit to columns, overriding outer_size.x value. Only available when ScrollX/ScrollY are disabled and Stretch columns are not used. + ImGuiTableFlags_NoHostExtendY = 1 << 17, // Make outer height stop exactly at outer_size.y (prevent auto-extending table past the limit). Only available when ScrollX/ScrollY are disabled. Data below the limit will be clipped and not visible. + ImGuiTableFlags_NoKeepColumnsVisible = 1 << 18, // Disable keeping column always minimally visible when ScrollX is off and table gets too small. Not recommended if columns are resizable. + ImGuiTableFlags_PreciseWidths = 1 << 19, // Disable distributing remainder width to stretched columns (width allocation on a 100-wide table with 3 columns: Without this flag: 33,33,34. With this flag: 33,33,33). With larger number of columns, resizing will appear to be less smooth. + // Clipping + ImGuiTableFlags_NoClip = 1 << 20, // Disable clipping rectangle for every individual columns (reduce draw command count, items will be able to overflow into other columns). Generally incompatible with TableSetupScrollFreeze(). + // Padding + ImGuiTableFlags_PadOuterX = 1 << 21, // Default if BordersOuterV is on. Enable outer-most padding. Generally desirable if you have headers. + ImGuiTableFlags_NoPadOuterX = 1 << 22, // Default if BordersOuterV is off. Disable outer-most padding. + ImGuiTableFlags_NoPadInnerX = 1 << 23, // Disable inner padding between columns (double inner padding if BordersOuterV is on, single inner padding if BordersOuterV is off). + // Scrolling + ImGuiTableFlags_ScrollX = 1 << 24, // Enable horizontal scrolling. Require 'outer_size' parameter of BeginTable() to specify the container size. Changes default sizing policy. Because this create a child window, ScrollY is currently generally recommended when using ScrollX. + ImGuiTableFlags_ScrollY = 1 << 25, // Enable vertical scrolling. Require 'outer_size' parameter of BeginTable() to specify the container size. + // Sorting + ImGuiTableFlags_SortMulti = 1 << 26, // Hold shift when clicking headers to sort on multiple column. TableGetSortSpecs() may return specs where (SpecsCount > 1). + ImGuiTableFlags_SortTristate = 1 << 27, // Allow no sorting, disable default sorting. TableGetSortSpecs() may return specs where (SpecsCount == 0). + + // [Internal] Combinations and masks + ImGuiTableFlags_SizingMask_ = ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_SizingFixedSame | ImGuiTableFlags_SizingStretchProp | ImGuiTableFlags_SizingStretchSame + + // Obsolete names (will be removed soon) +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + //, ImGuiTableFlags_ColumnsWidthFixed = ImGuiTableFlags_SizingFixedFit, ImGuiTableFlags_ColumnsWidthStretch = ImGuiTableFlags_SizingStretchSame // WIP Tables 2020/12 + //, ImGuiTableFlags_SizingPolicyFixed = ImGuiTableFlags_SizingFixedFit, ImGuiTableFlags_SizingPolicyStretch = ImGuiTableFlags_SizingStretchSame // WIP Tables 2021/01 +#endif +}; + +// Flags for ImGui::TableSetupColumn() +enum ImGuiTableColumnFlags_ +{ + // Input configuration flags + ImGuiTableColumnFlags_None = 0, + ImGuiTableColumnFlags_DefaultHide = 1 << 0, // Default as a hidden/disabled column. + ImGuiTableColumnFlags_DefaultSort = 1 << 1, // Default as a sorting column. + ImGuiTableColumnFlags_WidthStretch = 1 << 2, // Column will stretch. Preferable with horizontal scrolling disabled (default if table sizing policy is _SizingStretchSame or _SizingStretchProp). + ImGuiTableColumnFlags_WidthFixed = 1 << 3, // Column will not stretch. Preferable with horizontal scrolling enabled (default if table sizing policy is _SizingFixedFit and table is resizable). + ImGuiTableColumnFlags_NoResize = 1 << 4, // Disable manual resizing. + ImGuiTableColumnFlags_NoReorder = 1 << 5, // Disable manual reordering this column, this will also prevent other columns from crossing over this column. + ImGuiTableColumnFlags_NoHide = 1 << 6, // Disable ability to hide/disable this column. + ImGuiTableColumnFlags_NoClip = 1 << 7, // Disable clipping for this column (all NoClip columns will render in a same draw command). + ImGuiTableColumnFlags_NoSort = 1 << 8, // Disable ability to sort on this field (even if ImGuiTableFlags_Sortable is set on the table). + ImGuiTableColumnFlags_NoSortAscending = 1 << 9, // Disable ability to sort in the ascending direction. + ImGuiTableColumnFlags_NoSortDescending = 1 << 10, // Disable ability to sort in the descending direction. + ImGuiTableColumnFlags_NoHeaderWidth = 1 << 11, // Disable header text width contribution to automatic column width. + ImGuiTableColumnFlags_PreferSortAscending = 1 << 12, // Make the initial sort direction Ascending when first sorting on this column (default). + ImGuiTableColumnFlags_PreferSortDescending = 1 << 13, // Make the initial sort direction Descending when first sorting on this column. + ImGuiTableColumnFlags_IndentEnable = 1 << 14, // Use current Indent value when entering cell (default for column 0). + ImGuiTableColumnFlags_IndentDisable = 1 << 15, // Ignore current Indent value when entering cell (default for columns > 0). Indentation changes _within_ the cell will still be honored. + + // Output status flags, read-only via TableGetColumnFlags() + ImGuiTableColumnFlags_IsEnabled = 1 << 20, // Status: is enabled == not hidden by user/api (referred to as "Hide" in _DefaultHide and _NoHide) flags. + ImGuiTableColumnFlags_IsVisible = 1 << 21, // Status: is visible == is enabled AND not clipped by scrolling. + ImGuiTableColumnFlags_IsSorted = 1 << 22, // Status: is currently part of the sort specs + ImGuiTableColumnFlags_IsHovered = 1 << 23, // Status: is hovered by mouse + + // [Internal] Combinations and masks + ImGuiTableColumnFlags_WidthMask_ = ImGuiTableColumnFlags_WidthStretch | ImGuiTableColumnFlags_WidthFixed, + ImGuiTableColumnFlags_IndentMask_ = ImGuiTableColumnFlags_IndentEnable | ImGuiTableColumnFlags_IndentDisable, + ImGuiTableColumnFlags_StatusMask_ = ImGuiTableColumnFlags_IsEnabled | ImGuiTableColumnFlags_IsVisible | ImGuiTableColumnFlags_IsSorted | ImGuiTableColumnFlags_IsHovered, + ImGuiTableColumnFlags_NoDirectResize_ = 1 << 30 // [Internal] Disable user resizing this column directly (it may however we resized indirectly from its left edge) + + // Obsolete names (will be removed soon) +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + //ImGuiTableColumnFlags_WidthAuto = ImGuiTableColumnFlags_WidthFixed | ImGuiTableColumnFlags_NoResize, // Column will not stretch and keep resizing based on submitted contents. +#endif +}; + +// Flags for ImGui::TableNextRow() +enum ImGuiTableRowFlags_ +{ + ImGuiTableRowFlags_None = 0, + ImGuiTableRowFlags_Headers = 1 << 0 // Identify header row (set default background color + width of its contents accounted different for auto column width) +}; + +// Enum for ImGui::TableSetBgColor() +// Background colors are rendering in 3 layers: +// - Layer 0: draw with RowBg0 color if set, otherwise draw with ColumnBg0 if set. +// - Layer 1: draw with RowBg1 color if set, otherwise draw with ColumnBg1 if set. +// - Layer 2: draw with CellBg color if set. +// The purpose of the two row/columns layers is to let you decide if a background color changes should override or blend with the existing color. +// When using ImGuiTableFlags_RowBg on the table, each row has the RowBg0 color automatically set for odd/even rows. +// If you set the color of RowBg0 target, your color will override the existing RowBg0 color. +// If you set the color of RowBg1 or ColumnBg1 target, your color will blend over the RowBg0 color. +enum ImGuiTableBgTarget_ +{ + ImGuiTableBgTarget_None = 0, + ImGuiTableBgTarget_RowBg0 = 1, // Set row background color 0 (generally used for background, automatically set when ImGuiTableFlags_RowBg is used) + ImGuiTableBgTarget_RowBg1 = 2, // Set row background color 1 (generally used for selection marking) + ImGuiTableBgTarget_CellBg = 3 // Set cell background color (top-most color) +}; + +// Flags for ImGui::IsWindowFocused() +enum ImGuiFocusedFlags_ +{ + ImGuiFocusedFlags_None = 0, + ImGuiFocusedFlags_ChildWindows = 1 << 0, // IsWindowFocused(): Return true if any children of the window is focused + ImGuiFocusedFlags_RootWindow = 1 << 1, // IsWindowFocused(): Test from root window (top most parent of the current hierarchy) + ImGuiFocusedFlags_AnyWindow = 1 << 2, // IsWindowFocused(): Return true if any window is focused. Important: If you are trying to tell how to dispatch your low-level inputs, do NOT use this. Use 'io.WantCaptureMouse' instead! Please read the FAQ! + ImGuiFocusedFlags_RootAndChildWindows = ImGuiFocusedFlags_RootWindow | ImGuiFocusedFlags_ChildWindows +}; + +// Flags for ImGui::IsItemHovered(), ImGui::IsWindowHovered() +// Note: if you are trying to check whether your mouse should be dispatched to Dear ImGui or to your app, you should use 'io.WantCaptureMouse' instead! Please read the FAQ! +// Note: windows with the ImGuiWindowFlags_NoInputs flag are ignored by IsWindowHovered() calls. +enum ImGuiHoveredFlags_ +{ + ImGuiHoveredFlags_None = 0, // Return true if directly over the item/window, not obstructed by another window, not obstructed by an active popup or modal blocking inputs under them. + ImGuiHoveredFlags_ChildWindows = 1 << 0, // IsWindowHovered() only: Return true if any children of the window is hovered + ImGuiHoveredFlags_RootWindow = 1 << 1, // IsWindowHovered() only: Test from root window (top most parent of the current hierarchy) + ImGuiHoveredFlags_AnyWindow = 1 << 2, // IsWindowHovered() only: Return true if any window is hovered + ImGuiHoveredFlags_AllowWhenBlockedByPopup = 1 << 3, // Return true even if a popup window is normally blocking access to this item/window + //ImGuiHoveredFlags_AllowWhenBlockedByModal = 1 << 4, // Return true even if a modal popup window is normally blocking access to this item/window. FIXME-TODO: Unavailable yet. + ImGuiHoveredFlags_AllowWhenBlockedByActiveItem = 1 << 5, // Return true even if an active item is blocking access to this item/window. Useful for Drag and Drop patterns. + ImGuiHoveredFlags_AllowWhenOverlapped = 1 << 6, // Return true even if the position is obstructed or overlapped by another window + ImGuiHoveredFlags_AllowWhenDisabled = 1 << 7, // Return true even if the item is disabled + ImGuiHoveredFlags_RectOnly = ImGuiHoveredFlags_AllowWhenBlockedByPopup | ImGuiHoveredFlags_AllowWhenBlockedByActiveItem | ImGuiHoveredFlags_AllowWhenOverlapped, + ImGuiHoveredFlags_RootAndChildWindows = ImGuiHoveredFlags_RootWindow | ImGuiHoveredFlags_ChildWindows +}; + +// Flags for ImGui::BeginDragDropSource(), ImGui::AcceptDragDropPayload() +enum ImGuiDragDropFlags_ +{ + ImGuiDragDropFlags_None = 0, + // BeginDragDropSource() flags + ImGuiDragDropFlags_SourceNoPreviewTooltip = 1 << 0, // By default, a successful call to BeginDragDropSource opens a tooltip so you can display a preview or description of the source contents. This flag disable this behavior. + ImGuiDragDropFlags_SourceNoDisableHover = 1 << 1, // By default, when dragging we clear data so that IsItemHovered() will return false, to avoid subsequent user code submitting tooltips. This flag disable this behavior so you can still call IsItemHovered() on the source item. + ImGuiDragDropFlags_SourceNoHoldToOpenOthers = 1 << 2, // Disable the behavior that allows to open tree nodes and collapsing header by holding over them while dragging a source item. + ImGuiDragDropFlags_SourceAllowNullID = 1 << 3, // Allow items such as Text(), Image() that have no unique identifier to be used as drag source, by manufacturing a temporary identifier based on their window-relative position. This is extremely unusual within the dear imgui ecosystem and so we made it explicit. + ImGuiDragDropFlags_SourceExtern = 1 << 4, // External source (from outside of dear imgui), won't attempt to read current item/window info. Will always return true. Only one Extern source can be active simultaneously. + ImGuiDragDropFlags_SourceAutoExpirePayload = 1 << 5, // Automatically expire the payload if the source cease to be submitted (otherwise payloads are persisting while being dragged) + // AcceptDragDropPayload() flags + ImGuiDragDropFlags_AcceptBeforeDelivery = 1 << 10, // AcceptDragDropPayload() will returns true even before the mouse button is released. You can then call IsDelivery() to test if the payload needs to be delivered. + ImGuiDragDropFlags_AcceptNoDrawDefaultRect = 1 << 11, // Do not draw the default highlight rectangle when hovering over target. + ImGuiDragDropFlags_AcceptNoPreviewTooltip = 1 << 12, // Request hiding the BeginDragDropSource tooltip from the BeginDragDropTarget site. + ImGuiDragDropFlags_AcceptPeekOnly = ImGuiDragDropFlags_AcceptBeforeDelivery | ImGuiDragDropFlags_AcceptNoDrawDefaultRect // For peeking ahead and inspecting the payload before delivery. +}; + +// Standard Drag and Drop payload types. You can define you own payload types using short strings. Types starting with '_' are defined by Dear ImGui. +#define IMGUI_PAYLOAD_TYPE_COLOR_3F "_COL3F" // float[3]: Standard type for colors, without alpha. User code may use this type. +#define IMGUI_PAYLOAD_TYPE_COLOR_4F "_COL4F" // float[4]: Standard type for colors. User code may use this type. + +// A primary data type +enum ImGuiDataType_ +{ + ImGuiDataType_S8, // signed char / char (with sensible compilers) + ImGuiDataType_U8, // unsigned char + ImGuiDataType_S16, // short + ImGuiDataType_U16, // unsigned short + ImGuiDataType_S32, // int + ImGuiDataType_U32, // unsigned int + ImGuiDataType_S64, // long long / __int64 + ImGuiDataType_U64, // unsigned long long / unsigned __int64 + ImGuiDataType_Float, // float + ImGuiDataType_Double, // double + ImGuiDataType_COUNT +}; + +// A cardinal direction +enum ImGuiDir_ +{ + ImGuiDir_None = -1, + ImGuiDir_Left = 0, + ImGuiDir_Right = 1, + ImGuiDir_Up = 2, + ImGuiDir_Down = 3, + ImGuiDir_COUNT +}; + +// A sorting direction +enum ImGuiSortDirection_ +{ + ImGuiSortDirection_None = 0, + ImGuiSortDirection_Ascending = 1, // Ascending = 0->9, A->Z etc. + ImGuiSortDirection_Descending = 2 // Descending = 9->0, Z->A etc. +}; + +// User fill ImGuiIO.KeyMap[] array with indices into the ImGuiIO.KeysDown[512] array +enum ImGuiKey_ +{ + ImGuiKey_Tab, + ImGuiKey_LeftArrow, + ImGuiKey_RightArrow, + ImGuiKey_UpArrow, + ImGuiKey_DownArrow, + ImGuiKey_PageUp, + ImGuiKey_PageDown, + ImGuiKey_Home, + ImGuiKey_End, + ImGuiKey_Insert, + ImGuiKey_Delete, + ImGuiKey_Backspace, + ImGuiKey_Space, + ImGuiKey_Enter, + ImGuiKey_Escape, + ImGuiKey_KeyPadEnter, + ImGuiKey_A, // for text edit CTRL+A: select all + ImGuiKey_C, // for text edit CTRL+C: copy + ImGuiKey_V, // for text edit CTRL+V: paste + ImGuiKey_X, // for text edit CTRL+X: cut + ImGuiKey_Y, // for text edit CTRL+Y: redo + ImGuiKey_Z, // for text edit CTRL+Z: undo + ImGuiKey_COUNT +}; + +// To test io.KeyMods (which is a combination of individual fields io.KeyCtrl, io.KeyShift, io.KeyAlt set by user/backend) +enum ImGuiKeyModFlags_ +{ + ImGuiKeyModFlags_None = 0, + ImGuiKeyModFlags_Ctrl = 1 << 0, + ImGuiKeyModFlags_Shift = 1 << 1, + ImGuiKeyModFlags_Alt = 1 << 2, + ImGuiKeyModFlags_Super = 1 << 3 +}; + +// Gamepad/Keyboard navigation +// Keyboard: Set io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard to enable. NewFrame() will automatically fill io.NavInputs[] based on your io.KeysDown[] + io.KeyMap[] arrays. +// Gamepad: Set io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad to enable. Backend: set ImGuiBackendFlags_HasGamepad and fill the io.NavInputs[] fields before calling NewFrame(). Note that io.NavInputs[] is cleared by EndFrame(). +// Read instructions in imgui.cpp for more details. Download PNG/PSD at http://dearimgui.org/controls_sheets. +enum ImGuiNavInput_ +{ + // Gamepad Mapping + ImGuiNavInput_Activate, // activate / open / toggle / tweak value // e.g. Cross (PS4), A (Xbox), A (Switch), Space (Keyboard) + ImGuiNavInput_Cancel, // cancel / close / exit // e.g. Circle (PS4), B (Xbox), B (Switch), Escape (Keyboard) + ImGuiNavInput_Input, // text input / on-screen keyboard // e.g. Triang.(PS4), Y (Xbox), X (Switch), Return (Keyboard) + ImGuiNavInput_Menu, // tap: toggle menu / hold: focus, move, resize // e.g. Square (PS4), X (Xbox), Y (Switch), Alt (Keyboard) + ImGuiNavInput_DpadLeft, // move / tweak / resize window (w/ PadMenu) // e.g. D-pad Left/Right/Up/Down (Gamepads), Arrow keys (Keyboard) + ImGuiNavInput_DpadRight, // + ImGuiNavInput_DpadUp, // + ImGuiNavInput_DpadDown, // + ImGuiNavInput_LStickLeft, // scroll / move window (w/ PadMenu) // e.g. Left Analog Stick Left/Right/Up/Down + ImGuiNavInput_LStickRight, // + ImGuiNavInput_LStickUp, // + ImGuiNavInput_LStickDown, // + ImGuiNavInput_FocusPrev, // next window (w/ PadMenu) // e.g. L1 or L2 (PS4), LB or LT (Xbox), L or ZL (Switch) + ImGuiNavInput_FocusNext, // prev window (w/ PadMenu) // e.g. R1 or R2 (PS4), RB or RT (Xbox), R or ZL (Switch) + ImGuiNavInput_TweakSlow, // slower tweaks // e.g. L1 or L2 (PS4), LB or LT (Xbox), L or ZL (Switch) + ImGuiNavInput_TweakFast, // faster tweaks // e.g. R1 or R2 (PS4), RB or RT (Xbox), R or ZL (Switch) + + // [Internal] Don't use directly! This is used internally to differentiate keyboard from gamepad inputs for behaviors that require to differentiate them. + // Keyboard behavior that have no corresponding gamepad mapping (e.g. CTRL+TAB) will be directly reading from io.KeysDown[] instead of io.NavInputs[]. + ImGuiNavInput_KeyMenu_, // toggle menu // = io.KeyAlt + ImGuiNavInput_KeyLeft_, // move left // = Arrow keys + ImGuiNavInput_KeyRight_, // move right + ImGuiNavInput_KeyUp_, // move up + ImGuiNavInput_KeyDown_, // move down + ImGuiNavInput_COUNT, + ImGuiNavInput_InternalStart_ = ImGuiNavInput_KeyMenu_ +}; + +// Configuration flags stored in io.ConfigFlags. Set by user/application. +enum ImGuiConfigFlags_ +{ + ImGuiConfigFlags_None = 0, + ImGuiConfigFlags_NavEnableKeyboard = 1 << 0, // Master keyboard navigation enable flag. NewFrame() will automatically fill io.NavInputs[] based on io.KeysDown[]. + ImGuiConfigFlags_NavEnableGamepad = 1 << 1, // Master gamepad navigation enable flag. This is mostly to instruct your imgui backend to fill io.NavInputs[]. Backend also needs to set ImGuiBackendFlags_HasGamepad. + ImGuiConfigFlags_NavEnableSetMousePos = 1 << 2, // Instruct navigation to move the mouse cursor. May be useful on TV/console systems where moving a virtual mouse is awkward. Will update io.MousePos and set io.WantSetMousePos=true. If enabled you MUST honor io.WantSetMousePos requests in your backend, otherwise ImGui will react as if the mouse is jumping around back and forth. + ImGuiConfigFlags_NavNoCaptureKeyboard = 1 << 3, // Instruct navigation to not set the io.WantCaptureKeyboard flag when io.NavActive is set. + ImGuiConfigFlags_NoMouse = 1 << 4, // Instruct imgui to clear mouse position/buttons in NewFrame(). This allows ignoring the mouse information set by the backend. + ImGuiConfigFlags_NoMouseCursorChange = 1 << 5, // Instruct backend to not alter mouse cursor shape and visibility. Use if the backend cursor changes are interfering with yours and you don't want to use SetMouseCursor() to change mouse cursor. You may want to honor requests from imgui by reading GetMouseCursor() yourself instead. + + // User storage (to allow your backend/engine to communicate to code that may be shared between multiple projects. Those flags are not used by core Dear ImGui) + ImGuiConfigFlags_IsSRGB = 1 << 20, // Application is SRGB-aware. + ImGuiConfigFlags_IsTouchScreen = 1 << 21 // Application is using a touch screen instead of a mouse. +}; + +// Backend capabilities flags stored in io.BackendFlags. Set by imgui_impl_xxx or custom backend. +enum ImGuiBackendFlags_ +{ + ImGuiBackendFlags_None = 0, + ImGuiBackendFlags_HasGamepad = 1 << 0, // Backend Platform supports gamepad and currently has one connected. + ImGuiBackendFlags_HasMouseCursors = 1 << 1, // Backend Platform supports honoring GetMouseCursor() value to change the OS cursor shape. + ImGuiBackendFlags_HasSetMousePos = 1 << 2, // Backend Platform supports io.WantSetMousePos requests to reposition the OS mouse position (only used if ImGuiConfigFlags_NavEnableSetMousePos is set). + ImGuiBackendFlags_RendererHasVtxOffset = 1 << 3 // Backend Renderer supports ImDrawCmd::VtxOffset. This enables output of large meshes (64K+ vertices) while still using 16-bit indices. +}; + +// Enumeration for PushStyleColor() / PopStyleColor() +enum ImGuiCol_ +{ + ImGuiCol_Text, + ImGuiCol_TextDisabled, + ImGuiCol_WindowBg, // Background of normal windows + ImGuiCol_ChildBg, // Background of child windows + ImGuiCol_PopupBg, // Background of popups, menus, tooltips windows + ImGuiCol_Border, + ImGuiCol_BorderShadow, + ImGuiCol_FrameBg, // Background of checkbox, radio button, plot, slider, text input + ImGuiCol_FrameBgHovered, + ImGuiCol_FrameBgActive, + ImGuiCol_TitleBg, + ImGuiCol_TitleBgActive, + ImGuiCol_TitleBgCollapsed, + ImGuiCol_MenuBarBg, + ImGuiCol_ScrollbarBg, + ImGuiCol_ScrollbarGrab, + ImGuiCol_ScrollbarGrabHovered, + ImGuiCol_ScrollbarGrabActive, + ImGuiCol_CheckMark, + ImGuiCol_SliderGrab, + ImGuiCol_SliderGrabActive, + ImGuiCol_Button, + ImGuiCol_ButtonHovered, + ImGuiCol_ButtonActive, + ImGuiCol_Header, // Header* colors are used for CollapsingHeader, TreeNode, Selectable, MenuItem + ImGuiCol_HeaderHovered, + ImGuiCol_HeaderActive, + ImGuiCol_Separator, + ImGuiCol_SeparatorHovered, + ImGuiCol_SeparatorActive, + ImGuiCol_ResizeGrip, + ImGuiCol_ResizeGripHovered, + ImGuiCol_ResizeGripActive, + ImGuiCol_Tab, + ImGuiCol_TabHovered, + ImGuiCol_TabActive, + ImGuiCol_TabUnfocused, + ImGuiCol_TabUnfocusedActive, + ImGuiCol_PlotLines, + ImGuiCol_PlotLinesHovered, + ImGuiCol_PlotHistogram, + ImGuiCol_PlotHistogramHovered, + ImGuiCol_TableHeaderBg, // Table header background + ImGuiCol_TableBorderStrong, // Table outer and header borders (prefer using Alpha=1.0 here) + ImGuiCol_TableBorderLight, // Table inner borders (prefer using Alpha=1.0 here) + ImGuiCol_TableRowBg, // Table row background (even rows) + ImGuiCol_TableRowBgAlt, // Table row background (odd rows) + ImGuiCol_TextSelectedBg, + ImGuiCol_DragDropTarget, + ImGuiCol_NavHighlight, // Gamepad/keyboard: current highlighted item + ImGuiCol_NavWindowingHighlight, // Highlight window when using CTRL+TAB + ImGuiCol_NavWindowingDimBg, // Darken/colorize entire screen behind the CTRL+TAB window list, when active + ImGuiCol_ModalWindowDimBg, // Darken/colorize entire screen behind a modal window, when one is active + ImGuiCol_COUNT +}; + +// Enumeration for PushStyleVar() / PopStyleVar() to temporarily modify the ImGuiStyle structure. +// - The enum only refers to fields of ImGuiStyle which makes sense to be pushed/popped inside UI code. +// During initialization or between frames, feel free to just poke into ImGuiStyle directly. +// - Tip: Use your programming IDE navigation facilities on the names in the _second column_ below to find the actual members and their description. +// In Visual Studio IDE: CTRL+comma ("Edit.NavigateTo") can follow symbols in comments, whereas CTRL+F12 ("Edit.GoToImplementation") cannot. +// With Visual Assist installed: ALT+G ("VAssistX.GoToImplementation") can also follow symbols in comments. +// - When changing this enum, you need to update the associated internal table GStyleVarInfo[] accordingly. This is where we link enum values to members offset/type. +enum ImGuiStyleVar_ +{ + // Enum name --------------------- // Member in ImGuiStyle structure (see ImGuiStyle for descriptions) + ImGuiStyleVar_Alpha, // float Alpha + ImGuiStyleVar_WindowPadding, // ImVec2 WindowPadding + ImGuiStyleVar_WindowRounding, // float WindowRounding + ImGuiStyleVar_WindowBorderSize, // float WindowBorderSize + ImGuiStyleVar_WindowMinSize, // ImVec2 WindowMinSize + ImGuiStyleVar_WindowTitleAlign, // ImVec2 WindowTitleAlign + ImGuiStyleVar_ChildRounding, // float ChildRounding + ImGuiStyleVar_ChildBorderSize, // float ChildBorderSize + ImGuiStyleVar_PopupRounding, // float PopupRounding + ImGuiStyleVar_PopupBorderSize, // float PopupBorderSize + ImGuiStyleVar_FramePadding, // ImVec2 FramePadding + ImGuiStyleVar_FrameRounding, // float FrameRounding + ImGuiStyleVar_FrameBorderSize, // float FrameBorderSize + ImGuiStyleVar_ItemSpacing, // ImVec2 ItemSpacing + ImGuiStyleVar_ItemInnerSpacing, // ImVec2 ItemInnerSpacing + ImGuiStyleVar_IndentSpacing, // float IndentSpacing + ImGuiStyleVar_CellPadding, // ImVec2 CellPadding + ImGuiStyleVar_ScrollbarSize, // float ScrollbarSize + ImGuiStyleVar_ScrollbarRounding, // float ScrollbarRounding + ImGuiStyleVar_GrabMinSize, // float GrabMinSize + ImGuiStyleVar_GrabRounding, // float GrabRounding + ImGuiStyleVar_TabRounding, // float TabRounding + ImGuiStyleVar_ButtonTextAlign, // ImVec2 ButtonTextAlign + ImGuiStyleVar_SelectableTextAlign, // ImVec2 SelectableTextAlign + ImGuiStyleVar_COUNT +}; + +// Flags for InvisibleButton() [extended in imgui_internal.h] +enum ImGuiButtonFlags_ +{ + ImGuiButtonFlags_None = 0, + ImGuiButtonFlags_MouseButtonLeft = 1 << 0, // React on left mouse button (default) + ImGuiButtonFlags_MouseButtonRight = 1 << 1, // React on right mouse button + ImGuiButtonFlags_MouseButtonMiddle = 1 << 2, // React on center mouse button + + // [Internal] + ImGuiButtonFlags_MouseButtonMask_ = ImGuiButtonFlags_MouseButtonLeft | ImGuiButtonFlags_MouseButtonRight | ImGuiButtonFlags_MouseButtonMiddle, + ImGuiButtonFlags_MouseButtonDefault_ = ImGuiButtonFlags_MouseButtonLeft +}; + +// Flags for ColorEdit3() / ColorEdit4() / ColorPicker3() / ColorPicker4() / ColorButton() +enum ImGuiColorEditFlags_ +{ + ImGuiColorEditFlags_None = 0, + ImGuiColorEditFlags_NoAlpha = 1 << 1, // // ColorEdit, ColorPicker, ColorButton: ignore Alpha component (will only read 3 components from the input pointer). + ImGuiColorEditFlags_NoPicker = 1 << 2, // // ColorEdit: disable picker when clicking on color square. + ImGuiColorEditFlags_NoOptions = 1 << 3, // // ColorEdit: disable toggling options menu when right-clicking on inputs/small preview. + ImGuiColorEditFlags_NoSmallPreview = 1 << 4, // // ColorEdit, ColorPicker: disable color square preview next to the inputs. (e.g. to show only the inputs) + ImGuiColorEditFlags_NoInputs = 1 << 5, // // ColorEdit, ColorPicker: disable inputs sliders/text widgets (e.g. to show only the small preview color square). + ImGuiColorEditFlags_NoTooltip = 1 << 6, // // ColorEdit, ColorPicker, ColorButton: disable tooltip when hovering the preview. + ImGuiColorEditFlags_NoLabel = 1 << 7, // // ColorEdit, ColorPicker: disable display of inline text label (the label is still forwarded to the tooltip and picker). + ImGuiColorEditFlags_NoSidePreview = 1 << 8, // // ColorPicker: disable bigger color preview on right side of the picker, use small color square preview instead. + ImGuiColorEditFlags_NoDragDrop = 1 << 9, // // ColorEdit: disable drag and drop target. ColorButton: disable drag and drop source. + ImGuiColorEditFlags_NoBorder = 1 << 10, // // ColorButton: disable border (which is enforced by default) + + // User Options (right-click on widget to change some of them). + ImGuiColorEditFlags_AlphaBar = 1 << 16, // // ColorEdit, ColorPicker: show vertical alpha bar/gradient in picker. + ImGuiColorEditFlags_AlphaPreview = 1 << 17, // // ColorEdit, ColorPicker, ColorButton: display preview as a transparent color over a checkerboard, instead of opaque. + ImGuiColorEditFlags_AlphaPreviewHalf= 1 << 18, // // ColorEdit, ColorPicker, ColorButton: display half opaque / half checkerboard, instead of opaque. + ImGuiColorEditFlags_HDR = 1 << 19, // // (WIP) ColorEdit: Currently only disable 0.0f..1.0f limits in RGBA edition (note: you probably want to use ImGuiColorEditFlags_Float flag as well). + ImGuiColorEditFlags_DisplayRGB = 1 << 20, // [Display] // ColorEdit: override _display_ type among RGB/HSV/Hex. ColorPicker: select any combination using one or more of RGB/HSV/Hex. + ImGuiColorEditFlags_DisplayHSV = 1 << 21, // [Display] // " + ImGuiColorEditFlags_DisplayHex = 1 << 22, // [Display] // " + ImGuiColorEditFlags_Uint8 = 1 << 23, // [DataType] // ColorEdit, ColorPicker, ColorButton: _display_ values formatted as 0..255. + ImGuiColorEditFlags_Float = 1 << 24, // [DataType] // ColorEdit, ColorPicker, ColorButton: _display_ values formatted as 0.0f..1.0f floats instead of 0..255 integers. No round-trip of value via integers. + ImGuiColorEditFlags_PickerHueBar = 1 << 25, // [Picker] // ColorPicker: bar for Hue, rectangle for Sat/Value. + ImGuiColorEditFlags_PickerHueWheel = 1 << 26, // [Picker] // ColorPicker: wheel for Hue, triangle for Sat/Value. + ImGuiColorEditFlags_InputRGB = 1 << 27, // [Input] // ColorEdit, ColorPicker: input and output data in RGB format. + ImGuiColorEditFlags_InputHSV = 1 << 28, // [Input] // ColorEdit, ColorPicker: input and output data in HSV format. + + // Defaults Options. You can set application defaults using SetColorEditOptions(). The intent is that you probably don't want to + // override them in most of your calls. Let the user choose via the option menu and/or call SetColorEditOptions() once during startup. + ImGuiColorEditFlags__OptionsDefault = ImGuiColorEditFlags_Uint8 | ImGuiColorEditFlags_DisplayRGB | ImGuiColorEditFlags_InputRGB | ImGuiColorEditFlags_PickerHueBar, + + // [Internal] Masks + ImGuiColorEditFlags__DisplayMask = ImGuiColorEditFlags_DisplayRGB | ImGuiColorEditFlags_DisplayHSV | ImGuiColorEditFlags_DisplayHex, + ImGuiColorEditFlags__DataTypeMask = ImGuiColorEditFlags_Uint8 | ImGuiColorEditFlags_Float, + ImGuiColorEditFlags__PickerMask = ImGuiColorEditFlags_PickerHueWheel | ImGuiColorEditFlags_PickerHueBar, + ImGuiColorEditFlags__InputMask = ImGuiColorEditFlags_InputRGB | ImGuiColorEditFlags_InputHSV + + // Obsolete names (will be removed) +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + , ImGuiColorEditFlags_RGB = ImGuiColorEditFlags_DisplayRGB, ImGuiColorEditFlags_HSV = ImGuiColorEditFlags_DisplayHSV, ImGuiColorEditFlags_HEX = ImGuiColorEditFlags_DisplayHex // [renamed in 1.69] +#endif +}; + +// Flags for DragFloat(), DragInt(), SliderFloat(), SliderInt() etc. +// We use the same sets of flags for DragXXX() and SliderXXX() functions as the features are the same and it makes it easier to swap them. +enum ImGuiSliderFlags_ +{ + ImGuiSliderFlags_None = 0, + ImGuiSliderFlags_AlwaysClamp = 1 << 4, // Clamp value to min/max bounds when input manually with CTRL+Click. By default CTRL+Click allows going out of bounds. + ImGuiSliderFlags_Logarithmic = 1 << 5, // Make the widget logarithmic (linear otherwise). Consider using ImGuiSliderFlags_NoRoundToFormat with this if using a format-string with small amount of digits. + ImGuiSliderFlags_NoRoundToFormat = 1 << 6, // Disable rounding underlying value to match precision of the display format string (e.g. %.3f values are rounded to those 3 digits) + ImGuiSliderFlags_NoInput = 1 << 7, // Disable CTRL+Click or Enter key allowing to input text directly into the widget + ImGuiSliderFlags_InvalidMask_ = 0x7000000F // [Internal] We treat using those bits as being potentially a 'float power' argument from the previous API that has got miscast to this enum, and will trigger an assert if needed. + + // Obsolete names (will be removed) +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + , ImGuiSliderFlags_ClampOnInput = ImGuiSliderFlags_AlwaysClamp // [renamed in 1.79] +#endif +}; + +// Identify a mouse button. +// Those values are guaranteed to be stable and we frequently use 0/1 directly. Named enums provided for convenience. +enum ImGuiMouseButton_ +{ + ImGuiMouseButton_Left = 0, + ImGuiMouseButton_Right = 1, + ImGuiMouseButton_Middle = 2, + ImGuiMouseButton_COUNT = 5 +}; + +// Enumeration for GetMouseCursor() +// User code may request backend to display given cursor by calling SetMouseCursor(), which is why we have some cursors that are marked unused here +enum ImGuiMouseCursor_ +{ + ImGuiMouseCursor_None = -1, + ImGuiMouseCursor_Arrow = 0, + ImGuiMouseCursor_TextInput, // When hovering over InputText, etc. + ImGuiMouseCursor_ResizeAll, // (Unused by Dear ImGui functions) + ImGuiMouseCursor_ResizeNS, // When hovering over an horizontal border + ImGuiMouseCursor_ResizeEW, // When hovering over a vertical border or a column + ImGuiMouseCursor_ResizeNESW, // When hovering over the bottom-left corner of a window + ImGuiMouseCursor_ResizeNWSE, // When hovering over the bottom-right corner of a window + ImGuiMouseCursor_Hand, // (Unused by Dear ImGui functions. Use for e.g. hyperlinks) + ImGuiMouseCursor_NotAllowed, // When hovering something with disallowed interaction. Usually a crossed circle. + ImGuiMouseCursor_COUNT +}; + +// Enumeration for ImGui::SetWindow***(), SetNextWindow***(), SetNextItem***() functions +// Represent a condition. +// Important: Treat as a regular enum! Do NOT combine multiple values using binary operators! All the functions above treat 0 as a shortcut to ImGuiCond_Always. +enum ImGuiCond_ +{ + ImGuiCond_None = 0, // No condition (always set the variable), same as _Always + ImGuiCond_Always = 1 << 0, // No condition (always set the variable) + ImGuiCond_Once = 1 << 1, // Set the variable once per runtime session (only the first call will succeed) + ImGuiCond_FirstUseEver = 1 << 2, // Set the variable if the object/window has no persistently saved data (no entry in .ini file) + ImGuiCond_Appearing = 1 << 3 // Set the variable if the object/window is appearing after being hidden/inactive (or the first time) +}; + +//----------------------------------------------------------------------------- +// [SECTION] Helpers: Memory allocations macros, ImVector<> +//----------------------------------------------------------------------------- + +//----------------------------------------------------------------------------- +// IM_MALLOC(), IM_FREE(), IM_NEW(), IM_PLACEMENT_NEW(), IM_DELETE() +// We call C++ constructor on own allocated memory via the placement "new(ptr) Type()" syntax. +// Defining a custom placement new() with a custom parameter allows us to bypass including which on some platforms complains when user has disabled exceptions. +//----------------------------------------------------------------------------- + +struct ImNewWrapper {}; +inline void* operator new(size_t, ImNewWrapper, void* ptr) { return ptr; } +inline void operator delete(void*, ImNewWrapper, void*) {} // This is only required so we can use the symmetrical new() +#define IM_ALLOC(_SIZE) ImGui::MemAlloc(_SIZE) +#define IM_FREE(_PTR) ImGui::MemFree(_PTR) +#define IM_PLACEMENT_NEW(_PTR) new(ImNewWrapper(), _PTR) +#define IM_NEW(_TYPE) new(ImNewWrapper(), ImGui::MemAlloc(sizeof(_TYPE))) _TYPE +template void IM_DELETE(T* p) { if (p) { p->~T(); ImGui::MemFree(p); } } + +//----------------------------------------------------------------------------- +// ImVector<> +// Lightweight std::vector<>-like class to avoid dragging dependencies (also, some implementations of STL with debug enabled are absurdly slow, we bypass it so our code runs fast in debug). +//----------------------------------------------------------------------------- +// - You generally do NOT need to care or use this ever. But we need to make it available in imgui.h because some of our public structures are relying on it. +// - We use std-like naming convention here, which is a little unusual for this codebase. +// - Important: clear() frees memory, resize(0) keep the allocated buffer. We use resize(0) a lot to intentionally recycle allocated buffers across frames and amortize our costs. +// - Important: our implementation does NOT call C++ constructors/destructors, we treat everything as raw data! This is intentional but be extra mindful of that, +// Do NOT use this class as a std::vector replacement in your own code! Many of the structures used by dear imgui can be safely initialized by a zero-memset. +//----------------------------------------------------------------------------- + +template +struct ImVector +{ + int Size; + int Capacity; + T* Data; + + // Provide standard typedefs but we don't use them ourselves. + typedef T value_type; + typedef value_type* iterator; + typedef const value_type* const_iterator; + + // Constructors, destructor + inline ImVector() { Size = Capacity = 0; Data = NULL; } + inline ImVector(const ImVector& src) { Size = Capacity = 0; Data = NULL; operator=(src); } + inline ImVector& operator=(const ImVector& src) { clear(); resize(src.Size); memcpy(Data, src.Data, (size_t)Size * sizeof(T)); return *this; } + inline ~ImVector() { if (Data) IM_FREE(Data); } + + inline bool empty() const { return Size == 0; } + inline int size() const { return Size; } + inline int size_in_bytes() const { return Size * (int)sizeof(T); } + inline int max_size() const { return 0x7FFFFFFF / (int)sizeof(T); } + inline int capacity() const { return Capacity; } + inline T& operator[](int i) { IM_ASSERT(i >= 0 && i < Size); return Data[i]; } + inline const T& operator[](int i) const { IM_ASSERT(i >= 0 && i < Size); return Data[i]; } + + inline void clear() { if (Data) { Size = Capacity = 0; IM_FREE(Data); Data = NULL; } } + inline T* begin() { return Data; } + inline const T* begin() const { return Data; } + inline T* end() { return Data + Size; } + inline const T* end() const { return Data + Size; } + inline T& front() { IM_ASSERT(Size > 0); return Data[0]; } + inline const T& front() const { IM_ASSERT(Size > 0); return Data[0]; } + inline T& back() { IM_ASSERT(Size > 0); return Data[Size - 1]; } + inline const T& back() const { IM_ASSERT(Size > 0); return Data[Size - 1]; } + inline void swap(ImVector& rhs) { int rhs_size = rhs.Size; rhs.Size = Size; Size = rhs_size; int rhs_cap = rhs.Capacity; rhs.Capacity = Capacity; Capacity = rhs_cap; T* rhs_data = rhs.Data; rhs.Data = Data; Data = rhs_data; } + + inline int _grow_capacity(int sz) const { int new_capacity = Capacity ? (Capacity + Capacity / 2) : 8; return new_capacity > sz ? new_capacity : sz; } + inline void resize(int new_size) { if (new_size > Capacity) reserve(_grow_capacity(new_size)); Size = new_size; } + inline void resize(int new_size, const T& v) { if (new_size > Capacity) reserve(_grow_capacity(new_size)); if (new_size > Size) for (int n = Size; n < new_size; n++) memcpy(&Data[n], &v, sizeof(v)); Size = new_size; } + inline void shrink(int new_size) { IM_ASSERT(new_size <= Size); Size = new_size; } // Resize a vector to a smaller size, guaranteed not to cause a reallocation + inline void reserve(int new_capacity) { if (new_capacity <= Capacity) return; T* new_data = (T*)IM_ALLOC((size_t)new_capacity * sizeof(T)); if (Data) { memcpy(new_data, Data, (size_t)Size * sizeof(T)); IM_FREE(Data); } Data = new_data; Capacity = new_capacity; } + + // NB: It is illegal to call push_back/push_front/insert with a reference pointing inside the ImVector data itself! e.g. v.push_back(v[10]) is forbidden. + inline void push_back(const T& v) { if (Size == Capacity) reserve(_grow_capacity(Size + 1)); memcpy(&Data[Size], &v, sizeof(v)); Size++; } + inline void pop_back() { IM_ASSERT(Size > 0); Size--; } + inline void push_front(const T& v) { if (Size == 0) push_back(v); else insert(Data, v); } + inline T* erase(const T* it) { IM_ASSERT(it >= Data && it < Data + Size); const ptrdiff_t off = it - Data; memmove(Data + off, Data + off + 1, ((size_t)Size - (size_t)off - 1) * sizeof(T)); Size--; return Data + off; } + inline T* erase(const T* it, const T* it_last){ IM_ASSERT(it >= Data && it < Data + Size && it_last > it && it_last <= Data + Size); const ptrdiff_t count = it_last - it; const ptrdiff_t off = it - Data; memmove(Data + off, Data + off + count, ((size_t)Size - (size_t)off - count) * sizeof(T)); Size -= (int)count; return Data + off; } + inline T* erase_unsorted(const T* it) { IM_ASSERT(it >= Data && it < Data + Size); const ptrdiff_t off = it - Data; if (it < Data + Size - 1) memcpy(Data + off, Data + Size - 1, sizeof(T)); Size--; return Data + off; } + inline T* insert(const T* it, const T& v) { IM_ASSERT(it >= Data && it <= Data + Size); const ptrdiff_t off = it - Data; if (Size == Capacity) reserve(_grow_capacity(Size + 1)); if (off < (int)Size) memmove(Data + off + 1, Data + off, ((size_t)Size - (size_t)off) * sizeof(T)); memcpy(&Data[off], &v, sizeof(v)); Size++; return Data + off; } + inline bool contains(const T& v) const { const T* data = Data; const T* data_end = Data + Size; while (data < data_end) if (*data++ == v) return true; return false; } + inline T* find(const T& v) { T* data = Data; const T* data_end = Data + Size; while (data < data_end) if (*data == v) break; else ++data; return data; } + inline const T* find(const T& v) const { const T* data = Data; const T* data_end = Data + Size; while (data < data_end) if (*data == v) break; else ++data; return data; } + inline bool find_erase(const T& v) { const T* it = find(v); if (it < Data + Size) { erase(it); return true; } return false; } + inline bool find_erase_unsorted(const T& v) { const T* it = find(v); if (it < Data + Size) { erase_unsorted(it); return true; } return false; } + inline int index_from_ptr(const T* it) const { IM_ASSERT(it >= Data && it < Data + Size); const ptrdiff_t off = it - Data; return (int)off; } +}; + +//----------------------------------------------------------------------------- +// [SECTION] ImGuiStyle +//----------------------------------------------------------------------------- +// You may modify the ImGui::GetStyle() main instance during initialization and before NewFrame(). +// During the frame, use ImGui::PushStyleVar(ImGuiStyleVar_XXXX)/PopStyleVar() to alter the main style values, +// and ImGui::PushStyleColor(ImGuiCol_XXX)/PopStyleColor() for colors. +//----------------------------------------------------------------------------- + +struct ImGuiStyle +{ + float Alpha; // Global alpha applies to everything in Dear ImGui. + ImVec2 WindowPadding; // Padding within a window. + float WindowRounding; // Radius of window corners rounding. Set to 0.0f to have rectangular windows. Large values tend to lead to variety of artifacts and are not recommended. + float WindowBorderSize; // Thickness of border around windows. Generally set to 0.0f or 1.0f. (Other values are not well tested and more CPU/GPU costly). + ImVec2 WindowMinSize; // Minimum window size. This is a global setting. If you want to constraint individual windows, use SetNextWindowSizeConstraints(). + ImVec2 WindowTitleAlign; // Alignment for title bar text. Defaults to (0.0f,0.5f) for left-aligned,vertically centered. + ImGuiDir WindowMenuButtonPosition; // Side of the collapsing/docking button in the title bar (None/Left/Right). Defaults to ImGuiDir_Left. + float ChildRounding; // Radius of child window corners rounding. Set to 0.0f to have rectangular windows. + float ChildBorderSize; // Thickness of border around child windows. Generally set to 0.0f or 1.0f. (Other values are not well tested and more CPU/GPU costly). + float PopupRounding; // Radius of popup window corners rounding. (Note that tooltip windows use WindowRounding) + float PopupBorderSize; // Thickness of border around popup/tooltip windows. Generally set to 0.0f or 1.0f. (Other values are not well tested and more CPU/GPU costly). + ImVec2 FramePadding; // Padding within a framed rectangle (used by most widgets). + float FrameRounding; // Radius of frame corners rounding. Set to 0.0f to have rectangular frame (used by most widgets). + float FrameBorderSize; // Thickness of border around frames. Generally set to 0.0f or 1.0f. (Other values are not well tested and more CPU/GPU costly). + ImVec2 ItemSpacing; // Horizontal and vertical spacing between widgets/lines. + ImVec2 ItemInnerSpacing; // Horizontal and vertical spacing between within elements of a composed widget (e.g. a slider and its label). + ImVec2 CellPadding; // Padding within a table cell + ImVec2 TouchExtraPadding; // Expand reactive bounding box for touch-based system where touch position is not accurate enough. Unfortunately we don't sort widgets so priority on overlap will always be given to the first widget. So don't grow this too much! + float IndentSpacing; // Horizontal indentation when e.g. entering a tree node. Generally == (FontSize + FramePadding.x*2). + float ColumnsMinSpacing; // Minimum horizontal spacing between two columns. Preferably > (FramePadding.x + 1). + float ScrollbarSize; // Width of the vertical scrollbar, Height of the horizontal scrollbar. + float ScrollbarRounding; // Radius of grab corners for scrollbar. + float GrabMinSize; // Minimum width/height of a grab box for slider/scrollbar. + float GrabRounding; // Radius of grabs corners rounding. Set to 0.0f to have rectangular slider grabs. + float LogSliderDeadzone; // The size in pixels of the dead-zone around zero on logarithmic sliders that cross zero. + float TabRounding; // Radius of upper corners of a tab. Set to 0.0f to have rectangular tabs. + float TabBorderSize; // Thickness of border around tabs. + float TabMinWidthForCloseButton; // Minimum width for close button to appears on an unselected tab when hovered. Set to 0.0f to always show when hovering, set to FLT_MAX to never show close button unless selected. + ImGuiDir ColorButtonPosition; // Side of the color button in the ColorEdit4 widget (left/right). Defaults to ImGuiDir_Right. + ImVec2 ButtonTextAlign; // Alignment of button text when button is larger than text. Defaults to (0.5f, 0.5f) (centered). + ImVec2 SelectableTextAlign; // Alignment of selectable text. Defaults to (0.0f, 0.0f) (top-left aligned). It's generally important to keep this left-aligned if you want to lay multiple items on a same line. + ImVec2 DisplayWindowPadding; // Window position are clamped to be visible within the display area or monitors by at least this amount. Only applies to regular windows. + ImVec2 DisplaySafeAreaPadding; // If you cannot see the edges of your screen (e.g. on a TV) increase the safe area padding. Apply to popups/tooltips as well regular windows. NB: Prefer configuring your TV sets correctly! + float MouseCursorScale; // Scale software rendered mouse cursor (when io.MouseDrawCursor is enabled). May be removed later. + bool AntiAliasedLines; // Enable anti-aliased lines/borders. Disable if you are really tight on CPU/GPU. Latched at the beginning of the frame (copied to ImDrawList). + bool AntiAliasedLinesUseTex; // Enable anti-aliased lines/borders using textures where possible. Require backend to render with bilinear filtering. Latched at the beginning of the frame (copied to ImDrawList). + bool AntiAliasedFill; // Enable anti-aliased edges around filled shapes (rounded rectangles, circles, etc.). Disable if you are really tight on CPU/GPU. Latched at the beginning of the frame (copied to ImDrawList). + float CurveTessellationTol; // Tessellation tolerance when using PathBezierCurveTo() without a specific number of segments. Decrease for highly tessellated curves (higher quality, more polygons), increase to reduce quality. + float CircleSegmentMaxError; // Maximum error (in pixels) allowed when using AddCircle()/AddCircleFilled() or drawing rounded corner rectangles with no explicit segment count specified. Decrease for higher quality but more geometry. + ImVec4 Colors[ImGuiCol_COUNT]; + + IMGUI_API ImGuiStyle(); + IMGUI_API void ScaleAllSizes(float scale_factor); +}; + +//----------------------------------------------------------------------------- +// [SECTION] ImGuiIO +//----------------------------------------------------------------------------- +// Communicate most settings and inputs/outputs to Dear ImGui using this structure. +// Access via ImGui::GetIO(). Read 'Programmer guide' section in .cpp file for general usage. +//----------------------------------------------------------------------------- + +struct ImGuiIO +{ + //------------------------------------------------------------------ + // Configuration (fill once) // Default value + //------------------------------------------------------------------ + + ImGuiConfigFlags ConfigFlags; // = 0 // See ImGuiConfigFlags_ enum. Set by user/application. Gamepad/keyboard navigation options, etc. + ImGuiBackendFlags BackendFlags; // = 0 // See ImGuiBackendFlags_ enum. Set by backend (imgui_impl_xxx files or custom backend) to communicate features supported by the backend. + ImVec2 DisplaySize; // // Main display size, in pixels. + float DeltaTime; // = 1.0f/60.0f // Time elapsed since last frame, in seconds. + float IniSavingRate; // = 5.0f // Minimum time between saving positions/sizes to .ini file, in seconds. + const char* IniFilename; // = "imgui.ini" // Path to .ini file. Set NULL to disable automatic .ini loading/saving, if e.g. you want to manually load/save from memory. + const char* LogFilename; // = "imgui_log.txt"// Path to .log file (default parameter to ImGui::LogToFile when no file is specified). + float MouseDoubleClickTime; // = 0.30f // Time for a double-click, in seconds. + float MouseDoubleClickMaxDist; // = 6.0f // Distance threshold to stay in to validate a double-click, in pixels. + float MouseDragThreshold; // = 6.0f // Distance threshold before considering we are dragging. + int KeyMap[ImGuiKey_COUNT]; // // Map of indices into the KeysDown[512] entries array which represent your "native" keyboard state. + float KeyRepeatDelay; // = 0.250f // When holding a key/button, time before it starts repeating, in seconds (for buttons in Repeat mode, etc.). + float KeyRepeatRate; // = 0.050f // When holding a key/button, rate at which it repeats, in seconds. + void* UserData; // = NULL // Store your own data for retrieval by callbacks. + + ImFontAtlas*Fonts; // // Font atlas: load, rasterize and pack one or more fonts into a single texture. + float FontGlobalScale; // = 1.0f // Global scale all fonts + bool FontAllowUserScaling; // = false // Allow user scaling text of individual window with CTRL+Wheel. + ImFont* FontDefault; // = NULL // Font to use on NewFrame(). Use NULL to uses Fonts->Fonts[0]. + ImVec2 DisplayFramebufferScale; // = (1, 1) // For retina display or other situations where window coordinates are different from framebuffer coordinates. This generally ends up in ImDrawData::FramebufferScale. + + // Miscellaneous options + bool MouseDrawCursor; // = false // Request ImGui to draw a mouse cursor for you (if you are on a platform without a mouse cursor). Cannot be easily renamed to 'io.ConfigXXX' because this is frequently used by backend implementations. + bool ConfigMacOSXBehaviors; // = defined(__APPLE__) // OS X style: Text editing cursor movement using Alt instead of Ctrl, Shortcuts using Cmd/Super instead of Ctrl, Line/Text Start and End using Cmd+Arrows instead of Home/End, Double click selects by word instead of selecting whole text, Multi-selection in lists uses Cmd/Super instead of Ctrl. + bool ConfigInputTextCursorBlink; // = true // Enable blinking cursor (optional as some users consider it to be distracting). + bool ConfigDragClickToInputText; // = false // [BETA] Enable turning DragXXX widgets into text input with a simple mouse click-release (without moving). Not desirable on devices without a keyboard. + bool ConfigWindowsResizeFromEdges; // = true // Enable resizing of windows from their edges and from the lower-left corner. This requires (io.BackendFlags & ImGuiBackendFlags_HasMouseCursors) because it needs mouse cursor feedback. (This used to be a per-window ImGuiWindowFlags_ResizeFromAnySide flag) + bool ConfigWindowsMoveFromTitleBarOnly; // = false // Enable allowing to move windows only when clicking on their title bar. Does not apply to windows without a title bar. + float ConfigMemoryCompactTimer; // = 60.0f // Timer (in seconds) to free transient windows/tables memory buffers when unused. Set to -1.0f to disable. + + //------------------------------------------------------------------ + // Platform Functions + // (the imgui_impl_xxxx backend files are setting those up for you) + //------------------------------------------------------------------ + + // Optional: Platform/Renderer backend name (informational only! will be displayed in About Window) + User data for backend/wrappers to store their own stuff. + const char* BackendPlatformName; // = NULL + const char* BackendRendererName; // = NULL + void* BackendPlatformUserData; // = NULL // User data for platform backend + void* BackendRendererUserData; // = NULL // User data for renderer backend + void* BackendLanguageUserData; // = NULL // User data for non C++ programming language backend + + // Optional: Access OS clipboard + // (default to use native Win32 clipboard on Windows, otherwise uses a private clipboard. Override to access OS clipboard on other architectures) + const char* (*GetClipboardTextFn)(void* user_data); + void (*SetClipboardTextFn)(void* user_data, const char* text); + void* ClipboardUserData; + + // Optional: Notify OS Input Method Editor of the screen position of your cursor for text input position (e.g. when using Japanese/Chinese IME on Windows) + // (default to use native imm32 api on Windows) + void (*ImeSetInputScreenPosFn)(int x, int y); + void* ImeWindowHandle; // = NULL // (Windows) Set this to your HWND to get automatic IME cursor positioning. + + //------------------------------------------------------------------ + // Input - Fill before calling NewFrame() + //------------------------------------------------------------------ + + ImVec2 MousePos; // Mouse position, in pixels. Set to ImVec2(-FLT_MAX, -FLT_MAX) if mouse is unavailable (on another screen, etc.) + bool MouseDown[5]; // Mouse buttons: 0=left, 1=right, 2=middle + extras (ImGuiMouseButton_COUNT == 5). Dear ImGui mostly uses left and right buttons. Others buttons allows us to track if the mouse is being used by your application + available to user as a convenience via IsMouse** API. + float MouseWheel; // Mouse wheel Vertical: 1 unit scrolls about 5 lines text. + float MouseWheelH; // Mouse wheel Horizontal. Most users don't have a mouse with an horizontal wheel, may not be filled by all backends. + bool KeyCtrl; // Keyboard modifier pressed: Control + bool KeyShift; // Keyboard modifier pressed: Shift + bool KeyAlt; // Keyboard modifier pressed: Alt + bool KeySuper; // Keyboard modifier pressed: Cmd/Super/Windows + bool KeysDown[512]; // Keyboard keys that are pressed (ideally left in the "native" order your engine has access to keyboard keys, so you can use your own defines/enums for keys). + float NavInputs[ImGuiNavInput_COUNT]; // Gamepad inputs. Cleared back to zero by EndFrame(). Keyboard keys will be auto-mapped and be written here by NewFrame(). + + // Functions + IMGUI_API void AddInputCharacter(unsigned int c); // Queue new character input + IMGUI_API void AddInputCharacterUTF16(ImWchar16 c); // Queue new character input from an UTF-16 character, it can be a surrogate + IMGUI_API void AddInputCharactersUTF8(const char* str); // Queue new characters input from an UTF-8 string + IMGUI_API void ClearInputCharacters(); // Clear the text input buffer manually + + //------------------------------------------------------------------ + // Output - Updated by NewFrame() or EndFrame()/Render() + // (when reading from the io.WantCaptureMouse, io.WantCaptureKeyboard flags to dispatch your inputs, it is + // generally easier and more correct to use their state BEFORE calling NewFrame(). See FAQ for details!) + //------------------------------------------------------------------ + + bool WantCaptureMouse; // Set when Dear ImGui will use mouse inputs, in this case do not dispatch them to your main game/application (either way, always pass on mouse inputs to imgui). (e.g. unclicked mouse is hovering over an imgui window, widget is active, mouse was clicked over an imgui window, etc.). + bool WantCaptureKeyboard; // Set when Dear ImGui will use keyboard inputs, in this case do not dispatch them to your main game/application (either way, always pass keyboard inputs to imgui). (e.g. InputText active, or an imgui window is focused and navigation is enabled, etc.). + bool WantTextInput; // Mobile/console: when set, you may display an on-screen keyboard. This is set by Dear ImGui when it wants textual keyboard input to happen (e.g. when a InputText widget is active). + bool WantSetMousePos; // MousePos has been altered, backend should reposition mouse on next frame. Rarely used! Set only when ImGuiConfigFlags_NavEnableSetMousePos flag is enabled. + bool WantSaveIniSettings; // When manual .ini load/save is active (io.IniFilename == NULL), this will be set to notify your application that you can call SaveIniSettingsToMemory() and save yourself. Important: clear io.WantSaveIniSettings yourself after saving! + bool NavActive; // Keyboard/Gamepad navigation is currently allowed (will handle ImGuiKey_NavXXX events) = a window is focused and it doesn't use the ImGuiWindowFlags_NoNavInputs flag. + bool NavVisible; // Keyboard/Gamepad navigation is visible and allowed (will handle ImGuiKey_NavXXX events). + float Framerate; // Application framerate estimate, in frame per second. Solely for convenience. Rolling average estimation based on io.DeltaTime over 120 frames. + int MetricsRenderVertices; // Vertices output during last call to Render() + int MetricsRenderIndices; // Indices output during last call to Render() = number of triangles * 3 + int MetricsRenderWindows; // Number of visible windows + int MetricsActiveWindows; // Number of active windows + int MetricsActiveAllocations; // Number of active allocations, updated by MemAlloc/MemFree based on current context. May be off if you have multiple imgui contexts. + ImVec2 MouseDelta; // Mouse delta. Note that this is zero if either current or previous position are invalid (-FLT_MAX,-FLT_MAX), so a disappearing/reappearing mouse won't have a huge delta. + + //------------------------------------------------------------------ + // [Internal] Dear ImGui will maintain those fields. Forward compatibility not guaranteed! + //------------------------------------------------------------------ + + ImGuiKeyModFlags KeyMods; // Key mods flags (same as io.KeyCtrl/KeyShift/KeyAlt/KeySuper but merged into flags), updated by NewFrame() + ImVec2 MousePosPrev; // Previous mouse position (note that MouseDelta is not necessary == MousePos-MousePosPrev, in case either position is invalid) + ImVec2 MouseClickedPos[5]; // Position at time of clicking + double MouseClickedTime[5]; // Time of last click (used to figure out double-click) + bool MouseClicked[5]; // Mouse button went from !Down to Down + bool MouseDoubleClicked[5]; // Has mouse button been double-clicked? + bool MouseReleased[5]; // Mouse button went from Down to !Down + bool MouseDownOwned[5]; // Track if button was clicked inside a dear imgui window. We don't request mouse capture from the application if click started outside ImGui bounds. + bool MouseDownWasDoubleClick[5]; // Track if button down was a double-click + float MouseDownDuration[5]; // Duration the mouse button has been down (0.0f == just clicked) + float MouseDownDurationPrev[5]; // Previous time the mouse button has been down + ImVec2 MouseDragMaxDistanceAbs[5]; // Maximum distance, absolute, on each axis, of how much mouse has traveled from the clicking point + float MouseDragMaxDistanceSqr[5]; // Squared maximum distance of how much mouse has traveled from the clicking point + float KeysDownDuration[512]; // Duration the keyboard key has been down (0.0f == just pressed) + float KeysDownDurationPrev[512]; // Previous duration the key has been down + float NavInputsDownDuration[ImGuiNavInput_COUNT]; + float NavInputsDownDurationPrev[ImGuiNavInput_COUNT]; + float PenPressure; // Touch/Pen pressure (0.0f to 1.0f, should be >0.0f only when MouseDown[0] == true). Helper storage currently unused by Dear ImGui. + ImWchar16 InputQueueSurrogate; // For AddInputCharacterUTF16 + ImVector InputQueueCharacters; // Queue of _characters_ input (obtained by platform backend). Fill using AddInputCharacter() helper. + + IMGUI_API ImGuiIO(); +}; + +//----------------------------------------------------------------------------- +// [SECTION] Misc data structures +//----------------------------------------------------------------------------- + +// Shared state of InputText(), passed as an argument to your callback when a ImGuiInputTextFlags_Callback* flag is used. +// The callback function should return 0 by default. +// Callbacks (follow a flag name and see comments in ImGuiInputTextFlags_ declarations for more details) +// - ImGuiInputTextFlags_CallbackEdit: Callback on buffer edit (note that InputText() already returns true on edit, the callback is useful mainly to manipulate the underlying buffer while focus is active) +// - ImGuiInputTextFlags_CallbackAlways: Callback on each iteration +// - ImGuiInputTextFlags_CallbackCompletion: Callback on pressing TAB +// - ImGuiInputTextFlags_CallbackHistory: Callback on pressing Up/Down arrows +// - ImGuiInputTextFlags_CallbackCharFilter: Callback on character inputs to replace or discard them. Modify 'EventChar' to replace or discard, or return 1 in callback to discard. +// - ImGuiInputTextFlags_CallbackResize: Callback on buffer capacity changes request (beyond 'buf_size' parameter value), allowing the string to grow. +struct ImGuiInputTextCallbackData +{ + ImGuiInputTextFlags EventFlag; // One ImGuiInputTextFlags_Callback* // Read-only + ImGuiInputTextFlags Flags; // What user passed to InputText() // Read-only + void* UserData; // What user passed to InputText() // Read-only + + // Arguments for the different callback events + // - To modify the text buffer in a callback, prefer using the InsertChars() / DeleteChars() function. InsertChars() will take care of calling the resize callback if necessary. + // - If you know your edits are not going to resize the underlying buffer allocation, you may modify the contents of 'Buf[]' directly. You need to update 'BufTextLen' accordingly (0 <= BufTextLen < BufSize) and set 'BufDirty'' to true so InputText can update its internal state. + ImWchar EventChar; // Character input // Read-write // [CharFilter] Replace character with another one, or set to zero to drop. return 1 is equivalent to setting EventChar=0; + ImGuiKey EventKey; // Key pressed (Up/Down/TAB) // Read-only // [Completion,History] + char* Buf; // Text buffer // Read-write // [Resize] Can replace pointer / [Completion,History,Always] Only write to pointed data, don't replace the actual pointer! + int BufTextLen; // Text length (in bytes) // Read-write // [Resize,Completion,History,Always] Exclude zero-terminator storage. In C land: == strlen(some_text), in C++ land: string.length() + int BufSize; // Buffer size (in bytes) = capacity+1 // Read-only // [Resize,Completion,History,Always] Include zero-terminator storage. In C land == ARRAYSIZE(my_char_array), in C++ land: string.capacity()+1 + bool BufDirty; // Set if you modify Buf/BufTextLen! // Write // [Completion,History,Always] + int CursorPos; // // Read-write // [Completion,History,Always] + int SelectionStart; // // Read-write // [Completion,History,Always] == to SelectionEnd when no selection) + int SelectionEnd; // // Read-write // [Completion,History,Always] + + // Helper functions for text manipulation. + // Use those function to benefit from the CallbackResize behaviors. Calling those function reset the selection. + IMGUI_API ImGuiInputTextCallbackData(); + IMGUI_API void DeleteChars(int pos, int bytes_count); + IMGUI_API void InsertChars(int pos, const char* text, const char* text_end = NULL); + void SelectAll() { SelectionStart = 0; SelectionEnd = BufTextLen; } + void ClearSelection() { SelectionStart = SelectionEnd = BufTextLen; } + bool HasSelection() const { return SelectionStart != SelectionEnd; } +}; + +// Resizing callback data to apply custom constraint. As enabled by SetNextWindowSizeConstraints(). Callback is called during the next Begin(). +// NB: For basic min/max size constraint on each axis you don't need to use the callback! The SetNextWindowSizeConstraints() parameters are enough. +struct ImGuiSizeCallbackData +{ + void* UserData; // Read-only. What user passed to SetNextWindowSizeConstraints() + ImVec2 Pos; // Read-only. Window position, for reference. + ImVec2 CurrentSize; // Read-only. Current window size. + ImVec2 DesiredSize; // Read-write. Desired size, based on user's mouse position. Write to this field to restrain resizing. +}; + +// Data payload for Drag and Drop operations: AcceptDragDropPayload(), GetDragDropPayload() +struct ImGuiPayload +{ + // Members + void* Data; // Data (copied and owned by dear imgui) + int DataSize; // Data size + + // [Internal] + ImGuiID SourceId; // Source item id + ImGuiID SourceParentId; // Source parent id (if available) + int DataFrameCount; // Data timestamp + char DataType[32 + 1]; // Data type tag (short user-supplied string, 32 characters max) + bool Preview; // Set when AcceptDragDropPayload() was called and mouse has been hovering the target item (nb: handle overlapping drag targets) + bool Delivery; // Set when AcceptDragDropPayload() was called and mouse button is released over the target item. + + ImGuiPayload() { Clear(); } + void Clear() { SourceId = SourceParentId = 0; Data = NULL; DataSize = 0; memset(DataType, 0, sizeof(DataType)); DataFrameCount = -1; Preview = Delivery = false; } + bool IsDataType(const char* type) const { return DataFrameCount != -1 && strcmp(type, DataType) == 0; } + bool IsPreview() const { return Preview; } + bool IsDelivery() const { return Delivery; } +}; + +// Sorting specification for one column of a table (sizeof == 12 bytes) +struct ImGuiTableColumnSortSpecs +{ + ImGuiID ColumnUserID; // User id of the column (if specified by a TableSetupColumn() call) + ImS16 ColumnIndex; // Index of the column + ImS16 SortOrder; // Index within parent ImGuiTableSortSpecs (always stored in order starting from 0, tables sorted on a single criteria will always have a 0 here) + ImGuiSortDirection SortDirection : 8; // ImGuiSortDirection_Ascending or ImGuiSortDirection_Descending (you can use this or SortSign, whichever is more convenient for your sort function) + + ImGuiTableColumnSortSpecs() { memset(this, 0, sizeof(*this)); } +}; + +// Sorting specifications for a table (often handling sort specs for a single column, occasionally more) +// Obtained by calling TableGetSortSpecs(). +// When 'SpecsDirty == true' you can sort your data. It will be true with sorting specs have changed since last call, or the first time. +// Make sure to set 'SpecsDirty = false' after sorting, else you may wastefully sort your data every frame! +struct ImGuiTableSortSpecs +{ + const ImGuiTableColumnSortSpecs* Specs; // Pointer to sort spec array. + int SpecsCount; // Sort spec count. Most often 1. May be > 1 when ImGuiTableFlags_SortMulti is enabled. May be == 0 when ImGuiTableFlags_SortTristate is enabled. + bool SpecsDirty; // Set to true when specs have changed since last time! Use this to sort again, then clear the flag. + + ImGuiTableSortSpecs() { memset(this, 0, sizeof(*this)); } +}; + +//----------------------------------------------------------------------------- +// [SECTION] Obsolete functions +// (Will be removed! Read 'API BREAKING CHANGES' section in imgui.cpp for details) +// Please keep your copy of dear imgui up to date! Occasionally set '#define IMGUI_DISABLE_OBSOLETE_FUNCTIONS' in imconfig.h to stay ahead. +//----------------------------------------------------------------------------- + +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS +namespace ImGui +{ + // OBSOLETED in 1.79 (from August 2020) + static inline void OpenPopupContextItem(const char* str_id = NULL, ImGuiMouseButton mb = 1) { OpenPopupOnItemClick(str_id, mb); } // Bool return value removed. Use IsWindowAppearing() in BeginPopup() instead. Renamed in 1.77, renamed back in 1.79. Sorry! + // OBSOLETED in 1.78 (from June 2020) + // Old drag/sliders functions that took a 'float power = 1.0' argument instead of flags. + // For shared code, you can version check at compile-time with `#if IMGUI_VERSION_NUM >= 17704`. + IMGUI_API bool DragScalar(const char* label, ImGuiDataType data_type, void* p_data, float v_speed, const void* p_min, const void* p_max, const char* format, float power); + IMGUI_API bool DragScalarN(const char* label, ImGuiDataType data_type, void* p_data, int components, float v_speed, const void* p_min, const void* p_max, const char* format, float power); + static inline bool DragFloat(const char* label, float* v, float v_speed, float v_min, float v_max, const char* format, float power) { return DragScalar(label, ImGuiDataType_Float, v, v_speed, &v_min, &v_max, format, power); } + static inline bool DragFloat2(const char* label, float v[2], float v_speed, float v_min, float v_max, const char* format, float power) { return DragScalarN(label, ImGuiDataType_Float, v, 2, v_speed, &v_min, &v_max, format, power); } + static inline bool DragFloat3(const char* label, float v[3], float v_speed, float v_min, float v_max, const char* format, float power) { return DragScalarN(label, ImGuiDataType_Float, v, 3, v_speed, &v_min, &v_max, format, power); } + static inline bool DragFloat4(const char* label, float v[4], float v_speed, float v_min, float v_max, const char* format, float power) { return DragScalarN(label, ImGuiDataType_Float, v, 4, v_speed, &v_min, &v_max, format, power); } + IMGUI_API bool SliderScalar(const char* label, ImGuiDataType data_type, void* p_data, const void* p_min, const void* p_max, const char* format, float power); + IMGUI_API bool SliderScalarN(const char* label, ImGuiDataType data_type, void* p_data, int components, const void* p_min, const void* p_max, const char* format, float power); + static inline bool SliderFloat(const char* label, float* v, float v_min, float v_max, const char* format, float power) { return SliderScalar(label, ImGuiDataType_Float, v, &v_min, &v_max, format, power); } + static inline bool SliderFloat2(const char* label, float v[2], float v_min, float v_max, const char* format, float power) { return SliderScalarN(label, ImGuiDataType_Float, v, 2, &v_min, &v_max, format, power); } + static inline bool SliderFloat3(const char* label, float v[3], float v_min, float v_max, const char* format, float power) { return SliderScalarN(label, ImGuiDataType_Float, v, 3, &v_min, &v_max, format, power); } + static inline bool SliderFloat4(const char* label, float v[4], float v_min, float v_max, const char* format, float power) { return SliderScalarN(label, ImGuiDataType_Float, v, 4, &v_min, &v_max, format, power); } + // OBSOLETED in 1.77 (from June 2020) + static inline bool BeginPopupContextWindow(const char* str_id, ImGuiMouseButton mb, bool over_items) { return BeginPopupContextWindow(str_id, mb | (over_items ? 0 : ImGuiPopupFlags_NoOpenOverItems)); } + // OBSOLETED in 1.72 (from April 2019) + static inline void TreeAdvanceToLabelPos() { SetCursorPosX(GetCursorPosX() + GetTreeNodeToLabelSpacing()); } + // OBSOLETED in 1.71 (from June 2019) + static inline void SetNextTreeNodeOpen(bool open, ImGuiCond cond = 0) { SetNextItemOpen(open, cond); } + // OBSOLETED in 1.70 (from May 2019) + static inline float GetContentRegionAvailWidth() { return GetContentRegionAvail().x; } + // OBSOLETED in 1.69 (from Mar 2019) + static inline ImDrawList* GetOverlayDrawList() { return GetForegroundDrawList(); } + // OBSOLETED in 1.66 (from Sep 2018) + static inline void SetScrollHere(float center_ratio=0.5f){ SetScrollHereY(center_ratio); } +} +#endif + +//----------------------------------------------------------------------------- +// [SECTION] Helpers (ImGuiOnceUponAFrame, ImGuiTextFilter, ImGuiTextBuffer, ImGuiStorage, ImGuiListClipper, ImColor) +//----------------------------------------------------------------------------- + +// Helper: Unicode defines +#define IM_UNICODE_CODEPOINT_INVALID 0xFFFD // Invalid Unicode code point (standard value). +#ifdef IMGUI_USE_WCHAR32 +#define IM_UNICODE_CODEPOINT_MAX 0x10FFFF // Maximum Unicode code point supported by this build. +#else +#define IM_UNICODE_CODEPOINT_MAX 0xFFFF // Maximum Unicode code point supported by this build. +#endif + +// Helper: Execute a block of code at maximum once a frame. Convenient if you want to quickly create an UI within deep-nested code that runs multiple times every frame. +// Usage: static ImGuiOnceUponAFrame oaf; if (oaf) ImGui::Text("This will be called only once per frame"); +struct ImGuiOnceUponAFrame +{ + ImGuiOnceUponAFrame() { RefFrame = -1; } + mutable int RefFrame; + operator bool() const { int current_frame = ImGui::GetFrameCount(); if (RefFrame == current_frame) return false; RefFrame = current_frame; return true; } +}; + +// Helper: Parse and apply text filters. In format "aaaaa[,bbbb][,ccccc]" +struct ImGuiTextFilter +{ + IMGUI_API ImGuiTextFilter(const char* default_filter = ""); + IMGUI_API bool Draw(const char* label = "Filter (inc,-exc)", float width = 0.0f); // Helper calling InputText+Build + IMGUI_API bool PassFilter(const char* text, const char* text_end = NULL) const; + IMGUI_API void Build(); + void Clear() { InputBuf[0] = 0; Build(); } + bool IsActive() const { return !Filters.empty(); } + + // [Internal] + struct ImGuiTextRange + { + const char* b; + const char* e; + + ImGuiTextRange() { b = e = NULL; } + ImGuiTextRange(const char* _b, const char* _e) { b = _b; e = _e; } + bool empty() const { return b == e; } + IMGUI_API void split(char separator, ImVector* out) const; + }; + char InputBuf[256]; + ImVectorFilters; + int CountGrep; +}; + +// Helper: Growable text buffer for logging/accumulating text +// (this could be called 'ImGuiTextBuilder' / 'ImGuiStringBuilder') +struct ImGuiTextBuffer +{ + ImVector Buf; + IMGUI_API static char EmptyString[1]; + + ImGuiTextBuffer() { } + inline char operator[](int i) const { IM_ASSERT(Buf.Data != NULL); return Buf.Data[i]; } + const char* begin() const { return Buf.Data ? &Buf.front() : EmptyString; } + const char* end() const { return Buf.Data ? &Buf.back() : EmptyString; } // Buf is zero-terminated, so end() will point on the zero-terminator + int size() const { return Buf.Size ? Buf.Size - 1 : 0; } + bool empty() const { return Buf.Size <= 1; } + void clear() { Buf.clear(); } + void reserve(int capacity) { Buf.reserve(capacity); } + const char* c_str() const { return Buf.Data ? Buf.Data : EmptyString; } + IMGUI_API void append(const char* str, const char* str_end = NULL); + IMGUI_API void appendf(const char* fmt, ...) IM_FMTARGS(2); + IMGUI_API void appendfv(const char* fmt, va_list args) IM_FMTLIST(2); +}; + +// Helper: Key->Value storage +// Typically you don't have to worry about this since a storage is held within each Window. +// We use it to e.g. store collapse state for a tree (Int 0/1) +// This is optimized for efficient lookup (dichotomy into a contiguous buffer) and rare insertion (typically tied to user interactions aka max once a frame) +// You can use it as custom user storage for temporary values. Declare your own storage if, for example: +// - You want to manipulate the open/close state of a particular sub-tree in your interface (tree node uses Int 0/1 to store their state). +// - You want to store custom debug data easily without adding or editing structures in your code (probably not efficient, but convenient) +// Types are NOT stored, so it is up to you to make sure your Key don't collide with different types. +struct ImGuiStorage +{ + // [Internal] + struct ImGuiStoragePair + { + ImGuiID key; + union { int val_i; float val_f; void* val_p; }; + ImGuiStoragePair(ImGuiID _key, int _val_i) { key = _key; val_i = _val_i; } + ImGuiStoragePair(ImGuiID _key, float _val_f) { key = _key; val_f = _val_f; } + ImGuiStoragePair(ImGuiID _key, void* _val_p) { key = _key; val_p = _val_p; } + }; + + ImVector Data; + + // - Get***() functions find pair, never add/allocate. Pairs are sorted so a query is O(log N) + // - Set***() functions find pair, insertion on demand if missing. + // - Sorted insertion is costly, paid once. A typical frame shouldn't need to insert any new pair. + void Clear() { Data.clear(); } + IMGUI_API int GetInt(ImGuiID key, int default_val = 0) const; + IMGUI_API void SetInt(ImGuiID key, int val); + IMGUI_API bool GetBool(ImGuiID key, bool default_val = false) const; + IMGUI_API void SetBool(ImGuiID key, bool val); + IMGUI_API float GetFloat(ImGuiID key, float default_val = 0.0f) const; + IMGUI_API void SetFloat(ImGuiID key, float val); + IMGUI_API void* GetVoidPtr(ImGuiID key) const; // default_val is NULL + IMGUI_API void SetVoidPtr(ImGuiID key, void* val); + + // - Get***Ref() functions finds pair, insert on demand if missing, return pointer. Useful if you intend to do Get+Set. + // - References are only valid until a new value is added to the storage. Calling a Set***() function or a Get***Ref() function invalidates the pointer. + // - A typical use case where this is convenient for quick hacking (e.g. add storage during a live Edit&Continue session if you can't modify existing struct) + // float* pvar = ImGui::GetFloatRef(key); ImGui::SliderFloat("var", pvar, 0, 100.0f); some_var += *pvar; + IMGUI_API int* GetIntRef(ImGuiID key, int default_val = 0); + IMGUI_API bool* GetBoolRef(ImGuiID key, bool default_val = false); + IMGUI_API float* GetFloatRef(ImGuiID key, float default_val = 0.0f); + IMGUI_API void** GetVoidPtrRef(ImGuiID key, void* default_val = NULL); + + // Use on your own storage if you know only integer are being stored (open/close all tree nodes) + IMGUI_API void SetAllInt(int val); + + // For quicker full rebuild of a storage (instead of an incremental one), you may add all your contents and then sort once. + IMGUI_API void BuildSortByKey(); +}; + +// Helper: Manually clip large list of items. +// If you are submitting lots of evenly spaced items and you have a random access to the list, you can perform coarse +// clipping based on visibility to save yourself from processing those items at all. +// The clipper calculates the range of visible items and advance the cursor to compensate for the non-visible items we have skipped. +// (Dear ImGui already clip items based on their bounds but it needs to measure text size to do so, whereas manual coarse clipping before submission makes this cost and your own data fetching/submission cost almost null) +// Usage: +// ImGuiListClipper clipper; +// clipper.Begin(1000); // We have 1000 elements, evenly spaced. +// while (clipper.Step()) +// for (int i = clipper.DisplayStart; i < clipper.DisplayEnd; i++) +// ImGui::Text("line number %d", i); +// Generally what happens is: +// - Clipper lets you process the first element (DisplayStart = 0, DisplayEnd = 1) regardless of it being visible or not. +// - User code submit one element. +// - Clipper can measure the height of the first element +// - Clipper calculate the actual range of elements to display based on the current clipping rectangle, position the cursor before the first visible element. +// - User code submit visible elements. +struct ImGuiListClipper +{ + int DisplayStart; + int DisplayEnd; + + // [Internal] + int ItemsCount; + int StepNo; + int ItemsFrozen; + float ItemsHeight; + float StartPosY; + + IMGUI_API ImGuiListClipper(); + IMGUI_API ~ImGuiListClipper(); + + // items_count: Use INT_MAX if you don't know how many items you have (in which case the cursor won't be advanced in the final step) + // items_height: Use -1.0f to be calculated automatically on first step. Otherwise pass in the distance between your items, typically GetTextLineHeightWithSpacing() or GetFrameHeightWithSpacing(). + IMGUI_API void Begin(int items_count, float items_height = -1.0f); // Automatically called by constructor if you passed 'items_count' or by Step() in Step 1. + IMGUI_API void End(); // Automatically called on the last call of Step() that returns false. + IMGUI_API bool Step(); // Call until it returns false. The DisplayStart/DisplayEnd fields will be set and you can process/draw those items. + +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + inline ImGuiListClipper(int items_count, float items_height = -1.0f) { memset(this, 0, sizeof(*this)); ItemsCount = -1; Begin(items_count, items_height); } // [removed in 1.79] +#endif +}; + +// Helpers macros to generate 32-bit encoded colors +#ifdef IMGUI_USE_BGRA_PACKED_COLOR +#define IM_COL32_R_SHIFT 16 +#define IM_COL32_G_SHIFT 8 +#define IM_COL32_B_SHIFT 0 +#define IM_COL32_A_SHIFT 24 +#define IM_COL32_A_MASK 0xFF000000 +#else +#define IM_COL32_R_SHIFT 0 +#define IM_COL32_G_SHIFT 8 +#define IM_COL32_B_SHIFT 16 +#define IM_COL32_A_SHIFT 24 +#define IM_COL32_A_MASK 0xFF000000 +#endif +#define IM_COL32(R,G,B,A) (((ImU32)(A)<> IM_COL32_R_SHIFT) & 0xFF) * sc; Value.y = (float)((rgba >> IM_COL32_G_SHIFT) & 0xFF) * sc; Value.z = (float)((rgba >> IM_COL32_B_SHIFT) & 0xFF) * sc; Value.w = (float)((rgba >> IM_COL32_A_SHIFT) & 0xFF) * sc; } + ImColor(float r, float g, float b, float a = 1.0f) { Value.x = r; Value.y = g; Value.z = b; Value.w = a; } + ImColor(const ImVec4& col) { Value = col; } + inline operator ImU32() const { return ImGui::ColorConvertFloat4ToU32(Value); } + inline operator ImVec4() const { return Value; } + + // FIXME-OBSOLETE: May need to obsolete/cleanup those helpers. + inline void SetHSV(float h, float s, float v, float a = 1.0f){ ImGui::ColorConvertHSVtoRGB(h, s, v, Value.x, Value.y, Value.z); Value.w = a; } + static ImColor HSV(float h, float s, float v, float a = 1.0f) { float r, g, b; ImGui::ColorConvertHSVtoRGB(h, s, v, r, g, b); return ImColor(r, g, b, a); } +}; + +//----------------------------------------------------------------------------- +// [SECTION] Drawing API (ImDrawCmd, ImDrawIdx, ImDrawVert, ImDrawChannel, ImDrawListSplitter, ImDrawListFlags, ImDrawList, ImDrawData) +// Hold a series of drawing commands. The user provides a renderer for ImDrawData which essentially contains an array of ImDrawList. +//----------------------------------------------------------------------------- + +// The maximum line width to bake anti-aliased textures for. Build atlas with ImFontAtlasFlags_NoBakedLines to disable baking. +#ifndef IM_DRAWLIST_TEX_LINES_WIDTH_MAX +#define IM_DRAWLIST_TEX_LINES_WIDTH_MAX (63) +#endif + +// ImDrawCallback: Draw callbacks for advanced uses [configurable type: override in imconfig.h] +// NB: You most likely do NOT need to use draw callbacks just to create your own widget or customized UI rendering, +// you can poke into the draw list for that! Draw callback may be useful for example to: +// A) Change your GPU render state, +// B) render a complex 3D scene inside a UI element without an intermediate texture/render target, etc. +// The expected behavior from your rendering function is 'if (cmd.UserCallback != NULL) { cmd.UserCallback(parent_list, cmd); } else { RenderTriangles() }' +// If you want to override the signature of ImDrawCallback, you can simply use e.g. '#define ImDrawCallback MyDrawCallback' (in imconfig.h) + update rendering backend accordingly. +#ifndef ImDrawCallback +typedef void (*ImDrawCallback)(const ImDrawList* parent_list, const ImDrawCmd* cmd); +#endif + +// Special Draw callback value to request renderer backend to reset the graphics/render state. +// The renderer backend needs to handle this special value, otherwise it will crash trying to call a function at this address. +// This is useful for example if you submitted callbacks which you know have altered the render state and you want it to be restored. +// It is not done by default because they are many perfectly useful way of altering render state for imgui contents (e.g. changing shader/blending settings before an Image call). +#define ImDrawCallback_ResetRenderState (ImDrawCallback)(-1) + +// Typically, 1 command = 1 GPU draw call (unless command is a callback) +// - VtxOffset/IdxOffset: When 'io.BackendFlags & ImGuiBackendFlags_RendererHasVtxOffset' is enabled, +// those fields allow us to render meshes larger than 64K vertices while keeping 16-bit indices. +// Pre-1.71 backends will typically ignore the VtxOffset/IdxOffset fields. +// - The ClipRect/TextureId/VtxOffset fields must be contiguous as we memcmp() them together (this is asserted for). +struct ImDrawCmd +{ + ImVec4 ClipRect; // 4*4 // Clipping rectangle (x1, y1, x2, y2). Subtract ImDrawData->DisplayPos to get clipping rectangle in "viewport" coordinates + ImTextureID TextureId; // 4-8 // User-provided texture ID. Set by user in ImfontAtlas::SetTexID() for fonts or passed to Image*() functions. Ignore if never using images or multiple fonts atlas. + unsigned int VtxOffset; // 4 // Start offset in vertex buffer. ImGuiBackendFlags_RendererHasVtxOffset: always 0, otherwise may be >0 to support meshes larger than 64K vertices with 16-bit indices. + unsigned int IdxOffset; // 4 // Start offset in index buffer. Always equal to sum of ElemCount drawn so far. + unsigned int ElemCount; // 4 // Number of indices (multiple of 3) to be rendered as triangles. Vertices are stored in the callee ImDrawList's vtx_buffer[] array, indices in idx_buffer[]. + ImDrawCallback UserCallback; // 4-8 // If != NULL, call the function instead of rendering the vertices. clip_rect and texture_id will be set normally. + void* UserCallbackData; // 4-8 // The draw callback code can access this. + + ImDrawCmd() { memset(this, 0, sizeof(*this)); } // Also ensure our padding fields are zeroed +}; + +// Vertex index, default to 16-bit +// To allow large meshes with 16-bit indices: set 'io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset' and handle ImDrawCmd::VtxOffset in the renderer backend (recommended). +// To use 32-bit indices: override with '#define ImDrawIdx unsigned int' in imconfig.h. +#ifndef ImDrawIdx +typedef unsigned short ImDrawIdx; +#endif + +// Vertex layout +#ifndef IMGUI_OVERRIDE_DRAWVERT_STRUCT_LAYOUT +struct ImDrawVert +{ + ImVec2 pos; + ImVec2 uv; + ImU32 col; +}; +#else +// You can override the vertex format layout by defining IMGUI_OVERRIDE_DRAWVERT_STRUCT_LAYOUT in imconfig.h +// The code expect ImVec2 pos (8 bytes), ImVec2 uv (8 bytes), ImU32 col (4 bytes), but you can re-order them or add other fields as needed to simplify integration in your engine. +// The type has to be described within the macro (you can either declare the struct or use a typedef). This is because ImVec2/ImU32 are likely not declared a the time you'd want to set your type up. +// NOTE: IMGUI DOESN'T CLEAR THE STRUCTURE AND DOESN'T CALL A CONSTRUCTOR SO ANY CUSTOM FIELD WILL BE UNINITIALIZED. IF YOU ADD EXTRA FIELDS (SUCH AS A 'Z' COORDINATES) YOU WILL NEED TO CLEAR THEM DURING RENDER OR TO IGNORE THEM. +IMGUI_OVERRIDE_DRAWVERT_STRUCT_LAYOUT; +#endif + +// [Internal] For use by ImDrawList +struct ImDrawCmdHeader +{ + ImVec4 ClipRect; + ImTextureID TextureId; + unsigned int VtxOffset; +}; + +// [Internal] For use by ImDrawListSplitter +struct ImDrawChannel +{ + ImVector _CmdBuffer; + ImVector _IdxBuffer; +}; + + +// Split/Merge functions are used to split the draw list into different layers which can be drawn into out of order. +// This is used by the Columns/Tables API, so items of each column can be batched together in a same draw call. +struct ImDrawListSplitter +{ + int _Current; // Current channel number (0) + int _Count; // Number of active channels (1+) + ImVector _Channels; // Draw channels (not resized down so _Count might be < Channels.Size) + + inline ImDrawListSplitter() { memset(this, 0, sizeof(*this)); } + inline ~ImDrawListSplitter() { ClearFreeMemory(); } + inline void Clear() { _Current = 0; _Count = 1; } // Do not clear Channels[] so our allocations are reused next frame + IMGUI_API void ClearFreeMemory(); + IMGUI_API void Split(ImDrawList* draw_list, int count); + IMGUI_API void Merge(ImDrawList* draw_list); + IMGUI_API void SetCurrentChannel(ImDrawList* draw_list, int channel_idx); +}; + +enum ImDrawCornerFlags_ +{ + ImDrawCornerFlags_None = 0, + ImDrawCornerFlags_TopLeft = 1 << 0, // 0x1 + ImDrawCornerFlags_TopRight = 1 << 1, // 0x2 + ImDrawCornerFlags_BotLeft = 1 << 2, // 0x4 + ImDrawCornerFlags_BotRight = 1 << 3, // 0x8 + ImDrawCornerFlags_Top = ImDrawCornerFlags_TopLeft | ImDrawCornerFlags_TopRight, // 0x3 + ImDrawCornerFlags_Bot = ImDrawCornerFlags_BotLeft | ImDrawCornerFlags_BotRight, // 0xC + ImDrawCornerFlags_Left = ImDrawCornerFlags_TopLeft | ImDrawCornerFlags_BotLeft, // 0x5 + ImDrawCornerFlags_Right = ImDrawCornerFlags_TopRight | ImDrawCornerFlags_BotRight, // 0xA + ImDrawCornerFlags_All = 0xF // In your function calls you may use ~0 (= all bits sets) instead of ImDrawCornerFlags_All, as a convenience +}; + +// Flags for ImDrawList. Those are set automatically by ImGui:: functions from ImGuiIO settings, and generally not manipulated directly. +// It is however possible to temporarily alter flags between calls to ImDrawList:: functions. +enum ImDrawListFlags_ +{ + ImDrawListFlags_None = 0, + ImDrawListFlags_AntiAliasedLines = 1 << 0, // Enable anti-aliased lines/borders (*2 the number of triangles for 1.0f wide line or lines thin enough to be drawn using textures, otherwise *3 the number of triangles) + ImDrawListFlags_AntiAliasedLinesUseTex = 1 << 1, // Enable anti-aliased lines/borders using textures when possible. Require backend to render with bilinear filtering. + ImDrawListFlags_AntiAliasedFill = 1 << 2, // Enable anti-aliased edge around filled shapes (rounded rectangles, circles). + ImDrawListFlags_AllowVtxOffset = 1 << 3 // Can emit 'VtxOffset > 0' to allow large meshes. Set when 'ImGuiBackendFlags_RendererHasVtxOffset' is enabled. +}; + +// Draw command list +// This is the low-level list of polygons that ImGui:: functions are filling. At the end of the frame, +// all command lists are passed to your ImGuiIO::RenderDrawListFn function for rendering. +// Each dear imgui window contains its own ImDrawList. You can use ImGui::GetWindowDrawList() to +// access the current window draw list and draw custom primitives. +// You can interleave normal ImGui:: calls and adding primitives to the current draw list. +// All positions are generally in pixel coordinates (top-left at (0,0), bottom-right at io.DisplaySize), but you are totally free to apply whatever transformation matrix to want to the data (if you apply such transformation you'll want to apply it to ClipRect as well) +// Important: Primitives are always added to the list and not culled (culling is done at higher-level by ImGui:: functions), if you use this API a lot consider coarse culling your drawn objects. +struct ImDrawList +{ + // This is what you have to render + ImVector CmdBuffer; // Draw commands. Typically 1 command = 1 GPU draw call, unless the command is a callback. + ImVector IdxBuffer; // Index buffer. Each command consume ImDrawCmd::ElemCount of those + ImVector VtxBuffer; // Vertex buffer. + ImDrawListFlags Flags; // Flags, you may poke into these to adjust anti-aliasing settings per-primitive. + + // [Internal, used while building lists] + unsigned int _VtxCurrentIdx; // [Internal] generally == VtxBuffer.Size unless we are past 64K vertices, in which case this gets reset to 0. + const ImDrawListSharedData* _Data; // Pointer to shared draw data (you can use ImGui::GetDrawListSharedData() to get the one from current ImGui context) + const char* _OwnerName; // Pointer to owner window's name for debugging + ImDrawVert* _VtxWritePtr; // [Internal] point within VtxBuffer.Data after each add command (to avoid using the ImVector<> operators too much) + ImDrawIdx* _IdxWritePtr; // [Internal] point within IdxBuffer.Data after each add command (to avoid using the ImVector<> operators too much) + ImVector _ClipRectStack; // [Internal] + ImVector _TextureIdStack; // [Internal] + ImVector _Path; // [Internal] current path building + ImDrawCmdHeader _CmdHeader; // [Internal] template of active commands. Fields should match those of CmdBuffer.back(). + ImDrawListSplitter _Splitter; // [Internal] for channels api (note: prefer using your own persistent instance of ImDrawListSplitter!) + float _FringeScale; // [Internal] anti-alias fringe is scaled by this value, this helps to keep things sharp while zooming at vertex buffer content + + // If you want to create ImDrawList instances, pass them ImGui::GetDrawListSharedData() or create and use your own ImDrawListSharedData (so you can use ImDrawList without ImGui) + ImDrawList(const ImDrawListSharedData* shared_data) { memset(this, 0, sizeof(*this)); _Data = shared_data; } + + ~ImDrawList() { _ClearFreeMemory(); } + IMGUI_API void PushClipRect(ImVec2 clip_rect_min, ImVec2 clip_rect_max, bool intersect_with_current_clip_rect = false); // Render-level scissoring. This is passed down to your render function but not used for CPU-side coarse clipping. Prefer using higher-level ImGui::PushClipRect() to affect logic (hit-testing and widget culling) + IMGUI_API void PushClipRectFullScreen(); + IMGUI_API void PopClipRect(); + IMGUI_API void PushTextureID(ImTextureID texture_id); + IMGUI_API void PopTextureID(); + inline ImVec2 GetClipRectMin() const { const ImVec4& cr = _ClipRectStack.back(); return ImVec2(cr.x, cr.y); } + inline ImVec2 GetClipRectMax() const { const ImVec4& cr = _ClipRectStack.back(); return ImVec2(cr.z, cr.w); } + + // Primitives + // - For rectangular primitives, "p_min" and "p_max" represent the upper-left and lower-right corners. + // - For circle primitives, use "num_segments == 0" to automatically calculate tessellation (preferred). + // In older versions (until Dear ImGui 1.77) the AddCircle functions defaulted to num_segments == 12. + // In future versions we will use textures to provide cheaper and higher-quality circles. + // Use AddNgon() and AddNgonFilled() functions if you need to guaranteed a specific number of sides. + IMGUI_API void AddLine(const ImVec2& p1, const ImVec2& p2, ImU32 col, float thickness = 1.0f); + IMGUI_API void AddRect(const ImVec2& p_min, const ImVec2& p_max, ImU32 col, float rounding = 0.0f, ImDrawCornerFlags rounding_corners = ImDrawCornerFlags_All, float thickness = 1.0f); // a: upper-left, b: lower-right (== upper-left + size), rounding_corners_flags: 4 bits corresponding to which corner to round + IMGUI_API void AddRectFilled(const ImVec2& p_min, const ImVec2& p_max, ImU32 col, float rounding = 0.0f, ImDrawCornerFlags rounding_corners = ImDrawCornerFlags_All); // a: upper-left, b: lower-right (== upper-left + size) + IMGUI_API void AddRectFilledMultiColor(const ImVec2& p_min, const ImVec2& p_max, ImU32 col_upr_left, ImU32 col_upr_right, ImU32 col_bot_right, ImU32 col_bot_left); + IMGUI_API void AddQuad(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, ImU32 col, float thickness = 1.0f); + IMGUI_API void AddQuadFilled(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, ImU32 col); + IMGUI_API void AddTriangle(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, ImU32 col, float thickness = 1.0f); + IMGUI_API void AddTriangleFilled(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, ImU32 col); + IMGUI_API void AddCircle(const ImVec2& center, float radius, ImU32 col, int num_segments = 0, float thickness = 1.0f); + IMGUI_API void AddCircleFilled(const ImVec2& center, float radius, ImU32 col, int num_segments = 0); + IMGUI_API void AddNgon(const ImVec2& center, float radius, ImU32 col, int num_segments, float thickness = 1.0f); + IMGUI_API void AddNgonFilled(const ImVec2& center, float radius, ImU32 col, int num_segments); + IMGUI_API void AddText(const ImVec2& pos, ImU32 col, const char* text_begin, const char* text_end = NULL); + IMGUI_API void AddText(const ImFont* font, float font_size, const ImVec2& pos, ImU32 col, const char* text_begin, const char* text_end = NULL, float wrap_width = 0.0f, const ImVec4* cpu_fine_clip_rect = NULL); + IMGUI_API void AddPolyline(const ImVec2* points, int num_points, ImU32 col, bool closed, float thickness); + IMGUI_API void AddConvexPolyFilled(const ImVec2* points, int num_points, ImU32 col); // Note: Anti-aliased filling requires points to be in clockwise order. + IMGUI_API void AddBezierCubic(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, ImU32 col, float thickness, int num_segments = 0); // Cubic Bezier (4 control points) + IMGUI_API void AddBezierQuadratic(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, ImU32 col, float thickness, int num_segments = 0); // Quadratic Bezier (3 control points) + + // Image primitives + // - Read FAQ to understand what ImTextureID is. + // - "p_min" and "p_max" represent the upper-left and lower-right corners of the rectangle. + // - "uv_min" and "uv_max" represent the normalized texture coordinates to use for those corners. Using (0,0)->(1,1) texture coordinates will generally display the entire texture. + IMGUI_API void AddImage(ImTextureID user_texture_id, const ImVec2& p_min, const ImVec2& p_max, const ImVec2& uv_min = ImVec2(0, 0), const ImVec2& uv_max = ImVec2(1, 1), ImU32 col = IM_COL32_WHITE); + IMGUI_API void AddImageQuad(ImTextureID user_texture_id, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, const ImVec2& uv1 = ImVec2(0, 0), const ImVec2& uv2 = ImVec2(1, 0), const ImVec2& uv3 = ImVec2(1, 1), const ImVec2& uv4 = ImVec2(0, 1), ImU32 col = IM_COL32_WHITE); + IMGUI_API void AddImageRounded(ImTextureID user_texture_id, const ImVec2& p_min, const ImVec2& p_max, const ImVec2& uv_min, const ImVec2& uv_max, ImU32 col, float rounding, ImDrawCornerFlags rounding_corners = ImDrawCornerFlags_All); + + // Stateful path API, add points then finish with PathFillConvex() or PathStroke() + inline void PathClear() { _Path.Size = 0; } + inline void PathLineTo(const ImVec2& pos) { _Path.push_back(pos); } + inline void PathLineToMergeDuplicate(const ImVec2& pos) { if (_Path.Size == 0 || memcmp(&_Path.Data[_Path.Size - 1], &pos, 8) != 0) _Path.push_back(pos); } + inline void PathFillConvex(ImU32 col) { AddConvexPolyFilled(_Path.Data, _Path.Size, col); _Path.Size = 0; } // Note: Anti-aliased filling requires points to be in clockwise order. + inline void PathStroke(ImU32 col, bool closed, float thickness = 1.0f) { AddPolyline(_Path.Data, _Path.Size, col, closed, thickness); _Path.Size = 0; } + IMGUI_API void PathArcTo(const ImVec2& center, float radius, float a_min, float a_max, int num_segments = 10); + IMGUI_API void PathArcToFast(const ImVec2& center, float radius, int a_min_of_12, int a_max_of_12); // Use precomputed angles for a 12 steps circle + IMGUI_API void PathBezierCubicCurveTo(const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, int num_segments = 0); // Cubic Bezier (4 control points) + IMGUI_API void PathBezierQuadraticCurveTo(const ImVec2& p2, const ImVec2& p3, int num_segments = 0); // Quadratic Bezier (3 control points) + IMGUI_API void PathRect(const ImVec2& rect_min, const ImVec2& rect_max, float rounding = 0.0f, ImDrawCornerFlags rounding_corners = ImDrawCornerFlags_All); + + // Advanced + IMGUI_API void AddCallback(ImDrawCallback callback, void* callback_data); // Your rendering function must check for 'UserCallback' in ImDrawCmd and call the function instead of rendering triangles. + IMGUI_API void AddDrawCmd(); // This is useful if you need to forcefully create a new draw call (to allow for dependent rendering / blending). Otherwise primitives are merged into the same draw-call as much as possible + IMGUI_API ImDrawList* CloneOutput() const; // Create a clone of the CmdBuffer/IdxBuffer/VtxBuffer. + + // Advanced: Channels + // - Use to split render into layers. By switching channels to can render out-of-order (e.g. submit FG primitives before BG primitives) + // - Use to minimize draw calls (e.g. if going back-and-forth between multiple clipping rectangles, prefer to append into separate channels then merge at the end) + // - FIXME-OBSOLETE: This API shouldn't have been in ImDrawList in the first place! + // Prefer using your own persistent instance of ImDrawListSplitter as you can stack them. + // Using the ImDrawList::ChannelsXXXX you cannot stack a split over another. + inline void ChannelsSplit(int count) { _Splitter.Split(this, count); } + inline void ChannelsMerge() { _Splitter.Merge(this); } + inline void ChannelsSetCurrent(int n) { _Splitter.SetCurrentChannel(this, n); } + + // Advanced: Primitives allocations + // - We render triangles (three vertices) + // - All primitives needs to be reserved via PrimReserve() beforehand. + IMGUI_API void PrimReserve(int idx_count, int vtx_count); + IMGUI_API void PrimUnreserve(int idx_count, int vtx_count); + IMGUI_API void PrimRect(const ImVec2& a, const ImVec2& b, ImU32 col); // Axis aligned rectangle (composed of two triangles) + IMGUI_API void PrimRectUV(const ImVec2& a, const ImVec2& b, const ImVec2& uv_a, const ImVec2& uv_b, ImU32 col); + IMGUI_API void PrimQuadUV(const ImVec2& a, const ImVec2& b, const ImVec2& c, const ImVec2& d, const ImVec2& uv_a, const ImVec2& uv_b, const ImVec2& uv_c, const ImVec2& uv_d, ImU32 col); + inline void PrimWriteVtx(const ImVec2& pos, const ImVec2& uv, ImU32 col) { _VtxWritePtr->pos = pos; _VtxWritePtr->uv = uv; _VtxWritePtr->col = col; _VtxWritePtr++; _VtxCurrentIdx++; } + inline void PrimWriteIdx(ImDrawIdx idx) { *_IdxWritePtr = idx; _IdxWritePtr++; } + inline void PrimVtx(const ImVec2& pos, const ImVec2& uv, ImU32 col) { PrimWriteIdx((ImDrawIdx)_VtxCurrentIdx); PrimWriteVtx(pos, uv, col); } // Write vertex with unique index + +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + inline void AddBezierCurve(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, ImU32 col, float thickness, int num_segments = 0) { AddBezierCubic(p1, p2, p3, p4, col, thickness, num_segments); } + inline void PathBezierCurveTo(const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, int num_segments = 0) { PathBezierCubicCurveTo(p2, p3, p4, num_segments); } +#endif + + // [Internal helpers] + IMGUI_API void _ResetForNewFrame(); + IMGUI_API void _ClearFreeMemory(); + IMGUI_API void _PopUnusedDrawCmd(); + IMGUI_API void _OnChangedClipRect(); + IMGUI_API void _OnChangedTextureID(); + IMGUI_API void _OnChangedVtxOffset(); +}; + +// All draw data to render a Dear ImGui frame +// (NB: the style and the naming convention here is a little inconsistent, we currently preserve them for backward compatibility purpose, +// as this is one of the oldest structure exposed by the library! Basically, ImDrawList == CmdList) +struct ImDrawData +{ + bool Valid; // Only valid after Render() is called and before the next NewFrame() is called. + ImDrawList** CmdLists; // Array of ImDrawList* to render. The ImDrawList are owned by ImGuiContext and only pointed to from here. + int CmdListsCount; // Number of ImDrawList* to render + int TotalIdxCount; // For convenience, sum of all ImDrawList's IdxBuffer.Size + int TotalVtxCount; // For convenience, sum of all ImDrawList's VtxBuffer.Size + ImVec2 DisplayPos; // Upper-left position of the viewport to render (== upper-left of the orthogonal projection matrix to use) + ImVec2 DisplaySize; // Size of the viewport to render (== io.DisplaySize for the main viewport) (DisplayPos + DisplaySize == lower-right of the orthogonal projection matrix to use) + ImVec2 FramebufferScale; // Amount of pixels for each unit of DisplaySize. Based on io.DisplayFramebufferScale. Generally (1,1) on normal display, (2,2) on OSX with Retina display. + + // Functions + ImDrawData() { Valid = false; Clear(); } + ~ImDrawData() { Clear(); } + void Clear() { Valid = false; CmdLists = NULL; CmdListsCount = TotalVtxCount = TotalIdxCount = 0; DisplayPos = DisplaySize = FramebufferScale = ImVec2(0.f, 0.f); } // The ImDrawList are owned by ImGuiContext! + IMGUI_API void DeIndexAllBuffers(); // Helper to convert all buffers from indexed to non-indexed, in case you cannot render indexed. Note: this is slow and most likely a waste of resources. Always prefer indexed rendering! + IMGUI_API void ScaleClipRects(const ImVec2& fb_scale); // Helper to scale the ClipRect field of each ImDrawCmd. Use if your final output buffer is at a different scale than Dear ImGui expects, or if there is a difference between your window resolution and framebuffer resolution. +}; + +//----------------------------------------------------------------------------- +// [SECTION] Font API (ImFontConfig, ImFontGlyph, ImFontAtlasFlags, ImFontAtlas, ImFontGlyphRangesBuilder, ImFont) +//----------------------------------------------------------------------------- + +struct ImFontConfig +{ + void* FontData; // // TTF/OTF data + int FontDataSize; // // TTF/OTF data size + bool FontDataOwnedByAtlas; // true // TTF/OTF data ownership taken by the container ImFontAtlas (will delete memory itself). + int FontNo; // 0 // Index of font within TTF/OTF file + float SizePixels; // // Size in pixels for rasterizer (more or less maps to the resulting font height). + int OversampleH; // 3 // Rasterize at higher quality for sub-pixel positioning. Read https://github.com/nothings/stb/blob/master/tests/oversample/README.md for details. + int OversampleV; // 1 // Rasterize at higher quality for sub-pixel positioning. We don't use sub-pixel positions on the Y axis. + bool PixelSnapH; // false // Align every glyph to pixel boundary. Useful e.g. if you are merging a non-pixel aligned font with the default font. If enabled, you can set OversampleH/V to 1. + ImVec2 GlyphExtraSpacing; // 0, 0 // Extra spacing (in pixels) between glyphs. Only X axis is supported for now. + ImVec2 GlyphOffset; // 0, 0 // Offset all glyphs from this font input. + const ImWchar* GlyphRanges; // NULL // Pointer to a user-provided list of Unicode range (2 value per range, values are inclusive, zero-terminated list). THE ARRAY DATA NEEDS TO PERSIST AS LONG AS THE FONT IS ALIVE. + float GlyphMinAdvanceX; // 0 // Minimum AdvanceX for glyphs, set Min to align font icons, set both Min/Max to enforce mono-space font + float GlyphMaxAdvanceX; // FLT_MAX // Maximum AdvanceX for glyphs + bool MergeMode; // false // Merge into previous ImFont, so you can combine multiple inputs font into one ImFont (e.g. ASCII font + icons + Japanese glyphs). You may want to use GlyphOffset.y when merge font of different heights. + unsigned int FontBuilderFlags; // 0 // Settings for custom font builder. THIS IS BUILDER IMPLEMENTATION DEPENDENT. Leave as zero if unsure. + float RasterizerMultiply; // 1.0f // Brighten (>1.0f) or darken (<1.0f) font output. Brightening small fonts may be a good workaround to make them more readable. + ImWchar EllipsisChar; // -1 // Explicitly specify unicode codepoint of ellipsis character. When fonts are being merged first specified ellipsis will be used. + + // [Internal] + char Name[40]; // Name (strictly to ease debugging) + ImFont* DstFont; + + IMGUI_API ImFontConfig(); +}; + +// Hold rendering data for one glyph. +// (Note: some language parsers may fail to convert the 31+1 bitfield members, in this case maybe drop store a single u32 or we can rework this) +struct ImFontGlyph +{ + unsigned int Codepoint : 31; // 0x0000..0xFFFF + unsigned int Visible : 1; // Flag to allow early out when rendering + float AdvanceX; // Distance to next character (= data from font + ImFontConfig::GlyphExtraSpacing.x baked in) + float X0, Y0, X1, Y1; // Glyph corners + float U0, V0, U1, V1; // Texture coordinates +}; + +// Helper to build glyph ranges from text/string data. Feed your application strings/characters to it then call BuildRanges(). +// This is essentially a tightly packed of vector of 64k booleans = 8KB storage. +struct ImFontGlyphRangesBuilder +{ + ImVector UsedChars; // Store 1-bit per Unicode code point (0=unused, 1=used) + + ImFontGlyphRangesBuilder() { Clear(); } + inline void Clear() { int size_in_bytes = (IM_UNICODE_CODEPOINT_MAX + 1) / 8; UsedChars.resize(size_in_bytes / (int)sizeof(ImU32)); memset(UsedChars.Data, 0, (size_t)size_in_bytes); } + inline bool GetBit(size_t n) const { int off = (int)(n >> 5); ImU32 mask = 1u << (n & 31); return (UsedChars[off] & mask) != 0; } // Get bit n in the array + inline void SetBit(size_t n) { int off = (int)(n >> 5); ImU32 mask = 1u << (n & 31); UsedChars[off] |= mask; } // Set bit n in the array + inline void AddChar(ImWchar c) { SetBit(c); } // Add character + IMGUI_API void AddText(const char* text, const char* text_end = NULL); // Add string (each character of the UTF-8 string are added) + IMGUI_API void AddRanges(const ImWchar* ranges); // Add ranges, e.g. builder.AddRanges(ImFontAtlas::GetGlyphRangesDefault()) to force add all of ASCII/Latin+Ext + IMGUI_API void BuildRanges(ImVector* out_ranges); // Output new ranges +}; + +// See ImFontAtlas::AddCustomRectXXX functions. +struct ImFontAtlasCustomRect +{ + unsigned short Width, Height; // Input // Desired rectangle dimension + unsigned short X, Y; // Output // Packed position in Atlas + unsigned int GlyphID; // Input // For custom font glyphs only (ID < 0x110000) + float GlyphAdvanceX; // Input // For custom font glyphs only: glyph xadvance + ImVec2 GlyphOffset; // Input // For custom font glyphs only: glyph display offset + ImFont* Font; // Input // For custom font glyphs only: target font + ImFontAtlasCustomRect() { Width = Height = 0; X = Y = 0xFFFF; GlyphID = 0; GlyphAdvanceX = 0.0f; GlyphOffset = ImVec2(0, 0); Font = NULL; } + bool IsPacked() const { return X != 0xFFFF; } +}; + +// Flags for ImFontAtlas build +enum ImFontAtlasFlags_ +{ + ImFontAtlasFlags_None = 0, + ImFontAtlasFlags_NoPowerOfTwoHeight = 1 << 0, // Don't round the height to next power of two + ImFontAtlasFlags_NoMouseCursors = 1 << 1, // Don't build software mouse cursors into the atlas (save a little texture memory) + ImFontAtlasFlags_NoBakedLines = 1 << 2 // Don't build thick line textures into the atlas (save a little texture memory). The AntiAliasedLinesUseTex features uses them, otherwise they will be rendered using polygons (more expensive for CPU/GPU). +}; + +// Load and rasterize multiple TTF/OTF fonts into a same texture. The font atlas will build a single texture holding: +// - One or more fonts. +// - Custom graphics data needed to render the shapes needed by Dear ImGui. +// - Mouse cursor shapes for software cursor rendering (unless setting 'Flags |= ImFontAtlasFlags_NoMouseCursors' in the font atlas). +// It is the user-code responsibility to setup/build the atlas, then upload the pixel data into a texture accessible by your graphics api. +// - Optionally, call any of the AddFont*** functions. If you don't call any, the default font embedded in the code will be loaded for you. +// - Call GetTexDataAsAlpha8() or GetTexDataAsRGBA32() to build and retrieve pixels data. +// - Upload the pixels data into a texture within your graphics system (see imgui_impl_xxxx.cpp examples) +// - Call SetTexID(my_tex_id); and pass the pointer/identifier to your texture in a format natural to your graphics API. +// This value will be passed back to you during rendering to identify the texture. Read FAQ entry about ImTextureID for more details. +// Common pitfalls: +// - If you pass a 'glyph_ranges' array to AddFont*** functions, you need to make sure that your array persist up until the +// atlas is build (when calling GetTexData*** or Build()). We only copy the pointer, not the data. +// - Important: By default, AddFontFromMemoryTTF() takes ownership of the data. Even though we are not writing to it, we will free the pointer on destruction. +// You can set font_cfg->FontDataOwnedByAtlas=false to keep ownership of your data and it won't be freed, +// - Even though many functions are suffixed with "TTF", OTF data is supported just as well. +// - This is an old API and it is currently awkward for those and and various other reasons! We will address them in the future! +struct ImFontAtlas +{ + IMGUI_API ImFontAtlas(); + IMGUI_API ~ImFontAtlas(); + IMGUI_API ImFont* AddFont(const ImFontConfig* font_cfg); + IMGUI_API ImFont* AddFontDefault(const ImFontConfig* font_cfg = NULL); + IMGUI_API ImFont* AddFontFromFileTTF(const char* filename, float size_pixels, const ImFontConfig* font_cfg = NULL, const ImWchar* glyph_ranges = NULL); + IMGUI_API ImFont* AddFontFromMemoryTTF(void* font_data, int font_size, float size_pixels, const ImFontConfig* font_cfg = NULL, const ImWchar* glyph_ranges = NULL); // Note: Transfer ownership of 'ttf_data' to ImFontAtlas! Will be deleted after destruction of the atlas. Set font_cfg->FontDataOwnedByAtlas=false to keep ownership of your data and it won't be freed. + IMGUI_API ImFont* AddFontFromMemoryCompressedTTF(const void* compressed_font_data, int compressed_font_size, float size_pixels, const ImFontConfig* font_cfg = NULL, const ImWchar* glyph_ranges = NULL); // 'compressed_font_data' still owned by caller. Compress with binary_to_compressed_c.cpp. + IMGUI_API ImFont* AddFontFromMemoryCompressedBase85TTF(const char* compressed_font_data_base85, float size_pixels, const ImFontConfig* font_cfg = NULL, const ImWchar* glyph_ranges = NULL); // 'compressed_font_data_base85' still owned by caller. Compress with binary_to_compressed_c.cpp with -base85 parameter. + IMGUI_API void ClearInputData(); // Clear input data (all ImFontConfig structures including sizes, TTF data, glyph ranges, etc.) = all the data used to build the texture and fonts. + IMGUI_API void ClearTexData(); // Clear output texture data (CPU side). Saves RAM once the texture has been copied to graphics memory. + IMGUI_API void ClearFonts(); // Clear output font data (glyphs storage, UV coordinates). + IMGUI_API void Clear(); // Clear all input and output. + + // Build atlas, retrieve pixel data. + // User is in charge of copying the pixels into graphics memory (e.g. create a texture with your engine). Then store your texture handle with SetTexID(). + // The pitch is always = Width * BytesPerPixels (1 or 4) + // Building in RGBA32 format is provided for convenience and compatibility, but note that unless you manually manipulate or copy color data into + // the texture (e.g. when using the AddCustomRect*** api), then the RGB pixels emitted will always be white (~75% of memory/bandwidth waste. + IMGUI_API bool Build(); // Build pixels data. This is called automatically for you by the GetTexData*** functions. + IMGUI_API void GetTexDataAsAlpha8(unsigned char** out_pixels, int* out_width, int* out_height, int* out_bytes_per_pixel = NULL); // 1 byte per-pixel + IMGUI_API void GetTexDataAsRGBA32(unsigned char** out_pixels, int* out_width, int* out_height, int* out_bytes_per_pixel = NULL); // 4 bytes-per-pixel + bool IsBuilt() const { return Fonts.Size > 0 && (TexPixelsAlpha8 != NULL || TexPixelsRGBA32 != NULL); } + void SetTexID(ImTextureID id) { TexID = id; } + + //------------------------------------------- + // Glyph Ranges + //------------------------------------------- + + // Helpers to retrieve list of common Unicode ranges (2 value per range, values are inclusive, zero-terminated list) + // NB: Make sure that your string are UTF-8 and NOT in your local code page. In C++11, you can create UTF-8 string literal using the u8"Hello world" syntax. See FAQ for details. + // NB: Consider using ImFontGlyphRangesBuilder to build glyph ranges from textual data. + IMGUI_API const ImWchar* GetGlyphRangesDefault(); // Basic Latin, Extended Latin + IMGUI_API const ImWchar* GetGlyphRangesKorean(); // Default + Korean characters + IMGUI_API const ImWchar* GetGlyphRangesJapanese(); // Default + Hiragana, Katakana, Half-Width, Selection of 2999 Ideographs + IMGUI_API const ImWchar* GetGlyphRangesChineseFull(); // Default + Half-Width + Japanese Hiragana/Katakana + full set of about 21000 CJK Unified Ideographs + IMGUI_API const ImWchar* GetGlyphRangesChineseSimplifiedCommon();// Default + Half-Width + Japanese Hiragana/Katakana + set of 2500 CJK Unified Ideographs for common simplified Chinese + IMGUI_API const ImWchar* GetGlyphRangesCyrillic(); // Default + about 400 Cyrillic characters + IMGUI_API const ImWchar* GetGlyphRangesThai(); // Default + Thai characters + IMGUI_API const ImWchar* GetGlyphRangesVietnamese(); // Default + Vietnamese characters + + //------------------------------------------- + // [BETA] Custom Rectangles/Glyphs API + //------------------------------------------- + + // You can request arbitrary rectangles to be packed into the atlas, for your own purposes. + // After calling Build(), you can query the rectangle position and render your pixels. + // You can also request your rectangles to be mapped as font glyph (given a font + Unicode point), + // so you can render e.g. custom colorful icons and use them as regular glyphs. + // Read docs/FONTS.md for more details about using colorful icons. + // Note: this API may be redesigned later in order to support multi-monitor varying DPI settings. + IMGUI_API int AddCustomRectRegular(int width, int height); + IMGUI_API int AddCustomRectFontGlyph(ImFont* font, ImWchar id, int width, int height, float advance_x, const ImVec2& offset = ImVec2(0, 0)); + ImFontAtlasCustomRect* GetCustomRectByIndex(int index) { IM_ASSERT(index >= 0); return &CustomRects[index]; } + + // [Internal] + IMGUI_API void CalcCustomRectUV(const ImFontAtlasCustomRect* rect, ImVec2* out_uv_min, ImVec2* out_uv_max) const; + IMGUI_API bool GetMouseCursorTexData(ImGuiMouseCursor cursor, ImVec2* out_offset, ImVec2* out_size, ImVec2 out_uv_border[2], ImVec2 out_uv_fill[2]); + + //------------------------------------------- + // Members + //------------------------------------------- + + bool Locked; // Marked as Locked by ImGui::NewFrame() so attempt to modify the atlas will assert. + ImFontAtlasFlags Flags; // Build flags (see ImFontAtlasFlags_) + ImTextureID TexID; // User data to refer to the texture once it has been uploaded to user's graphic systems. It is passed back to you during rendering via the ImDrawCmd structure. + int TexDesiredWidth; // Texture width desired by user before Build(). Must be a power-of-two. If have many glyphs your graphics API have texture size restrictions you may want to increase texture width to decrease height. + int TexGlyphPadding; // Padding between glyphs within texture in pixels. Defaults to 1. If your rendering method doesn't rely on bilinear filtering you may set this to 0. + + // [Internal] + // NB: Access texture data via GetTexData*() calls! Which will setup a default font for you. + unsigned char* TexPixelsAlpha8; // 1 component per pixel, each component is unsigned 8-bit. Total size = TexWidth * TexHeight + unsigned int* TexPixelsRGBA32; // 4 component per pixel, each component is unsigned 8-bit. Total size = TexWidth * TexHeight * 4 + int TexWidth; // Texture width calculated during Build(). + int TexHeight; // Texture height calculated during Build(). + ImVec2 TexUvScale; // = (1.0f/TexWidth, 1.0f/TexHeight) + ImVec2 TexUvWhitePixel; // Texture coordinates to a white pixel + ImVector Fonts; // Hold all the fonts returned by AddFont*. Fonts[0] is the default font upon calling ImGui::NewFrame(), use ImGui::PushFont()/PopFont() to change the current font. + ImVector CustomRects; // Rectangles for packing custom texture data into the atlas. + ImVector ConfigData; // Configuration data + ImVec4 TexUvLines[IM_DRAWLIST_TEX_LINES_WIDTH_MAX + 1]; // UVs for baked anti-aliased lines + + // [Internal] Font builder + const ImFontBuilderIO* FontBuilderIO; // Opaque interface to a font builder (default to stb_truetype, can be changed to use FreeType by defining IMGUI_ENABLE_FREETYPE). + unsigned int FontBuilderFlags; // Shared flags (for all fonts) for custom font builder. THIS IS BUILD IMPLEMENTATION DEPENDENT. Per-font override is also available in ImFontConfig. + + // [Internal] Packing data + int PackIdMouseCursors; // Custom texture rectangle ID for white pixel and mouse cursors + int PackIdLines; // Custom texture rectangle ID for baked anti-aliased lines + +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + typedef ImFontAtlasCustomRect CustomRect; // OBSOLETED in 1.72+ + typedef ImFontGlyphRangesBuilder GlyphRangesBuilder; // OBSOLETED in 1.67+ +#endif +}; + +// Font runtime data and rendering +// ImFontAtlas automatically loads a default embedded font for you when you call GetTexDataAsAlpha8() or GetTexDataAsRGBA32(). +struct ImFont +{ + // Members: Hot ~20/24 bytes (for CalcTextSize) + ImVector IndexAdvanceX; // 12-16 // out // // Sparse. Glyphs->AdvanceX in a directly indexable way (cache-friendly for CalcTextSize functions which only this this info, and are often bottleneck in large UI). + float FallbackAdvanceX; // 4 // out // = FallbackGlyph->AdvanceX + float FontSize; // 4 // in // // Height of characters/line, set during loading (don't change after loading) + + // Members: Hot ~28/40 bytes (for CalcTextSize + render loop) + ImVector IndexLookup; // 12-16 // out // // Sparse. Index glyphs by Unicode code-point. + ImVector Glyphs; // 12-16 // out // // All glyphs. + const ImFontGlyph* FallbackGlyph; // 4-8 // out // = FindGlyph(FontFallbackChar) + + // Members: Cold ~32/40 bytes + ImFontAtlas* ContainerAtlas; // 4-8 // out // // What we has been loaded into + const ImFontConfig* ConfigData; // 4-8 // in // // Pointer within ContainerAtlas->ConfigData + short ConfigDataCount; // 2 // in // ~ 1 // Number of ImFontConfig involved in creating this font. Bigger than 1 when merging multiple font sources into one ImFont. + ImWchar FallbackChar; // 2 // in // = '?' // Replacement character if a glyph isn't found. Only set via SetFallbackChar() + ImWchar EllipsisChar; // 2 // out // = -1 // Character used for ellipsis rendering. + bool DirtyLookupTables; // 1 // out // + float Scale; // 4 // in // = 1.f // Base font scale, multiplied by the per-window font scale which you can adjust with SetWindowFontScale() + float Ascent, Descent; // 4+4 // out // // Ascent: distance from top to bottom of e.g. 'A' [0..FontSize] + int MetricsTotalSurface;// 4 // out // // Total surface in pixels to get an idea of the font rasterization/texture cost (not exact, we approximate the cost of padding between glyphs) + ImU8 Used4kPagesMap[(IM_UNICODE_CODEPOINT_MAX+1)/4096/8]; // 2 bytes if ImWchar=ImWchar16, 34 bytes if ImWchar==ImWchar32. Store 1-bit for each block of 4K codepoints that has one active glyph. This is mainly used to facilitate iterations across all used codepoints. + + // Methods + IMGUI_API ImFont(); + IMGUI_API ~ImFont(); + IMGUI_API const ImFontGlyph*FindGlyph(ImWchar c) const; + IMGUI_API const ImFontGlyph*FindGlyphNoFallback(ImWchar c) const; + float GetCharAdvance(ImWchar c) const { return ((int)c < IndexAdvanceX.Size) ? IndexAdvanceX[(int)c] : FallbackAdvanceX; } + bool IsLoaded() const { return ContainerAtlas != NULL; } + const char* GetDebugName() const { return ConfigData ? ConfigData->Name : ""; } + + // 'max_width' stops rendering after a certain width (could be turned into a 2d size). FLT_MAX to disable. + // 'wrap_width' enable automatic word-wrapping across multiple lines to fit into given width. 0.0f to disable. + IMGUI_API ImVec2 CalcTextSizeA(float size, float max_width, float wrap_width, const char* text_begin, const char* text_end = NULL, const char** remaining = NULL) const; // utf8 + IMGUI_API const char* CalcWordWrapPositionA(float scale, const char* text, const char* text_end, float wrap_width) const; + IMGUI_API void RenderChar(ImDrawList* draw_list, float size, ImVec2 pos, ImU32 col, ImWchar c) const; + IMGUI_API void RenderText(ImDrawList* draw_list, float size, ImVec2 pos, ImU32 col, const ImVec4& clip_rect, const char* text_begin, const char* text_end, float wrap_width = 0.0f, bool cpu_fine_clip = false) const; + + // [Internal] Don't use! + IMGUI_API void BuildLookupTable(); + IMGUI_API void ClearOutputData(); + IMGUI_API void GrowIndex(int new_size); + IMGUI_API void AddGlyph(const ImFontConfig* src_cfg, ImWchar c, float x0, float y0, float x1, float y1, float u0, float v0, float u1, float v1, float advance_x); + IMGUI_API void AddRemapChar(ImWchar dst, ImWchar src, bool overwrite_dst = true); // Makes 'dst' character/glyph points to 'src' character/glyph. Currently needs to be called AFTER fonts have been built. + IMGUI_API void SetGlyphVisible(ImWchar c, bool visible); + IMGUI_API void SetFallbackChar(ImWchar c); + IMGUI_API bool IsGlyphRangeUnused(unsigned int c_begin, unsigned int c_last); +}; + +#if defined(__clang__) +#pragma clang diagnostic pop +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + +// Include imgui_user.h at the end of imgui.h (convenient for user to only explicitly include vanilla imgui.h) +#ifdef IMGUI_INCLUDE_IMGUI_USER_H +#include "imgui_user.h" +#endif + +#endif // #ifndef IMGUI_DISABLE diff --git a/cpp-projects/3d-engine/imgui/imgui_demo.cpp b/cpp-projects/3d-engine/imgui/imgui_demo.cpp new file mode 100644 index 0000000..8d52ea3 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/imgui_demo.cpp @@ -0,0 +1,7533 @@ +// dear imgui, v1.81 WIP +// (demo code) + +// Help: +// - Read FAQ at http://dearimgui.org/faq +// - Newcomers, read 'Programmer guide' in imgui.cpp for notes on how to setup Dear ImGui in your codebase. +// - Call and read ImGui::ShowDemoWindow() in imgui_demo.cpp. All applications in examples/ are doing that. +// Read imgui.cpp for more details, documentation and comments. +// Get latest version at https://github.com/ocornut/imgui + +// Message to the person tempted to delete this file when integrating Dear ImGui into their code base: +// Do NOT remove this file from your project! Think again! It is the most useful reference code that you and other +// coders will want to refer to and call. Have the ImGui::ShowDemoWindow() function wired in an always-available +// debug menu of your game/app! Removing this file from your project is hindering access to documentation for everyone +// in your team, likely leading you to poorer usage of the library. +// Everything in this file will be stripped out by the linker if you don't call ImGui::ShowDemoWindow(). +// If you want to link core Dear ImGui in your shipped builds but want a thorough guarantee that the demo will not be +// linked, you can setup your imconfig.h with #define IMGUI_DISABLE_DEMO_WINDOWS and those functions will be empty. +// In other situation, whenever you have Dear ImGui available you probably want this to be available for reference. +// Thank you, +// -Your beloved friend, imgui_demo.cpp (which you won't delete) + +// Message to beginner C/C++ programmers about the meaning of the 'static' keyword: +// In this demo code, we frequently we use 'static' variables inside functions. A static variable persist across calls, +// so it is essentially like a global variable but declared inside the scope of the function. We do this as a way to +// gather code and data in the same place, to make the demo source code faster to read, faster to write, and smaller +// in size. It also happens to be a convenient way of storing simple UI related information as long as your function +// doesn't need to be reentrant or used in multiple threads. This might be a pattern you will want to use in your code, +// but most of the real data you would be editing is likely going to be stored outside your functions. + +// The Demo code in this file is designed to be easy to copy-and-paste in into your application! +// Because of this: +// - We never omit the ImGui:: prefix when calling functions, even though most code here is in the same namespace. +// - We try to declare static variables in the local scope, as close as possible to the code using them. +// - We never use any of the helpers/facilities used internally by Dear ImGui, unless available in the public API. +// - We never use maths operators on ImVec2/ImVec4. For our other sources files we use them, and they are provided +// by imgui_internal.h using the IMGUI_DEFINE_MATH_OPERATORS define. For your own sources file they are optional +// and require you either enable those, either provide your own via IM_VEC2_CLASS_EXTRA in imconfig.h. +// Because we can't assume anything about your support of maths operators, we cannot use them in imgui_demo.cpp. + +// Navigating this file: +// - In Visual Studio IDE: CTRL+comma ("Edit.NavigateTo") can follow symbols in comments, whereas CTRL+F12 ("Edit.GoToImplementation") cannot. +// - With Visual Assist installed: ALT+G ("VAssistX.GoToImplementation") can also follow symbols in comments. + +/* + +Index of this file: + +// [SECTION] Forward Declarations, Helpers +// [SECTION] Demo Window / ShowDemoWindow() +// - sub section: ShowDemoWindowWidgets() +// - sub section: ShowDemoWindowLayout() +// - sub section: ShowDemoWindowPopups() +// - sub section: ShowDemoWindowTables() +// - sub section: ShowDemoWindowMisc() +// [SECTION] About Window / ShowAboutWindow() +// [SECTION] Style Editor / ShowStyleEditor() +// [SECTION] Example App: Main Menu Bar / ShowExampleAppMainMenuBar() +// [SECTION] Example App: Debug Console / ShowExampleAppConsole() +// [SECTION] Example App: Debug Log / ShowExampleAppLog() +// [SECTION] Example App: Simple Layout / ShowExampleAppLayout() +// [SECTION] Example App: Property Editor / ShowExampleAppPropertyEditor() +// [SECTION] Example App: Long Text / ShowExampleAppLongText() +// [SECTION] Example App: Auto Resize / ShowExampleAppAutoResize() +// [SECTION] Example App: Constrained Resize / ShowExampleAppConstrainedResize() +// [SECTION] Example App: Simple Overlay / ShowExampleAppSimpleOverlay() +// [SECTION] Example App: Manipulating Window Titles / ShowExampleAppWindowTitles() +// [SECTION] Example App: Custom Rendering using ImDrawList API / ShowExampleAppCustomRendering() +// [SECTION] Example App: Documents Handling / ShowExampleAppDocuments() + +*/ + +#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS) +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include "imgui.h" +#ifndef IMGUI_DISABLE + +// System includes +#include // toupper +#include // INT_MIN, INT_MAX +#include // sqrtf, powf, cosf, sinf, floorf, ceilf +#include // vsnprintf, sscanf, printf +#include // NULL, malloc, free, atoi +#if defined(_MSC_VER) && _MSC_VER <= 1500 // MSVC 2008 or earlier +#include // intptr_t +#else +#include // intptr_t +#endif + +// Visual Studio warnings +#ifdef _MSC_VER +#pragma warning (disable: 4996) // 'This function or variable may be unsafe': strcpy, strdup, sprintf, vsnprintf, sscanf, fopen +#endif + +// Clang/GCC warnings with -Weverything +#if defined(__clang__) +#if __has_warning("-Wunknown-warning-option") +#pragma clang diagnostic ignored "-Wunknown-warning-option" // warning: unknown warning group 'xxx' // not all warnings are known by all Clang versions and they tend to be rename-happy.. so ignoring warnings triggers new warnings on some configuration. Great! +#endif +#pragma clang diagnostic ignored "-Wunknown-pragmas" // warning: unknown warning group 'xxx' +#pragma clang diagnostic ignored "-Wold-style-cast" // warning: use of old-style cast // yes, they are more terse. +#pragma clang diagnostic ignored "-Wdeprecated-declarations" // warning: 'xx' is deprecated: The POSIX name for this.. // for strdup used in demo code (so user can copy & paste the code) +#pragma clang diagnostic ignored "-Wint-to-void-pointer-cast" // warning: cast to 'void *' from smaller integer type +#pragma clang diagnostic ignored "-Wformat-security" // warning: format string is not a string literal +#pragma clang diagnostic ignored "-Wexit-time-destructors" // warning: declaration requires an exit-time destructor // exit-time destruction order is undefined. if MemFree() leads to users code that has been disabled before exit it might cause problems. ImGui coding style welcomes static/globals. +#pragma clang diagnostic ignored "-Wunused-macros" // warning: macro is not used // we define snprintf/vsnprintf on Windows so they are available, but not always used. +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" // warning: zero as null pointer constant // some standard header variations use #define NULL 0 +#pragma clang diagnostic ignored "-Wdouble-promotion" // warning: implicit conversion from 'float' to 'double' when passing argument to function // using printf() is a misery with this as C++ va_arg ellipsis changes float to double. +#pragma clang diagnostic ignored "-Wreserved-id-macro" // warning: macro name is a reserved identifier +#pragma clang diagnostic ignored "-Wimplicit-int-float-conversion" // warning: implicit conversion from 'xxx' to 'float' may lose precision +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wpragmas" // warning: unknown option after '#pragma GCC diagnostic' kind +#pragma GCC diagnostic ignored "-Wint-to-pointer-cast" // warning: cast to pointer from integer of different size +#pragma GCC diagnostic ignored "-Wformat-security" // warning: format string is not a string literal (potentially insecure) +#pragma GCC diagnostic ignored "-Wdouble-promotion" // warning: implicit conversion from 'float' to 'double' when passing argument to function +#pragma GCC diagnostic ignored "-Wconversion" // warning: conversion to 'xxxx' from 'xxxx' may alter its value +#pragma GCC diagnostic ignored "-Wmisleading-indentation" // [__GNUC__ >= 6] warning: this 'if' clause does not guard this statement // GCC 6.0+ only. See #883 on GitHub. +#endif + +// Play it nice with Windows users (Update: May 2018, Notepad now supports Unix-style carriage returns!) +#ifdef _WIN32 +#define IM_NEWLINE "\r\n" +#else +#define IM_NEWLINE "\n" +#endif + +// Helpers +#if defined(_MSC_VER) && !defined(snprintf) +#define snprintf _snprintf +#endif +#if defined(_MSC_VER) && !defined(vsnprintf) +#define vsnprintf _vsnprintf +#endif + +// Helpers macros +// We normally try to not use many helpers in imgui_demo.cpp in order to make code easier to copy and paste, +// but making an exception here as those are largely simplifying code... +// In other imgui sources we can use nicer internal functions from imgui_internal.h (ImMin/ImMax) but not in the demo. +#define IM_MIN(A, B) (((A) < (B)) ? (A) : (B)) +#define IM_MAX(A, B) (((A) >= (B)) ? (A) : (B)) +#define IM_CLAMP(V, MN, MX) ((V) < (MN) ? (MN) : (V) > (MX) ? (MX) : (V)) + +// Enforce cdecl calling convention for functions called by the standard library, in case compilation settings changed the default to e.g. __vectorcall +#ifndef IMGUI_CDECL +#ifdef _MSC_VER +#define IMGUI_CDECL __cdecl +#else +#define IMGUI_CDECL +#endif +#endif + +//----------------------------------------------------------------------------- +// [SECTION] Forward Declarations, Helpers +//----------------------------------------------------------------------------- + +#if !defined(IMGUI_DISABLE_DEMO_WINDOWS) + +// Forward Declarations +static void ShowExampleAppDocuments(bool* p_open); +static void ShowExampleAppMainMenuBar(); +static void ShowExampleAppConsole(bool* p_open); +static void ShowExampleAppLog(bool* p_open); +static void ShowExampleAppLayout(bool* p_open); +static void ShowExampleAppPropertyEditor(bool* p_open); +static void ShowExampleAppLongText(bool* p_open); +static void ShowExampleAppAutoResize(bool* p_open); +static void ShowExampleAppConstrainedResize(bool* p_open); +static void ShowExampleAppSimpleOverlay(bool* p_open); +static void ShowExampleAppWindowTitles(bool* p_open); +static void ShowExampleAppCustomRendering(bool* p_open); +static void ShowExampleMenuFile(); + +// Helper to display a little (?) mark which shows a tooltip when hovered. +// In your own code you may want to display an actual icon if you are using a merged icon fonts (see docs/FONTS.md) +static void HelpMarker(const char* desc) +{ + ImGui::TextDisabled("(?)"); + if (ImGui::IsItemHovered()) + { + ImGui::BeginTooltip(); + ImGui::PushTextWrapPos(ImGui::GetFontSize() * 35.0f); + ImGui::TextUnformatted(desc); + ImGui::PopTextWrapPos(); + ImGui::EndTooltip(); + } +} + +// Helper to display basic user controls. +void ImGui::ShowUserGuide() +{ + ImGuiIO& io = ImGui::GetIO(); + ImGui::BulletText("Double-click on title bar to collapse window."); + ImGui::BulletText( + "Click and drag on lower corner to resize window\n" + "(double-click to auto fit window to its contents)."); + ImGui::BulletText("CTRL+Click on a slider or drag box to input value as text."); + ImGui::BulletText("TAB/SHIFT+TAB to cycle through keyboard editable fields."); + if (io.FontAllowUserScaling) + ImGui::BulletText("CTRL+Mouse Wheel to zoom window contents."); + ImGui::BulletText("While inputing text:\n"); + ImGui::Indent(); + ImGui::BulletText("CTRL+Left/Right to word jump."); + ImGui::BulletText("CTRL+A or double-click to select all."); + ImGui::BulletText("CTRL+X/C/V to use clipboard cut/copy/paste."); + ImGui::BulletText("CTRL+Z,CTRL+Y to undo/redo."); + ImGui::BulletText("ESCAPE to revert."); + ImGui::BulletText("You can apply arithmetic operators +,*,/ on numerical values.\nUse +- to subtract."); + ImGui::Unindent(); + ImGui::BulletText("With keyboard navigation enabled:"); + ImGui::Indent(); + ImGui::BulletText("Arrow keys to navigate."); + ImGui::BulletText("Space to activate a widget."); + ImGui::BulletText("Return to input text into a widget."); + ImGui::BulletText("Escape to deactivate a widget, close popup, exit child window."); + ImGui::BulletText("Alt to jump to the menu layer of a window."); + ImGui::BulletText("CTRL+Tab to select a window."); + ImGui::Unindent(); +} + +//----------------------------------------------------------------------------- +// [SECTION] Demo Window / ShowDemoWindow() +//----------------------------------------------------------------------------- +// - ShowDemoWindowWidgets() +// - ShowDemoWindowLayout() +// - ShowDemoWindowPopups() +// - ShowDemoWindowTables() +// - ShowDemoWindowColumns() +// - ShowDemoWindowMisc() +//----------------------------------------------------------------------------- + +// We split the contents of the big ShowDemoWindow() function into smaller functions +// (because the link time of very large functions grow non-linearly) +static void ShowDemoWindowWidgets(); +static void ShowDemoWindowLayout(); +static void ShowDemoWindowPopups(); +static void ShowDemoWindowTables(); +static void ShowDemoWindowColumns(); +static void ShowDemoWindowMisc(); + +// Demonstrate most Dear ImGui features (this is big function!) +// You may execute this function to experiment with the UI and understand what it does. +// You may then search for keywords in the code when you are interested by a specific feature. +void ImGui::ShowDemoWindow(bool* p_open) +{ + // Exceptionally add an extra assert here for people confused about initial Dear ImGui setup + // Most ImGui functions would normally just crash if the context is missing. + IM_ASSERT(ImGui::GetCurrentContext() != NULL && "Missing dear imgui context. Refer to examples app!"); + + // Examples Apps (accessible from the "Examples" menu) + static bool show_app_main_menu_bar = false; + static bool show_app_documents = false; + static bool show_app_console = false; + static bool show_app_log = false; + static bool show_app_layout = false; + static bool show_app_property_editor = false; + static bool show_app_long_text = false; + static bool show_app_auto_resize = false; + static bool show_app_constrained_resize = false; + static bool show_app_simple_overlay = false; + static bool show_app_window_titles = false; + static bool show_app_custom_rendering = false; + + if (show_app_main_menu_bar) ShowExampleAppMainMenuBar(); + if (show_app_documents) ShowExampleAppDocuments(&show_app_documents); + + if (show_app_console) ShowExampleAppConsole(&show_app_console); + if (show_app_log) ShowExampleAppLog(&show_app_log); + if (show_app_layout) ShowExampleAppLayout(&show_app_layout); + if (show_app_property_editor) ShowExampleAppPropertyEditor(&show_app_property_editor); + if (show_app_long_text) ShowExampleAppLongText(&show_app_long_text); + if (show_app_auto_resize) ShowExampleAppAutoResize(&show_app_auto_resize); + if (show_app_constrained_resize) ShowExampleAppConstrainedResize(&show_app_constrained_resize); + if (show_app_simple_overlay) ShowExampleAppSimpleOverlay(&show_app_simple_overlay); + if (show_app_window_titles) ShowExampleAppWindowTitles(&show_app_window_titles); + if (show_app_custom_rendering) ShowExampleAppCustomRendering(&show_app_custom_rendering); + + // Dear ImGui Apps (accessible from the "Tools" menu) + static bool show_app_metrics = false; + static bool show_app_style_editor = false; + static bool show_app_about = false; + + if (show_app_metrics) { ImGui::ShowMetricsWindow(&show_app_metrics); } + if (show_app_about) { ImGui::ShowAboutWindow(&show_app_about); } + if (show_app_style_editor) + { + ImGui::Begin("Dear ImGui Style Editor", &show_app_style_editor); + ImGui::ShowStyleEditor(); + ImGui::End(); + } + + // Demonstrate the various window flags. Typically you would just use the default! + static bool no_titlebar = false; + static bool no_scrollbar = false; + static bool no_menu = false; + static bool no_move = false; + static bool no_resize = false; + static bool no_collapse = false; + static bool no_close = false; + static bool no_nav = false; + static bool no_background = false; + static bool no_bring_to_front = false; + + ImGuiWindowFlags window_flags = 0; + if (no_titlebar) window_flags |= ImGuiWindowFlags_NoTitleBar; + if (no_scrollbar) window_flags |= ImGuiWindowFlags_NoScrollbar; + if (!no_menu) window_flags |= ImGuiWindowFlags_MenuBar; + if (no_move) window_flags |= ImGuiWindowFlags_NoMove; + if (no_resize) window_flags |= ImGuiWindowFlags_NoResize; + if (no_collapse) window_flags |= ImGuiWindowFlags_NoCollapse; + if (no_nav) window_flags |= ImGuiWindowFlags_NoNav; + if (no_background) window_flags |= ImGuiWindowFlags_NoBackground; + if (no_bring_to_front) window_flags |= ImGuiWindowFlags_NoBringToFrontOnFocus; + if (no_close) p_open = NULL; // Don't pass our bool* to Begin + + // We specify a default position/size in case there's no data in the .ini file. + // We only do it to make the demo applications a little more welcoming, but typically this isn't required. + ImGui::SetNextWindowPos(ImVec2(650, 20), ImGuiCond_FirstUseEver); + ImGui::SetNextWindowSize(ImVec2(550, 680), ImGuiCond_FirstUseEver); + + // Main body of the Demo window starts here. + if (!ImGui::Begin("Dear ImGui Demo", p_open, window_flags)) + { + // Early out if the window is collapsed, as an optimization. + ImGui::End(); + return; + } + + // Most "big" widgets share a common width settings by default. See 'Demo->Layout->Widgets Width' for details. + + // e.g. Use 2/3 of the space for widgets and 1/3 for labels (right align) + //ImGui::PushItemWidth(-ImGui::GetWindowWidth() * 0.35f); + + // e.g. Leave a fixed amount of width for labels (by passing a negative value), the rest goes to widgets. + ImGui::PushItemWidth(ImGui::GetFontSize() * -12); + + // Menu Bar + if (ImGui::BeginMenuBar()) + { + if (ImGui::BeginMenu("Menu")) + { + ShowExampleMenuFile(); + ImGui::EndMenu(); + } + if (ImGui::BeginMenu("Examples")) + { + ImGui::MenuItem("Main menu bar", NULL, &show_app_main_menu_bar); + ImGui::MenuItem("Console", NULL, &show_app_console); + ImGui::MenuItem("Log", NULL, &show_app_log); + ImGui::MenuItem("Simple layout", NULL, &show_app_layout); + ImGui::MenuItem("Property editor", NULL, &show_app_property_editor); + ImGui::MenuItem("Long text display", NULL, &show_app_long_text); + ImGui::MenuItem("Auto-resizing window", NULL, &show_app_auto_resize); + ImGui::MenuItem("Constrained-resizing window", NULL, &show_app_constrained_resize); + ImGui::MenuItem("Simple overlay", NULL, &show_app_simple_overlay); + ImGui::MenuItem("Manipulating window titles", NULL, &show_app_window_titles); + ImGui::MenuItem("Custom rendering", NULL, &show_app_custom_rendering); + ImGui::MenuItem("Documents", NULL, &show_app_documents); + ImGui::EndMenu(); + } + if (ImGui::BeginMenu("Tools")) + { + ImGui::MenuItem("Metrics/Debugger", NULL, &show_app_metrics); + ImGui::MenuItem("Style Editor", NULL, &show_app_style_editor); + ImGui::MenuItem("About Dear ImGui", NULL, &show_app_about); + ImGui::EndMenu(); + } + ImGui::EndMenuBar(); + } + + ImGui::Text("dear imgui says hello. (%s)", IMGUI_VERSION); + ImGui::Spacing(); + + if (ImGui::CollapsingHeader("Help")) + { + ImGui::Text("ABOUT THIS DEMO:"); + ImGui::BulletText("Sections below are demonstrating many aspects of the library."); + ImGui::BulletText("The \"Examples\" menu above leads to more demo contents."); + ImGui::BulletText("The \"Tools\" menu above gives access to: About Box, Style Editor,\n" + "and Metrics/Debugger (general purpose Dear ImGui debugging tool)."); + ImGui::Separator(); + + ImGui::Text("PROGRAMMER GUIDE:"); + ImGui::BulletText("See the ShowDemoWindow() code in imgui_demo.cpp. <- you are here!"); + ImGui::BulletText("See comments in imgui.cpp."); + ImGui::BulletText("See example applications in the examples/ folder."); + ImGui::BulletText("Read the FAQ at http://www.dearimgui.org/faq/"); + ImGui::BulletText("Set 'io.ConfigFlags |= NavEnableKeyboard' for keyboard controls."); + ImGui::BulletText("Set 'io.ConfigFlags |= NavEnableGamepad' for gamepad controls."); + ImGui::Separator(); + + ImGui::Text("USER GUIDE:"); + ImGui::ShowUserGuide(); + } + + if (ImGui::CollapsingHeader("Configuration")) + { + ImGuiIO& io = ImGui::GetIO(); + + if (ImGui::TreeNode("Configuration##2")) + { + ImGui::CheckboxFlags("io.ConfigFlags: NavEnableKeyboard", &io.ConfigFlags, ImGuiConfigFlags_NavEnableKeyboard); + ImGui::SameLine(); HelpMarker("Enable keyboard controls."); + ImGui::CheckboxFlags("io.ConfigFlags: NavEnableGamepad", &io.ConfigFlags, ImGuiConfigFlags_NavEnableGamepad); + ImGui::SameLine(); HelpMarker("Enable gamepad controls. Require backend to set io.BackendFlags |= ImGuiBackendFlags_HasGamepad.\n\nRead instructions in imgui.cpp for details."); + ImGui::CheckboxFlags("io.ConfigFlags: NavEnableSetMousePos", &io.ConfigFlags, ImGuiConfigFlags_NavEnableSetMousePos); + ImGui::SameLine(); HelpMarker("Instruct navigation to move the mouse cursor. See comment for ImGuiConfigFlags_NavEnableSetMousePos."); + ImGui::CheckboxFlags("io.ConfigFlags: NoMouse", &io.ConfigFlags, ImGuiConfigFlags_NoMouse); + if (io.ConfigFlags & ImGuiConfigFlags_NoMouse) + { + // The "NoMouse" option can get us stuck with a disabled mouse! Let's provide an alternative way to fix it: + if (fmodf((float)ImGui::GetTime(), 0.40f) < 0.20f) + { + ImGui::SameLine(); + ImGui::Text("<>"); + } + if (ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_Space))) + io.ConfigFlags &= ~ImGuiConfigFlags_NoMouse; + } + ImGui::CheckboxFlags("io.ConfigFlags: NoMouseCursorChange", &io.ConfigFlags, ImGuiConfigFlags_NoMouseCursorChange); + ImGui::SameLine(); HelpMarker("Instruct backend to not alter mouse cursor shape and visibility."); + ImGui::Checkbox("io.ConfigInputTextCursorBlink", &io.ConfigInputTextCursorBlink); + ImGui::SameLine(); HelpMarker("Enable blinking cursor (optional as some users consider it to be distracting)"); + ImGui::Checkbox("io.ConfigDragClickToInputText", &io.ConfigDragClickToInputText); + ImGui::SameLine(); HelpMarker("Enable turning DragXXX widgets into text input with a simple mouse click-release (without moving)."); + ImGui::Checkbox("io.ConfigWindowsResizeFromEdges", &io.ConfigWindowsResizeFromEdges); + ImGui::SameLine(); HelpMarker("Enable resizing of windows from their edges and from the lower-left corner.\nThis requires (io.BackendFlags & ImGuiBackendFlags_HasMouseCursors) because it needs mouse cursor feedback."); + ImGui::Checkbox("io.ConfigWindowsMoveFromTitleBarOnly", &io.ConfigWindowsMoveFromTitleBarOnly); + ImGui::Checkbox("io.MouseDrawCursor", &io.MouseDrawCursor); + ImGui::SameLine(); HelpMarker("Instruct Dear ImGui to render a mouse cursor itself. Note that a mouse cursor rendered via your application GPU rendering path will feel more laggy than hardware cursor, but will be more in sync with your other visuals.\n\nSome desktop applications may use both kinds of cursors (e.g. enable software cursor only when resizing/dragging something)."); + ImGui::Text("Also see Style->Rendering for rendering options."); + ImGui::TreePop(); + ImGui::Separator(); + } + + if (ImGui::TreeNode("Backend Flags")) + { + HelpMarker( + "Those flags are set by the backends (imgui_impl_xxx files) to specify their capabilities.\n" + "Here we expose then as read-only fields to avoid breaking interactions with your backend."); + + // Make a local copy to avoid modifying actual backend flags. + ImGuiBackendFlags backend_flags = io.BackendFlags; + ImGui::CheckboxFlags("io.BackendFlags: HasGamepad", &backend_flags, ImGuiBackendFlags_HasGamepad); + ImGui::CheckboxFlags("io.BackendFlags: HasMouseCursors", &backend_flags, ImGuiBackendFlags_HasMouseCursors); + ImGui::CheckboxFlags("io.BackendFlags: HasSetMousePos", &backend_flags, ImGuiBackendFlags_HasSetMousePos); + ImGui::CheckboxFlags("io.BackendFlags: RendererHasVtxOffset", &backend_flags, ImGuiBackendFlags_RendererHasVtxOffset); + ImGui::TreePop(); + ImGui::Separator(); + } + + if (ImGui::TreeNode("Style")) + { + HelpMarker("The same contents can be accessed in 'Tools->Style Editor' or by calling the ShowStyleEditor() function."); + ImGui::ShowStyleEditor(); + ImGui::TreePop(); + ImGui::Separator(); + } + + if (ImGui::TreeNode("Capture/Logging")) + { + HelpMarker( + "The logging API redirects all text output so you can easily capture the content of " + "a window or a block. Tree nodes can be automatically expanded.\n" + "Try opening any of the contents below in this window and then click one of the \"Log To\" button."); + ImGui::LogButtons(); + + HelpMarker("You can also call ImGui::LogText() to output directly to the log without a visual output."); + if (ImGui::Button("Copy \"Hello, world!\" to clipboard")) + { + ImGui::LogToClipboard(); + ImGui::LogText("Hello, world!"); + ImGui::LogFinish(); + } + ImGui::TreePop(); + } + } + + if (ImGui::CollapsingHeader("Window options")) + { + if (ImGui::BeginTable("split", 3)) + { + ImGui::TableNextColumn(); ImGui::Checkbox("No titlebar", &no_titlebar); + ImGui::TableNextColumn(); ImGui::Checkbox("No scrollbar", &no_scrollbar); + ImGui::TableNextColumn(); ImGui::Checkbox("No menu", &no_menu); + ImGui::TableNextColumn(); ImGui::Checkbox("No move", &no_move); + ImGui::TableNextColumn(); ImGui::Checkbox("No resize", &no_resize); + ImGui::TableNextColumn(); ImGui::Checkbox("No collapse", &no_collapse); + ImGui::TableNextColumn(); ImGui::Checkbox("No close", &no_close); + ImGui::TableNextColumn(); ImGui::Checkbox("No nav", &no_nav); + ImGui::TableNextColumn(); ImGui::Checkbox("No background", &no_background); + ImGui::TableNextColumn(); ImGui::Checkbox("No bring to front", &no_bring_to_front); + ImGui::EndTable(); + } + } + + // All demo contents + ShowDemoWindowWidgets(); + ShowDemoWindowLayout(); + ShowDemoWindowPopups(); + ShowDemoWindowTables(); + ShowDemoWindowMisc(); + + // End of ShowDemoWindow() + ImGui::PopItemWidth(); + ImGui::End(); +} + +static void ShowDemoWindowWidgets() +{ + if (!ImGui::CollapsingHeader("Widgets")) + return; + + if (ImGui::TreeNode("Basic")) + { + static int clicked = 0; + if (ImGui::Button("Button")) + clicked++; + if (clicked & 1) + { + ImGui::SameLine(); + ImGui::Text("Thanks for clicking me!"); + } + + static bool check = true; + ImGui::Checkbox("checkbox", &check); + + static int e = 0; + ImGui::RadioButton("radio a", &e, 0); ImGui::SameLine(); + ImGui::RadioButton("radio b", &e, 1); ImGui::SameLine(); + ImGui::RadioButton("radio c", &e, 2); + + // Color buttons, demonstrate using PushID() to add unique identifier in the ID stack, and changing style. + for (int i = 0; i < 7; i++) + { + if (i > 0) + ImGui::SameLine(); + ImGui::PushID(i); + ImGui::PushStyleColor(ImGuiCol_Button, (ImVec4)ImColor::HSV(i / 7.0f, 0.6f, 0.6f)); + ImGui::PushStyleColor(ImGuiCol_ButtonHovered, (ImVec4)ImColor::HSV(i / 7.0f, 0.7f, 0.7f)); + ImGui::PushStyleColor(ImGuiCol_ButtonActive, (ImVec4)ImColor::HSV(i / 7.0f, 0.8f, 0.8f)); + ImGui::Button("Click"); + ImGui::PopStyleColor(3); + ImGui::PopID(); + } + + // Use AlignTextToFramePadding() to align text baseline to the baseline of framed widgets elements + // (otherwise a Text+SameLine+Button sequence will have the text a little too high by default!) + // See 'Demo->Layout->Text Baseline Alignment' for details. + ImGui::AlignTextToFramePadding(); + ImGui::Text("Hold to repeat:"); + ImGui::SameLine(); + + // Arrow buttons with Repeater + static int counter = 0; + float spacing = ImGui::GetStyle().ItemInnerSpacing.x; + ImGui::PushButtonRepeat(true); + if (ImGui::ArrowButton("##left", ImGuiDir_Left)) { counter--; } + ImGui::SameLine(0.0f, spacing); + if (ImGui::ArrowButton("##right", ImGuiDir_Right)) { counter++; } + ImGui::PopButtonRepeat(); + ImGui::SameLine(); + ImGui::Text("%d", counter); + + ImGui::Text("Hover over me"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("I am a tooltip"); + + ImGui::SameLine(); + ImGui::Text("- or me"); + if (ImGui::IsItemHovered()) + { + ImGui::BeginTooltip(); + ImGui::Text("I am a fancy tooltip"); + static float arr[] = { 0.6f, 0.1f, 1.0f, 0.5f, 0.92f, 0.1f, 0.2f }; + ImGui::PlotLines("Curve", arr, IM_ARRAYSIZE(arr)); + ImGui::EndTooltip(); + } + + ImGui::Separator(); + + ImGui::LabelText("label", "Value"); + + { + // Using the _simplified_ one-liner Combo() api here + // See "Combo" section for examples of how to use the more complete BeginCombo()/EndCombo() api. + const char* items[] = { "AAAA", "BBBB", "CCCC", "DDDD", "EEEE", "FFFF", "GGGG", "HHHH", "IIIIIII", "JJJJ", "KKKKKKK" }; + static int item_current = 0; + ImGui::Combo("combo", &item_current, items, IM_ARRAYSIZE(items)); + ImGui::SameLine(); HelpMarker( + "Refer to the \"Combo\" section below for an explanation of the full BeginCombo/EndCombo API, " + "and demonstration of various flags.\n"); + } + + { + // To wire InputText() with std::string or any other custom string type, + // see the "Text Input > Resize Callback" section of this demo, and the misc/cpp/imgui_stdlib.h file. + static char str0[128] = "Hello, world!"; + ImGui::InputText("input text", str0, IM_ARRAYSIZE(str0)); + ImGui::SameLine(); HelpMarker( + "USER:\n" + "Hold SHIFT or use mouse to select text.\n" + "CTRL+Left/Right to word jump.\n" + "CTRL+A or double-click to select all.\n" + "CTRL+X,CTRL+C,CTRL+V clipboard.\n" + "CTRL+Z,CTRL+Y undo/redo.\n" + "ESCAPE to revert.\n\n" + "PROGRAMMER:\n" + "You can use the ImGuiInputTextFlags_CallbackResize facility if you need to wire InputText() " + "to a dynamic string type. See misc/cpp/imgui_stdlib.h for an example (this is not demonstrated " + "in imgui_demo.cpp)."); + + static char str1[128] = ""; + ImGui::InputTextWithHint("input text (w/ hint)", "enter text here", str1, IM_ARRAYSIZE(str1)); + + static int i0 = 123; + ImGui::InputInt("input int", &i0); + ImGui::SameLine(); HelpMarker( + "You can apply arithmetic operators +,*,/ on numerical values.\n" + " e.g. [ 100 ], input \'*2\', result becomes [ 200 ]\n" + "Use +- to subtract."); + + static float f0 = 0.001f; + ImGui::InputFloat("input float", &f0, 0.01f, 1.0f, "%.3f"); + + static double d0 = 999999.00000001; + ImGui::InputDouble("input double", &d0, 0.01f, 1.0f, "%.8f"); + + static float f1 = 1.e10f; + ImGui::InputFloat("input scientific", &f1, 0.0f, 0.0f, "%e"); + ImGui::SameLine(); HelpMarker( + "You can input value using the scientific notation,\n" + " e.g. \"1e+8\" becomes \"100000000\"."); + + static float vec4a[4] = { 0.10f, 0.20f, 0.30f, 0.44f }; + ImGui::InputFloat3("input float3", vec4a); + } + + { + static int i1 = 50, i2 = 42; + ImGui::DragInt("drag int", &i1, 1); + ImGui::SameLine(); HelpMarker( + "Click and drag to edit value.\n" + "Hold SHIFT/ALT for faster/slower edit.\n" + "Double-click or CTRL+click to input value."); + + ImGui::DragInt("drag int 0..100", &i2, 1, 0, 100, "%d%%", ImGuiSliderFlags_AlwaysClamp); + + static float f1 = 1.00f, f2 = 0.0067f; + ImGui::DragFloat("drag float", &f1, 0.005f); + ImGui::DragFloat("drag small float", &f2, 0.0001f, 0.0f, 0.0f, "%.06f ns"); + } + + { + static int i1 = 0; + ImGui::SliderInt("slider int", &i1, -1, 3); + ImGui::SameLine(); HelpMarker("CTRL+click to input value."); + + static float f1 = 0.123f, f2 = 0.0f; + ImGui::SliderFloat("slider float", &f1, 0.0f, 1.0f, "ratio = %.3f"); + ImGui::SliderFloat("slider float (log)", &f2, -10.0f, 10.0f, "%.4f", ImGuiSliderFlags_Logarithmic); + + static float angle = 0.0f; + ImGui::SliderAngle("slider angle", &angle); + + // Using the format string to display a name instead of an integer. + // Here we completely omit '%d' from the format string, so it'll only display a name. + // This technique can also be used with DragInt(). + enum Element { Element_Fire, Element_Earth, Element_Air, Element_Water, Element_COUNT }; + static int elem = Element_Fire; + const char* elems_names[Element_COUNT] = { "Fire", "Earth", "Air", "Water" }; + const char* elem_name = (elem >= 0 && elem < Element_COUNT) ? elems_names[elem] : "Unknown"; + ImGui::SliderInt("slider enum", &elem, 0, Element_COUNT - 1, elem_name); + ImGui::SameLine(); HelpMarker("Using the format string parameter to display a name instead of the underlying integer."); + } + + { + static float col1[3] = { 1.0f, 0.0f, 0.2f }; + static float col2[4] = { 0.4f, 0.7f, 0.0f, 0.5f }; + ImGui::ColorEdit3("color 1", col1); + ImGui::SameLine(); HelpMarker( + "Click on the color square to open a color picker.\n" + "Click and hold to use drag and drop.\n" + "Right-click on the color square to show options.\n" + "CTRL+click on individual component to input value.\n"); + + ImGui::ColorEdit4("color 2", col2); + } + + { + // List box + const char* items[] = { "Apple", "Banana", "Cherry", "Kiwi", "Mango", "Orange", "Pineapple", "Strawberry", "Watermelon" }; + static int item_current = 1; + ImGui::ListBox("listbox\n(single select)", &item_current, items, IM_ARRAYSIZE(items), 4); + + //static int listbox_item_current2 = 2; + //ImGui::SetNextItemWidth(-1); + //ImGui::ListBox("##listbox2", &listbox_item_current2, listbox_items, IM_ARRAYSIZE(listbox_items), 4); + } + + ImGui::TreePop(); + } + + // Testing ImGuiOnceUponAFrame helper. + //static ImGuiOnceUponAFrame once; + //for (int i = 0; i < 5; i++) + // if (once) + // ImGui::Text("This will be displayed only once."); + + if (ImGui::TreeNode("Trees")) + { + if (ImGui::TreeNode("Basic trees")) + { + for (int i = 0; i < 5; i++) + { + // Use SetNextItemOpen() so set the default state of a node to be open. We could + // also use TreeNodeEx() with the ImGuiTreeNodeFlags_DefaultOpen flag to achieve the same thing! + if (i == 0) + ImGui::SetNextItemOpen(true, ImGuiCond_Once); + + if (ImGui::TreeNode((void*)(intptr_t)i, "Child %d", i)) + { + ImGui::Text("blah blah"); + ImGui::SameLine(); + if (ImGui::SmallButton("button")) {} + ImGui::TreePop(); + } + } + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Advanced, with Selectable nodes")) + { + HelpMarker( + "This is a more typical looking tree with selectable nodes.\n" + "Click to select, CTRL+Click to toggle, click on arrows or double-click to open."); + static ImGuiTreeNodeFlags base_flags = ImGuiTreeNodeFlags_OpenOnArrow | ImGuiTreeNodeFlags_OpenOnDoubleClick | ImGuiTreeNodeFlags_SpanAvailWidth; + static bool align_label_with_current_x_position = false; + static bool test_drag_and_drop = false; + ImGui::CheckboxFlags("ImGuiTreeNodeFlags_OpenOnArrow", &base_flags, ImGuiTreeNodeFlags_OpenOnArrow); + ImGui::CheckboxFlags("ImGuiTreeNodeFlags_OpenOnDoubleClick", &base_flags, ImGuiTreeNodeFlags_OpenOnDoubleClick); + ImGui::CheckboxFlags("ImGuiTreeNodeFlags_SpanAvailWidth", &base_flags, ImGuiTreeNodeFlags_SpanAvailWidth); ImGui::SameLine(); HelpMarker("Extend hit area to all available width instead of allowing more items to be laid out after the node."); + ImGui::CheckboxFlags("ImGuiTreeNodeFlags_SpanFullWidth", &base_flags, ImGuiTreeNodeFlags_SpanFullWidth); + ImGui::Checkbox("Align label with current X position", &align_label_with_current_x_position); + ImGui::Checkbox("Test tree node as drag source", &test_drag_and_drop); + ImGui::Text("Hello!"); + if (align_label_with_current_x_position) + ImGui::Unindent(ImGui::GetTreeNodeToLabelSpacing()); + + // 'selection_mask' is dumb representation of what may be user-side selection state. + // You may retain selection state inside or outside your objects in whatever format you see fit. + // 'node_clicked' is temporary storage of what node we have clicked to process selection at the end + /// of the loop. May be a pointer to your own node type, etc. + static int selection_mask = (1 << 2); + int node_clicked = -1; + for (int i = 0; i < 6; i++) + { + // Disable the default "open on single-click behavior" + set Selected flag according to our selection. + ImGuiTreeNodeFlags node_flags = base_flags; + const bool is_selected = (selection_mask & (1 << i)) != 0; + if (is_selected) + node_flags |= ImGuiTreeNodeFlags_Selected; + if (i < 3) + { + // Items 0..2 are Tree Node + bool node_open = ImGui::TreeNodeEx((void*)(intptr_t)i, node_flags, "Selectable Node %d", i); + if (ImGui::IsItemClicked()) + node_clicked = i; + if (test_drag_and_drop && ImGui::BeginDragDropSource()) + { + ImGui::SetDragDropPayload("_TREENODE", NULL, 0); + ImGui::Text("This is a drag and drop source"); + ImGui::EndDragDropSource(); + } + if (node_open) + { + ImGui::BulletText("Blah blah\nBlah Blah"); + ImGui::TreePop(); + } + } + else + { + // Items 3..5 are Tree Leaves + // The only reason we use TreeNode at all is to allow selection of the leaf. Otherwise we can + // use BulletText() or advance the cursor by GetTreeNodeToLabelSpacing() and call Text(). + node_flags |= ImGuiTreeNodeFlags_Leaf | ImGuiTreeNodeFlags_NoTreePushOnOpen; // ImGuiTreeNodeFlags_Bullet + ImGui::TreeNodeEx((void*)(intptr_t)i, node_flags, "Selectable Leaf %d", i); + if (ImGui::IsItemClicked()) + node_clicked = i; + if (test_drag_and_drop && ImGui::BeginDragDropSource()) + { + ImGui::SetDragDropPayload("_TREENODE", NULL, 0); + ImGui::Text("This is a drag and drop source"); + ImGui::EndDragDropSource(); + } + } + } + if (node_clicked != -1) + { + // Update selection state + // (process outside of tree loop to avoid visual inconsistencies during the clicking frame) + if (ImGui::GetIO().KeyCtrl) + selection_mask ^= (1 << node_clicked); // CTRL+click to toggle + else //if (!(selection_mask & (1 << node_clicked))) // Depending on selection behavior you want, may want to preserve selection when clicking on item that is part of the selection + selection_mask = (1 << node_clicked); // Click to single-select + } + if (align_label_with_current_x_position) + ImGui::Indent(ImGui::GetTreeNodeToLabelSpacing()); + ImGui::TreePop(); + } + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Collapsing Headers")) + { + static bool closable_group = true; + ImGui::Checkbox("Show 2nd header", &closable_group); + if (ImGui::CollapsingHeader("Header", ImGuiTreeNodeFlags_None)) + { + ImGui::Text("IsItemHovered: %d", ImGui::IsItemHovered()); + for (int i = 0; i < 5; i++) + ImGui::Text("Some content %d", i); + } + if (ImGui::CollapsingHeader("Header with a close button", &closable_group)) + { + ImGui::Text("IsItemHovered: %d", ImGui::IsItemHovered()); + for (int i = 0; i < 5; i++) + ImGui::Text("More content %d", i); + } + /* + if (ImGui::CollapsingHeader("Header with a bullet", ImGuiTreeNodeFlags_Bullet)) + ImGui::Text("IsItemHovered: %d", ImGui::IsItemHovered()); + */ + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Bullets")) + { + ImGui::BulletText("Bullet point 1"); + ImGui::BulletText("Bullet point 2\nOn multiple lines"); + if (ImGui::TreeNode("Tree node")) + { + ImGui::BulletText("Another bullet point"); + ImGui::TreePop(); + } + ImGui::Bullet(); ImGui::Text("Bullet point 3 (two calls)"); + ImGui::Bullet(); ImGui::SmallButton("Button"); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Text")) + { + if (ImGui::TreeNode("Colorful Text")) + { + // Using shortcut. You can use PushStyleColor()/PopStyleColor() for more flexibility. + ImGui::TextColored(ImVec4(1.0f, 0.0f, 1.0f, 1.0f), "Pink"); + ImGui::TextColored(ImVec4(1.0f, 1.0f, 0.0f, 1.0f), "Yellow"); + ImGui::TextDisabled("Disabled"); + ImGui::SameLine(); HelpMarker("The TextDisabled color is stored in ImGuiStyle."); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Word Wrapping")) + { + // Using shortcut. You can use PushTextWrapPos()/PopTextWrapPos() for more flexibility. + ImGui::TextWrapped( + "This text should automatically wrap on the edge of the window. The current implementation " + "for text wrapping follows simple rules suitable for English and possibly other languages."); + ImGui::Spacing(); + + static float wrap_width = 200.0f; + ImGui::SliderFloat("Wrap width", &wrap_width, -20, 600, "%.0f"); + + ImDrawList* draw_list = ImGui::GetWindowDrawList(); + for (int n = 0; n < 2; n++) + { + ImGui::Text("Test paragraph %d:", n); + ImVec2 pos = ImGui::GetCursorScreenPos(); + ImVec2 marker_min = ImVec2(pos.x + wrap_width, pos.y); + ImVec2 marker_max = ImVec2(pos.x + wrap_width + 10, pos.y + ImGui::GetTextLineHeight()); + ImGui::PushTextWrapPos(ImGui::GetCursorPos().x + wrap_width); + if (n == 0) + ImGui::Text("The lazy dog is a good dog. This paragraph should fit within %.0f pixels. Testing a 1 character word. The quick brown fox jumps over the lazy dog.", wrap_width); + else + ImGui::Text("aaaaaaaa bbbbbbbb, c cccccccc,dddddddd. d eeeeeeee ffffffff. gggggggg!hhhhhhhh"); + + // Draw actual text bounding box, following by marker of our expected limit (should not overlap!) + draw_list->AddRect(ImGui::GetItemRectMin(), ImGui::GetItemRectMax(), IM_COL32(255, 255, 0, 255)); + draw_list->AddRectFilled(marker_min, marker_max, IM_COL32(255, 0, 255, 255)); + ImGui::PopTextWrapPos(); + } + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("UTF-8 Text")) + { + // UTF-8 test with Japanese characters + // (Needs a suitable font? Try "Google Noto" or "Arial Unicode". See docs/FONTS.md for details.) + // - From C++11 you can use the u8"my text" syntax to encode literal strings as UTF-8 + // - For earlier compiler, you may be able to encode your sources as UTF-8 (e.g. in Visual Studio, you + // can save your source files as 'UTF-8 without signature'). + // - FOR THIS DEMO FILE ONLY, BECAUSE WE WANT TO SUPPORT OLD COMPILERS, WE ARE *NOT* INCLUDING RAW UTF-8 + // CHARACTERS IN THIS SOURCE FILE. Instead we are encoding a few strings with hexadecimal constants. + // Don't do this in your application! Please use u8"text in any language" in your application! + // Note that characters values are preserved even by InputText() if the font cannot be displayed, + // so you can safely copy & paste garbled characters into another application. + ImGui::TextWrapped( + "CJK text will only appears if the font was loaded with the appropriate CJK character ranges. " + "Call io.Font->AddFontFromFileTTF() manually to load extra character ranges. " + "Read docs/FONTS.md for details."); + ImGui::Text("Hiragana: \xe3\x81\x8b\xe3\x81\x8d\xe3\x81\x8f\xe3\x81\x91\xe3\x81\x93 (kakikukeko)"); // Normally we would use u8"blah blah" with the proper characters directly in the string. + ImGui::Text("Kanjis: \xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e (nihongo)"); + static char buf[32] = "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e"; + //static char buf[32] = u8"NIHONGO"; // <- this is how you would write it with C++11, using real kanjis + ImGui::InputText("UTF-8 input", buf, IM_ARRAYSIZE(buf)); + ImGui::TreePop(); + } + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Images")) + { + ImGuiIO& io = ImGui::GetIO(); + ImGui::TextWrapped( + "Below we are displaying the font texture (which is the only texture we have access to in this demo). " + "Use the 'ImTextureID' type as storage to pass pointers or identifier to your own texture data. " + "Hover the texture for a zoomed view!"); + + // Below we are displaying the font texture because it is the only texture we have access to inside the demo! + // Remember that ImTextureID is just storage for whatever you want it to be. It is essentially a value that + // will be passed to the rendering backend via the ImDrawCmd structure. + // If you use one of the default imgui_impl_XXXX.cpp rendering backend, they all have comments at the top + // of their respective source file to specify what they expect to be stored in ImTextureID, for example: + // - The imgui_impl_dx11.cpp renderer expect a 'ID3D11ShaderResourceView*' pointer + // - The imgui_impl_opengl3.cpp renderer expect a GLuint OpenGL texture identifier, etc. + // More: + // - If you decided that ImTextureID = MyEngineTexture*, then you can pass your MyEngineTexture* pointers + // to ImGui::Image(), and gather width/height through your own functions, etc. + // - You can use ShowMetricsWindow() to inspect the draw data that are being passed to your renderer, + // it will help you debug issues if you are confused about it. + // - Consider using the lower-level ImDrawList::AddImage() API, via ImGui::GetWindowDrawList()->AddImage(). + // - Read https://github.com/ocornut/imgui/blob/master/docs/FAQ.md + // - Read https://github.com/ocornut/imgui/wiki/Image-Loading-and-Displaying-Examples + ImTextureID my_tex_id = io.Fonts->TexID; + float my_tex_w = (float)io.Fonts->TexWidth; + float my_tex_h = (float)io.Fonts->TexHeight; + { + ImGui::Text("%.0fx%.0f", my_tex_w, my_tex_h); + ImVec2 pos = ImGui::GetCursorScreenPos(); + ImVec2 uv_min = ImVec2(0.0f, 0.0f); // Top-left + ImVec2 uv_max = ImVec2(1.0f, 1.0f); // Lower-right + ImVec4 tint_col = ImVec4(1.0f, 1.0f, 1.0f, 1.0f); // No tint + ImVec4 border_col = ImVec4(1.0f, 1.0f, 1.0f, 0.5f); // 50% opaque white + ImGui::Image(my_tex_id, ImVec2(my_tex_w, my_tex_h), uv_min, uv_max, tint_col, border_col); + if (ImGui::IsItemHovered()) + { + ImGui::BeginTooltip(); + float region_sz = 32.0f; + float region_x = io.MousePos.x - pos.x - region_sz * 0.5f; + float region_y = io.MousePos.y - pos.y - region_sz * 0.5f; + float zoom = 4.0f; + if (region_x < 0.0f) { region_x = 0.0f; } + else if (region_x > my_tex_w - region_sz) { region_x = my_tex_w - region_sz; } + if (region_y < 0.0f) { region_y = 0.0f; } + else if (region_y > my_tex_h - region_sz) { region_y = my_tex_h - region_sz; } + ImGui::Text("Min: (%.2f, %.2f)", region_x, region_y); + ImGui::Text("Max: (%.2f, %.2f)", region_x + region_sz, region_y + region_sz); + ImVec2 uv0 = ImVec2((region_x) / my_tex_w, (region_y) / my_tex_h); + ImVec2 uv1 = ImVec2((region_x + region_sz) / my_tex_w, (region_y + region_sz) / my_tex_h); + ImGui::Image(my_tex_id, ImVec2(region_sz * zoom, region_sz * zoom), uv0, uv1, tint_col, border_col); + ImGui::EndTooltip(); + } + } + ImGui::TextWrapped("And now some textured buttons.."); + static int pressed_count = 0; + for (int i = 0; i < 8; i++) + { + ImGui::PushID(i); + int frame_padding = -1 + i; // -1 == uses default padding (style.FramePadding) + ImVec2 size = ImVec2(32.0f, 32.0f); // Size of the image we want to make visible + ImVec2 uv0 = ImVec2(0.0f, 0.0f); // UV coordinates for lower-left + ImVec2 uv1 = ImVec2(32.0f / my_tex_w, 32.0f / my_tex_h);// UV coordinates for (32,32) in our texture + ImVec4 bg_col = ImVec4(0.0f, 0.0f, 0.0f, 1.0f); // Black background + ImVec4 tint_col = ImVec4(1.0f, 1.0f, 1.0f, 1.0f); // No tint + if (ImGui::ImageButton(my_tex_id, size, uv0, uv1, frame_padding, bg_col, tint_col)) + pressed_count += 1; + ImGui::PopID(); + ImGui::SameLine(); + } + ImGui::NewLine(); + ImGui::Text("Pressed %d times.", pressed_count); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Combo")) + { + // Expose flags as checkbox for the demo + static ImGuiComboFlags flags = 0; + ImGui::CheckboxFlags("ImGuiComboFlags_PopupAlignLeft", &flags, ImGuiComboFlags_PopupAlignLeft); + ImGui::SameLine(); HelpMarker("Only makes a difference if the popup is larger than the combo"); + if (ImGui::CheckboxFlags("ImGuiComboFlags_NoArrowButton", &flags, ImGuiComboFlags_NoArrowButton)) + flags &= ~ImGuiComboFlags_NoPreview; // Clear the other flag, as we cannot combine both + if (ImGui::CheckboxFlags("ImGuiComboFlags_NoPreview", &flags, ImGuiComboFlags_NoPreview)) + flags &= ~ImGuiComboFlags_NoArrowButton; // Clear the other flag, as we cannot combine both + + // Using the generic BeginCombo() API, you have full control over how to display the combo contents. + // (your selection data could be an index, a pointer to the object, an id for the object, a flag intrusively + // stored in the object itself, etc.) + const char* items[] = { "AAAA", "BBBB", "CCCC", "DDDD", "EEEE", "FFFF", "GGGG", "HHHH", "IIII", "JJJJ", "KKKK", "LLLLLLL", "MMMM", "OOOOOOO" }; + static int item_current_idx = 0; // Here our selection data is an index. + const char* combo_label = items[item_current_idx]; // Label to preview before opening the combo (technically it could be anything) + if (ImGui::BeginCombo("combo 1", combo_label, flags)) + { + for (int n = 0; n < IM_ARRAYSIZE(items); n++) + { + const bool is_selected = (item_current_idx == n); + if (ImGui::Selectable(items[n], is_selected)) + item_current_idx = n; + + // Set the initial focus when opening the combo (scrolling + keyboard navigation focus) + if (is_selected) + ImGui::SetItemDefaultFocus(); + } + ImGui::EndCombo(); + } + + // Simplified one-liner Combo() API, using values packed in a single constant string + static int item_current_2 = 0; + ImGui::Combo("combo 2 (one-liner)", &item_current_2, "aaaa\0bbbb\0cccc\0dddd\0eeee\0\0"); + + // Simplified one-liner Combo() using an array of const char* + static int item_current_3 = -1; // If the selection isn't within 0..count, Combo won't display a preview + ImGui::Combo("combo 3 (array)", &item_current_3, items, IM_ARRAYSIZE(items)); + + // Simplified one-liner Combo() using an accessor function + struct Funcs { static bool ItemGetter(void* data, int n, const char** out_str) { *out_str = ((const char**)data)[n]; return true; } }; + static int item_current_4 = 0; + ImGui::Combo("combo 4 (function)", &item_current_4, &Funcs::ItemGetter, items, IM_ARRAYSIZE(items)); + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Selectables")) + { + // Selectable() has 2 overloads: + // - The one taking "bool selected" as a read-only selection information. + // When Selectable() has been clicked it returns true and you can alter selection state accordingly. + // - The one taking "bool* p_selected" as a read-write selection information (convenient in some cases) + // The earlier is more flexible, as in real application your selection may be stored in many different ways + // and not necessarily inside a bool value (e.g. in flags within objects, as an external list, etc). + if (ImGui::TreeNode("Basic")) + { + static bool selection[5] = { false, true, false, false, false }; + ImGui::Selectable("1. I am selectable", &selection[0]); + ImGui::Selectable("2. I am selectable", &selection[1]); + ImGui::Text("3. I am not selectable"); + ImGui::Selectable("4. I am selectable", &selection[3]); + if (ImGui::Selectable("5. I am double clickable", selection[4], ImGuiSelectableFlags_AllowDoubleClick)) + if (ImGui::IsMouseDoubleClicked(0)) + selection[4] = !selection[4]; + ImGui::TreePop(); + } + if (ImGui::TreeNode("Selection State: Single Selection")) + { + static int selected = -1; + for (int n = 0; n < 5; n++) + { + char buf[32]; + sprintf(buf, "Object %d", n); + if (ImGui::Selectable(buf, selected == n)) + selected = n; + } + ImGui::TreePop(); + } + if (ImGui::TreeNode("Selection State: Multiple Selection")) + { + HelpMarker("Hold CTRL and click to select multiple items."); + static bool selection[5] = { false, false, false, false, false }; + for (int n = 0; n < 5; n++) + { + char buf[32]; + sprintf(buf, "Object %d", n); + if (ImGui::Selectable(buf, selection[n])) + { + if (!ImGui::GetIO().KeyCtrl) // Clear selection when CTRL is not held + memset(selection, 0, sizeof(selection)); + selection[n] ^= 1; + } + } + ImGui::TreePop(); + } + if (ImGui::TreeNode("Rendering more text into the same line")) + { + // Using the Selectable() override that takes "bool* p_selected" parameter, + // this function toggle your bool value automatically. + static bool selected[3] = { false, false, false }; + ImGui::Selectable("main.c", &selected[0]); ImGui::SameLine(300); ImGui::Text(" 2,345 bytes"); + ImGui::Selectable("Hello.cpp", &selected[1]); ImGui::SameLine(300); ImGui::Text("12,345 bytes"); + ImGui::Selectable("Hello.h", &selected[2]); ImGui::SameLine(300); ImGui::Text(" 2,345 bytes"); + ImGui::TreePop(); + } + if (ImGui::TreeNode("In columns")) + { + static bool selected[10] = {}; + + if (ImGui::BeginTable("split1", 3, ImGuiTableFlags_Resizable | ImGuiTableFlags_NoSavedSettings)) + { + for (int i = 0; i < 10; i++) + { + char label[32]; + sprintf(label, "Item %d", i); + ImGui::TableNextColumn(); + ImGui::Selectable(label, &selected[i]); // FIXME-TABLE: Selection overlap + } + ImGui::EndTable(); + } + ImGui::Separator(); + if (ImGui::BeginTable("split2", 3, ImGuiTableFlags_Resizable | ImGuiTableFlags_NoSavedSettings)) + { + for (int i = 0; i < 10; i++) + { + char label[32]; + sprintf(label, "Item %d", i); + ImGui::TableNextRow(); + ImGui::TableNextColumn(); + ImGui::Selectable(label, &selected[i], ImGuiSelectableFlags_SpanAllColumns); + ImGui::TableNextColumn(); + ImGui::Text("Some other contents"); + ImGui::TableNextColumn(); + ImGui::Text("123456"); + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + if (ImGui::TreeNode("Grid")) + { + static char selected[4][4] = { { 1, 0, 0, 0 }, { 0, 1, 0, 0 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 } }; + + // Add in a bit of silly fun... + const float time = (float)ImGui::GetTime(); + const bool winning_state = memchr(selected, 0, sizeof(selected)) == NULL; // If all cells are selected... + if (winning_state) + ImGui::PushStyleVar(ImGuiStyleVar_SelectableTextAlign, ImVec2(0.5f + 0.5f * cosf(time * 2.0f), 0.5f + 0.5f * sinf(time * 3.0f))); + + for (int y = 0; y < 4; y++) + for (int x = 0; x < 4; x++) + { + if (x > 0) + ImGui::SameLine(); + ImGui::PushID(y * 4 + x); + if (ImGui::Selectable("Sailor", selected[y][x] != 0, 0, ImVec2(50, 50))) + { + // Toggle clicked cell + toggle neighbors + selected[y][x] ^= 1; + if (x > 0) { selected[y][x - 1] ^= 1; } + if (x < 3) { selected[y][x + 1] ^= 1; } + if (y > 0) { selected[y - 1][x] ^= 1; } + if (y < 3) { selected[y + 1][x] ^= 1; } + } + ImGui::PopID(); + } + + if (winning_state) + ImGui::PopStyleVar(); + ImGui::TreePop(); + } + if (ImGui::TreeNode("Alignment")) + { + HelpMarker( + "By default, Selectables uses style.SelectableTextAlign but it can be overridden on a per-item " + "basis using PushStyleVar(). You'll probably want to always keep your default situation to " + "left-align otherwise it becomes difficult to layout multiple items on a same line"); + static bool selected[3 * 3] = { true, false, true, false, true, false, true, false, true }; + for (int y = 0; y < 3; y++) + { + for (int x = 0; x < 3; x++) + { + ImVec2 alignment = ImVec2((float)x / 2.0f, (float)y / 2.0f); + char name[32]; + sprintf(name, "(%.1f,%.1f)", alignment.x, alignment.y); + if (x > 0) ImGui::SameLine(); + ImGui::PushStyleVar(ImGuiStyleVar_SelectableTextAlign, alignment); + ImGui::Selectable(name, &selected[3 * y + x], ImGuiSelectableFlags_None, ImVec2(80, 80)); + ImGui::PopStyleVar(); + } + } + ImGui::TreePop(); + } + ImGui::TreePop(); + } + + // To wire InputText() with std::string or any other custom string type, + // see the "Text Input > Resize Callback" section of this demo, and the misc/cpp/imgui_stdlib.h file. + if (ImGui::TreeNode("Text Input")) + { + if (ImGui::TreeNode("Multi-line Text Input")) + { + // Note: we are using a fixed-sized buffer for simplicity here. See ImGuiInputTextFlags_CallbackResize + // and the code in misc/cpp/imgui_stdlib.h for how to setup InputText() for dynamically resizing strings. + static char text[1024 * 16] = + "/*\n" + " The Pentium F00F bug, shorthand for F0 0F C7 C8,\n" + " the hexadecimal encoding of one offending instruction,\n" + " more formally, the invalid operand with locked CMPXCHG8B\n" + " instruction bug, is a design flaw in the majority of\n" + " Intel Pentium, Pentium MMX, and Pentium OverDrive\n" + " processors (all in the P5 microarchitecture).\n" + "*/\n\n" + "label:\n" + "\tlock cmpxchg8b eax\n"; + + static ImGuiInputTextFlags flags = ImGuiInputTextFlags_AllowTabInput; + HelpMarker("You can use the ImGuiInputTextFlags_CallbackResize facility if you need to wire InputTextMultiline() to a dynamic string type. See misc/cpp/imgui_stdlib.h for an example. (This is not demonstrated in imgui_demo.cpp because we don't want to include in here)"); + ImGui::CheckboxFlags("ImGuiInputTextFlags_ReadOnly", &flags, ImGuiInputTextFlags_ReadOnly); + ImGui::CheckboxFlags("ImGuiInputTextFlags_AllowTabInput", &flags, ImGuiInputTextFlags_AllowTabInput); + ImGui::CheckboxFlags("ImGuiInputTextFlags_CtrlEnterForNewLine", &flags, ImGuiInputTextFlags_CtrlEnterForNewLine); + ImGui::InputTextMultiline("##source", text, IM_ARRAYSIZE(text), ImVec2(-FLT_MIN, ImGui::GetTextLineHeight() * 16), flags); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Filtered Text Input")) + { + struct TextFilters + { + // Return 0 (pass) if the character is 'i' or 'm' or 'g' or 'u' or 'i' + static int FilterImGuiLetters(ImGuiInputTextCallbackData* data) + { + if (data->EventChar < 256 && strchr("imgui", (char)data->EventChar)) + return 0; + return 1; + } + }; + + static char buf1[64] = ""; ImGui::InputText("default", buf1, 64); + static char buf2[64] = ""; ImGui::InputText("decimal", buf2, 64, ImGuiInputTextFlags_CharsDecimal); + static char buf3[64] = ""; ImGui::InputText("hexadecimal", buf3, 64, ImGuiInputTextFlags_CharsHexadecimal | ImGuiInputTextFlags_CharsUppercase); + static char buf4[64] = ""; ImGui::InputText("uppercase", buf4, 64, ImGuiInputTextFlags_CharsUppercase); + static char buf5[64] = ""; ImGui::InputText("no blank", buf5, 64, ImGuiInputTextFlags_CharsNoBlank); + static char buf6[64] = ""; ImGui::InputText("\"imgui\" letters", buf6, 64, ImGuiInputTextFlags_CallbackCharFilter, TextFilters::FilterImGuiLetters); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Password Input")) + { + static char password[64] = "password123"; + ImGui::InputText("password", password, IM_ARRAYSIZE(password), ImGuiInputTextFlags_Password); + ImGui::SameLine(); HelpMarker("Display all characters as '*'.\nDisable clipboard cut and copy.\nDisable logging.\n"); + ImGui::InputTextWithHint("password (w/ hint)", "", password, IM_ARRAYSIZE(password), ImGuiInputTextFlags_Password); + ImGui::InputText("password (clear)", password, IM_ARRAYSIZE(password)); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Completion, History, Edit Callbacks")) + { + struct Funcs + { + static int MyCallback(ImGuiInputTextCallbackData* data) + { + if (data->EventFlag == ImGuiInputTextFlags_CallbackCompletion) + { + data->InsertChars(data->CursorPos, ".."); + } + else if (data->EventFlag == ImGuiInputTextFlags_CallbackHistory) + { + if (data->EventKey == ImGuiKey_UpArrow) + { + data->DeleteChars(0, data->BufTextLen); + data->InsertChars(0, "Pressed Up!"); + data->SelectAll(); + } + else if (data->EventKey == ImGuiKey_DownArrow) + { + data->DeleteChars(0, data->BufTextLen); + data->InsertChars(0, "Pressed Down!"); + data->SelectAll(); + } + } + else if (data->EventFlag == ImGuiInputTextFlags_CallbackEdit) + { + // Toggle casing of first character + char c = data->Buf[0]; + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) data->Buf[0] ^= 32; + data->BufDirty = true; + + // Increment a counter + int* p_int = (int*)data->UserData; + *p_int = *p_int + 1; + } + return 0; + } + }; + static char buf1[64]; + ImGui::InputText("Completion", buf1, 64, ImGuiInputTextFlags_CallbackCompletion, Funcs::MyCallback); + ImGui::SameLine(); HelpMarker("Here we append \"..\" each time Tab is pressed. See 'Examples>Console' for a more meaningful demonstration of using this callback."); + + static char buf2[64]; + ImGui::InputText("History", buf2, 64, ImGuiInputTextFlags_CallbackHistory, Funcs::MyCallback); + ImGui::SameLine(); HelpMarker("Here we replace and select text each time Up/Down are pressed. See 'Examples>Console' for a more meaningful demonstration of using this callback."); + + static char buf3[64]; + static int edit_count = 0; + ImGui::InputText("Edit", buf3, 64, ImGuiInputTextFlags_CallbackEdit, Funcs::MyCallback, (void*)&edit_count); + ImGui::SameLine(); HelpMarker("Here we toggle the casing of the first character on every edits + count edits."); + ImGui::SameLine(); ImGui::Text("(%d)", edit_count); + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Resize Callback")) + { + // To wire InputText() with std::string or any other custom string type, + // you can use the ImGuiInputTextFlags_CallbackResize flag + create a custom ImGui::InputText() wrapper + // using your preferred type. See misc/cpp/imgui_stdlib.h for an implementation of this using std::string. + HelpMarker( + "Using ImGuiInputTextFlags_CallbackResize to wire your custom string type to InputText().\n\n" + "See misc/cpp/imgui_stdlib.h for an implementation of this for std::string."); + struct Funcs + { + static int MyResizeCallback(ImGuiInputTextCallbackData* data) + { + if (data->EventFlag == ImGuiInputTextFlags_CallbackResize) + { + ImVector* my_str = (ImVector*)data->UserData; + IM_ASSERT(my_str->begin() == data->Buf); + my_str->resize(data->BufSize); // NB: On resizing calls, generally data->BufSize == data->BufTextLen + 1 + data->Buf = my_str->begin(); + } + return 0; + } + + // Note: Because ImGui:: is a namespace you would typically add your own function into the namespace. + // For example, you code may declare a function 'ImGui::InputText(const char* label, MyString* my_str)' + static bool MyInputTextMultiline(const char* label, ImVector* my_str, const ImVec2& size = ImVec2(0, 0), ImGuiInputTextFlags flags = 0) + { + IM_ASSERT((flags & ImGuiInputTextFlags_CallbackResize) == 0); + return ImGui::InputTextMultiline(label, my_str->begin(), (size_t)my_str->size(), size, flags | ImGuiInputTextFlags_CallbackResize, Funcs::MyResizeCallback, (void*)my_str); + } + }; + + // For this demo we are using ImVector as a string container. + // Note that because we need to store a terminating zero character, our size/capacity are 1 more + // than usually reported by a typical string class. + static ImVector my_str; + if (my_str.empty()) + my_str.push_back(0); + Funcs::MyInputTextMultiline("##MyStr", &my_str, ImVec2(-FLT_MIN, ImGui::GetTextLineHeight() * 16)); + ImGui::Text("Data: %p\nSize: %d\nCapacity: %d", (void*)my_str.begin(), my_str.size(), my_str.capacity()); + ImGui::TreePop(); + } + + ImGui::TreePop(); + } + + // Tabs + if (ImGui::TreeNode("Tabs")) + { + if (ImGui::TreeNode("Basic")) + { + ImGuiTabBarFlags tab_bar_flags = ImGuiTabBarFlags_None; + if (ImGui::BeginTabBar("MyTabBar", tab_bar_flags)) + { + if (ImGui::BeginTabItem("Avocado")) + { + ImGui::Text("This is the Avocado tab!\nblah blah blah blah blah"); + ImGui::EndTabItem(); + } + if (ImGui::BeginTabItem("Broccoli")) + { + ImGui::Text("This is the Broccoli tab!\nblah blah blah blah blah"); + ImGui::EndTabItem(); + } + if (ImGui::BeginTabItem("Cucumber")) + { + ImGui::Text("This is the Cucumber tab!\nblah blah blah blah blah"); + ImGui::EndTabItem(); + } + ImGui::EndTabBar(); + } + ImGui::Separator(); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Advanced & Close Button")) + { + // Expose a couple of the available flags. In most cases you may just call BeginTabBar() with no flags (0). + static ImGuiTabBarFlags tab_bar_flags = ImGuiTabBarFlags_Reorderable; + ImGui::CheckboxFlags("ImGuiTabBarFlags_Reorderable", &tab_bar_flags, ImGuiTabBarFlags_Reorderable); + ImGui::CheckboxFlags("ImGuiTabBarFlags_AutoSelectNewTabs", &tab_bar_flags, ImGuiTabBarFlags_AutoSelectNewTabs); + ImGui::CheckboxFlags("ImGuiTabBarFlags_TabListPopupButton", &tab_bar_flags, ImGuiTabBarFlags_TabListPopupButton); + ImGui::CheckboxFlags("ImGuiTabBarFlags_NoCloseWithMiddleMouseButton", &tab_bar_flags, ImGuiTabBarFlags_NoCloseWithMiddleMouseButton); + if ((tab_bar_flags & ImGuiTabBarFlags_FittingPolicyMask_) == 0) + tab_bar_flags |= ImGuiTabBarFlags_FittingPolicyDefault_; + if (ImGui::CheckboxFlags("ImGuiTabBarFlags_FittingPolicyResizeDown", &tab_bar_flags, ImGuiTabBarFlags_FittingPolicyResizeDown)) + tab_bar_flags &= ~(ImGuiTabBarFlags_FittingPolicyMask_ ^ ImGuiTabBarFlags_FittingPolicyResizeDown); + if (ImGui::CheckboxFlags("ImGuiTabBarFlags_FittingPolicyScroll", &tab_bar_flags, ImGuiTabBarFlags_FittingPolicyScroll)) + tab_bar_flags &= ~(ImGuiTabBarFlags_FittingPolicyMask_ ^ ImGuiTabBarFlags_FittingPolicyScroll); + + // Tab Bar + const char* names[4] = { "Artichoke", "Beetroot", "Celery", "Daikon" }; + static bool opened[4] = { true, true, true, true }; // Persistent user state + for (int n = 0; n < IM_ARRAYSIZE(opened); n++) + { + if (n > 0) { ImGui::SameLine(); } + ImGui::Checkbox(names[n], &opened[n]); + } + + // Passing a bool* to BeginTabItem() is similar to passing one to Begin(): + // the underlying bool will be set to false when the tab is closed. + if (ImGui::BeginTabBar("MyTabBar", tab_bar_flags)) + { + for (int n = 0; n < IM_ARRAYSIZE(opened); n++) + if (opened[n] && ImGui::BeginTabItem(names[n], &opened[n], ImGuiTabItemFlags_None)) + { + ImGui::Text("This is the %s tab!", names[n]); + if (n & 1) + ImGui::Text("I am an odd tab."); + ImGui::EndTabItem(); + } + ImGui::EndTabBar(); + } + ImGui::Separator(); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("TabItemButton & Leading/Trailing flags")) + { + static ImVector active_tabs; + static int next_tab_id = 0; + if (next_tab_id == 0) // Initialize with some default tabs + for (int i = 0; i < 3; i++) + active_tabs.push_back(next_tab_id++); + + // TabItemButton() and Leading/Trailing flags are distinct features which we will demo together. + // (It is possible to submit regular tabs with Leading/Trailing flags, or TabItemButton tabs without Leading/Trailing flags... + // but they tend to make more sense together) + static bool show_leading_button = true; + static bool show_trailing_button = true; + ImGui::Checkbox("Show Leading TabItemButton()", &show_leading_button); + ImGui::Checkbox("Show Trailing TabItemButton()", &show_trailing_button); + + // Expose some other flags which are useful to showcase how they interact with Leading/Trailing tabs + static ImGuiTabBarFlags tab_bar_flags = ImGuiTabBarFlags_AutoSelectNewTabs | ImGuiTabBarFlags_Reorderable | ImGuiTabBarFlags_FittingPolicyResizeDown; + ImGui::CheckboxFlags("ImGuiTabBarFlags_TabListPopupButton", &tab_bar_flags, ImGuiTabBarFlags_TabListPopupButton); + if (ImGui::CheckboxFlags("ImGuiTabBarFlags_FittingPolicyResizeDown", &tab_bar_flags, ImGuiTabBarFlags_FittingPolicyResizeDown)) + tab_bar_flags &= ~(ImGuiTabBarFlags_FittingPolicyMask_ ^ ImGuiTabBarFlags_FittingPolicyResizeDown); + if (ImGui::CheckboxFlags("ImGuiTabBarFlags_FittingPolicyScroll", &tab_bar_flags, ImGuiTabBarFlags_FittingPolicyScroll)) + tab_bar_flags &= ~(ImGuiTabBarFlags_FittingPolicyMask_ ^ ImGuiTabBarFlags_FittingPolicyScroll); + + if (ImGui::BeginTabBar("MyTabBar", tab_bar_flags)) + { + // Demo a Leading TabItemButton(): click the "?" button to open a menu + if (show_leading_button) + if (ImGui::TabItemButton("?", ImGuiTabItemFlags_Leading | ImGuiTabItemFlags_NoTooltip)) + ImGui::OpenPopup("MyHelpMenu"); + if (ImGui::BeginPopup("MyHelpMenu")) + { + ImGui::Selectable("Hello!"); + ImGui::EndPopup(); + } + + // Demo Trailing Tabs: click the "+" button to add a new tab (in your app you may want to use a font icon instead of the "+") + // Note that we submit it before the regular tabs, but because of the ImGuiTabItemFlags_Trailing flag it will always appear at the end. + if (show_trailing_button) + if (ImGui::TabItemButton("+", ImGuiTabItemFlags_Trailing | ImGuiTabItemFlags_NoTooltip)) + active_tabs.push_back(next_tab_id++); // Add new tab + + // Submit our regular tabs + for (int n = 0; n < active_tabs.Size; ) + { + bool open = true; + char name[16]; + snprintf(name, IM_ARRAYSIZE(name), "%04d", active_tabs[n]); + if (ImGui::BeginTabItem(name, &open, ImGuiTabItemFlags_None)) + { + ImGui::Text("This is the %s tab!", name); + ImGui::EndTabItem(); + } + + if (!open) + active_tabs.erase(active_tabs.Data + n); + else + n++; + } + + ImGui::EndTabBar(); + } + ImGui::Separator(); + ImGui::TreePop(); + } + ImGui::TreePop(); + } + + // Plot/Graph widgets are not very good. + // Consider writing your own, or using a third-party one, see: + // - ImPlot https://github.com/epezent/implot + // - others https://github.com/ocornut/imgui/wiki/Useful-Widgets + if (ImGui::TreeNode("Plots Widgets")) + { + static bool animate = true; + ImGui::Checkbox("Animate", &animate); + + static float arr[] = { 0.6f, 0.1f, 1.0f, 0.5f, 0.92f, 0.1f, 0.2f }; + ImGui::PlotLines("Frame Times", arr, IM_ARRAYSIZE(arr)); + + // Fill an array of contiguous float values to plot + // Tip: If your float aren't contiguous but part of a structure, you can pass a pointer to your first float + // and the sizeof() of your structure in the "stride" parameter. + static float values[90] = {}; + static int values_offset = 0; + static double refresh_time = 0.0; + if (!animate || refresh_time == 0.0) + refresh_time = ImGui::GetTime(); + while (refresh_time < ImGui::GetTime()) // Create data at fixed 60 Hz rate for the demo + { + static float phase = 0.0f; + values[values_offset] = cosf(phase); + values_offset = (values_offset + 1) % IM_ARRAYSIZE(values); + phase += 0.10f * values_offset; + refresh_time += 1.0f / 60.0f; + } + + // Plots can display overlay texts + // (in this example, we will display an average value) + { + float average = 0.0f; + for (int n = 0; n < IM_ARRAYSIZE(values); n++) + average += values[n]; + average /= (float)IM_ARRAYSIZE(values); + char overlay[32]; + sprintf(overlay, "avg %f", average); + ImGui::PlotLines("Lines", values, IM_ARRAYSIZE(values), values_offset, overlay, -1.0f, 1.0f, ImVec2(0, 80.0f)); + } + ImGui::PlotHistogram("Histogram", arr, IM_ARRAYSIZE(arr), 0, NULL, 0.0f, 1.0f, ImVec2(0, 80.0f)); + + // Use functions to generate output + // FIXME: This is rather awkward because current plot API only pass in indices. + // We probably want an API passing floats and user provide sample rate/count. + struct Funcs + { + static float Sin(void*, int i) { return sinf(i * 0.1f); } + static float Saw(void*, int i) { return (i & 1) ? 1.0f : -1.0f; } + }; + static int func_type = 0, display_count = 70; + ImGui::Separator(); + ImGui::SetNextItemWidth(100); + ImGui::Combo("func", &func_type, "Sin\0Saw\0"); + ImGui::SameLine(); + ImGui::SliderInt("Sample count", &display_count, 1, 400); + float (*func)(void*, int) = (func_type == 0) ? Funcs::Sin : Funcs::Saw; + ImGui::PlotLines("Lines", func, NULL, display_count, 0, NULL, -1.0f, 1.0f, ImVec2(0, 80)); + ImGui::PlotHistogram("Histogram", func, NULL, display_count, 0, NULL, -1.0f, 1.0f, ImVec2(0, 80)); + ImGui::Separator(); + + // Animate a simple progress bar + static float progress = 0.0f, progress_dir = 1.0f; + if (animate) + { + progress += progress_dir * 0.4f * ImGui::GetIO().DeltaTime; + if (progress >= +1.1f) { progress = +1.1f; progress_dir *= -1.0f; } + if (progress <= -0.1f) { progress = -0.1f; progress_dir *= -1.0f; } + } + + // Typically we would use ImVec2(-1.0f,0.0f) or ImVec2(-FLT_MIN,0.0f) to use all available width, + // or ImVec2(width,0.0f) for a specified width. ImVec2(0.0f,0.0f) uses ItemWidth. + ImGui::ProgressBar(progress, ImVec2(0.0f, 0.0f)); + ImGui::SameLine(0.0f, ImGui::GetStyle().ItemInnerSpacing.x); + ImGui::Text("Progress Bar"); + + float progress_saturated = IM_CLAMP(progress, 0.0f, 1.0f); + char buf[32]; + sprintf(buf, "%d/%d", (int)(progress_saturated * 1753), 1753); + ImGui::ProgressBar(progress, ImVec2(0.f, 0.f), buf); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Color/Picker Widgets")) + { + static ImVec4 color = ImVec4(114.0f / 255.0f, 144.0f / 255.0f, 154.0f / 255.0f, 200.0f / 255.0f); + + static bool alpha_preview = true; + static bool alpha_half_preview = false; + static bool drag_and_drop = true; + static bool options_menu = true; + static bool hdr = false; + ImGui::Checkbox("With Alpha Preview", &alpha_preview); + ImGui::Checkbox("With Half Alpha Preview", &alpha_half_preview); + ImGui::Checkbox("With Drag and Drop", &drag_and_drop); + ImGui::Checkbox("With Options Menu", &options_menu); ImGui::SameLine(); HelpMarker("Right-click on the individual color widget to show options."); + ImGui::Checkbox("With HDR", &hdr); ImGui::SameLine(); HelpMarker("Currently all this does is to lift the 0..1 limits on dragging widgets."); + ImGuiColorEditFlags misc_flags = (hdr ? ImGuiColorEditFlags_HDR : 0) | (drag_and_drop ? 0 : ImGuiColorEditFlags_NoDragDrop) | (alpha_half_preview ? ImGuiColorEditFlags_AlphaPreviewHalf : (alpha_preview ? ImGuiColorEditFlags_AlphaPreview : 0)) | (options_menu ? 0 : ImGuiColorEditFlags_NoOptions); + + ImGui::Text("Color widget:"); + ImGui::SameLine(); HelpMarker( + "Click on the color square to open a color picker.\n" + "CTRL+click on individual component to input value.\n"); + ImGui::ColorEdit3("MyColor##1", (float*)&color, misc_flags); + + ImGui::Text("Color widget HSV with Alpha:"); + ImGui::ColorEdit4("MyColor##2", (float*)&color, ImGuiColorEditFlags_DisplayHSV | misc_flags); + + ImGui::Text("Color widget with Float Display:"); + ImGui::ColorEdit4("MyColor##2f", (float*)&color, ImGuiColorEditFlags_Float | misc_flags); + + ImGui::Text("Color button with Picker:"); + ImGui::SameLine(); HelpMarker( + "With the ImGuiColorEditFlags_NoInputs flag you can hide all the slider/text inputs.\n" + "With the ImGuiColorEditFlags_NoLabel flag you can pass a non-empty label which will only " + "be used for the tooltip and picker popup."); + ImGui::ColorEdit4("MyColor##3", (float*)&color, ImGuiColorEditFlags_NoInputs | ImGuiColorEditFlags_NoLabel | misc_flags); + + ImGui::Text("Color button with Custom Picker Popup:"); + + // Generate a default palette. The palette will persist and can be edited. + static bool saved_palette_init = true; + static ImVec4 saved_palette[32] = {}; + if (saved_palette_init) + { + for (int n = 0; n < IM_ARRAYSIZE(saved_palette); n++) + { + ImGui::ColorConvertHSVtoRGB(n / 31.0f, 0.8f, 0.8f, + saved_palette[n].x, saved_palette[n].y, saved_palette[n].z); + saved_palette[n].w = 1.0f; // Alpha + } + saved_palette_init = false; + } + + static ImVec4 backup_color; + bool open_popup = ImGui::ColorButton("MyColor##3b", color, misc_flags); + ImGui::SameLine(0, ImGui::GetStyle().ItemInnerSpacing.x); + open_popup |= ImGui::Button("Palette"); + if (open_popup) + { + ImGui::OpenPopup("mypicker"); + backup_color = color; + } + if (ImGui::BeginPopup("mypicker")) + { + ImGui::Text("MY CUSTOM COLOR PICKER WITH AN AMAZING PALETTE!"); + ImGui::Separator(); + ImGui::ColorPicker4("##picker", (float*)&color, misc_flags | ImGuiColorEditFlags_NoSidePreview | ImGuiColorEditFlags_NoSmallPreview); + ImGui::SameLine(); + + ImGui::BeginGroup(); // Lock X position + ImGui::Text("Current"); + ImGui::ColorButton("##current", color, ImGuiColorEditFlags_NoPicker | ImGuiColorEditFlags_AlphaPreviewHalf, ImVec2(60, 40)); + ImGui::Text("Previous"); + if (ImGui::ColorButton("##previous", backup_color, ImGuiColorEditFlags_NoPicker | ImGuiColorEditFlags_AlphaPreviewHalf, ImVec2(60, 40))) + color = backup_color; + ImGui::Separator(); + ImGui::Text("Palette"); + for (int n = 0; n < IM_ARRAYSIZE(saved_palette); n++) + { + ImGui::PushID(n); + if ((n % 8) != 0) + ImGui::SameLine(0.0f, ImGui::GetStyle().ItemSpacing.y); + + ImGuiColorEditFlags palette_button_flags = ImGuiColorEditFlags_NoAlpha | ImGuiColorEditFlags_NoPicker | ImGuiColorEditFlags_NoTooltip; + if (ImGui::ColorButton("##palette", saved_palette[n], palette_button_flags, ImVec2(20, 20))) + color = ImVec4(saved_palette[n].x, saved_palette[n].y, saved_palette[n].z, color.w); // Preserve alpha! + + // Allow user to drop colors into each palette entry. Note that ColorButton() is already a + // drag source by default, unless specifying the ImGuiColorEditFlags_NoDragDrop flag. + if (ImGui::BeginDragDropTarget()) + { + if (const ImGuiPayload* payload = ImGui::AcceptDragDropPayload(IMGUI_PAYLOAD_TYPE_COLOR_3F)) + memcpy((float*)&saved_palette[n], payload->Data, sizeof(float) * 3); + if (const ImGuiPayload* payload = ImGui::AcceptDragDropPayload(IMGUI_PAYLOAD_TYPE_COLOR_4F)) + memcpy((float*)&saved_palette[n], payload->Data, sizeof(float) * 4); + ImGui::EndDragDropTarget(); + } + + ImGui::PopID(); + } + ImGui::EndGroup(); + ImGui::EndPopup(); + } + + ImGui::Text("Color button only:"); + static bool no_border = false; + ImGui::Checkbox("ImGuiColorEditFlags_NoBorder", &no_border); + ImGui::ColorButton("MyColor##3c", *(ImVec4*)&color, misc_flags | (no_border ? ImGuiColorEditFlags_NoBorder : 0), ImVec2(80, 80)); + + ImGui::Text("Color picker:"); + static bool alpha = true; + static bool alpha_bar = true; + static bool side_preview = true; + static bool ref_color = false; + static ImVec4 ref_color_v(1.0f, 0.0f, 1.0f, 0.5f); + static int display_mode = 0; + static int picker_mode = 0; + ImGui::Checkbox("With Alpha", &alpha); + ImGui::Checkbox("With Alpha Bar", &alpha_bar); + ImGui::Checkbox("With Side Preview", &side_preview); + if (side_preview) + { + ImGui::SameLine(); + ImGui::Checkbox("With Ref Color", &ref_color); + if (ref_color) + { + ImGui::SameLine(); + ImGui::ColorEdit4("##RefColor", &ref_color_v.x, ImGuiColorEditFlags_NoInputs | misc_flags); + } + } + ImGui::Combo("Display Mode", &display_mode, "Auto/Current\0None\0RGB Only\0HSV Only\0Hex Only\0"); + ImGui::SameLine(); HelpMarker( + "ColorEdit defaults to displaying RGB inputs if you don't specify a display mode, " + "but the user can change it with a right-click.\n\nColorPicker defaults to displaying RGB+HSV+Hex " + "if you don't specify a display mode.\n\nYou can change the defaults using SetColorEditOptions()."); + ImGui::Combo("Picker Mode", &picker_mode, "Auto/Current\0Hue bar + SV rect\0Hue wheel + SV triangle\0"); + ImGui::SameLine(); HelpMarker("User can right-click the picker to change mode."); + ImGuiColorEditFlags flags = misc_flags; + if (!alpha) flags |= ImGuiColorEditFlags_NoAlpha; // This is by default if you call ColorPicker3() instead of ColorPicker4() + if (alpha_bar) flags |= ImGuiColorEditFlags_AlphaBar; + if (!side_preview) flags |= ImGuiColorEditFlags_NoSidePreview; + if (picker_mode == 1) flags |= ImGuiColorEditFlags_PickerHueBar; + if (picker_mode == 2) flags |= ImGuiColorEditFlags_PickerHueWheel; + if (display_mode == 1) flags |= ImGuiColorEditFlags_NoInputs; // Disable all RGB/HSV/Hex displays + if (display_mode == 2) flags |= ImGuiColorEditFlags_DisplayRGB; // Override display mode + if (display_mode == 3) flags |= ImGuiColorEditFlags_DisplayHSV; + if (display_mode == 4) flags |= ImGuiColorEditFlags_DisplayHex; + ImGui::ColorPicker4("MyColor##4", (float*)&color, flags, ref_color ? &ref_color_v.x : NULL); + + ImGui::Text("Set defaults in code:"); + ImGui::SameLine(); HelpMarker( + "SetColorEditOptions() is designed to allow you to set boot-time default.\n" + "We don't have Push/Pop functions because you can force options on a per-widget basis if needed," + "and the user can change non-forced ones with the options menu.\nWe don't have a getter to avoid" + "encouraging you to persistently save values that aren't forward-compatible."); + if (ImGui::Button("Default: Uint8 + HSV + Hue Bar")) + ImGui::SetColorEditOptions(ImGuiColorEditFlags_Uint8 | ImGuiColorEditFlags_DisplayHSV | ImGuiColorEditFlags_PickerHueBar); + if (ImGui::Button("Default: Float + HDR + Hue Wheel")) + ImGui::SetColorEditOptions(ImGuiColorEditFlags_Float | ImGuiColorEditFlags_HDR | ImGuiColorEditFlags_PickerHueWheel); + + // HSV encoded support (to avoid RGB<>HSV round trips and singularities when S==0 or V==0) + static ImVec4 color_hsv(0.23f, 1.0f, 1.0f, 1.0f); // Stored as HSV! + ImGui::Spacing(); + ImGui::Text("HSV encoded colors"); + ImGui::SameLine(); HelpMarker( + "By default, colors are given to ColorEdit and ColorPicker in RGB, but ImGuiColorEditFlags_InputHSV" + "allows you to store colors as HSV and pass them to ColorEdit and ColorPicker as HSV. This comes with the" + "added benefit that you can manipulate hue values with the picker even when saturation or value are zero."); + ImGui::Text("Color widget with InputHSV:"); + ImGui::ColorEdit4("HSV shown as RGB##1", (float*)&color_hsv, ImGuiColorEditFlags_DisplayRGB | ImGuiColorEditFlags_InputHSV | ImGuiColorEditFlags_Float); + ImGui::ColorEdit4("HSV shown as HSV##1", (float*)&color_hsv, ImGuiColorEditFlags_DisplayHSV | ImGuiColorEditFlags_InputHSV | ImGuiColorEditFlags_Float); + ImGui::DragFloat4("Raw HSV values", (float*)&color_hsv, 0.01f, 0.0f, 1.0f); + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Drag/Slider Flags")) + { + // Demonstrate using advanced flags for DragXXX and SliderXXX functions. Note that the flags are the same! + static ImGuiSliderFlags flags = ImGuiSliderFlags_None; + ImGui::CheckboxFlags("ImGuiSliderFlags_AlwaysClamp", &flags, ImGuiSliderFlags_AlwaysClamp); + ImGui::SameLine(); HelpMarker("Always clamp value to min/max bounds (if any) when input manually with CTRL+Click."); + ImGui::CheckboxFlags("ImGuiSliderFlags_Logarithmic", &flags, ImGuiSliderFlags_Logarithmic); + ImGui::SameLine(); HelpMarker("Enable logarithmic editing (more precision for small values)."); + ImGui::CheckboxFlags("ImGuiSliderFlags_NoRoundToFormat", &flags, ImGuiSliderFlags_NoRoundToFormat); + ImGui::SameLine(); HelpMarker("Disable rounding underlying value to match precision of the format string (e.g. %.3f values are rounded to those 3 digits)."); + ImGui::CheckboxFlags("ImGuiSliderFlags_NoInput", &flags, ImGuiSliderFlags_NoInput); + ImGui::SameLine(); HelpMarker("Disable CTRL+Click or Enter key allowing to input text directly into the widget."); + + // Drags + static float drag_f = 0.5f; + static int drag_i = 50; + ImGui::Text("Underlying float value: %f", drag_f); + ImGui::DragFloat("DragFloat (0 -> 1)", &drag_f, 0.005f, 0.0f, 1.0f, "%.3f", flags); + ImGui::DragFloat("DragFloat (0 -> +inf)", &drag_f, 0.005f, 0.0f, FLT_MAX, "%.3f", flags); + ImGui::DragFloat("DragFloat (-inf -> 1)", &drag_f, 0.005f, -FLT_MAX, 1.0f, "%.3f", flags); + ImGui::DragFloat("DragFloat (-inf -> +inf)", &drag_f, 0.005f, -FLT_MAX, +FLT_MAX, "%.3f", flags); + ImGui::DragInt("DragInt (0 -> 100)", &drag_i, 0.5f, 0, 100, "%d", flags); + + // Sliders + static float slider_f = 0.5f; + static int slider_i = 50; + ImGui::Text("Underlying float value: %f", slider_f); + ImGui::SliderFloat("SliderFloat (0 -> 1)", &slider_f, 0.0f, 1.0f, "%.3f", flags); + ImGui::SliderInt("SliderInt (0 -> 100)", &slider_i, 0, 100, "%d", flags); + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Range Widgets")) + { + static float begin = 10, end = 90; + static int begin_i = 100, end_i = 1000; + ImGui::DragFloatRange2("range float", &begin, &end, 0.25f, 0.0f, 100.0f, "Min: %.1f %%", "Max: %.1f %%", ImGuiSliderFlags_AlwaysClamp); + ImGui::DragIntRange2("range int", &begin_i, &end_i, 5, 0, 1000, "Min: %d units", "Max: %d units"); + ImGui::DragIntRange2("range int (no bounds)", &begin_i, &end_i, 5, 0, 0, "Min: %d units", "Max: %d units"); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Data Types")) + { + // DragScalar/InputScalar/SliderScalar functions allow various data types + // - signed/unsigned + // - 8/16/32/64-bits + // - integer/float/double + // To avoid polluting the public API with all possible combinations, we use the ImGuiDataType enum + // to pass the type, and passing all arguments by pointer. + // This is the reason the test code below creates local variables to hold "zero" "one" etc. for each types. + // In practice, if you frequently use a given type that is not covered by the normal API entry points, + // you can wrap it yourself inside a 1 line function which can take typed argument as value instead of void*, + // and then pass their address to the generic function. For example: + // bool MySliderU64(const char *label, u64* value, u64 min = 0, u64 max = 0, const char* format = "%lld") + // { + // return SliderScalar(label, ImGuiDataType_U64, value, &min, &max, format); + // } + + // Setup limits (as helper variables so we can take their address, as explained above) + // Note: SliderScalar() functions have a maximum usable range of half the natural type maximum, hence the /2. + #ifndef LLONG_MIN + ImS64 LLONG_MIN = -9223372036854775807LL - 1; + ImS64 LLONG_MAX = 9223372036854775807LL; + ImU64 ULLONG_MAX = (2ULL * 9223372036854775807LL + 1); + #endif + const char s8_zero = 0, s8_one = 1, s8_fifty = 50, s8_min = -128, s8_max = 127; + const ImU8 u8_zero = 0, u8_one = 1, u8_fifty = 50, u8_min = 0, u8_max = 255; + const short s16_zero = 0, s16_one = 1, s16_fifty = 50, s16_min = -32768, s16_max = 32767; + const ImU16 u16_zero = 0, u16_one = 1, u16_fifty = 50, u16_min = 0, u16_max = 65535; + const ImS32 s32_zero = 0, s32_one = 1, s32_fifty = 50, s32_min = INT_MIN/2, s32_max = INT_MAX/2, s32_hi_a = INT_MAX/2 - 100, s32_hi_b = INT_MAX/2; + const ImU32 u32_zero = 0, u32_one = 1, u32_fifty = 50, u32_min = 0, u32_max = UINT_MAX/2, u32_hi_a = UINT_MAX/2 - 100, u32_hi_b = UINT_MAX/2; + const ImS64 s64_zero = 0, s64_one = 1, s64_fifty = 50, s64_min = LLONG_MIN/2, s64_max = LLONG_MAX/2, s64_hi_a = LLONG_MAX/2 - 100, s64_hi_b = LLONG_MAX/2; + const ImU64 u64_zero = 0, u64_one = 1, u64_fifty = 50, u64_min = 0, u64_max = ULLONG_MAX/2, u64_hi_a = ULLONG_MAX/2 - 100, u64_hi_b = ULLONG_MAX/2; + const float f32_zero = 0.f, f32_one = 1.f, f32_lo_a = -10000000000.0f, f32_hi_a = +10000000000.0f; + const double f64_zero = 0., f64_one = 1., f64_lo_a = -1000000000000000.0, f64_hi_a = +1000000000000000.0; + + // State + static char s8_v = 127; + static ImU8 u8_v = 255; + static short s16_v = 32767; + static ImU16 u16_v = 65535; + static ImS32 s32_v = -1; + static ImU32 u32_v = (ImU32)-1; + static ImS64 s64_v = -1; + static ImU64 u64_v = (ImU64)-1; + static float f32_v = 0.123f; + static double f64_v = 90000.01234567890123456789; + + const float drag_speed = 0.2f; + static bool drag_clamp = false; + ImGui::Text("Drags:"); + ImGui::Checkbox("Clamp integers to 0..50", &drag_clamp); + ImGui::SameLine(); HelpMarker( + "As with every widgets in dear imgui, we never modify values unless there is a user interaction.\n" + "You can override the clamping limits by using CTRL+Click to input a value."); + ImGui::DragScalar("drag s8", ImGuiDataType_S8, &s8_v, drag_speed, drag_clamp ? &s8_zero : NULL, drag_clamp ? &s8_fifty : NULL); + ImGui::DragScalar("drag u8", ImGuiDataType_U8, &u8_v, drag_speed, drag_clamp ? &u8_zero : NULL, drag_clamp ? &u8_fifty : NULL, "%u ms"); + ImGui::DragScalar("drag s16", ImGuiDataType_S16, &s16_v, drag_speed, drag_clamp ? &s16_zero : NULL, drag_clamp ? &s16_fifty : NULL); + ImGui::DragScalar("drag u16", ImGuiDataType_U16, &u16_v, drag_speed, drag_clamp ? &u16_zero : NULL, drag_clamp ? &u16_fifty : NULL, "%u ms"); + ImGui::DragScalar("drag s32", ImGuiDataType_S32, &s32_v, drag_speed, drag_clamp ? &s32_zero : NULL, drag_clamp ? &s32_fifty : NULL); + ImGui::DragScalar("drag u32", ImGuiDataType_U32, &u32_v, drag_speed, drag_clamp ? &u32_zero : NULL, drag_clamp ? &u32_fifty : NULL, "%u ms"); + ImGui::DragScalar("drag s64", ImGuiDataType_S64, &s64_v, drag_speed, drag_clamp ? &s64_zero : NULL, drag_clamp ? &s64_fifty : NULL); + ImGui::DragScalar("drag u64", ImGuiDataType_U64, &u64_v, drag_speed, drag_clamp ? &u64_zero : NULL, drag_clamp ? &u64_fifty : NULL); + ImGui::DragScalar("drag float", ImGuiDataType_Float, &f32_v, 0.005f, &f32_zero, &f32_one, "%f"); + ImGui::DragScalar("drag float log", ImGuiDataType_Float, &f32_v, 0.005f, &f32_zero, &f32_one, "%f", ImGuiSliderFlags_Logarithmic); + ImGui::DragScalar("drag double", ImGuiDataType_Double, &f64_v, 0.0005f, &f64_zero, NULL, "%.10f grams"); + ImGui::DragScalar("drag double log",ImGuiDataType_Double, &f64_v, 0.0005f, &f64_zero, &f64_one, "0 < %.10f < 1", ImGuiSliderFlags_Logarithmic); + + ImGui::Text("Sliders"); + ImGui::SliderScalar("slider s8 full", ImGuiDataType_S8, &s8_v, &s8_min, &s8_max, "%d"); + ImGui::SliderScalar("slider u8 full", ImGuiDataType_U8, &u8_v, &u8_min, &u8_max, "%u"); + ImGui::SliderScalar("slider s16 full", ImGuiDataType_S16, &s16_v, &s16_min, &s16_max, "%d"); + ImGui::SliderScalar("slider u16 full", ImGuiDataType_U16, &u16_v, &u16_min, &u16_max, "%u"); + ImGui::SliderScalar("slider s32 low", ImGuiDataType_S32, &s32_v, &s32_zero, &s32_fifty,"%d"); + ImGui::SliderScalar("slider s32 high", ImGuiDataType_S32, &s32_v, &s32_hi_a, &s32_hi_b, "%d"); + ImGui::SliderScalar("slider s32 full", ImGuiDataType_S32, &s32_v, &s32_min, &s32_max, "%d"); + ImGui::SliderScalar("slider u32 low", ImGuiDataType_U32, &u32_v, &u32_zero, &u32_fifty,"%u"); + ImGui::SliderScalar("slider u32 high", ImGuiDataType_U32, &u32_v, &u32_hi_a, &u32_hi_b, "%u"); + ImGui::SliderScalar("slider u32 full", ImGuiDataType_U32, &u32_v, &u32_min, &u32_max, "%u"); + ImGui::SliderScalar("slider s64 low", ImGuiDataType_S64, &s64_v, &s64_zero, &s64_fifty,"%I64d"); + ImGui::SliderScalar("slider s64 high", ImGuiDataType_S64, &s64_v, &s64_hi_a, &s64_hi_b, "%I64d"); + ImGui::SliderScalar("slider s64 full", ImGuiDataType_S64, &s64_v, &s64_min, &s64_max, "%I64d"); + ImGui::SliderScalar("slider u64 low", ImGuiDataType_U64, &u64_v, &u64_zero, &u64_fifty,"%I64u ms"); + ImGui::SliderScalar("slider u64 high", ImGuiDataType_U64, &u64_v, &u64_hi_a, &u64_hi_b, "%I64u ms"); + ImGui::SliderScalar("slider u64 full", ImGuiDataType_U64, &u64_v, &u64_min, &u64_max, "%I64u ms"); + ImGui::SliderScalar("slider float low", ImGuiDataType_Float, &f32_v, &f32_zero, &f32_one); + ImGui::SliderScalar("slider float low log", ImGuiDataType_Float, &f32_v, &f32_zero, &f32_one, "%.10f", ImGuiSliderFlags_Logarithmic); + ImGui::SliderScalar("slider float high", ImGuiDataType_Float, &f32_v, &f32_lo_a, &f32_hi_a, "%e"); + ImGui::SliderScalar("slider double low", ImGuiDataType_Double, &f64_v, &f64_zero, &f64_one, "%.10f grams"); + ImGui::SliderScalar("slider double low log",ImGuiDataType_Double, &f64_v, &f64_zero, &f64_one, "%.10f", ImGuiSliderFlags_Logarithmic); + ImGui::SliderScalar("slider double high", ImGuiDataType_Double, &f64_v, &f64_lo_a, &f64_hi_a, "%e grams"); + + ImGui::Text("Sliders (reverse)"); + ImGui::SliderScalar("slider s8 reverse", ImGuiDataType_S8, &s8_v, &s8_max, &s8_min, "%d"); + ImGui::SliderScalar("slider u8 reverse", ImGuiDataType_U8, &u8_v, &u8_max, &u8_min, "%u"); + ImGui::SliderScalar("slider s32 reverse", ImGuiDataType_S32, &s32_v, &s32_fifty, &s32_zero, "%d"); + ImGui::SliderScalar("slider u32 reverse", ImGuiDataType_U32, &u32_v, &u32_fifty, &u32_zero, "%u"); + ImGui::SliderScalar("slider s64 reverse", ImGuiDataType_S64, &s64_v, &s64_fifty, &s64_zero, "%I64d"); + ImGui::SliderScalar("slider u64 reverse", ImGuiDataType_U64, &u64_v, &u64_fifty, &u64_zero, "%I64u ms"); + + static bool inputs_step = true; + ImGui::Text("Inputs"); + ImGui::Checkbox("Show step buttons", &inputs_step); + ImGui::InputScalar("input s8", ImGuiDataType_S8, &s8_v, inputs_step ? &s8_one : NULL, NULL, "%d"); + ImGui::InputScalar("input u8", ImGuiDataType_U8, &u8_v, inputs_step ? &u8_one : NULL, NULL, "%u"); + ImGui::InputScalar("input s16", ImGuiDataType_S16, &s16_v, inputs_step ? &s16_one : NULL, NULL, "%d"); + ImGui::InputScalar("input u16", ImGuiDataType_U16, &u16_v, inputs_step ? &u16_one : NULL, NULL, "%u"); + ImGui::InputScalar("input s32", ImGuiDataType_S32, &s32_v, inputs_step ? &s32_one : NULL, NULL, "%d"); + ImGui::InputScalar("input s32 hex", ImGuiDataType_S32, &s32_v, inputs_step ? &s32_one : NULL, NULL, "%08X", ImGuiInputTextFlags_CharsHexadecimal); + ImGui::InputScalar("input u32", ImGuiDataType_U32, &u32_v, inputs_step ? &u32_one : NULL, NULL, "%u"); + ImGui::InputScalar("input u32 hex", ImGuiDataType_U32, &u32_v, inputs_step ? &u32_one : NULL, NULL, "%08X", ImGuiInputTextFlags_CharsHexadecimal); + ImGui::InputScalar("input s64", ImGuiDataType_S64, &s64_v, inputs_step ? &s64_one : NULL); + ImGui::InputScalar("input u64", ImGuiDataType_U64, &u64_v, inputs_step ? &u64_one : NULL); + ImGui::InputScalar("input float", ImGuiDataType_Float, &f32_v, inputs_step ? &f32_one : NULL); + ImGui::InputScalar("input double", ImGuiDataType_Double, &f64_v, inputs_step ? &f64_one : NULL); + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Multi-component Widgets")) + { + static float vec4f[4] = { 0.10f, 0.20f, 0.30f, 0.44f }; + static int vec4i[4] = { 1, 5, 100, 255 }; + + ImGui::InputFloat2("input float2", vec4f); + ImGui::DragFloat2("drag float2", vec4f, 0.01f, 0.0f, 1.0f); + ImGui::SliderFloat2("slider float2", vec4f, 0.0f, 1.0f); + ImGui::InputInt2("input int2", vec4i); + ImGui::DragInt2("drag int2", vec4i, 1, 0, 255); + ImGui::SliderInt2("slider int2", vec4i, 0, 255); + ImGui::Spacing(); + + ImGui::InputFloat3("input float3", vec4f); + ImGui::DragFloat3("drag float3", vec4f, 0.01f, 0.0f, 1.0f); + ImGui::SliderFloat3("slider float3", vec4f, 0.0f, 1.0f); + ImGui::InputInt3("input int3", vec4i); + ImGui::DragInt3("drag int3", vec4i, 1, 0, 255); + ImGui::SliderInt3("slider int3", vec4i, 0, 255); + ImGui::Spacing(); + + ImGui::InputFloat4("input float4", vec4f); + ImGui::DragFloat4("drag float4", vec4f, 0.01f, 0.0f, 1.0f); + ImGui::SliderFloat4("slider float4", vec4f, 0.0f, 1.0f); + ImGui::InputInt4("input int4", vec4i); + ImGui::DragInt4("drag int4", vec4i, 1, 0, 255); + ImGui::SliderInt4("slider int4", vec4i, 0, 255); + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Vertical Sliders")) + { + const float spacing = 4; + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(spacing, spacing)); + + static int int_value = 0; + ImGui::VSliderInt("##int", ImVec2(18, 160), &int_value, 0, 5); + ImGui::SameLine(); + + static float values[7] = { 0.0f, 0.60f, 0.35f, 0.9f, 0.70f, 0.20f, 0.0f }; + ImGui::PushID("set1"); + for (int i = 0; i < 7; i++) + { + if (i > 0) ImGui::SameLine(); + ImGui::PushID(i); + ImGui::PushStyleColor(ImGuiCol_FrameBg, (ImVec4)ImColor::HSV(i / 7.0f, 0.5f, 0.5f)); + ImGui::PushStyleColor(ImGuiCol_FrameBgHovered, (ImVec4)ImColor::HSV(i / 7.0f, 0.6f, 0.5f)); + ImGui::PushStyleColor(ImGuiCol_FrameBgActive, (ImVec4)ImColor::HSV(i / 7.0f, 0.7f, 0.5f)); + ImGui::PushStyleColor(ImGuiCol_SliderGrab, (ImVec4)ImColor::HSV(i / 7.0f, 0.9f, 0.9f)); + ImGui::VSliderFloat("##v", ImVec2(18, 160), &values[i], 0.0f, 1.0f, ""); + if (ImGui::IsItemActive() || ImGui::IsItemHovered()) + ImGui::SetTooltip("%.3f", values[i]); + ImGui::PopStyleColor(4); + ImGui::PopID(); + } + ImGui::PopID(); + + ImGui::SameLine(); + ImGui::PushID("set2"); + static float values2[4] = { 0.20f, 0.80f, 0.40f, 0.25f }; + const int rows = 3; + const ImVec2 small_slider_size(18, (float)(int)((160.0f - (rows - 1) * spacing) / rows)); + for (int nx = 0; nx < 4; nx++) + { + if (nx > 0) ImGui::SameLine(); + ImGui::BeginGroup(); + for (int ny = 0; ny < rows; ny++) + { + ImGui::PushID(nx * rows + ny); + ImGui::VSliderFloat("##v", small_slider_size, &values2[nx], 0.0f, 1.0f, ""); + if (ImGui::IsItemActive() || ImGui::IsItemHovered()) + ImGui::SetTooltip("%.3f", values2[nx]); + ImGui::PopID(); + } + ImGui::EndGroup(); + } + ImGui::PopID(); + + ImGui::SameLine(); + ImGui::PushID("set3"); + for (int i = 0; i < 4; i++) + { + if (i > 0) ImGui::SameLine(); + ImGui::PushID(i); + ImGui::PushStyleVar(ImGuiStyleVar_GrabMinSize, 40); + ImGui::VSliderFloat("##v", ImVec2(40, 160), &values[i], 0.0f, 1.0f, "%.2f\nsec"); + ImGui::PopStyleVar(); + ImGui::PopID(); + } + ImGui::PopID(); + ImGui::PopStyleVar(); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Drag and Drop")) + { + if (ImGui::TreeNode("Drag and drop in standard widgets")) + { + // ColorEdit widgets automatically act as drag source and drag target. + // They are using standardized payload strings IMGUI_PAYLOAD_TYPE_COLOR_3F and IMGUI_PAYLOAD_TYPE_COLOR_4F + // to allow your own widgets to use colors in their drag and drop interaction. + // Also see 'Demo->Widgets->Color/Picker Widgets->Palette' demo. + HelpMarker("You can drag from the color squares."); + static float col1[3] = { 1.0f, 0.0f, 0.2f }; + static float col2[4] = { 0.4f, 0.7f, 0.0f, 0.5f }; + ImGui::ColorEdit3("color 1", col1); + ImGui::ColorEdit4("color 2", col2); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Drag and drop to copy/swap items")) + { + enum Mode + { + Mode_Copy, + Mode_Move, + Mode_Swap + }; + static int mode = 0; + if (ImGui::RadioButton("Copy", mode == Mode_Copy)) { mode = Mode_Copy; } ImGui::SameLine(); + if (ImGui::RadioButton("Move", mode == Mode_Move)) { mode = Mode_Move; } ImGui::SameLine(); + if (ImGui::RadioButton("Swap", mode == Mode_Swap)) { mode = Mode_Swap; } + static const char* names[9] = + { + "Bobby", "Beatrice", "Betty", + "Brianna", "Barry", "Bernard", + "Bibi", "Blaine", "Bryn" + }; + for (int n = 0; n < IM_ARRAYSIZE(names); n++) + { + ImGui::PushID(n); + if ((n % 3) != 0) + ImGui::SameLine(); + ImGui::Button(names[n], ImVec2(60, 60)); + + // Our buttons are both drag sources and drag targets here! + if (ImGui::BeginDragDropSource(ImGuiDragDropFlags_None)) + { + // Set payload to carry the index of our item (could be anything) + ImGui::SetDragDropPayload("DND_DEMO_CELL", &n, sizeof(int)); + + // Display preview (could be anything, e.g. when dragging an image we could decide to display + // the filename and a small preview of the image, etc.) + if (mode == Mode_Copy) { ImGui::Text("Copy %s", names[n]); } + if (mode == Mode_Move) { ImGui::Text("Move %s", names[n]); } + if (mode == Mode_Swap) { ImGui::Text("Swap %s", names[n]); } + ImGui::EndDragDropSource(); + } + if (ImGui::BeginDragDropTarget()) + { + if (const ImGuiPayload* payload = ImGui::AcceptDragDropPayload("DND_DEMO_CELL")) + { + IM_ASSERT(payload->DataSize == sizeof(int)); + int payload_n = *(const int*)payload->Data; + if (mode == Mode_Copy) + { + names[n] = names[payload_n]; + } + if (mode == Mode_Move) + { + names[n] = names[payload_n]; + names[payload_n] = ""; + } + if (mode == Mode_Swap) + { + const char* tmp = names[n]; + names[n] = names[payload_n]; + names[payload_n] = tmp; + } + } + ImGui::EndDragDropTarget(); + } + ImGui::PopID(); + } + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Drag to reorder items (simple)")) + { + // Simple reordering + HelpMarker( + "We don't use the drag and drop api at all here! " + "Instead we query when the item is held but not hovered, and order items accordingly."); + static const char* item_names[] = { "Item One", "Item Two", "Item Three", "Item Four", "Item Five" }; + for (int n = 0; n < IM_ARRAYSIZE(item_names); n++) + { + const char* item = item_names[n]; + ImGui::Selectable(item); + + if (ImGui::IsItemActive() && !ImGui::IsItemHovered()) + { + int n_next = n + (ImGui::GetMouseDragDelta(0).y < 0.f ? -1 : 1); + if (n_next >= 0 && n_next < IM_ARRAYSIZE(item_names)) + { + item_names[n] = item_names[n_next]; + item_names[n_next] = item; + ImGui::ResetMouseDragDelta(); + } + } + } + ImGui::TreePop(); + } + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Querying Status (Edited/Active/Focused/Hovered etc.)")) + { + // Select an item type + const char* item_names[] = + { + "Text", "Button", "Button (w/ repeat)", "Checkbox", "SliderFloat", "InputText", "InputFloat", + "InputFloat3", "ColorEdit4", "MenuItem", "TreeNode", "TreeNode (w/ double-click)", "Combo", "ListBox" + }; + static int item_type = 1; + ImGui::Combo("Item Type", &item_type, item_names, IM_ARRAYSIZE(item_names), IM_ARRAYSIZE(item_names)); + ImGui::SameLine(); + HelpMarker("Testing how various types of items are interacting with the IsItemXXX functions. Note that the bool return value of most ImGui function is generally equivalent to calling ImGui::IsItemHovered()."); + + // Submit selected item item so we can query their status in the code following it. + bool ret = false; + static bool b = false; + static float col4f[4] = { 1.0f, 0.5, 0.0f, 1.0f }; + static char str[16] = {}; + if (item_type == 0) { ImGui::Text("ITEM: Text"); } // Testing text items with no identifier/interaction + if (item_type == 1) { ret = ImGui::Button("ITEM: Button"); } // Testing button + if (item_type == 2) { ImGui::PushButtonRepeat(true); ret = ImGui::Button("ITEM: Button"); ImGui::PopButtonRepeat(); } // Testing button (with repeater) + if (item_type == 3) { ret = ImGui::Checkbox("ITEM: Checkbox", &b); } // Testing checkbox + if (item_type == 4) { ret = ImGui::SliderFloat("ITEM: SliderFloat", &col4f[0], 0.0f, 1.0f); } // Testing basic item + if (item_type == 5) { ret = ImGui::InputText("ITEM: InputText", &str[0], IM_ARRAYSIZE(str)); } // Testing input text (which handles tabbing) + if (item_type == 6) { ret = ImGui::InputFloat("ITEM: InputFloat", col4f, 1.0f); } // Testing +/- buttons on scalar input + if (item_type == 7) { ret = ImGui::InputFloat3("ITEM: InputFloat3", col4f); } // Testing multi-component items (IsItemXXX flags are reported merged) + if (item_type == 8) { ret = ImGui::ColorEdit4("ITEM: ColorEdit4", col4f); } // Testing multi-component items (IsItemXXX flags are reported merged) + if (item_type == 9) { ret = ImGui::MenuItem("ITEM: MenuItem"); } // Testing menu item (they use ImGuiButtonFlags_PressedOnRelease button policy) + if (item_type == 10){ ret = ImGui::TreeNode("ITEM: TreeNode"); if (ret) ImGui::TreePop(); } // Testing tree node + if (item_type == 11){ ret = ImGui::TreeNodeEx("ITEM: TreeNode w/ ImGuiTreeNodeFlags_OpenOnDoubleClick", ImGuiTreeNodeFlags_OpenOnDoubleClick | ImGuiTreeNodeFlags_NoTreePushOnOpen); } // Testing tree node with ImGuiButtonFlags_PressedOnDoubleClick button policy. + if (item_type == 12){ const char* items[] = { "Apple", "Banana", "Cherry", "Kiwi" }; static int current = 1; ret = ImGui::Combo("ITEM: Combo", ¤t, items, IM_ARRAYSIZE(items)); } + if (item_type == 13){ const char* items[] = { "Apple", "Banana", "Cherry", "Kiwi" }; static int current = 1; ret = ImGui::ListBox("ITEM: ListBox", ¤t, items, IM_ARRAYSIZE(items), IM_ARRAYSIZE(items)); } + + // Display the values of IsItemHovered() and other common item state functions. + // Note that the ImGuiHoveredFlags_XXX flags can be combined. + // Because BulletText is an item itself and that would affect the output of IsItemXXX functions, + // we query every state in a single call to avoid storing them and to simplify the code. + ImGui::BulletText( + "Return value = %d\n" + "IsItemFocused() = %d\n" + "IsItemHovered() = %d\n" + "IsItemHovered(_AllowWhenBlockedByPopup) = %d\n" + "IsItemHovered(_AllowWhenBlockedByActiveItem) = %d\n" + "IsItemHovered(_AllowWhenOverlapped) = %d\n" + "IsItemHovered(_RectOnly) = %d\n" + "IsItemActive() = %d\n" + "IsItemEdited() = %d\n" + "IsItemActivated() = %d\n" + "IsItemDeactivated() = %d\n" + "IsItemDeactivatedAfterEdit() = %d\n" + "IsItemVisible() = %d\n" + "IsItemClicked() = %d\n" + "IsItemToggledOpen() = %d\n" + "GetItemRectMin() = (%.1f, %.1f)\n" + "GetItemRectMax() = (%.1f, %.1f)\n" + "GetItemRectSize() = (%.1f, %.1f)", + ret, + ImGui::IsItemFocused(), + ImGui::IsItemHovered(), + ImGui::IsItemHovered(ImGuiHoveredFlags_AllowWhenBlockedByPopup), + ImGui::IsItemHovered(ImGuiHoveredFlags_AllowWhenBlockedByActiveItem), + ImGui::IsItemHovered(ImGuiHoveredFlags_AllowWhenOverlapped), + ImGui::IsItemHovered(ImGuiHoveredFlags_RectOnly), + ImGui::IsItemActive(), + ImGui::IsItemEdited(), + ImGui::IsItemActivated(), + ImGui::IsItemDeactivated(), + ImGui::IsItemDeactivatedAfterEdit(), + ImGui::IsItemVisible(), + ImGui::IsItemClicked(), + ImGui::IsItemToggledOpen(), + ImGui::GetItemRectMin().x, ImGui::GetItemRectMin().y, + ImGui::GetItemRectMax().x, ImGui::GetItemRectMax().y, + ImGui::GetItemRectSize().x, ImGui::GetItemRectSize().y + ); + + static bool embed_all_inside_a_child_window = false; + ImGui::Checkbox("Embed everything inside a child window (for additional testing)", &embed_all_inside_a_child_window); + if (embed_all_inside_a_child_window) + ImGui::BeginChild("outer_child", ImVec2(0, ImGui::GetFontSize() * 20.0f), true); + + // Testing IsWindowFocused() function with its various flags. + // Note that the ImGuiFocusedFlags_XXX flags can be combined. + ImGui::BulletText( + "IsWindowFocused() = %d\n" + "IsWindowFocused(_ChildWindows) = %d\n" + "IsWindowFocused(_ChildWindows|_RootWindow) = %d\n" + "IsWindowFocused(_RootWindow) = %d\n" + "IsWindowFocused(_AnyWindow) = %d\n", + ImGui::IsWindowFocused(), + ImGui::IsWindowFocused(ImGuiFocusedFlags_ChildWindows), + ImGui::IsWindowFocused(ImGuiFocusedFlags_ChildWindows | ImGuiFocusedFlags_RootWindow), + ImGui::IsWindowFocused(ImGuiFocusedFlags_RootWindow), + ImGui::IsWindowFocused(ImGuiFocusedFlags_AnyWindow)); + + // Testing IsWindowHovered() function with its various flags. + // Note that the ImGuiHoveredFlags_XXX flags can be combined. + ImGui::BulletText( + "IsWindowHovered() = %d\n" + "IsWindowHovered(_AllowWhenBlockedByPopup) = %d\n" + "IsWindowHovered(_AllowWhenBlockedByActiveItem) = %d\n" + "IsWindowHovered(_ChildWindows) = %d\n" + "IsWindowHovered(_ChildWindows|_RootWindow) = %d\n" + "IsWindowHovered(_ChildWindows|_AllowWhenBlockedByPopup) = %d\n" + "IsWindowHovered(_RootWindow) = %d\n" + "IsWindowHovered(_AnyWindow) = %d\n", + ImGui::IsWindowHovered(), + ImGui::IsWindowHovered(ImGuiHoveredFlags_AllowWhenBlockedByPopup), + ImGui::IsWindowHovered(ImGuiHoveredFlags_AllowWhenBlockedByActiveItem), + ImGui::IsWindowHovered(ImGuiHoveredFlags_ChildWindows), + ImGui::IsWindowHovered(ImGuiHoveredFlags_ChildWindows | ImGuiHoveredFlags_RootWindow), + ImGui::IsWindowHovered(ImGuiHoveredFlags_ChildWindows | ImGuiHoveredFlags_AllowWhenBlockedByPopup), + ImGui::IsWindowHovered(ImGuiHoveredFlags_RootWindow), + ImGui::IsWindowHovered(ImGuiHoveredFlags_AnyWindow)); + + ImGui::BeginChild("child", ImVec2(0, 50), true); + ImGui::Text("This is another child window for testing the _ChildWindows flag."); + ImGui::EndChild(); + if (embed_all_inside_a_child_window) + ImGui::EndChild(); + + static char unused_str[] = "This widget is only here to be able to tab-out of the widgets above."; + ImGui::InputText("unused", unused_str, IM_ARRAYSIZE(unused_str), ImGuiInputTextFlags_ReadOnly); + + // Calling IsItemHovered() after begin returns the hovered status of the title bar. + // This is useful in particular if you want to create a context menu associated to the title bar of a window. + static bool test_window = false; + ImGui::Checkbox("Hovered/Active tests after Begin() for title bar testing", &test_window); + if (test_window) + { + ImGui::Begin("Title bar Hovered/Active tests", &test_window); + if (ImGui::BeginPopupContextItem()) // <-- This is using IsItemHovered() + { + if (ImGui::MenuItem("Close")) { test_window = false; } + ImGui::EndPopup(); + } + ImGui::Text( + "IsItemHovered() after begin = %d (== is title bar hovered)\n" + "IsItemActive() after begin = %d (== is window being clicked/moved)\n", + ImGui::IsItemHovered(), ImGui::IsItemActive()); + ImGui::End(); + } + + ImGui::TreePop(); + } +} + +static void ShowDemoWindowLayout() +{ + if (!ImGui::CollapsingHeader("Layout & Scrolling")) + return; + + if (ImGui::TreeNode("Child windows")) + { + HelpMarker("Use child windows to begin into a self-contained independent scrolling/clipping regions within a host window."); + static bool disable_mouse_wheel = false; + static bool disable_menu = false; + ImGui::Checkbox("Disable Mouse Wheel", &disable_mouse_wheel); + ImGui::Checkbox("Disable Menu", &disable_menu); + + // Child 1: no border, enable horizontal scrollbar + { + ImGuiWindowFlags window_flags = ImGuiWindowFlags_HorizontalScrollbar; + if (disable_mouse_wheel) + window_flags |= ImGuiWindowFlags_NoScrollWithMouse; + ImGui::BeginChild("ChildL", ImVec2(ImGui::GetWindowContentRegionWidth() * 0.5f, 260), false, window_flags); + for (int i = 0; i < 100; i++) + ImGui::Text("%04d: scrollable region", i); + ImGui::EndChild(); + } + + ImGui::SameLine(); + + // Child 2: rounded border + { + ImGuiWindowFlags window_flags = ImGuiWindowFlags_None; + if (disable_mouse_wheel) + window_flags |= ImGuiWindowFlags_NoScrollWithMouse; + if (!disable_menu) + window_flags |= ImGuiWindowFlags_MenuBar; + ImGui::PushStyleVar(ImGuiStyleVar_ChildRounding, 5.0f); + ImGui::BeginChild("ChildR", ImVec2(0, 260), true, window_flags); + if (!disable_menu && ImGui::BeginMenuBar()) + { + if (ImGui::BeginMenu("Menu")) + { + ShowExampleMenuFile(); + ImGui::EndMenu(); + } + ImGui::EndMenuBar(); + } + if (ImGui::BeginTable("split", 2, ImGuiTableFlags_Resizable | ImGuiTableFlags_NoSavedSettings)) + { + for (int i = 0; i < 100; i++) + { + char buf[32]; + sprintf(buf, "%03d", i); + ImGui::TableNextColumn(); + ImGui::Button(buf, ImVec2(-FLT_MIN, 0.0f)); + } + ImGui::EndTable(); + } + ImGui::EndChild(); + ImGui::PopStyleVar(); + } + + ImGui::Separator(); + + // Demonstrate a few extra things + // - Changing ImGuiCol_ChildBg (which is transparent black in default styles) + // - Using SetCursorPos() to position child window (the child window is an item from the POV of parent window) + // You can also call SetNextWindowPos() to position the child window. The parent window will effectively + // layout from this position. + // - Using ImGui::GetItemRectMin/Max() to query the "item" state (because the child window is an item from + // the POV of the parent window). See 'Demo->Querying Status (Active/Focused/Hovered etc.)' for details. + { + static int offset_x = 0; + ImGui::SetNextItemWidth(100); + ImGui::DragInt("Offset X", &offset_x, 1.0f, -1000, 1000); + + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + (float)offset_x); + ImGui::PushStyleColor(ImGuiCol_ChildBg, IM_COL32(255, 0, 0, 100)); + ImGui::BeginChild("Red", ImVec2(200, 100), true, ImGuiWindowFlags_None); + for (int n = 0; n < 50; n++) + ImGui::Text("Some test %d", n); + ImGui::EndChild(); + bool child_is_hovered = ImGui::IsItemHovered(); + ImVec2 child_rect_min = ImGui::GetItemRectMin(); + ImVec2 child_rect_max = ImGui::GetItemRectMax(); + ImGui::PopStyleColor(); + ImGui::Text("Hovered: %d", child_is_hovered); + ImGui::Text("Rect of child window is: (%.0f,%.0f) (%.0f,%.0f)", child_rect_min.x, child_rect_min.y, child_rect_max.x, child_rect_max.y); + } + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Widgets Width")) + { + // Use SetNextItemWidth() to set the width of a single upcoming item. + // Use PushItemWidth()/PopItemWidth() to set the width of a group of items. + // In real code use you'll probably want to choose width values that are proportional to your font size + // e.g. Using '20.0f * GetFontSize()' as width instead of '200.0f', etc. + + static float f = 0.0f; + static bool show_indented_items = true; + ImGui::Checkbox("Show indented items", &show_indented_items); + + ImGui::Text("SetNextItemWidth/PushItemWidth(100)"); + ImGui::SameLine(); HelpMarker("Fixed width."); + ImGui::PushItemWidth(100); + ImGui::DragFloat("float##1b", &f); + if (show_indented_items) + { + ImGui::Indent(); + ImGui::DragFloat("float (indented)##1b", &f); + ImGui::Unindent(); + } + ImGui::PopItemWidth(); + + ImGui::Text("SetNextItemWidth/PushItemWidth(-100)"); + ImGui::SameLine(); HelpMarker("Align to right edge minus 100"); + ImGui::PushItemWidth(-100); + ImGui::DragFloat("float##2a", &f); + if (show_indented_items) + { + ImGui::Indent(); + ImGui::DragFloat("float (indented)##2b", &f); + ImGui::Unindent(); + } + ImGui::PopItemWidth(); + + ImGui::Text("SetNextItemWidth/PushItemWidth(GetContentRegionAvail().x * 0.5f)"); + ImGui::SameLine(); HelpMarker("Half of available width.\n(~ right-cursor_pos)\n(works within a column set)"); + ImGui::PushItemWidth(ImGui::GetContentRegionAvail().x * 0.5f); + ImGui::DragFloat("float##3a", &f); + if (show_indented_items) + { + ImGui::Indent(); + ImGui::DragFloat("float (indented)##3b", &f); + ImGui::Unindent(); + } + ImGui::PopItemWidth(); + + ImGui::Text("SetNextItemWidth/PushItemWidth(-GetContentRegionAvail().x * 0.5f)"); + ImGui::SameLine(); HelpMarker("Align to right edge minus half"); + ImGui::PushItemWidth(-ImGui::GetContentRegionAvail().x * 0.5f); + ImGui::DragFloat("float##4a", &f); + if (show_indented_items) + { + ImGui::Indent(); + ImGui::DragFloat("float (indented)##4b", &f); + ImGui::Unindent(); + } + ImGui::PopItemWidth(); + + // Demonstrate using PushItemWidth to surround three items. + // Calling SetNextItemWidth() before each of them would have the same effect. + ImGui::Text("SetNextItemWidth/PushItemWidth(-FLT_MIN)"); + ImGui::SameLine(); HelpMarker("Align to right edge"); + ImGui::PushItemWidth(-FLT_MIN); + ImGui::DragFloat("##float5a", &f); + if (show_indented_items) + { + ImGui::Indent(); + ImGui::DragFloat("float (indented)##5b", &f); + ImGui::Unindent(); + } + ImGui::PopItemWidth(); + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Basic Horizontal Layout")) + { + ImGui::TextWrapped("(Use ImGui::SameLine() to keep adding items to the right of the preceding item)"); + + // Text + ImGui::Text("Two items: Hello"); ImGui::SameLine(); + ImGui::TextColored(ImVec4(1,1,0,1), "Sailor"); + + // Adjust spacing + ImGui::Text("More spacing: Hello"); ImGui::SameLine(0, 20); + ImGui::TextColored(ImVec4(1,1,0,1), "Sailor"); + + // Button + ImGui::AlignTextToFramePadding(); + ImGui::Text("Normal buttons"); ImGui::SameLine(); + ImGui::Button("Banana"); ImGui::SameLine(); + ImGui::Button("Apple"); ImGui::SameLine(); + ImGui::Button("Corniflower"); + + // Button + ImGui::Text("Small buttons"); ImGui::SameLine(); + ImGui::SmallButton("Like this one"); ImGui::SameLine(); + ImGui::Text("can fit within a text block."); + + // Aligned to arbitrary position. Easy/cheap column. + ImGui::Text("Aligned"); + ImGui::SameLine(150); ImGui::Text("x=150"); + ImGui::SameLine(300); ImGui::Text("x=300"); + ImGui::Text("Aligned"); + ImGui::SameLine(150); ImGui::SmallButton("x=150"); + ImGui::SameLine(300); ImGui::SmallButton("x=300"); + + // Checkbox + static bool c1 = false, c2 = false, c3 = false, c4 = false; + ImGui::Checkbox("My", &c1); ImGui::SameLine(); + ImGui::Checkbox("Tailor", &c2); ImGui::SameLine(); + ImGui::Checkbox("Is", &c3); ImGui::SameLine(); + ImGui::Checkbox("Rich", &c4); + + // Various + static float f0 = 1.0f, f1 = 2.0f, f2 = 3.0f; + ImGui::PushItemWidth(80); + const char* items[] = { "AAAA", "BBBB", "CCCC", "DDDD" }; + static int item = -1; + ImGui::Combo("Combo", &item, items, IM_ARRAYSIZE(items)); ImGui::SameLine(); + ImGui::SliderFloat("X", &f0, 0.0f, 5.0f); ImGui::SameLine(); + ImGui::SliderFloat("Y", &f1, 0.0f, 5.0f); ImGui::SameLine(); + ImGui::SliderFloat("Z", &f2, 0.0f, 5.0f); + ImGui::PopItemWidth(); + + ImGui::PushItemWidth(80); + ImGui::Text("Lists:"); + static int selection[4] = { 0, 1, 2, 3 }; + for (int i = 0; i < 4; i++) + { + if (i > 0) ImGui::SameLine(); + ImGui::PushID(i); + ImGui::ListBox("", &selection[i], items, IM_ARRAYSIZE(items)); + ImGui::PopID(); + //if (ImGui::IsItemHovered()) ImGui::SetTooltip("ListBox %d hovered", i); + } + ImGui::PopItemWidth(); + + // Dummy + ImVec2 button_sz(40, 40); + ImGui::Button("A", button_sz); ImGui::SameLine(); + ImGui::Dummy(button_sz); ImGui::SameLine(); + ImGui::Button("B", button_sz); + + // Manually wrapping + // (we should eventually provide this as an automatic layout feature, but for now you can do it manually) + ImGui::Text("Manually wrapping:"); + ImGuiStyle& style = ImGui::GetStyle(); + int buttons_count = 20; + float window_visible_x2 = ImGui::GetWindowPos().x + ImGui::GetWindowContentRegionMax().x; + for (int n = 0; n < buttons_count; n++) + { + ImGui::PushID(n); + ImGui::Button("Box", button_sz); + float last_button_x2 = ImGui::GetItemRectMax().x; + float next_button_x2 = last_button_x2 + style.ItemSpacing.x + button_sz.x; // Expected position if next button was on same line + if (n + 1 < buttons_count && next_button_x2 < window_visible_x2) + ImGui::SameLine(); + ImGui::PopID(); + } + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Groups")) + { + HelpMarker( + "BeginGroup() basically locks the horizontal position for new line. " + "EndGroup() bundles the whole group so that you can use \"item\" functions such as " + "IsItemHovered()/IsItemActive() or SameLine() etc. on the whole group."); + ImGui::BeginGroup(); + { + ImGui::BeginGroup(); + ImGui::Button("AAA"); + ImGui::SameLine(); + ImGui::Button("BBB"); + ImGui::SameLine(); + ImGui::BeginGroup(); + ImGui::Button("CCC"); + ImGui::Button("DDD"); + ImGui::EndGroup(); + ImGui::SameLine(); + ImGui::Button("EEE"); + ImGui::EndGroup(); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("First group hovered"); + } + // Capture the group size and create widgets using the same size + ImVec2 size = ImGui::GetItemRectSize(); + const float values[5] = { 0.5f, 0.20f, 0.80f, 0.60f, 0.25f }; + ImGui::PlotHistogram("##values", values, IM_ARRAYSIZE(values), 0, NULL, 0.0f, 1.0f, size); + + ImGui::Button("ACTION", ImVec2((size.x - ImGui::GetStyle().ItemSpacing.x) * 0.5f, size.y)); + ImGui::SameLine(); + ImGui::Button("REACTION", ImVec2((size.x - ImGui::GetStyle().ItemSpacing.x) * 0.5f, size.y)); + ImGui::EndGroup(); + ImGui::SameLine(); + + ImGui::Button("LEVERAGE\nBUZZWORD", size); + ImGui::SameLine(); + + if (ImGui::ListBoxHeader("List", size)) + { + ImGui::Selectable("Selected", true); + ImGui::Selectable("Not Selected", false); + ImGui::ListBoxFooter(); + } + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Text Baseline Alignment")) + { + { + ImGui::BulletText("Text baseline:"); + ImGui::SameLine(); HelpMarker( + "This is testing the vertical alignment that gets applied on text to keep it aligned with widgets. " + "Lines only composed of text or \"small\" widgets use less vertical space than lines with framed widgets."); + ImGui::Indent(); + + ImGui::Text("KO Blahblah"); ImGui::SameLine(); + ImGui::Button("Some framed item"); ImGui::SameLine(); + HelpMarker("Baseline of button will look misaligned with text.."); + + // If your line starts with text, call AlignTextToFramePadding() to align text to upcoming widgets. + // (because we don't know what's coming after the Text() statement, we need to move the text baseline + // down by FramePadding.y ahead of time) + ImGui::AlignTextToFramePadding(); + ImGui::Text("OK Blahblah"); ImGui::SameLine(); + ImGui::Button("Some framed item"); ImGui::SameLine(); + HelpMarker("We call AlignTextToFramePadding() to vertically align the text baseline by +FramePadding.y"); + + // SmallButton() uses the same vertical padding as Text + ImGui::Button("TEST##1"); ImGui::SameLine(); + ImGui::Text("TEST"); ImGui::SameLine(); + ImGui::SmallButton("TEST##2"); + + // If your line starts with text, call AlignTextToFramePadding() to align text to upcoming widgets. + ImGui::AlignTextToFramePadding(); + ImGui::Text("Text aligned to framed item"); ImGui::SameLine(); + ImGui::Button("Item##1"); ImGui::SameLine(); + ImGui::Text("Item"); ImGui::SameLine(); + ImGui::SmallButton("Item##2"); ImGui::SameLine(); + ImGui::Button("Item##3"); + + ImGui::Unindent(); + } + + ImGui::Spacing(); + + { + ImGui::BulletText("Multi-line text:"); + ImGui::Indent(); + ImGui::Text("One\nTwo\nThree"); ImGui::SameLine(); + ImGui::Text("Hello\nWorld"); ImGui::SameLine(); + ImGui::Text("Banana"); + + ImGui::Text("Banana"); ImGui::SameLine(); + ImGui::Text("Hello\nWorld"); ImGui::SameLine(); + ImGui::Text("One\nTwo\nThree"); + + ImGui::Button("HOP##1"); ImGui::SameLine(); + ImGui::Text("Banana"); ImGui::SameLine(); + ImGui::Text("Hello\nWorld"); ImGui::SameLine(); + ImGui::Text("Banana"); + + ImGui::Button("HOP##2"); ImGui::SameLine(); + ImGui::Text("Hello\nWorld"); ImGui::SameLine(); + ImGui::Text("Banana"); + ImGui::Unindent(); + } + + ImGui::Spacing(); + + { + ImGui::BulletText("Misc items:"); + ImGui::Indent(); + + // SmallButton() sets FramePadding to zero. Text baseline is aligned to match baseline of previous Button. + ImGui::Button("80x80", ImVec2(80, 80)); + ImGui::SameLine(); + ImGui::Button("50x50", ImVec2(50, 50)); + ImGui::SameLine(); + ImGui::Button("Button()"); + ImGui::SameLine(); + ImGui::SmallButton("SmallButton()"); + + // Tree + const float spacing = ImGui::GetStyle().ItemInnerSpacing.x; + ImGui::Button("Button##1"); + ImGui::SameLine(0.0f, spacing); + if (ImGui::TreeNode("Node##1")) + { + // Placeholder tree data + for (int i = 0; i < 6; i++) + ImGui::BulletText("Item %d..", i); + ImGui::TreePop(); + } + + // Vertically align text node a bit lower so it'll be vertically centered with upcoming widget. + // Otherwise you can use SmallButton() (smaller fit). + ImGui::AlignTextToFramePadding(); + + // Common mistake to avoid: if we want to SameLine after TreeNode we need to do it before we add + // other contents below the node. + bool node_open = ImGui::TreeNode("Node##2"); + ImGui::SameLine(0.0f, spacing); ImGui::Button("Button##2"); + if (node_open) + { + // Placeholder tree data + for (int i = 0; i < 6; i++) + ImGui::BulletText("Item %d..", i); + ImGui::TreePop(); + } + + // Bullet + ImGui::Button("Button##3"); + ImGui::SameLine(0.0f, spacing); + ImGui::BulletText("Bullet text"); + + ImGui::AlignTextToFramePadding(); + ImGui::BulletText("Node"); + ImGui::SameLine(0.0f, spacing); ImGui::Button("Button##4"); + ImGui::Unindent(); + } + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Scrolling")) + { + // Vertical scroll functions + HelpMarker("Use SetScrollHereY() or SetScrollFromPosY() to scroll to a given vertical position."); + + static int track_item = 50; + static bool enable_track = true; + static bool enable_extra_decorations = false; + static float scroll_to_off_px = 0.0f; + static float scroll_to_pos_px = 200.0f; + + ImGui::Checkbox("Decoration", &enable_extra_decorations); + + ImGui::Checkbox("Track", &enable_track); + ImGui::PushItemWidth(100); + ImGui::SameLine(140); enable_track |= ImGui::DragInt("##item", &track_item, 0.25f, 0, 99, "Item = %d"); + + bool scroll_to_off = ImGui::Button("Scroll Offset"); + ImGui::SameLine(140); scroll_to_off |= ImGui::DragFloat("##off", &scroll_to_off_px, 1.00f, 0, FLT_MAX, "+%.0f px"); + + bool scroll_to_pos = ImGui::Button("Scroll To Pos"); + ImGui::SameLine(140); scroll_to_pos |= ImGui::DragFloat("##pos", &scroll_to_pos_px, 1.00f, -10, FLT_MAX, "X/Y = %.0f px"); + ImGui::PopItemWidth(); + + if (scroll_to_off || scroll_to_pos) + enable_track = false; + + ImGuiStyle& style = ImGui::GetStyle(); + float child_w = (ImGui::GetContentRegionAvail().x - 4 * style.ItemSpacing.x) / 5; + if (child_w < 1.0f) + child_w = 1.0f; + ImGui::PushID("##VerticalScrolling"); + for (int i = 0; i < 5; i++) + { + if (i > 0) ImGui::SameLine(); + ImGui::BeginGroup(); + const char* names[] = { "Top", "25%", "Center", "75%", "Bottom" }; + ImGui::TextUnformatted(names[i]); + + const ImGuiWindowFlags child_flags = enable_extra_decorations ? ImGuiWindowFlags_MenuBar : 0; + const ImGuiID child_id = ImGui::GetID((void*)(intptr_t)i); + const bool child_is_visible = ImGui::BeginChild(child_id, ImVec2(child_w, 200.0f), true, child_flags); + if (ImGui::BeginMenuBar()) + { + ImGui::TextUnformatted("abc"); + ImGui::EndMenuBar(); + } + if (scroll_to_off) + ImGui::SetScrollY(scroll_to_off_px); + if (scroll_to_pos) + ImGui::SetScrollFromPosY(ImGui::GetCursorStartPos().y + scroll_to_pos_px, i * 0.25f); + if (child_is_visible) // Avoid calling SetScrollHereY when running with culled items + { + for (int item = 0; item < 100; item++) + { + if (enable_track && item == track_item) + { + ImGui::TextColored(ImVec4(1, 1, 0, 1), "Item %d", item); + ImGui::SetScrollHereY(i * 0.25f); // 0.0f:top, 0.5f:center, 1.0f:bottom + } + else + { + ImGui::Text("Item %d", item); + } + } + } + float scroll_y = ImGui::GetScrollY(); + float scroll_max_y = ImGui::GetScrollMaxY(); + ImGui::EndChild(); + ImGui::Text("%.0f/%.0f", scroll_y, scroll_max_y); + ImGui::EndGroup(); + } + ImGui::PopID(); + + // Horizontal scroll functions + ImGui::Spacing(); + HelpMarker( + "Use SetScrollHereX() or SetScrollFromPosX() to scroll to a given horizontal position.\n\n" + "Because the clipping rectangle of most window hides half worth of WindowPadding on the " + "left/right, using SetScrollFromPosX(+1) will usually result in clipped text whereas the " + "equivalent SetScrollFromPosY(+1) wouldn't."); + ImGui::PushID("##HorizontalScrolling"); + for (int i = 0; i < 5; i++) + { + float child_height = ImGui::GetTextLineHeight() + style.ScrollbarSize + style.WindowPadding.y * 2.0f; + ImGuiWindowFlags child_flags = ImGuiWindowFlags_HorizontalScrollbar | (enable_extra_decorations ? ImGuiWindowFlags_AlwaysVerticalScrollbar : 0); + ImGuiID child_id = ImGui::GetID((void*)(intptr_t)i); + bool child_is_visible = ImGui::BeginChild(child_id, ImVec2(-100, child_height), true, child_flags); + if (scroll_to_off) + ImGui::SetScrollX(scroll_to_off_px); + if (scroll_to_pos) + ImGui::SetScrollFromPosX(ImGui::GetCursorStartPos().x + scroll_to_pos_px, i * 0.25f); + if (child_is_visible) // Avoid calling SetScrollHereY when running with culled items + { + for (int item = 0; item < 100; item++) + { + if (enable_track && item == track_item) + { + ImGui::TextColored(ImVec4(1, 1, 0, 1), "Item %d", item); + ImGui::SetScrollHereX(i * 0.25f); // 0.0f:left, 0.5f:center, 1.0f:right + } + else + { + ImGui::Text("Item %d", item); + } + ImGui::SameLine(); + } + } + float scroll_x = ImGui::GetScrollX(); + float scroll_max_x = ImGui::GetScrollMaxX(); + ImGui::EndChild(); + ImGui::SameLine(); + const char* names[] = { "Left", "25%", "Center", "75%", "Right" }; + ImGui::Text("%s\n%.0f/%.0f", names[i], scroll_x, scroll_max_x); + ImGui::Spacing(); + } + ImGui::PopID(); + + // Miscellaneous Horizontal Scrolling Demo + HelpMarker( + "Horizontal scrolling for a window is enabled via the ImGuiWindowFlags_HorizontalScrollbar flag.\n\n" + "You may want to also explicitly specify content width by using SetNextWindowContentWidth() before Begin()."); + static int lines = 7; + ImGui::SliderInt("Lines", &lines, 1, 15); + ImGui::PushStyleVar(ImGuiStyleVar_FrameRounding, 3.0f); + ImGui::PushStyleVar(ImGuiStyleVar_FramePadding, ImVec2(2.0f, 1.0f)); + ImVec2 scrolling_child_size = ImVec2(0, ImGui::GetFrameHeightWithSpacing() * 7 + 30); + ImGui::BeginChild("scrolling", scrolling_child_size, true, ImGuiWindowFlags_HorizontalScrollbar); + for (int line = 0; line < lines; line++) + { + // Display random stuff. For the sake of this trivial demo we are using basic Button() + SameLine() + // If you want to create your own time line for a real application you may be better off manipulating + // the cursor position yourself, aka using SetCursorPos/SetCursorScreenPos to position the widgets + // yourself. You may also want to use the lower-level ImDrawList API. + int num_buttons = 10 + ((line & 1) ? line * 9 : line * 3); + for (int n = 0; n < num_buttons; n++) + { + if (n > 0) ImGui::SameLine(); + ImGui::PushID(n + line * 1000); + char num_buf[16]; + sprintf(num_buf, "%d", n); + const char* label = (!(n % 15)) ? "FizzBuzz" : (!(n % 3)) ? "Fizz" : (!(n % 5)) ? "Buzz" : num_buf; + float hue = n * 0.05f; + ImGui::PushStyleColor(ImGuiCol_Button, (ImVec4)ImColor::HSV(hue, 0.6f, 0.6f)); + ImGui::PushStyleColor(ImGuiCol_ButtonHovered, (ImVec4)ImColor::HSV(hue, 0.7f, 0.7f)); + ImGui::PushStyleColor(ImGuiCol_ButtonActive, (ImVec4)ImColor::HSV(hue, 0.8f, 0.8f)); + ImGui::Button(label, ImVec2(40.0f + sinf((float)(line + n)) * 20.0f, 0.0f)); + ImGui::PopStyleColor(3); + ImGui::PopID(); + } + } + float scroll_x = ImGui::GetScrollX(); + float scroll_max_x = ImGui::GetScrollMaxX(); + ImGui::EndChild(); + ImGui::PopStyleVar(2); + float scroll_x_delta = 0.0f; + ImGui::SmallButton("<<"); + if (ImGui::IsItemActive()) + scroll_x_delta = -ImGui::GetIO().DeltaTime * 1000.0f; + ImGui::SameLine(); + ImGui::Text("Scroll from code"); ImGui::SameLine(); + ImGui::SmallButton(">>"); + if (ImGui::IsItemActive()) + scroll_x_delta = +ImGui::GetIO().DeltaTime * 1000.0f; + ImGui::SameLine(); + ImGui::Text("%.0f/%.0f", scroll_x, scroll_max_x); + if (scroll_x_delta != 0.0f) + { + // Demonstrate a trick: you can use Begin to set yourself in the context of another window + // (here we are already out of your child window) + ImGui::BeginChild("scrolling"); + ImGui::SetScrollX(ImGui::GetScrollX() + scroll_x_delta); + ImGui::EndChild(); + } + ImGui::Spacing(); + + static bool show_horizontal_contents_size_demo_window = false; + ImGui::Checkbox("Show Horizontal contents size demo window", &show_horizontal_contents_size_demo_window); + + if (show_horizontal_contents_size_demo_window) + { + static bool show_h_scrollbar = true; + static bool show_button = true; + static bool show_tree_nodes = true; + static bool show_text_wrapped = false; + static bool show_columns = true; + static bool show_tab_bar = true; + static bool show_child = false; + static bool explicit_content_size = false; + static float contents_size_x = 300.0f; + if (explicit_content_size) + ImGui::SetNextWindowContentSize(ImVec2(contents_size_x, 0.0f)); + ImGui::Begin("Horizontal contents size demo window", &show_horizontal_contents_size_demo_window, show_h_scrollbar ? ImGuiWindowFlags_HorizontalScrollbar : 0); + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(2, 0)); + ImGui::PushStyleVar(ImGuiStyleVar_FramePadding, ImVec2(2, 0)); + HelpMarker("Test of different widgets react and impact the work rectangle growing when horizontal scrolling is enabled.\n\nUse 'Metrics->Tools->Show windows rectangles' to visualize rectangles."); + ImGui::Checkbox("H-scrollbar", &show_h_scrollbar); + ImGui::Checkbox("Button", &show_button); // Will grow contents size (unless explicitly overwritten) + ImGui::Checkbox("Tree nodes", &show_tree_nodes); // Will grow contents size and display highlight over full width + ImGui::Checkbox("Text wrapped", &show_text_wrapped);// Will grow and use contents size + ImGui::Checkbox("Columns", &show_columns); // Will use contents size + ImGui::Checkbox("Tab bar", &show_tab_bar); // Will use contents size + ImGui::Checkbox("Child", &show_child); // Will grow and use contents size + ImGui::Checkbox("Explicit content size", &explicit_content_size); + ImGui::Text("Scroll %.1f/%.1f %.1f/%.1f", ImGui::GetScrollX(), ImGui::GetScrollMaxX(), ImGui::GetScrollY(), ImGui::GetScrollMaxY()); + if (explicit_content_size) + { + ImGui::SameLine(); + ImGui::SetNextItemWidth(100); + ImGui::DragFloat("##csx", &contents_size_x); + ImVec2 p = ImGui::GetCursorScreenPos(); + ImGui::GetWindowDrawList()->AddRectFilled(p, ImVec2(p.x + 10, p.y + 10), IM_COL32_WHITE); + ImGui::GetWindowDrawList()->AddRectFilled(ImVec2(p.x + contents_size_x - 10, p.y), ImVec2(p.x + contents_size_x, p.y + 10), IM_COL32_WHITE); + ImGui::Dummy(ImVec2(0, 10)); + } + ImGui::PopStyleVar(2); + ImGui::Separator(); + if (show_button) + { + ImGui::Button("this is a 300-wide button", ImVec2(300, 0)); + } + if (show_tree_nodes) + { + bool open = true; + if (ImGui::TreeNode("this is a tree node")) + { + if (ImGui::TreeNode("another one of those tree node...")) + { + ImGui::Text("Some tree contents"); + ImGui::TreePop(); + } + ImGui::TreePop(); + } + ImGui::CollapsingHeader("CollapsingHeader", &open); + } + if (show_text_wrapped) + { + ImGui::TextWrapped("This text should automatically wrap on the edge of the work rectangle."); + } + if (show_columns) + { + ImGui::Text("Tables:"); + if (ImGui::BeginTable("table", 4, ImGuiTableFlags_Borders)) + { + for (int n = 0; n < 4; n++) + { + ImGui::TableNextColumn(); + ImGui::Text("Width %.2f", ImGui::GetContentRegionAvail().x); + } + ImGui::EndTable(); + } + ImGui::Text("Columns:"); + ImGui::Columns(4); + for (int n = 0; n < 4; n++) + { + ImGui::Text("Width %.2f", ImGui::GetColumnWidth()); + ImGui::NextColumn(); + } + ImGui::Columns(1); + } + if (show_tab_bar && ImGui::BeginTabBar("Hello")) + { + if (ImGui::BeginTabItem("OneOneOne")) { ImGui::EndTabItem(); } + if (ImGui::BeginTabItem("TwoTwoTwo")) { ImGui::EndTabItem(); } + if (ImGui::BeginTabItem("ThreeThreeThree")) { ImGui::EndTabItem(); } + if (ImGui::BeginTabItem("FourFourFour")) { ImGui::EndTabItem(); } + ImGui::EndTabBar(); + } + if (show_child) + { + ImGui::BeginChild("child", ImVec2(0, 0), true); + ImGui::EndChild(); + } + ImGui::End(); + } + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Clipping")) + { + static ImVec2 size(100.0f, 100.0f); + static ImVec2 offset(30.0f, 30.0f); + ImGui::DragFloat2("size", (float*)&size, 0.5f, 1.0f, 200.0f, "%.0f"); + ImGui::TextWrapped("(Click and drag to scroll)"); + + for (int n = 0; n < 3; n++) + { + if (n > 0) + ImGui::SameLine(); + ImGui::PushID(n); + ImGui::BeginGroup(); // Lock X position + + ImGui::InvisibleButton("##empty", size); + if (ImGui::IsItemActive() && ImGui::IsMouseDragging(ImGuiMouseButton_Left)) + { + offset.x += ImGui::GetIO().MouseDelta.x; + offset.y += ImGui::GetIO().MouseDelta.y; + } + const ImVec2 p0 = ImGui::GetItemRectMin(); + const ImVec2 p1 = ImGui::GetItemRectMax(); + const char* text_str = "Line 1 hello\nLine 2 clip me!"; + const ImVec2 text_pos = ImVec2(p0.x + offset.x, p0.y + offset.y); + ImDrawList* draw_list = ImGui::GetWindowDrawList(); + + switch (n) + { + case 0: + HelpMarker( + "Using ImGui::PushClipRect():\n" + "Will alter ImGui hit-testing logic + ImDrawList rendering.\n" + "(use this if you want your clipping rectangle to affect interactions)"); + ImGui::PushClipRect(p0, p1, true); + draw_list->AddRectFilled(p0, p1, IM_COL32(90, 90, 120, 255)); + draw_list->AddText(text_pos, IM_COL32_WHITE, text_str); + ImGui::PopClipRect(); + break; + case 1: + HelpMarker( + "Using ImDrawList::PushClipRect():\n" + "Will alter ImDrawList rendering only.\n" + "(use this as a shortcut if you are only using ImDrawList calls)"); + draw_list->PushClipRect(p0, p1, true); + draw_list->AddRectFilled(p0, p1, IM_COL32(90, 90, 120, 255)); + draw_list->AddText(text_pos, IM_COL32_WHITE, text_str); + draw_list->PopClipRect(); + break; + case 2: + HelpMarker( + "Using ImDrawList::AddText() with a fine ClipRect:\n" + "Will alter only this specific ImDrawList::AddText() rendering.\n" + "(this is often used internally to avoid altering the clipping rectangle and minimize draw calls)"); + ImVec4 clip_rect(p0.x, p0.y, p1.x, p1.y); // AddText() takes a ImVec4* here so let's convert. + draw_list->AddRectFilled(p0, p1, IM_COL32(90, 90, 120, 255)); + draw_list->AddText(ImGui::GetFont(), ImGui::GetFontSize(), text_pos, IM_COL32_WHITE, text_str, NULL, 0.0f, &clip_rect); + break; + } + ImGui::EndGroup(); + ImGui::PopID(); + } + + ImGui::TreePop(); + } +} + +static void ShowDemoWindowPopups() +{ + if (!ImGui::CollapsingHeader("Popups & Modal windows")) + return; + + // The properties of popups windows are: + // - They block normal mouse hovering detection outside them. (*) + // - Unless modal, they can be closed by clicking anywhere outside them, or by pressing ESCAPE. + // - Their visibility state (~bool) is held internally by Dear ImGui instead of being held by the programmer as + // we are used to with regular Begin() calls. User can manipulate the visibility state by calling OpenPopup(). + // (*) One can use IsItemHovered(ImGuiHoveredFlags_AllowWhenBlockedByPopup) to bypass it and detect hovering even + // when normally blocked by a popup. + // Those three properties are connected. The library needs to hold their visibility state BECAUSE it can close + // popups at any time. + + // Typical use for regular windows: + // bool my_tool_is_active = false; if (ImGui::Button("Open")) my_tool_is_active = true; [...] if (my_tool_is_active) Begin("My Tool", &my_tool_is_active) { [...] } End(); + // Typical use for popups: + // if (ImGui::Button("Open")) ImGui::OpenPopup("MyPopup"); if (ImGui::BeginPopup("MyPopup") { [...] EndPopup(); } + + // With popups we have to go through a library call (here OpenPopup) to manipulate the visibility state. + // This may be a bit confusing at first but it should quickly make sense. Follow on the examples below. + + if (ImGui::TreeNode("Popups")) + { + ImGui::TextWrapped( + "When a popup is active, it inhibits interacting with windows that are behind the popup. " + "Clicking outside the popup closes it."); + + static int selected_fish = -1; + const char* names[] = { "Bream", "Haddock", "Mackerel", "Pollock", "Tilefish" }; + static bool toggles[] = { true, false, false, false, false }; + + // Simple selection popup (if you want to show the current selection inside the Button itself, + // you may want to build a string using the "###" operator to preserve a constant ID with a variable label) + if (ImGui::Button("Select..")) + ImGui::OpenPopup("my_select_popup"); + ImGui::SameLine(); + ImGui::TextUnformatted(selected_fish == -1 ? "" : names[selected_fish]); + if (ImGui::BeginPopup("my_select_popup")) + { + ImGui::Text("Aquarium"); + ImGui::Separator(); + for (int i = 0; i < IM_ARRAYSIZE(names); i++) + if (ImGui::Selectable(names[i])) + selected_fish = i; + ImGui::EndPopup(); + } + + // Showing a menu with toggles + if (ImGui::Button("Toggle..")) + ImGui::OpenPopup("my_toggle_popup"); + if (ImGui::BeginPopup("my_toggle_popup")) + { + for (int i = 0; i < IM_ARRAYSIZE(names); i++) + ImGui::MenuItem(names[i], "", &toggles[i]); + if (ImGui::BeginMenu("Sub-menu")) + { + ImGui::MenuItem("Click me"); + ImGui::EndMenu(); + } + + ImGui::Separator(); + ImGui::Text("Tooltip here"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("I am a tooltip over a popup"); + + if (ImGui::Button("Stacked Popup")) + ImGui::OpenPopup("another popup"); + if (ImGui::BeginPopup("another popup")) + { + for (int i = 0; i < IM_ARRAYSIZE(names); i++) + ImGui::MenuItem(names[i], "", &toggles[i]); + if (ImGui::BeginMenu("Sub-menu")) + { + ImGui::MenuItem("Click me"); + if (ImGui::Button("Stacked Popup")) + ImGui::OpenPopup("another popup"); + if (ImGui::BeginPopup("another popup")) + { + ImGui::Text("I am the last one here."); + ImGui::EndPopup(); + } + ImGui::EndMenu(); + } + ImGui::EndPopup(); + } + ImGui::EndPopup(); + } + + // Call the more complete ShowExampleMenuFile which we use in various places of this demo + if (ImGui::Button("File Menu..")) + ImGui::OpenPopup("my_file_popup"); + if (ImGui::BeginPopup("my_file_popup")) + { + ShowExampleMenuFile(); + ImGui::EndPopup(); + } + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Context menus")) + { + // BeginPopupContextItem() is a helper to provide common/simple popup behavior of essentially doing: + // if (IsItemHovered() && IsMouseReleased(ImGuiMouseButton_Right)) + // OpenPopup(id); + // return BeginPopup(id); + // For more advanced uses you may want to replicate and customize this code. + // See details in BeginPopupContextItem(). + static float value = 0.5f; + ImGui::Text("Value = %.3f (<-- right-click here)", value); + if (ImGui::BeginPopupContextItem("item context menu")) + { + if (ImGui::Selectable("Set to zero")) value = 0.0f; + if (ImGui::Selectable("Set to PI")) value = 3.1415f; + ImGui::SetNextItemWidth(-1); + ImGui::DragFloat("##Value", &value, 0.1f, 0.0f, 0.0f); + ImGui::EndPopup(); + } + + // We can also use OpenPopupOnItemClick() which is the same as BeginPopupContextItem() but without the + // Begin() call. So here we will make it that clicking on the text field with the right mouse button (1) + // will toggle the visibility of the popup above. + ImGui::Text("(You can also right-click me to open the same popup as above.)"); + ImGui::OpenPopupOnItemClick("item context menu", 1); + + // When used after an item that has an ID (e.g.Button), we can skip providing an ID to BeginPopupContextItem(). + // BeginPopupContextItem() will use the last item ID as the popup ID. + // In addition here, we want to include your editable label inside the button label. + // We use the ### operator to override the ID (read FAQ about ID for details) + static char name[32] = "Label1"; + char buf[64]; + sprintf(buf, "Button: %s###Button", name); // ### operator override ID ignoring the preceding label + ImGui::Button(buf); + if (ImGui::BeginPopupContextItem()) + { + ImGui::Text("Edit name:"); + ImGui::InputText("##edit", name, IM_ARRAYSIZE(name)); + if (ImGui::Button("Close")) + ImGui::CloseCurrentPopup(); + ImGui::EndPopup(); + } + ImGui::SameLine(); ImGui::Text("(<-- right-click here)"); + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Modals")) + { + ImGui::TextWrapped("Modal windows are like popups but the user cannot close them by clicking outside."); + + if (ImGui::Button("Delete..")) + ImGui::OpenPopup("Delete?"); + + // Always center this window when appearing + ImVec2 center(ImGui::GetIO().DisplaySize.x * 0.5f, ImGui::GetIO().DisplaySize.y * 0.5f); + ImGui::SetNextWindowPos(center, ImGuiCond_Appearing, ImVec2(0.5f, 0.5f)); + + if (ImGui::BeginPopupModal("Delete?", NULL, ImGuiWindowFlags_AlwaysAutoResize)) + { + ImGui::Text("All those beautiful files will be deleted.\nThis operation cannot be undone!\n\n"); + ImGui::Separator(); + + //static int unused_i = 0; + //ImGui::Combo("Combo", &unused_i, "Delete\0Delete harder\0"); + + static bool dont_ask_me_next_time = false; + ImGui::PushStyleVar(ImGuiStyleVar_FramePadding, ImVec2(0, 0)); + ImGui::Checkbox("Don't ask me next time", &dont_ask_me_next_time); + ImGui::PopStyleVar(); + + if (ImGui::Button("OK", ImVec2(120, 0))) { ImGui::CloseCurrentPopup(); } + ImGui::SetItemDefaultFocus(); + ImGui::SameLine(); + if (ImGui::Button("Cancel", ImVec2(120, 0))) { ImGui::CloseCurrentPopup(); } + ImGui::EndPopup(); + } + + if (ImGui::Button("Stacked modals..")) + ImGui::OpenPopup("Stacked 1"); + if (ImGui::BeginPopupModal("Stacked 1", NULL, ImGuiWindowFlags_MenuBar)) + { + if (ImGui::BeginMenuBar()) + { + if (ImGui::BeginMenu("File")) + { + if (ImGui::MenuItem("Some menu item")) {} + ImGui::EndMenu(); + } + ImGui::EndMenuBar(); + } + ImGui::Text("Hello from Stacked The First\nUsing style.Colors[ImGuiCol_ModalWindowDimBg] behind it."); + + // Testing behavior of widgets stacking their own regular popups over the modal. + static int item = 1; + static float color[4] = { 0.4f, 0.7f, 0.0f, 0.5f }; + ImGui::Combo("Combo", &item, "aaaa\0bbbb\0cccc\0dddd\0eeee\0\0"); + ImGui::ColorEdit4("color", color); + + if (ImGui::Button("Add another modal..")) + ImGui::OpenPopup("Stacked 2"); + + // Also demonstrate passing a bool* to BeginPopupModal(), this will create a regular close button which + // will close the popup. Note that the visibility state of popups is owned by imgui, so the input value + // of the bool actually doesn't matter here. + bool unused_open = true; + if (ImGui::BeginPopupModal("Stacked 2", &unused_open)) + { + ImGui::Text("Hello from Stacked The Second!"); + if (ImGui::Button("Close")) + ImGui::CloseCurrentPopup(); + ImGui::EndPopup(); + } + + if (ImGui::Button("Close")) + ImGui::CloseCurrentPopup(); + ImGui::EndPopup(); + } + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Menus inside a regular window")) + { + ImGui::TextWrapped("Below we are testing adding menu items to a regular window. It's rather unusual but should work!"); + ImGui::Separator(); + + // Note: As a quirk in this very specific example, we want to differentiate the parent of this menu from the + // parent of the various popup menus above. To do so we are encloding the items in a PushID()/PopID() block + // to make them two different menusets. If we don't, opening any popup above and hovering our menu here would + // open it. This is because once a menu is active, we allow to switch to a sibling menu by just hovering on it, + // which is the desired behavior for regular menus. + ImGui::PushID("foo"); + ImGui::MenuItem("Menu item", "CTRL+M"); + if (ImGui::BeginMenu("Menu inside a regular window")) + { + ShowExampleMenuFile(); + ImGui::EndMenu(); + } + ImGui::PopID(); + ImGui::Separator(); + ImGui::TreePop(); + } +} + +// Dummy data structure that we use for the Table demo. +// (pre-C++11 doesn't allow us to instantiate ImVector template if this structure if defined inside the demo function) +namespace +{ +// We are passing our own identifier to TableSetupColumn() to facilitate identifying columns in the sorting code. +// This identifier will be passed down into ImGuiTableSortSpec::ColumnUserID. +// But it is possible to omit the user id parameter of TableSetupColumn() and just use the column index instead! (ImGuiTableSortSpec::ColumnIndex) +// If you don't use sorting, you will generally never care about giving column an ID! +enum MyItemColumnID +{ + MyItemColumnID_ID, + MyItemColumnID_Name, + MyItemColumnID_Action, + MyItemColumnID_Quantity, + MyItemColumnID_Description +}; + +struct MyItem +{ + int ID; + const char* Name; + int Quantity; + + // We have a problem which is affecting _only this demo_ and should not affect your code: + // As we don't rely on std:: or other third-party library to compile dear imgui, we only have reliable access to qsort(), + // however qsort doesn't allow passing user data to comparing function. + // As a workaround, we are storing the sort specs in a static/global for the comparing function to access. + // In your own use case you would probably pass the sort specs to your sorting/comparing functions directly and not use a global. + // We could technically call ImGui::TableGetSortSpecs() in CompareWithSortSpecs(), but considering that this function is called + // very often by the sorting algorithm it would be a little wasteful. + static const ImGuiTableSortSpecs* s_current_sort_specs; + + // Compare function to be used by qsort() + static int IMGUI_CDECL CompareWithSortSpecs(const void* lhs, const void* rhs) + { + const MyItem* a = (const MyItem*)lhs; + const MyItem* b = (const MyItem*)rhs; + for (int n = 0; n < s_current_sort_specs->SpecsCount; n++) + { + // Here we identify columns using the ColumnUserID value that we ourselves passed to TableSetupColumn() + // We could also choose to identify columns based on their index (sort_spec->ColumnIndex), which is simpler! + const ImGuiTableColumnSortSpecs* sort_spec = &s_current_sort_specs->Specs[n]; + int delta = 0; + switch (sort_spec->ColumnUserID) + { + case MyItemColumnID_ID: delta = (a->ID - b->ID); break; + case MyItemColumnID_Name: delta = (strcmp(a->Name, b->Name)); break; + case MyItemColumnID_Quantity: delta = (a->Quantity - b->Quantity); break; + case MyItemColumnID_Description: delta = (strcmp(a->Name, b->Name)); break; + default: IM_ASSERT(0); break; + } + if (delta > 0) + return (sort_spec->SortDirection == ImGuiSortDirection_Ascending) ? +1 : -1; + if (delta < 0) + return (sort_spec->SortDirection == ImGuiSortDirection_Ascending) ? -1 : +1; + } + + // qsort() is instable so always return a way to differenciate items. + // Your own compare function may want to avoid fallback on implicit sort specs e.g. a Name compare if it wasn't already part of the sort specs. + return (a->ID - b->ID); + } +}; +const ImGuiTableSortSpecs* MyItem::s_current_sort_specs = NULL; +} + +// Make the UI compact because there are so many fields +static void PushStyleCompact() +{ + ImGuiStyle& style = ImGui::GetStyle(); + ImGui::PushStyleVar(ImGuiStyleVar_FramePadding, ImVec2(style.FramePadding.x, (float)(int)(style.FramePadding.y * 0.60f))); + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(style.ItemSpacing.x, (float)(int)(style.ItemSpacing.y * 0.60f))); +} + +static void PopStyleCompact() +{ + ImGui::PopStyleVar(2); +} + +// Show a combo box with a choice of sizing policies +static void EditTableSizingFlags(ImGuiTableFlags* p_flags) +{ + struct EnumDesc { ImGuiTableFlags Value; const char* Name; const char* Tooltip; }; + static const EnumDesc policies[] = + { + { ImGuiTableFlags_None, "Default", "Use default sizing policy:\n- ImGuiTableFlags_SizingFixedFit if ScrollX is on or if host window has ImGuiWindowFlags_AlwaysAutoResize.\n- ImGuiTableFlags_SizingStretchSame otherwise." }, + { ImGuiTableFlags_SizingFixedFit, "ImGuiTableFlags_SizingFixedFit", "Columns default to _WidthFixed (if resizable) or _WidthAuto (if not resizable), matching contents width." }, + { ImGuiTableFlags_SizingFixedSame, "ImGuiTableFlags_SizingFixedSame", "Columns are all the same width, matching the maximum contents width.\nImplicitly disable ImGuiTableFlags_Resizable and enable ImGuiTableFlags_NoKeepColumnsVisible." }, + { ImGuiTableFlags_SizingStretchProp, "ImGuiTableFlags_SizingStretchProp", "Columns default to _WidthStretch with weights proportional to their widths." }, + { ImGuiTableFlags_SizingStretchSame, "ImGuiTableFlags_SizingStretchSame", "Columns default to _WidthStretch with same weights." } + }; + int idx; + for (idx = 0; idx < IM_ARRAYSIZE(policies); idx++) + if (policies[idx].Value == (*p_flags & ImGuiTableFlags_SizingMask_)) + break; + const char* preview_text = (idx < IM_ARRAYSIZE(policies)) ? policies[idx].Name + (idx > 0 ? strlen("ImGuiTableFlags") : 0) : ""; + if (ImGui::BeginCombo("Sizing Policy", preview_text)) + { + for (int n = 0; n < IM_ARRAYSIZE(policies); n++) + if (ImGui::Selectable(policies[n].Name, idx == n)) + *p_flags = (*p_flags & ~ImGuiTableFlags_SizingMask_) | policies[n].Value; + ImGui::EndCombo(); + } + ImGui::SameLine(); + ImGui::TextDisabled("(?)"); + if (ImGui::IsItemHovered()) + { + ImGui::BeginTooltip(); + ImGui::PushTextWrapPos(ImGui::GetFontSize() * 50.0f); + for (int m = 0; m < IM_ARRAYSIZE(policies); m++) + { + ImGui::Separator(); + ImGui::Text("%s:", policies[m].Name); + ImGui::Separator(); + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + ImGui::GetStyle().IndentSpacing * 0.5f); + ImGui::TextUnformatted(policies[m].Tooltip); + } + ImGui::PopTextWrapPos(); + ImGui::EndTooltip(); + } +} + +static void EditTableColumnsFlags(ImGuiTableColumnFlags* p_flags) +{ + ImGui::CheckboxFlags("_DefaultHide", p_flags, ImGuiTableColumnFlags_DefaultHide); + ImGui::CheckboxFlags("_DefaultSort", p_flags, ImGuiTableColumnFlags_DefaultSort); + if (ImGui::CheckboxFlags("_WidthStretch", p_flags, ImGuiTableColumnFlags_WidthStretch)) + *p_flags &= ~(ImGuiTableColumnFlags_WidthMask_ ^ ImGuiTableColumnFlags_WidthStretch); + if (ImGui::CheckboxFlags("_WidthFixed", p_flags, ImGuiTableColumnFlags_WidthFixed)) + *p_flags &= ~(ImGuiTableColumnFlags_WidthMask_ ^ ImGuiTableColumnFlags_WidthFixed); + ImGui::CheckboxFlags("_NoResize", p_flags, ImGuiTableColumnFlags_NoResize); + ImGui::CheckboxFlags("_NoReorder", p_flags, ImGuiTableColumnFlags_NoReorder); + ImGui::CheckboxFlags("_NoHide", p_flags, ImGuiTableColumnFlags_NoHide); + ImGui::CheckboxFlags("_NoClip", p_flags, ImGuiTableColumnFlags_NoClip); + ImGui::CheckboxFlags("_NoSort", p_flags, ImGuiTableColumnFlags_NoSort); + ImGui::CheckboxFlags("_NoSortAscending", p_flags, ImGuiTableColumnFlags_NoSortAscending); + ImGui::CheckboxFlags("_NoSortDescending", p_flags, ImGuiTableColumnFlags_NoSortDescending); + ImGui::CheckboxFlags("_NoHeaderWidth", p_flags, ImGuiTableColumnFlags_NoHeaderWidth); + ImGui::CheckboxFlags("_PreferSortAscending", p_flags, ImGuiTableColumnFlags_PreferSortAscending); + ImGui::CheckboxFlags("_PreferSortDescending", p_flags, ImGuiTableColumnFlags_PreferSortDescending); + ImGui::CheckboxFlags("_IndentEnable", p_flags, ImGuiTableColumnFlags_IndentEnable); ImGui::SameLine(); HelpMarker("Default for column 0"); + ImGui::CheckboxFlags("_IndentDisable", p_flags, ImGuiTableColumnFlags_IndentDisable); ImGui::SameLine(); HelpMarker("Default for column >0"); +} + +static void ShowTableColumnsStatusFlags(ImGuiTableColumnFlags flags) +{ + ImGui::CheckboxFlags("_IsEnabled", &flags, ImGuiTableColumnFlags_IsEnabled); + ImGui::CheckboxFlags("_IsVisible", &flags, ImGuiTableColumnFlags_IsVisible); + ImGui::CheckboxFlags("_IsSorted", &flags, ImGuiTableColumnFlags_IsSorted); + ImGui::CheckboxFlags("_IsHovered", &flags, ImGuiTableColumnFlags_IsHovered); +} + +static void ShowDemoWindowTables() +{ + //ImGui::SetNextItemOpen(true, ImGuiCond_Once); + if (!ImGui::CollapsingHeader("Tables & Columns")) + return; + + // Using those as a base value to create width/height that are factor of the size of our font + const float TEXT_BASE_WIDTH = ImGui::CalcTextSize("A").x; + const float TEXT_BASE_HEIGHT = ImGui::GetTextLineHeightWithSpacing(); + + ImGui::PushID("Tables"); + + int open_action = -1; + if (ImGui::Button("Open all")) + open_action = 1; + ImGui::SameLine(); + if (ImGui::Button("Close all")) + open_action = 0; + ImGui::SameLine(); + + // Options + static bool disable_indent = false; + ImGui::Checkbox("Disable tree indentation", &disable_indent); + ImGui::SameLine(); + HelpMarker("Disable the indenting of tree nodes so demo tables can use the full window width."); + ImGui::Separator(); + if (disable_indent) + ImGui::PushStyleVar(ImGuiStyleVar_IndentSpacing, 0.0f); + + // About Styling of tables + // Most settings are configured on a per-table basis via the flags passed to BeginTable() and TableSetupColumns APIs. + // There are however a few settings that a shared and part of the ImGuiStyle structure: + // style.CellPadding // Padding within each cell + // style.Colors[ImGuiCol_TableHeaderBg] // Table header background + // style.Colors[ImGuiCol_TableBorderStrong] // Table outer and header borders + // style.Colors[ImGuiCol_TableBorderLight] // Table inner borders + // style.Colors[ImGuiCol_TableRowBg] // Table row background when ImGuiTableFlags_RowBg is enabled (even rows) + // style.Colors[ImGuiCol_TableRowBgAlt] // Table row background when ImGuiTableFlags_RowBg is enabled (odds rows) + + // Demos + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Basic")) + { + // Here we will showcase three different ways to output a table. + // They are very simple variations of a same thing! + + // [Method 1] Using TableNextRow() to create a new row, and TableSetColumnIndex() to select the column. + // In many situations, this is the most flexible and easy to use pattern. + HelpMarker("Using TableNextRow() + calling TableSetColumnIndex() _before_ each cell, in a loop."); + if (ImGui::BeginTable("table1", 3)) + { + for (int row = 0; row < 4; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + ImGui::TableSetColumnIndex(column); + ImGui::Text("Row %d Column %d", row, column); + } + } + ImGui::EndTable(); + } + + // [Method 2] Using TableNextColumn() called multiple times, instead of using a for loop + TableSetColumnIndex(). + // This is generally more convenient when you have code manually submitting the contents of each columns. + HelpMarker("Using TableNextRow() + calling TableNextColumn() _before_ each cell, manually."); + if (ImGui::BeginTable("table2", 3)) + { + for (int row = 0; row < 4; row++) + { + ImGui::TableNextRow(); + ImGui::TableNextColumn(); + ImGui::Text("Row %d", row); + ImGui::TableNextColumn(); + ImGui::Text("Some contents"); + ImGui::TableNextColumn(); + ImGui::Text("123.456"); + } + ImGui::EndTable(); + } + + // [Method 3] We call TableNextColumn() _before_ each cell. We never call TableNextRow(), + // as TableNextColumn() will automatically wrap around and create new roes as needed. + // This is generally more convenient when your cells all contains the same type of data. + HelpMarker( + "Only using TableNextColumn(), which tends to be convenient for tables where every cells contains the same type of contents.\n" + "This is also more similar to the old NextColumn() function of the Columns API, and provided to facilitate the Columns->Tables API transition."); + if (ImGui::BeginTable("table3", 3)) + { + for (int item = 0; item < 14; item++) + { + ImGui::TableNextColumn(); + ImGui::Text("Item %d", item); + } + ImGui::EndTable(); + } + + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Borders, background")) + { + // Expose a few Borders related flags interactively + enum ContentsType { CT_Text, CT_FillButton }; + static ImGuiTableFlags flags = ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg; + static bool display_headers = false; + static int contents_type = CT_Text; + + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_RowBg", &flags, ImGuiTableFlags_RowBg); + ImGui::CheckboxFlags("ImGuiTableFlags_Borders", &flags, ImGuiTableFlags_Borders); + ImGui::SameLine(); HelpMarker("ImGuiTableFlags_Borders\n = ImGuiTableFlags_BordersInnerV\n | ImGuiTableFlags_BordersOuterV\n | ImGuiTableFlags_BordersInnerV\n | ImGuiTableFlags_BordersOuterH"); + ImGui::Indent(); + + ImGui::CheckboxFlags("ImGuiTableFlags_BordersH", &flags, ImGuiTableFlags_BordersH); + ImGui::Indent(); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersOuterH", &flags, ImGuiTableFlags_BordersOuterH); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersInnerH", &flags, ImGuiTableFlags_BordersInnerH); + ImGui::Unindent(); + + ImGui::CheckboxFlags("ImGuiTableFlags_BordersV", &flags, ImGuiTableFlags_BordersV); + ImGui::Indent(); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersOuterV", &flags, ImGuiTableFlags_BordersOuterV); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersInnerV", &flags, ImGuiTableFlags_BordersInnerV); + ImGui::Unindent(); + + ImGui::CheckboxFlags("ImGuiTableFlags_BordersOuter", &flags, ImGuiTableFlags_BordersOuter); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersInner", &flags, ImGuiTableFlags_BordersInner); + ImGui::Unindent(); + + ImGui::AlignTextToFramePadding(); ImGui::Text("Cell contents:"); + ImGui::SameLine(); ImGui::RadioButton("Text", &contents_type, CT_Text); + ImGui::SameLine(); ImGui::RadioButton("FillButton", &contents_type, CT_FillButton); + ImGui::Checkbox("Display headers", &display_headers); + ImGui::CheckboxFlags("ImGuiTableFlags_NoBordersInBody", &flags, ImGuiTableFlags_NoBordersInBody); ImGui::SameLine(); HelpMarker("Disable vertical borders in columns Body (borders will always appears in Headers"); + PopStyleCompact(); + + if (ImGui::BeginTable("table1", 3, flags)) + { + // Display headers so we can inspect their interaction with borders. + // (Headers are not the main purpose of this section of the demo, so we are not elaborating on them too much. See other sections for details) + if (display_headers) + { + ImGui::TableSetupColumn("One"); + ImGui::TableSetupColumn("Two"); + ImGui::TableSetupColumn("Three"); + ImGui::TableHeadersRow(); + } + + for (int row = 0; row < 5; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + ImGui::TableSetColumnIndex(column); + char buf[32]; + sprintf(buf, "Hello %d,%d", column, row); + if (contents_type == CT_Text) + ImGui::TextUnformatted(buf); + else if (contents_type) + ImGui::Button(buf, ImVec2(-FLT_MIN, 0.0f)); + } + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Resizable, stretch")) + { + // By default, if we don't enable ScrollX the sizing policy for each columns is "Stretch" + // Each columns maintain a sizing weight, and they will occupy all available width. + static ImGuiTableFlags flags = ImGuiTableFlags_SizingStretchSame | ImGuiTableFlags_Resizable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_ContextMenuInBody; + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_Resizable", &flags, ImGuiTableFlags_Resizable); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersV", &flags, ImGuiTableFlags_BordersV); + ImGui::SameLine(); HelpMarker("Using the _Resizable flag automatically enables the _BordersInnerV flag as well, this is why the resize borders are still showing when unchecking this."); + PopStyleCompact(); + + if (ImGui::BeginTable("table1", 3, flags)) + { + for (int row = 0; row < 5; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + ImGui::TableSetColumnIndex(column); + ImGui::Text("Hello %d,%d", column, row); + } + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Resizable, fixed")) + { + // Here we use ImGuiTableFlags_SizingFixedFit (even though _ScrollX is not set) + // So columns will adopt the "Fixed" policy and will maintain a fixed width regardless of the whole available width (unless table is small) + // If there is not enough available width to fit all columns, they will however be resized down. + // FIXME-TABLE: Providing a stretch-on-init would make sense especially for tables which don't have saved settings + HelpMarker( + "Using _Resizable + _SizingFixedFit flags.\n" + "Fixed-width columns generally makes more sense if you want to use horizontal scrolling.\n\n" + "Double-click a column border to auto-fit the column to its contents."); + PushStyleCompact(); + static ImGuiTableFlags flags = ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Resizable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_ContextMenuInBody; + ImGui::CheckboxFlags("ImGuiTableFlags_NoHostExtendX", &flags, ImGuiTableFlags_NoHostExtendX); + PopStyleCompact(); + + if (ImGui::BeginTable("table1", 3, flags)) + { + for (int row = 0; row < 5; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + ImGui::TableSetColumnIndex(column); + ImGui::Text("Hello %d,%d", column, row); + } + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Resizable, mixed")) + { + HelpMarker( + "Using TableSetupColumn() to alter resizing policy on a per-column basis.\n\n" + "When combining Fixed and Stretch columns, generally you only want one, maybe two trailing columns to use _WidthStretch."); + static ImGuiTableFlags flags = ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_RowBg | ImGuiTableFlags_Borders | ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable; + + if (ImGui::BeginTable("table1", 3, flags)) + { + ImGui::TableSetupColumn("AAA", ImGuiTableColumnFlags_WidthFixed); + ImGui::TableSetupColumn("BBB", ImGuiTableColumnFlags_WidthFixed); + ImGui::TableSetupColumn("CCC", ImGuiTableColumnFlags_WidthStretch); + ImGui::TableHeadersRow(); + for (int row = 0; row < 5; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + ImGui::TableSetColumnIndex(column); + ImGui::Text("%s %d,%d", (column == 2) ? "Stretch" : "Fixed", column, row); + } + } + ImGui::EndTable(); + } + if (ImGui::BeginTable("table2", 6, flags)) + { + ImGui::TableSetupColumn("AAA", ImGuiTableColumnFlags_WidthFixed); + ImGui::TableSetupColumn("BBB", ImGuiTableColumnFlags_WidthFixed); + ImGui::TableSetupColumn("CCC", ImGuiTableColumnFlags_WidthFixed | ImGuiTableColumnFlags_DefaultHide); + ImGui::TableSetupColumn("DDD", ImGuiTableColumnFlags_WidthStretch); + ImGui::TableSetupColumn("EEE", ImGuiTableColumnFlags_WidthStretch); + ImGui::TableSetupColumn("FFF", ImGuiTableColumnFlags_WidthStretch | ImGuiTableColumnFlags_DefaultHide); + ImGui::TableHeadersRow(); + for (int row = 0; row < 5; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 6; column++) + { + ImGui::TableSetColumnIndex(column); + ImGui::Text("%s %d,%d", (column >= 3) ? "Stretch" : "Fixed", column, row); + } + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Reorderable, hideable, with headers")) + { + HelpMarker( + "Click and drag column headers to reorder columns.\n\n" + "Right-click on a header to open a context menu."); + static ImGuiTableFlags flags = ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV; + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_Resizable", &flags, ImGuiTableFlags_Resizable); + ImGui::CheckboxFlags("ImGuiTableFlags_Reorderable", &flags, ImGuiTableFlags_Reorderable); + ImGui::CheckboxFlags("ImGuiTableFlags_Hideable", &flags, ImGuiTableFlags_Hideable); + ImGui::CheckboxFlags("ImGuiTableFlags_NoBordersInBody", &flags, ImGuiTableFlags_NoBordersInBody); + ImGui::CheckboxFlags("ImGuiTableFlags_NoBordersInBodyUntilResize", &flags, ImGuiTableFlags_NoBordersInBodyUntilResize); ImGui::SameLine(); HelpMarker("Disable vertical borders in columns Body until hovered for resize (borders will always appears in Headers)"); + PopStyleCompact(); + + if (ImGui::BeginTable("table1", 3, flags)) + { + // Submit columns name with TableSetupColumn() and call TableHeadersRow() to create a row with a header in each column. + // (Later we will show how TableSetupColumn() has other uses, optional flags, sizing weight etc.) + ImGui::TableSetupColumn("One"); + ImGui::TableSetupColumn("Two"); + ImGui::TableSetupColumn("Three"); + ImGui::TableHeadersRow(); + for (int row = 0; row < 6; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + ImGui::TableSetColumnIndex(column); + ImGui::Text("Hello %d,%d", column, row); + } + } + ImGui::EndTable(); + } + + // Use outer_size.x == 0.0f instead of default to make the table as tight as possible (only valid when no scrolling and no stretch column) + if (ImGui::BeginTable("table2", 3, flags | ImGuiTableFlags_SizingFixedFit, ImVec2(0.0f, 0.0f))) + { + ImGui::TableSetupColumn("One"); + ImGui::TableSetupColumn("Two"); + ImGui::TableSetupColumn("Three"); + ImGui::TableHeadersRow(); + for (int row = 0; row < 6; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + ImGui::TableSetColumnIndex(column); + ImGui::Text("Fixed %d,%d", column, row); + } + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Padding")) + { + // First example: showcase use of padding flags and effect of BorderOuterV/BorderInnerV on X padding. + // We don't expose BorderOuterH/BorderInnerH here because they have no effect on X padding. + HelpMarker( + "We often want outer padding activated when any using features which makes the edges of a column visible:\n" + "e.g.:\n" + "- BorderOuterV\n" + "- any form of row selection\n" + "Because of this, activating BorderOuterV sets the default to PadOuterX. Using PadOuterX or NoPadOuterX you can override the default.\n\n" + "Actual padding values are using style.CellPadding.\n\n" + "In this demo we don't show horizontal borders to emphasis how they don't affect default horizontal padding."); + + static ImGuiTableFlags flags1 = ImGuiTableFlags_BordersV; + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_PadOuterX", &flags1, ImGuiTableFlags_PadOuterX); + ImGui::SameLine(); HelpMarker("Enable outer-most padding (default if ImGuiTableFlags_BordersOuterV is set)"); + ImGui::CheckboxFlags("ImGuiTableFlags_NoPadOuterX", &flags1, ImGuiTableFlags_NoPadOuterX); + ImGui::SameLine(); HelpMarker("Disable outer-most padding (default if ImGuiTableFlags_BordersOuterV is not set)"); + ImGui::CheckboxFlags("ImGuiTableFlags_NoPadInnerX", &flags1, ImGuiTableFlags_NoPadInnerX); + ImGui::SameLine(); HelpMarker("Disable inner padding between columns (double inner padding if BordersOuterV is on, single inner padding if BordersOuterV is off)"); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersOuterV", &flags1, ImGuiTableFlags_BordersOuterV); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersInnerV", &flags1, ImGuiTableFlags_BordersInnerV); + static bool show_headers = false; + ImGui::Checkbox("show_headers", &show_headers); + PopStyleCompact(); + + if (ImGui::BeginTable("table_padding", 3, flags1)) + { + if (show_headers) + { + ImGui::TableSetupColumn("One"); + ImGui::TableSetupColumn("Two"); + ImGui::TableSetupColumn("Three"); + ImGui::TableHeadersRow(); + } + + for (int row = 0; row < 5; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + ImGui::TableSetColumnIndex(column); + if (row == 0) + { + ImGui::Text("Avail %.2f", ImGui::GetContentRegionAvail().x); + } + else + { + char buf[32]; + sprintf(buf, "Hello %d,%d", column, row); + ImGui::Button(buf, ImVec2(-FLT_MIN, 0.0f)); + } + //if (ImGui::TableGetColumnFlags() & ImGuiTableColumnFlags_IsHovered) + // ImGui::TableSetBgColor(ImGuiTableBgTarget_CellBg, IM_COL32(0, 100, 0, 255)); + } + } + ImGui::EndTable(); + } + + // Second example: set style.CellPadding to (0.0) or a custom value. + // FIXME-TABLE: Vertical border effectively not displayed the same way as horizontal one... + HelpMarker("Setting style.CellPadding to (0,0) or a custom value."); + static ImGuiTableFlags flags2 = ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg; + static ImVec2 cell_padding(0.0f, 0.0f); + static bool show_widget_frame_bg = true; + + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_Borders", &flags2, ImGuiTableFlags_Borders); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersH", &flags2, ImGuiTableFlags_BordersH); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersV", &flags2, ImGuiTableFlags_BordersV); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersInner", &flags2, ImGuiTableFlags_BordersInner); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersOuter", &flags2, ImGuiTableFlags_BordersOuter); + ImGui::CheckboxFlags("ImGuiTableFlags_RowBg", &flags2, ImGuiTableFlags_RowBg); + ImGui::CheckboxFlags("ImGuiTableFlags_Resizable", &flags2, ImGuiTableFlags_Resizable); + ImGui::Checkbox("show_widget_frame_bg", &show_widget_frame_bg); + ImGui::SliderFloat2("CellPadding", &cell_padding.x, 0.0f, 10.0f, "%.0f"); + PopStyleCompact(); + + ImGui::PushStyleVar(ImGuiStyleVar_CellPadding, cell_padding); + if (ImGui::BeginTable("table_padding_2", 3, flags2)) + { + static char text_bufs[3 * 5][16]; // Mini text storage for 3x5 cells + static bool init = true; + if (!show_widget_frame_bg) + ImGui::PushStyleColor(ImGuiCol_FrameBg, 0); + for (int cell = 0; cell < 3 * 5; cell++) + { + ImGui::TableNextColumn(); + if (init) + strcpy(text_bufs[cell], "edit me"); + ImGui::SetNextItemWidth(-FLT_MIN); + ImGui::PushID(cell); + ImGui::InputText("##cell", text_bufs[cell], IM_ARRAYSIZE(text_bufs[cell])); + ImGui::PopID(); + } + if (!show_widget_frame_bg) + ImGui::PopStyleColor(); + init = false; + ImGui::EndTable(); + } + ImGui::PopStyleVar(); + + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Sizing policies")) + { + static ImGuiTableFlags flags1 = ImGuiTableFlags_BordersV | ImGuiTableFlags_BordersOuterH | ImGuiTableFlags_RowBg | ImGuiTableFlags_ContextMenuInBody; + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_Resizable", &flags1, ImGuiTableFlags_Resizable); + ImGui::CheckboxFlags("ImGuiTableFlags_NoHostExtendX", &flags1, ImGuiTableFlags_NoHostExtendX); + PopStyleCompact(); + + static ImGuiTableFlags sizing_policy_flags[4] = { ImGuiTableFlags_SizingFixedFit, ImGuiTableFlags_SizingFixedSame, ImGuiTableFlags_SizingStretchProp, ImGuiTableFlags_SizingStretchSame }; + for (int table_n = 0; table_n < 4; table_n++) + { + ImGui::PushID(table_n); + ImGui::SetNextItemWidth(TEXT_BASE_WIDTH * 30); + EditTableSizingFlags(&sizing_policy_flags[table_n]); + + // To make it easier to understand the different sizing policy, + // For each policy: we display one table where the columns have equal contents width, and one where the columns have different contents width. + if (ImGui::BeginTable("table1", 3, sizing_policy_flags[table_n] | flags1)) + { + for (int row = 0; row < 3; row++) + { + ImGui::TableNextRow(); + ImGui::TableNextColumn(); ImGui::Text("Oh dear"); + ImGui::TableNextColumn(); ImGui::Text("Oh dear"); + ImGui::TableNextColumn(); ImGui::Text("Oh dear"); + } + ImGui::EndTable(); + } + if (ImGui::BeginTable("table2", 3, sizing_policy_flags[table_n] | flags1)) + { + for (int row = 0; row < 3; row++) + { + ImGui::TableNextRow(); + ImGui::TableNextColumn(); ImGui::Text("AAAA"); + ImGui::TableNextColumn(); ImGui::Text("BBBBBBBB"); + ImGui::TableNextColumn(); ImGui::Text("CCCCCCCCCCCC"); + } + ImGui::EndTable(); + } + ImGui::PopID(); + } + + ImGui::Spacing(); + ImGui::TextUnformatted("Advanced"); + ImGui::SameLine(); + HelpMarker("This section allows you to interact and see the effect of various sizing policies depending on whether Scroll is enabled and the contents of your columns."); + + enum ContentsType { CT_ShowWidth, CT_ShortText, CT_LongText, CT_Button, CT_FillButton, CT_InputText }; + static ImGuiTableFlags flags = ImGuiTableFlags_ScrollY | ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg | ImGuiTableFlags_Resizable; + static int contents_type = CT_ShowWidth; + static int column_count = 3; + + PushStyleCompact(); + ImGui::PushID("Advanced"); + ImGui::PushItemWidth(TEXT_BASE_WIDTH * 30); + EditTableSizingFlags(&flags); + ImGui::Combo("Contents", &contents_type, "Show width\0Short Text\0Long Text\0Button\0Fill Button\0InputText\0"); + if (contents_type == CT_FillButton) + { + ImGui::SameLine(); + HelpMarker("Be mindful that using right-alignment (e.g. size.x = -FLT_MIN) creates a feedback loop where contents width can feed into auto-column width can feed into contents width."); + } + ImGui::DragInt("Columns", &column_count, 0.1f, 1, 64, "%d", ImGuiSliderFlags_AlwaysClamp); + ImGui::CheckboxFlags("ImGuiTableFlags_Resizable", &flags, ImGuiTableFlags_Resizable); + ImGui::CheckboxFlags("ImGuiTableFlags_PreciseWidths", &flags, ImGuiTableFlags_PreciseWidths); + ImGui::SameLine(); HelpMarker("Disable distributing remainder width to stretched columns (width allocation on a 100-wide table with 3 columns: Without this flag: 33,33,34. With this flag: 33,33,33). With larger number of columns, resizing will appear to be less smooth."); + ImGui::CheckboxFlags("ImGuiTableFlags_ScrollX", &flags, ImGuiTableFlags_ScrollX); + ImGui::CheckboxFlags("ImGuiTableFlags_ScrollY", &flags, ImGuiTableFlags_ScrollY); + ImGui::CheckboxFlags("ImGuiTableFlags_NoClip", &flags, ImGuiTableFlags_NoClip); + ImGui::PopItemWidth(); + ImGui::PopID(); + PopStyleCompact(); + + if (ImGui::BeginTable("table2", column_count, flags, ImVec2(0.0f, TEXT_BASE_HEIGHT * 7))) + { + for (int cell = 0; cell < 10 * column_count; cell++) + { + ImGui::TableNextColumn(); + int column = ImGui::TableGetColumnIndex(); + int row = ImGui::TableGetRowIndex(); + + ImGui::PushID(cell); + char label[32]; + static char text_buf[32] = ""; + sprintf(label, "Hello %d,%d", column, row); + switch (contents_type) + { + case CT_ShortText: ImGui::TextUnformatted(label); break; + case CT_LongText: ImGui::Text("Some %s text %d,%d\nOver two lines..", column == 0 ? "long" : "longeeer", column, row); break; + case CT_ShowWidth: ImGui::Text("W: %.1f", ImGui::GetContentRegionAvail().x); break; + case CT_Button: ImGui::Button(label); break; + case CT_FillButton: ImGui::Button(label, ImVec2(-FLT_MIN, 0.0f)); break; + case CT_InputText: ImGui::SetNextItemWidth(-FLT_MIN); ImGui::InputText("##", text_buf, IM_ARRAYSIZE(text_buf)); break; + } + ImGui::PopID(); + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Vertical scrolling, with clipping")) + { + HelpMarker("Here we activate ScrollY, which will create a child window container to allow hosting scrollable contents.\n\nWe also demonstrate using ImGuiListClipper to virtualize the submission of many items."); + static ImGuiTableFlags flags = ImGuiTableFlags_ScrollY | ImGuiTableFlags_RowBg | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable; + + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_ScrollY", &flags, ImGuiTableFlags_ScrollY); + PopStyleCompact(); + + // When using ScrollX or ScrollY we need to specify a size for our table container! + // Otherwise by default the table will fit all available space, like a BeginChild() call. + ImVec2 outer_size = ImVec2(0.0f, TEXT_BASE_HEIGHT * 8); + if (ImGui::BeginTable("table_scrolly", 3, flags, outer_size)) + { + ImGui::TableSetupScrollFreeze(0, 1); // Make top row always visible + ImGui::TableSetupColumn("One", ImGuiTableColumnFlags_None); + ImGui::TableSetupColumn("Two", ImGuiTableColumnFlags_None); + ImGui::TableSetupColumn("Three", ImGuiTableColumnFlags_None); + ImGui::TableHeadersRow(); + + // Demonstrate using clipper for large vertical lists + ImGuiListClipper clipper; + clipper.Begin(1000); + while (clipper.Step()) + { + for (int row = clipper.DisplayStart; row < clipper.DisplayEnd; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + ImGui::TableSetColumnIndex(column); + ImGui::Text("Hello %d,%d", column, row); + } + } + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Horizontal scrolling")) + { + HelpMarker( + "When ScrollX is enabled, the default sizing policy becomes ImGuiTableFlags_SizingFixedFit, " + "as automatically stretching columns doesn't make much sense with horizontal scrolling.\n\n" + "Also note that as of the current version, you will almost always want to enable ScrollY along with ScrollX," + "because the container window won't automatically extend vertically to fix contents (this may be improved in future versions)."); + static ImGuiTableFlags flags = ImGuiTableFlags_ScrollX | ImGuiTableFlags_ScrollY | ImGuiTableFlags_RowBg | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable; + static int freeze_cols = 1; + static int freeze_rows = 1; + + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_Resizable", &flags, ImGuiTableFlags_Resizable); + ImGui::CheckboxFlags("ImGuiTableFlags_ScrollX", &flags, ImGuiTableFlags_ScrollX); + ImGui::CheckboxFlags("ImGuiTableFlags_ScrollY", &flags, ImGuiTableFlags_ScrollY); + ImGui::SetNextItemWidth(ImGui::GetFrameHeight()); + ImGui::DragInt("freeze_cols", &freeze_cols, 0.2f, 0, 9, NULL, ImGuiSliderFlags_NoInput); + ImGui::SetNextItemWidth(ImGui::GetFrameHeight()); + ImGui::DragInt("freeze_rows", &freeze_rows, 0.2f, 0, 9, NULL, ImGuiSliderFlags_NoInput); + PopStyleCompact(); + + // When using ScrollX or ScrollY we need to specify a size for our table container! + // Otherwise by default the table will fit all available space, like a BeginChild() call. + ImVec2 outer_size = ImVec2(0.0f, TEXT_BASE_HEIGHT * 8); + if (ImGui::BeginTable("table_scrollx", 7, flags, outer_size)) + { + ImGui::TableSetupScrollFreeze(freeze_cols, freeze_rows); + ImGui::TableSetupColumn("Line #", ImGuiTableColumnFlags_NoHide); // Make the first column not hideable to match our use of TableSetupScrollFreeze() + ImGui::TableSetupColumn("One"); + ImGui::TableSetupColumn("Two"); + ImGui::TableSetupColumn("Three"); + ImGui::TableSetupColumn("Four"); + ImGui::TableSetupColumn("Five"); + ImGui::TableSetupColumn("Six"); + ImGui::TableHeadersRow(); + for (int row = 0; row < 20; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 7; column++) + { + // Both TableNextColumn() and TableSetColumnIndex() return true when a column is visible or performing width measurement. + // Because here we know that: + // - A) all our columns are contributing the same to row height + // - B) column 0 is always visible, + // We only always submit this one column and can skip others. + // More advanced per-column clipping behaviors may benefit from polling the status flags via TableGetColumnFlags(). + if (!ImGui::TableSetColumnIndex(column) && column > 0) + continue; + if (column == 0) + ImGui::Text("Line %d", row); + else + ImGui::Text("Hello world %d,%d", column, row); + } + } + ImGui::EndTable(); + } + + ImGui::Spacing(); + ImGui::TextUnformatted("Stretch + ScrollX"); + ImGui::SameLine(); + HelpMarker( + "Showcase using Stretch columns + ScrollX together: " + "this is rather unusual and only makes sense when specifying an 'inner_width' for the table!\n" + "Without an explicit value, inner_width is == outer_size.x and therefore using Stretch columns + ScrollX together doesn't make sense."); + static ImGuiTableFlags flags2 = ImGuiTableFlags_SizingStretchSame | ImGuiTableFlags_ScrollX | ImGuiTableFlags_ScrollY | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_RowBg | ImGuiTableFlags_ContextMenuInBody; + static float inner_width = 1000.0f; + PushStyleCompact(); + ImGui::PushID("flags3"); + ImGui::PushItemWidth(TEXT_BASE_WIDTH * 30); + ImGui::CheckboxFlags("ImGuiTableFlags_ScrollX", &flags2, ImGuiTableFlags_ScrollX); + ImGui::DragFloat("inner_width", &inner_width, 1.0f, 0.0f, FLT_MAX, "%.1f"); + ImGui::PopItemWidth(); + ImGui::PopID(); + PopStyleCompact(); + if (ImGui::BeginTable("table2", 7, flags2, outer_size, inner_width)) + { + for (int cell = 0; cell < 20 * 7; cell++) + { + ImGui::TableNextColumn(); + ImGui::Text("Hello world %d,%d", ImGui::TableGetColumnIndex(), ImGui::TableGetRowIndex()); + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Columns flags")) + { + // Create a first table just to show all the options/flags we want to make visible in our example! + const int column_count = 3; + const char* column_names[column_count] = { "One", "Two", "Three" }; + static ImGuiTableColumnFlags column_flags[column_count] = { ImGuiTableColumnFlags_DefaultSort, ImGuiTableColumnFlags_None, ImGuiTableColumnFlags_DefaultHide }; + static ImGuiTableColumnFlags column_flags_out[column_count] = { 0, 0, 0 }; // Output from TableGetColumnFlags() + + if (ImGui::BeginTable("table_columns_flags_checkboxes", column_count, ImGuiTableFlags_None)) + { + PushStyleCompact(); + for (int column = 0; column < column_count; column++) + { + ImGui::TableNextColumn(); + ImGui::PushID(column); + ImGui::AlignTextToFramePadding(); // FIXME-TABLE: Workaround for wrong text baseline propagation + ImGui::Text("'%s'", column_names[column]); + ImGui::Spacing(); + ImGui::Text("Input flags:"); + EditTableColumnsFlags(&column_flags[column]); + ImGui::Spacing(); + ImGui::Text("Output flags:"); + ShowTableColumnsStatusFlags(column_flags_out[column]); + ImGui::PopID(); + } + PopStyleCompact(); + ImGui::EndTable(); + } + + // Create the real table we care about for the example! + // We use a scrolling table to be able to showcase the difference between the _IsEnabled and _IsVisible flags above, otherwise in + // a non-scrolling table columns are always visible (unless using ImGuiTableFlags_NoKeepColumnsVisible + resizing the parent window down) + const ImGuiTableFlags flags + = ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_ScrollX | ImGuiTableFlags_ScrollY + | ImGuiTableFlags_RowBg | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV + | ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable | ImGuiTableFlags_Sortable; + ImVec2 outer_size = ImVec2(0.0f, TEXT_BASE_HEIGHT * 9); + if (ImGui::BeginTable("table_columns_flags", column_count, flags, outer_size)) + { + for (int column = 0; column < column_count; column++) + ImGui::TableSetupColumn(column_names[column], column_flags[column]); + ImGui::TableHeadersRow(); + for (int column = 0; column < column_count; column++) + column_flags_out[column] = ImGui::TableGetColumnFlags(column); + float indent_step = (float)((int)TEXT_BASE_WIDTH / 2); + for (int row = 0; row < 8; row++) + { + ImGui::Indent(indent_step); // Add some indentation to demonstrate usage of per-column IndentEnable/IndentDisable flags. + ImGui::TableNextRow(); + for (int column = 0; column < column_count; column++) + { + ImGui::TableSetColumnIndex(column); + ImGui::Text("%s %s", (column == 0) ? "Indented" : "Hello", ImGui::TableGetColumnName(column)); + } + } + ImGui::Unindent(indent_step * 8.0f); + + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Columns widths")) + { + HelpMarker("Using TableSetupColumn() to setup default width."); + + static ImGuiTableFlags flags1 = ImGuiTableFlags_Borders | ImGuiTableFlags_NoBordersInBodyUntilResize; + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_Resizable", &flags1, ImGuiTableFlags_Resizable); + ImGui::CheckboxFlags("ImGuiTableFlags_NoBordersInBodyUntilResize", &flags1, ImGuiTableFlags_NoBordersInBodyUntilResize); + PopStyleCompact(); + if (ImGui::BeginTable("table1", 3, flags1)) + { + // We could also set ImGuiTableFlags_SizingFixedFit on the table and all columns will default to ImGuiTableColumnFlags_WidthFixed. + ImGui::TableSetupColumn("one", ImGuiTableColumnFlags_WidthFixed, 100.0f); // Default to 100.0f + ImGui::TableSetupColumn("two", ImGuiTableColumnFlags_WidthFixed, 200.0f); // Default to 200.0f + ImGui::TableSetupColumn("three", ImGuiTableColumnFlags_WidthFixed); // Default to auto + ImGui::TableHeadersRow(); + for (int row = 0; row < 4; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + ImGui::TableSetColumnIndex(column); + if (row == 0) + ImGui::Text("(w: %5.1f)", ImGui::GetContentRegionAvail().x); + else + ImGui::Text("Hello %d,%d", column, row); + } + } + ImGui::EndTable(); + } + + HelpMarker("Using TableSetupColumn() to setup explicit width.\n\nUnless _NoKeepColumnsVisible is set, fixed columns with set width may still be shrunk down if there's not enough space in the host."); + + static ImGuiTableFlags flags2 = ImGuiTableFlags_None; + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_NoKeepColumnsVisible", &flags2, ImGuiTableFlags_NoKeepColumnsVisible); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersInnerV", &flags2, ImGuiTableFlags_BordersInnerV); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersOuterV", &flags2, ImGuiTableFlags_BordersOuterV); + PopStyleCompact(); + if (ImGui::BeginTable("table2", 4, flags2)) + { + // We could also set ImGuiTableFlags_SizingFixedFit on the table and all columns will default to ImGuiTableColumnFlags_WidthFixed. + ImGui::TableSetupColumn("", ImGuiTableColumnFlags_WidthFixed, 100.0f); + ImGui::TableSetupColumn("", ImGuiTableColumnFlags_WidthFixed, TEXT_BASE_WIDTH * 15.0f); + ImGui::TableSetupColumn("", ImGuiTableColumnFlags_WidthFixed, TEXT_BASE_WIDTH * 30.0f); + ImGui::TableSetupColumn("", ImGuiTableColumnFlags_WidthFixed, TEXT_BASE_WIDTH * 15.0f); + for (int row = 0; row < 5; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 4; column++) + { + ImGui::TableSetColumnIndex(column); + if (row == 0) + ImGui::Text("(w: %5.1f)", ImGui::GetContentRegionAvail().x); + else + ImGui::Text("Hello %d,%d", column, row); + } + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Nested tables")) + { + HelpMarker("This demonstrate embedding a table into another table cell."); + + if (ImGui::BeginTable("table_nested1", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable)) + { + ImGui::TableSetupColumn("A0"); + ImGui::TableSetupColumn("A1"); + ImGui::TableHeadersRow(); + + ImGui::TableNextColumn(); + ImGui::Text("A0 Row 0"); + { + float rows_height = TEXT_BASE_HEIGHT * 2; + if (ImGui::BeginTable("table_nested2", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable)) + { + ImGui::TableSetupColumn("B0"); + ImGui::TableSetupColumn("B1"); + ImGui::TableHeadersRow(); + + ImGui::TableNextRow(ImGuiTableRowFlags_None, rows_height); + ImGui::TableNextColumn(); + ImGui::Text("B0 Row 0"); + ImGui::TableNextColumn(); + ImGui::Text("B1 Row 0"); + ImGui::TableNextRow(ImGuiTableRowFlags_None, rows_height); + ImGui::TableNextColumn(); + ImGui::Text("B0 Row 1"); + ImGui::TableNextColumn(); + ImGui::Text("B1 Row 1"); + + ImGui::EndTable(); + } + } + ImGui::TableNextColumn(); ImGui::Text("A1 Row 0"); + ImGui::TableNextColumn(); ImGui::Text("A0 Row 1"); + ImGui::TableNextColumn(); ImGui::Text("A1 Row 1"); + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Row height")) + { + HelpMarker("You can pass a 'min_row_height' to TableNextRow().\n\nRows are padded with 'style.CellPadding.y' on top and bottom, so effectively the minimum row height will always be >= 'style.CellPadding.y * 2.0f'.\n\nWe cannot honor a _maximum_ row height as that would requires a unique clipping rectangle per row."); + if (ImGui::BeginTable("table_row_height", 1, ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersInnerV)) + { + for (int row = 0; row < 10; row++) + { + float min_row_height = (float)(int)(TEXT_BASE_HEIGHT * 0.30f * row); + ImGui::TableNextRow(ImGuiTableRowFlags_None, min_row_height); + ImGui::TableNextColumn(); + ImGui::Text("min_row_height = %.2f", min_row_height); + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Outer size")) + { + // Showcasing use of ImGuiTableFlags_NoHostExtendX and ImGuiTableFlags_NoHostExtendY + // Important to that note how the two flags have slightly different behaviors! + ImGui::Text("Using NoHostExtendX and NoHostExtendY:"); + PushStyleCompact(); + static ImGuiTableFlags flags = ImGuiTableFlags_Borders | ImGuiTableFlags_Resizable | ImGuiTableFlags_ContextMenuInBody | ImGuiTableFlags_RowBg | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_NoHostExtendX; + ImGui::CheckboxFlags("ImGuiTableFlags_NoHostExtendX", &flags, ImGuiTableFlags_NoHostExtendX); + ImGui::SameLine(); HelpMarker("Make outer width auto-fit to columns, overriding outer_size.x value.\n\nOnly available when ScrollX/ScrollY are disabled and Stretch columns are not used."); + ImGui::CheckboxFlags("ImGuiTableFlags_NoHostExtendY", &flags, ImGuiTableFlags_NoHostExtendY); + ImGui::SameLine(); HelpMarker("Make outer height stop exactly at outer_size.y (prevent auto-extending table past the limit).\n\nOnly available when ScrollX/ScrollY are disabled. Data below the limit will be clipped and not visible."); + PopStyleCompact(); + + ImVec2 outer_size = ImVec2(0.0f, TEXT_BASE_HEIGHT * 5.5f); + if (ImGui::BeginTable("table1", 3, flags, outer_size)) + { + for (int row = 0; row < 10; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + ImGui::TableNextColumn(); + ImGui::Text("Cell %d,%d", column, row); + } + } + ImGui::EndTable(); + } + ImGui::SameLine(); + ImGui::Text("Hello!"); + + ImGui::Spacing(); + + ImGui::Text("Using explicit size:"); + if (ImGui::BeginTable("table2", 3, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg, ImVec2(TEXT_BASE_WIDTH * 30, 0.0f))) + { + for (int row = 0; row < 5; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + ImGui::TableNextColumn(); + ImGui::Text("Cell %d,%d", column, row); + } + } + ImGui::EndTable(); + } + ImGui::SameLine(); + if (ImGui::BeginTable("table3", 3, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg, ImVec2(TEXT_BASE_WIDTH * 30, 0.0f))) + { + for (int row = 0; row < 3; row++) + { + ImGui::TableNextRow(0, TEXT_BASE_HEIGHT * 1.5f); + for (int column = 0; column < 3; column++) + { + ImGui::TableNextColumn(); + ImGui::Text("Cell %d,%d", column, row); + } + } + ImGui::EndTable(); + } + + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Background color")) + { + static ImGuiTableFlags flags = ImGuiTableFlags_RowBg; + static int row_bg_type = 1; + static int row_bg_target = 1; + static int cell_bg_type = 1; + + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_Borders", &flags, ImGuiTableFlags_Borders); + ImGui::CheckboxFlags("ImGuiTableFlags_RowBg", &flags, ImGuiTableFlags_RowBg); + ImGui::SameLine(); HelpMarker("ImGuiTableFlags_RowBg automatically sets RowBg0 to alternative colors pulled from the Style."); + ImGui::Combo("row bg type", (int*)&row_bg_type, "None\0Red\0Gradient\0"); + ImGui::Combo("row bg target", (int*)&row_bg_target, "RowBg0\0RowBg1\0"); ImGui::SameLine(); HelpMarker("Target RowBg0 to override the alternating odd/even colors,\nTarget RowBg1 to blend with them."); + ImGui::Combo("cell bg type", (int*)&cell_bg_type, "None\0Blue\0"); ImGui::SameLine(); HelpMarker("We are colorizing cells to B1->C2 here."); + IM_ASSERT(row_bg_type >= 0 && row_bg_type <= 2); + IM_ASSERT(row_bg_target >= 0 && row_bg_target <= 1); + IM_ASSERT(cell_bg_type >= 0 && cell_bg_type <= 1); + PopStyleCompact(); + + if (ImGui::BeginTable("table1", 5, flags)) + { + for (int row = 0; row < 6; row++) + { + ImGui::TableNextRow(); + + // Demonstrate setting a row background color with 'ImGui::TableSetBgColor(ImGuiTableBgTarget_RowBgX, ...)' + // We use a transparent color so we can see the one behind in case our target is RowBg1 and RowBg0 was already targeted by the ImGuiTableFlags_RowBg flag. + if (row_bg_type != 0) + { + ImU32 row_bg_color = ImGui::GetColorU32(row_bg_type == 1 ? ImVec4(0.7f, 0.3f, 0.3f, 0.65f) : ImVec4(0.2f + row * 0.1f, 0.2f, 0.2f, 0.65f)); // Flat or Gradient? + ImGui::TableSetBgColor(ImGuiTableBgTarget_RowBg0 + row_bg_target, row_bg_color); + } + + // Fill cells + for (int column = 0; column < 5; column++) + { + ImGui::TableSetColumnIndex(column); + ImGui::Text("%c%c", 'A' + row, '0' + column); + + // Change background of Cells B1->C2 + // Demonstrate setting a cell background color with 'ImGui::TableSetBgColor(ImGuiTableBgTarget_CellBg, ...)' + // (the CellBg color will be blended over the RowBg and ColumnBg colors) + // We can also pass a column number as a third parameter to TableSetBgColor() and do this outside the column loop. + if (row >= 1 && row <= 2 && column >= 1 && column <= 2 && cell_bg_type == 1) + { + ImU32 cell_bg_color = ImGui::GetColorU32(ImVec4(0.3f, 0.3f, 0.7f, 0.65f)); + ImGui::TableSetBgColor(ImGuiTableBgTarget_CellBg, cell_bg_color); + } + } + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Tree view")) + { + static ImGuiTableFlags flags = ImGuiTableFlags_BordersV | ImGuiTableFlags_BordersOuterH | ImGuiTableFlags_Resizable | ImGuiTableFlags_RowBg | ImGuiTableFlags_NoBordersInBody; + + if (ImGui::BeginTable("3ways", 3, flags)) + { + // The first column will use the default _WidthStretch when ScrollX is Off and _WidthFixed when ScrollX is On + ImGui::TableSetupColumn("Name", ImGuiTableColumnFlags_NoHide); + ImGui::TableSetupColumn("Size", ImGuiTableColumnFlags_WidthFixed, TEXT_BASE_WIDTH * 12.0f); + ImGui::TableSetupColumn("Type", ImGuiTableColumnFlags_WidthFixed, TEXT_BASE_WIDTH * 18.0f); + ImGui::TableHeadersRow(); + + // Simple storage to output a dummy file-system. + struct MyTreeNode + { + const char* Name; + const char* Type; + int Size; + int ChildIdx; + int ChildCount; + static void DisplayNode(const MyTreeNode* node, const MyTreeNode* all_nodes) + { + ImGui::TableNextRow(); + ImGui::TableNextColumn(); + const bool is_folder = (node->ChildCount > 0); + if (is_folder) + { + bool open = ImGui::TreeNodeEx(node->Name, ImGuiTreeNodeFlags_SpanFullWidth); + ImGui::TableNextColumn(); + ImGui::TextDisabled("--"); + ImGui::TableNextColumn(); + ImGui::TextUnformatted(node->Type); + if (open) + { + for (int child_n = 0; child_n < node->ChildCount; child_n++) + DisplayNode(&all_nodes[node->ChildIdx + child_n], all_nodes); + ImGui::TreePop(); + } + } + else + { + ImGui::TreeNodeEx(node->Name, ImGuiTreeNodeFlags_Leaf | ImGuiTreeNodeFlags_Bullet | ImGuiTreeNodeFlags_NoTreePushOnOpen | ImGuiTreeNodeFlags_SpanFullWidth); + ImGui::TableNextColumn(); + ImGui::Text("%d", node->Size); + ImGui::TableNextColumn(); + ImGui::TextUnformatted(node->Type); + } + } + }; + static const MyTreeNode nodes[] = + { + { "Root", "Folder", -1, 1, 3 }, // 0 + { "Music", "Folder", -1, 4, 2 }, // 1 + { "Textures", "Folder", -1, 6, 3 }, // 2 + { "desktop.ini", "System file", 1024, -1,-1 }, // 3 + { "File1_a.wav", "Audio file", 123000, -1,-1 }, // 4 + { "File1_b.wav", "Audio file", 456000, -1,-1 }, // 5 + { "Image001.png", "Image file", 203128, -1,-1 }, // 6 + { "Copy of Image001.png", "Image file", 203256, -1,-1 }, // 7 + { "Copy of Image001 (Final2).png","Image file", 203512, -1,-1 }, // 8 + }; + + MyTreeNode::DisplayNode(&nodes[0], nodes); + + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Item width")) + { + HelpMarker( + "Showcase using PushItemWidth() and how it is preserved on a per-column basis.\n\n" + "Note that on auto-resizing non-resizable fixed columns, querying the content width for e.g. right-alignment doesn't make sense."); + if (ImGui::BeginTable("table_item_width", 3, ImGuiTableFlags_Borders)) + { + ImGui::TableSetupColumn("small"); + ImGui::TableSetupColumn("half"); + ImGui::TableSetupColumn("right-align"); + ImGui::TableHeadersRow(); + + for (int row = 0; row < 3; row++) + { + ImGui::TableNextRow(); + if (row == 0) + { + // Setup ItemWidth once (instead of setting up every time, which is also possible but less efficient) + ImGui::TableSetColumnIndex(0); + ImGui::PushItemWidth(TEXT_BASE_WIDTH * 3.0f); // Small + ImGui::TableSetColumnIndex(1); + ImGui::PushItemWidth(-ImGui::GetContentRegionAvail().x * 0.5f); + ImGui::TableSetColumnIndex(2); + ImGui::PushItemWidth(-FLT_MIN); // Right-aligned + } + + // Draw our contents + static float dummy_f = 0.0f; + ImGui::PushID(row); + ImGui::TableSetColumnIndex(0); + ImGui::SliderFloat("float0", &dummy_f, 0.0f, 1.0f); + ImGui::TableSetColumnIndex(1); + ImGui::SliderFloat("float1", &dummy_f, 0.0f, 1.0f); + ImGui::TableSetColumnIndex(2); + ImGui::SliderFloat("float2", &dummy_f, 0.0f, 1.0f); + ImGui::PopID(); + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + // Demonstrate using TableHeader() calls instead of TableHeadersRow() + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Custom headers")) + { + const int COLUMNS_COUNT = 3; + if (ImGui::BeginTable("table_custom_headers", COLUMNS_COUNT, ImGuiTableFlags_Borders | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable)) + { + ImGui::TableSetupColumn("Apricot"); + ImGui::TableSetupColumn("Banana"); + ImGui::TableSetupColumn("Cherry"); + + // Dummy entire-column selection storage + // FIXME: It would be nice to actually demonstrate full-featured selection using those checkbox. + static bool column_selected[3] = {}; + + // Instead of calling TableHeadersRow() we'll submit custom headers ourselves + ImGui::TableNextRow(ImGuiTableRowFlags_Headers); + for (int column = 0; column < COLUMNS_COUNT; column++) + { + ImGui::TableSetColumnIndex(column); + const char* column_name = ImGui::TableGetColumnName(column); // Retrieve name passed to TableSetupColumn() + ImGui::PushID(column); + ImGui::PushStyleVar(ImGuiStyleVar_FramePadding, ImVec2(0, 0)); + ImGui::Checkbox("##checkall", &column_selected[column]); + ImGui::PopStyleVar(); + ImGui::SameLine(0.0f, ImGui::GetStyle().ItemInnerSpacing.x); + ImGui::TableHeader(column_name); + ImGui::PopID(); + } + + for (int row = 0; row < 5; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < 3; column++) + { + char buf[32]; + sprintf(buf, "Cell %d,%d", column, row); + ImGui::TableSetColumnIndex(column); + ImGui::Selectable(buf, column_selected[column]); + } + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + // Demonstrate creating custom context menus inside columns, while playing it nice with context menus provided by TableHeadersRow()/TableHeader() + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Context menus")) + { + HelpMarker("By default, right-clicking over a TableHeadersRow()/TableHeader() line will open the default context-menu.\nUsing ImGuiTableFlags_ContextMenuInBody we also allow right-clicking over columns body."); + static ImGuiTableFlags flags1 = ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable | ImGuiTableFlags_Borders | ImGuiTableFlags_ContextMenuInBody; + + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_ContextMenuInBody", &flags1, ImGuiTableFlags_ContextMenuInBody); + PopStyleCompact(); + + // Context Menus: first example + // [1.1] Right-click on the TableHeadersRow() line to open the default table context menu. + // [1.2] Right-click in columns also open the default table context menu (if ImGuiTableFlags_ContextMenuInBody is set) + const int COLUMNS_COUNT = 3; + if (ImGui::BeginTable("table_context_menu", COLUMNS_COUNT, flags1)) + { + ImGui::TableSetupColumn("One"); + ImGui::TableSetupColumn("Two"); + ImGui::TableSetupColumn("Three"); + + // [1.1]] Right-click on the TableHeadersRow() line to open the default table context menu. + ImGui::TableHeadersRow(); + + // Submit dummy contents + for (int row = 0; row < 4; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < COLUMNS_COUNT; column++) + { + ImGui::TableSetColumnIndex(column); + ImGui::Text("Cell %d,%d", column, row); + } + } + ImGui::EndTable(); + } + + // Context Menus: second example + // [2.1] Right-click on the TableHeadersRow() line to open the default table context menu. + // [2.2] Right-click on the ".." to open a custom popup + // [2.3] Right-click in columns to open another custom popup + HelpMarker("Demonstrate mixing table context menu (over header), item context button (over button) and custom per-colum context menu (over column body)."); + ImGuiTableFlags flags2 = ImGuiTableFlags_Resizable | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable | ImGuiTableFlags_Borders; + if (ImGui::BeginTable("table_context_menu_2", COLUMNS_COUNT, flags2)) + { + ImGui::TableSetupColumn("One"); + ImGui::TableSetupColumn("Two"); + ImGui::TableSetupColumn("Three"); + + // [2.1] Right-click on the TableHeadersRow() line to open the default table context menu. + ImGui::TableHeadersRow(); + for (int row = 0; row < 4; row++) + { + ImGui::TableNextRow(); + for (int column = 0; column < COLUMNS_COUNT; column++) + { + // Submit dummy contents + ImGui::TableSetColumnIndex(column); + ImGui::Text("Cell %d,%d", column, row); + ImGui::SameLine(); + + // [2.2] Right-click on the ".." to open a custom popup + ImGui::PushID(row * COLUMNS_COUNT + column); + ImGui::SmallButton(".."); + if (ImGui::BeginPopupContextItem()) + { + ImGui::Text("This is the popup for Button(\"..\") in Cell %d,%d", column, row); + if (ImGui::Button("Close")) + ImGui::CloseCurrentPopup(); + ImGui::EndPopup(); + } + ImGui::PopID(); + } + } + + // [2.3] Right-click anywhere in columns to open another custom popup + // (instead of testing for !IsAnyItemHovered() we could also call OpenPopup() with ImGuiPopupFlags_NoOpenOverExistingPopup + // to manage popup priority as the popups triggers, here "are we hovering a column" are overlapping) + int hovered_column = -1; + for (int column = 0; column < COLUMNS_COUNT + 1; column++) + { + ImGui::PushID(column); + if (ImGui::TableGetColumnFlags(column) & ImGuiTableColumnFlags_IsHovered) + hovered_column = column; + if (hovered_column == column && !ImGui::IsAnyItemHovered() && ImGui::IsMouseReleased(1)) + ImGui::OpenPopup("MyPopup"); + if (ImGui::BeginPopup("MyPopup")) + { + if (column == COLUMNS_COUNT) + ImGui::Text("This is a custom popup for unused space after the last column."); + else + ImGui::Text("This is a custom popup for Column %d", column); + if (ImGui::Button("Close")) + ImGui::CloseCurrentPopup(); + ImGui::EndPopup(); + } + ImGui::PopID(); + } + + ImGui::EndTable(); + ImGui::Text("Hovered column: %d", hovered_column); + } + ImGui::TreePop(); + } + + // Demonstrate creating multiple tables with the same ID + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Synced instances")) + { + HelpMarker("Multiple tables with the same identifier will share their settings, width, visibility, order etc."); + for (int n = 0; n < 3; n++) + { + char buf[32]; + sprintf(buf, "Synced Table %d", n); + bool open = ImGui::CollapsingHeader(buf, ImGuiTreeNodeFlags_DefaultOpen); + if (open && ImGui::BeginTable("Table", 3, ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable | ImGuiTableFlags_Borders | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_NoSavedSettings)) + { + ImGui::TableSetupColumn("One"); + ImGui::TableSetupColumn("Two"); + ImGui::TableSetupColumn("Three"); + ImGui::TableHeadersRow(); + for (int cell = 0; cell < 9; cell++) + { + ImGui::TableNextColumn(); + ImGui::Text("this cell %d", cell); + } + ImGui::EndTable(); + } + } + ImGui::TreePop(); + } + + // Demonstrate using Sorting facilities + // This is a simplified version of the "Advanced" example, where we mostly focus on the code necessary to handle sorting. + // Note that the "Advanced" example also showcase manually triggering a sort (e.g. if item quantities have been modified) + static const char* template_items_names[] = + { + "Banana", "Apple", "Cherry", "Watermelon", "Grapefruit", "Strawberry", "Mango", + "Kiwi", "Orange", "Pineapple", "Blueberry", "Plum", "Coconut", "Pear", "Apricot" + }; + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Sorting")) + { + // Create item list + static ImVector items; + if (items.Size == 0) + { + items.resize(50, MyItem()); + for (int n = 0; n < items.Size; n++) + { + const int template_n = n % IM_ARRAYSIZE(template_items_names); + MyItem& item = items[n]; + item.ID = n; + item.Name = template_items_names[template_n]; + item.Quantity = (n * n - n) % 20; // Assign default quantities + } + } + + // Options + static ImGuiTableFlags flags = + ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable | ImGuiTableFlags_Sortable | ImGuiTableFlags_SortMulti + | ImGuiTableFlags_RowBg | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_NoBordersInBody + | ImGuiTableFlags_ScrollY; + PushStyleCompact(); + ImGui::CheckboxFlags("ImGuiTableFlags_SortMulti", &flags, ImGuiTableFlags_SortMulti); + ImGui::SameLine(); HelpMarker("When sorting is enabled: hold shift when clicking headers to sort on multiple column. TableGetSortSpecs() may return specs where (SpecsCount > 1)."); + ImGui::CheckboxFlags("ImGuiTableFlags_SortTristate", &flags, ImGuiTableFlags_SortTristate); + ImGui::SameLine(); HelpMarker("When sorting is enabled: allow no sorting, disable default sorting. TableGetSortSpecs() may return specs where (SpecsCount == 0)."); + PopStyleCompact(); + + if (ImGui::BeginTable("table_sorting", 4, flags, ImVec2(0.0f, TEXT_BASE_HEIGHT * 15), 0.0f)) + { + // Declare columns + // We use the "user_id" parameter of TableSetupColumn() to specify a user id that will be stored in the sort specifications. + // This is so our sort function can identify a column given our own identifier. We could also identify them based on their index! + // Demonstrate using a mixture of flags among available sort-related flags: + // - ImGuiTableColumnFlags_DefaultSort + // - ImGuiTableColumnFlags_NoSort / ImGuiTableColumnFlags_NoSortAscending / ImGuiTableColumnFlags_NoSortDescending + // - ImGuiTableColumnFlags_PreferSortAscending / ImGuiTableColumnFlags_PreferSortDescending + ImGui::TableSetupColumn("ID", ImGuiTableColumnFlags_DefaultSort | ImGuiTableColumnFlags_WidthFixed, 0.0f, MyItemColumnID_ID); + ImGui::TableSetupColumn("Name", ImGuiTableColumnFlags_WidthFixed, 0.0f, MyItemColumnID_Name); + ImGui::TableSetupColumn("Action", ImGuiTableColumnFlags_NoSort | ImGuiTableColumnFlags_WidthFixed, 0.0f, MyItemColumnID_Action); + ImGui::TableSetupColumn("Quantity", ImGuiTableColumnFlags_PreferSortDescending | ImGuiTableColumnFlags_WidthStretch, 0.0f, MyItemColumnID_Quantity); + ImGui::TableSetupScrollFreeze(0, 1); // Make row always visible + ImGui::TableHeadersRow(); + + // Sort our data if sort specs have been changed! + if (ImGuiTableSortSpecs* sorts_specs = ImGui::TableGetSortSpecs()) + if (sorts_specs->SpecsDirty) + { + MyItem::s_current_sort_specs = sorts_specs; // Store in variable accessible by the sort function. + if (items.Size > 1) + qsort(&items[0], (size_t)items.Size, sizeof(items[0]), MyItem::CompareWithSortSpecs); + MyItem::s_current_sort_specs = NULL; + sorts_specs->SpecsDirty = false; + } + + // Demonstrate using clipper for large vertical lists + ImGuiListClipper clipper; + clipper.Begin(items.Size); + while (clipper.Step()) + for (int row_n = clipper.DisplayStart; row_n < clipper.DisplayEnd; row_n++) + { + // Display a data item + MyItem* item = &items[row_n]; + ImGui::PushID(item->ID); + ImGui::TableNextRow(); + ImGui::TableNextColumn(); + ImGui::Text("%04d", item->ID); + ImGui::TableNextColumn(); + ImGui::TextUnformatted(item->Name); + ImGui::TableNextColumn(); + ImGui::SmallButton("None"); + ImGui::TableNextColumn(); + ImGui::Text("%d", item->Quantity); + ImGui::PopID(); + } + ImGui::EndTable(); + } + ImGui::TreePop(); + } + + //ImGui::SetNextItemOpen(true, ImGuiCond_Once); // [DEBUG] + if (open_action != -1) + ImGui::SetNextItemOpen(open_action != 0); + if (ImGui::TreeNode("Advanced")) + { + static ImGuiTableFlags flags = + ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable + | ImGuiTableFlags_Sortable | ImGuiTableFlags_SortMulti + | ImGuiTableFlags_RowBg | ImGuiTableFlags_Borders | ImGuiTableFlags_NoBordersInBody + | ImGuiTableFlags_ScrollX | ImGuiTableFlags_ScrollY + | ImGuiTableFlags_SizingFixedFit; + + enum ContentsType { CT_Text, CT_Button, CT_SmallButton, CT_FillButton, CT_Selectable, CT_SelectableSpanRow }; + static int contents_type = CT_SelectableSpanRow; + const char* contents_type_names[] = { "Text", "Button", "SmallButton", "FillButton", "Selectable", "Selectable (span row)" }; + static int freeze_cols = 1; + static int freeze_rows = 1; + static int items_count = IM_ARRAYSIZE(template_items_names) * 2; + static ImVec2 outer_size_value = ImVec2(0.0f, TEXT_BASE_HEIGHT * 12); + static float row_min_height = 0.0f; // Auto + static float inner_width_with_scroll = 0.0f; // Auto-extend + static bool outer_size_enabled = true; + static bool show_headers = true; + static bool show_wrapped_text = false; + //static ImGuiTextFilter filter; + //ImGui::SetNextItemOpen(true, ImGuiCond_Once); // FIXME-TABLE: Enabling this results in initial clipped first pass on table which tend to affects column sizing + if (ImGui::TreeNode("Options")) + { + // Make the UI compact because there are so many fields + PushStyleCompact(); + ImGui::PushItemWidth(TEXT_BASE_WIDTH * 28.0f); + + if (ImGui::TreeNodeEx("Features:", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::CheckboxFlags("ImGuiTableFlags_Resizable", &flags, ImGuiTableFlags_Resizable); + ImGui::CheckboxFlags("ImGuiTableFlags_Reorderable", &flags, ImGuiTableFlags_Reorderable); + ImGui::CheckboxFlags("ImGuiTableFlags_Hideable", &flags, ImGuiTableFlags_Hideable); + ImGui::CheckboxFlags("ImGuiTableFlags_Sortable", &flags, ImGuiTableFlags_Sortable); + ImGui::CheckboxFlags("ImGuiTableFlags_NoSavedSettings", &flags, ImGuiTableFlags_NoSavedSettings); + ImGui::CheckboxFlags("ImGuiTableFlags_ContextMenuInBody", &flags, ImGuiTableFlags_ContextMenuInBody); + ImGui::TreePop(); + } + + if (ImGui::TreeNodeEx("Decorations:", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::CheckboxFlags("ImGuiTableFlags_RowBg", &flags, ImGuiTableFlags_RowBg); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersV", &flags, ImGuiTableFlags_BordersV); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersOuterV", &flags, ImGuiTableFlags_BordersOuterV); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersInnerV", &flags, ImGuiTableFlags_BordersInnerV); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersH", &flags, ImGuiTableFlags_BordersH); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersOuterH", &flags, ImGuiTableFlags_BordersOuterH); + ImGui::CheckboxFlags("ImGuiTableFlags_BordersInnerH", &flags, ImGuiTableFlags_BordersInnerH); + ImGui::CheckboxFlags("ImGuiTableFlags_NoBordersInBody", &flags, ImGuiTableFlags_NoBordersInBody); ImGui::SameLine(); HelpMarker("Disable vertical borders in columns Body (borders will always appears in Headers"); + ImGui::CheckboxFlags("ImGuiTableFlags_NoBordersInBodyUntilResize", &flags, ImGuiTableFlags_NoBordersInBodyUntilResize); ImGui::SameLine(); HelpMarker("Disable vertical borders in columns Body until hovered for resize (borders will always appears in Headers)"); + ImGui::TreePop(); + } + + if (ImGui::TreeNodeEx("Sizing:", ImGuiTreeNodeFlags_DefaultOpen)) + { + EditTableSizingFlags(&flags); + ImGui::SameLine(); HelpMarker("In the Advanced demo we override the policy of each column so those table-wide settings have less effect that typical."); + ImGui::CheckboxFlags("ImGuiTableFlags_NoHostExtendX", &flags, ImGuiTableFlags_NoHostExtendX); + ImGui::SameLine(); HelpMarker("Make outer width auto-fit to columns, overriding outer_size.x value.\n\nOnly available when ScrollX/ScrollY are disabled and Stretch columns are not used."); + ImGui::CheckboxFlags("ImGuiTableFlags_NoHostExtendY", &flags, ImGuiTableFlags_NoHostExtendY); + ImGui::SameLine(); HelpMarker("Make outer height stop exactly at outer_size.y (prevent auto-extending table past the limit).\n\nOnly available when ScrollX/ScrollY are disabled. Data below the limit will be clipped and not visible."); + ImGui::CheckboxFlags("ImGuiTableFlags_NoKeepColumnsVisible", &flags, ImGuiTableFlags_NoKeepColumnsVisible); + ImGui::SameLine(); HelpMarker("Only available if ScrollX is disabled."); + ImGui::CheckboxFlags("ImGuiTableFlags_PreciseWidths", &flags, ImGuiTableFlags_PreciseWidths); + ImGui::SameLine(); HelpMarker("Disable distributing remainder width to stretched columns (width allocation on a 100-wide table with 3 columns: Without this flag: 33,33,34. With this flag: 33,33,33). With larger number of columns, resizing will appear to be less smooth."); + ImGui::CheckboxFlags("ImGuiTableFlags_NoClip", &flags, ImGuiTableFlags_NoClip); + ImGui::SameLine(); HelpMarker("Disable clipping rectangle for every individual columns (reduce draw command count, items will be able to overflow into other columns). Generally incompatible with ScrollFreeze options."); + ImGui::TreePop(); + } + + if (ImGui::TreeNodeEx("Padding:", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::CheckboxFlags("ImGuiTableFlags_PadOuterX", &flags, ImGuiTableFlags_PadOuterX); + ImGui::CheckboxFlags("ImGuiTableFlags_NoPadOuterX", &flags, ImGuiTableFlags_NoPadOuterX); + ImGui::CheckboxFlags("ImGuiTableFlags_NoPadInnerX", &flags, ImGuiTableFlags_NoPadInnerX); + ImGui::TreePop(); + } + + if (ImGui::TreeNodeEx("Scrolling:", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::CheckboxFlags("ImGuiTableFlags_ScrollX", &flags, ImGuiTableFlags_ScrollX); + ImGui::SameLine(); + ImGui::SetNextItemWidth(ImGui::GetFrameHeight()); + ImGui::DragInt("freeze_cols", &freeze_cols, 0.2f, 0, 9, NULL, ImGuiSliderFlags_NoInput); + ImGui::CheckboxFlags("ImGuiTableFlags_ScrollY", &flags, ImGuiTableFlags_ScrollY); + ImGui::SameLine(); + ImGui::SetNextItemWidth(ImGui::GetFrameHeight()); + ImGui::DragInt("freeze_rows", &freeze_rows, 0.2f, 0, 9, NULL, ImGuiSliderFlags_NoInput); + ImGui::TreePop(); + } + + if (ImGui::TreeNodeEx("Sorting:", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::CheckboxFlags("ImGuiTableFlags_SortMulti", &flags, ImGuiTableFlags_SortMulti); + ImGui::SameLine(); HelpMarker("When sorting is enabled: hold shift when clicking headers to sort on multiple column. TableGetSortSpecs() may return specs where (SpecsCount > 1)."); + ImGui::CheckboxFlags("ImGuiTableFlags_SortTristate", &flags, ImGuiTableFlags_SortTristate); + ImGui::SameLine(); HelpMarker("When sorting is enabled: allow no sorting, disable default sorting. TableGetSortSpecs() may return specs where (SpecsCount == 0)."); + ImGui::TreePop(); + } + + if (ImGui::TreeNodeEx("Other:", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Checkbox("show_headers", &show_headers); + ImGui::Checkbox("show_wrapped_text", &show_wrapped_text); + + ImGui::DragFloat2("##OuterSize", &outer_size_value.x); + ImGui::SameLine(0.0f, ImGui::GetStyle().ItemInnerSpacing.x); + ImGui::Checkbox("outer_size", &outer_size_enabled); + ImGui::SameLine(); + HelpMarker("If scrolling is disabled (ScrollX and ScrollY not set):\n" + "- The table is output directly in the parent window.\n" + "- OuterSize.x < 0.0f will right-align the table.\n" + "- OuterSize.x = 0.0f will narrow fit the table unless there are any Stretch column.\n" + "- OuterSize.y then becomes the minimum size for the table, which will extend vertically if there are more rows (unless NoHostExtendY is set)."); + + // From a user point of view we will tend to use 'inner_width' differently depending on whether our table is embedding scrolling. + // To facilitate toying with this demo we will actually pass 0.0f to the BeginTable() when ScrollX is disabled. + ImGui::DragFloat("inner_width (when ScrollX active)", &inner_width_with_scroll, 1.0f, 0.0f, FLT_MAX); + + ImGui::DragFloat("row_min_height", &row_min_height, 1.0f, 0.0f, FLT_MAX); + ImGui::SameLine(); HelpMarker("Specify height of the Selectable item."); + + ImGui::DragInt("items_count", &items_count, 0.1f, 0, 9999); + ImGui::Combo("items_type (first column)", &contents_type, contents_type_names, IM_ARRAYSIZE(contents_type_names)); + //filter.Draw("filter"); + ImGui::TreePop(); + } + + ImGui::PopItemWidth(); + PopStyleCompact(); + ImGui::Spacing(); + ImGui::TreePop(); + } + + // Recreate/reset item list if we changed the number of items + static ImVector items; + static ImVector selection; + static bool items_need_sort = false; + if (items.Size != items_count) + { + items.resize(items_count, MyItem()); + for (int n = 0; n < items_count; n++) + { + const int template_n = n % IM_ARRAYSIZE(template_items_names); + MyItem& item = items[n]; + item.ID = n; + item.Name = template_items_names[template_n]; + item.Quantity = (template_n == 3) ? 10 : (template_n == 4) ? 20 : 0; // Assign default quantities + } + } + + const ImDrawList* parent_draw_list = ImGui::GetWindowDrawList(); + const int parent_draw_list_draw_cmd_count = parent_draw_list->CmdBuffer.Size; + ImVec2 table_scroll_cur, table_scroll_max; // For debug display + const ImDrawList* table_draw_list = NULL; // " + + const float inner_width_to_use = (flags & ImGuiTableFlags_ScrollX) ? inner_width_with_scroll : 0.0f; + if (ImGui::BeginTable("table_advanced", 6, flags, outer_size_enabled ? outer_size_value : ImVec2(0, 0), inner_width_to_use)) + { + // Declare columns + // We use the "user_id" parameter of TableSetupColumn() to specify a user id that will be stored in the sort specifications. + // This is so our sort function can identify a column given our own identifier. We could also identify them based on their index! + ImGui::TableSetupColumn("ID", ImGuiTableColumnFlags_DefaultSort | ImGuiTableColumnFlags_WidthFixed | ImGuiTableColumnFlags_NoHide, 0.0f, MyItemColumnID_ID); + ImGui::TableSetupColumn("Name", ImGuiTableColumnFlags_WidthFixed, 0.0f, MyItemColumnID_Name); + ImGui::TableSetupColumn("Action", ImGuiTableColumnFlags_NoSort | ImGuiTableColumnFlags_WidthFixed, 0.0f, MyItemColumnID_Action); + ImGui::TableSetupColumn("Quantity", ImGuiTableColumnFlags_PreferSortDescending, 0.0f, MyItemColumnID_Quantity); + ImGui::TableSetupColumn("Description", (flags & ImGuiTableFlags_NoHostExtendX) ? 0 : ImGuiTableColumnFlags_WidthStretch, 0.0f, MyItemColumnID_Description); + ImGui::TableSetupColumn("Hidden", ImGuiTableColumnFlags_DefaultHide | ImGuiTableColumnFlags_NoSort); + ImGui::TableSetupScrollFreeze(freeze_cols, freeze_rows); + + // Sort our data if sort specs have been changed! + ImGuiTableSortSpecs* sorts_specs = ImGui::TableGetSortSpecs(); + if (sorts_specs && sorts_specs->SpecsDirty) + items_need_sort = true; + if (sorts_specs && items_need_sort && items.Size > 1) + { + MyItem::s_current_sort_specs = sorts_specs; // Store in variable accessible by the sort function. + qsort(&items[0], (size_t)items.Size, sizeof(items[0]), MyItem::CompareWithSortSpecs); + MyItem::s_current_sort_specs = NULL; + sorts_specs->SpecsDirty = false; + } + items_need_sort = false; + + // Take note of whether we are currently sorting based on the Quantity field, + // we will use this to trigger sorting when we know the data of this column has been modified. + const bool sorts_specs_using_quantity = (ImGui::TableGetColumnFlags(3) & ImGuiTableColumnFlags_IsSorted) != 0; + + // Show headers + if (show_headers) + ImGui::TableHeadersRow(); + + // Show data + // FIXME-TABLE FIXME-NAV: How we can get decent up/down even though we have the buttons here? + ImGui::PushButtonRepeat(true); +#if 1 + // Demonstrate using clipper for large vertical lists + ImGuiListClipper clipper; + clipper.Begin(items.Size); + while (clipper.Step()) + { + for (int row_n = clipper.DisplayStart; row_n < clipper.DisplayEnd; row_n++) +#else + // Without clipper + { + for (int row_n = 0; row_n < items.Size; row_n++) +#endif + { + MyItem* item = &items[row_n]; + //if (!filter.PassFilter(item->Name)) + // continue; + + const bool item_is_selected = selection.contains(item->ID); + ImGui::PushID(item->ID); + ImGui::TableNextRow(ImGuiTableRowFlags_None, row_min_height); + ImGui::TableNextColumn(); + + // For the demo purpose we can select among different type of items submitted in the first column + char label[32]; + sprintf(label, "%04d", item->ID); + if (contents_type == CT_Text) + ImGui::TextUnformatted(label); + else if (contents_type == CT_Button) + ImGui::Button(label); + else if (contents_type == CT_SmallButton) + ImGui::SmallButton(label); + else if (contents_type == CT_FillButton) + ImGui::Button(label, ImVec2(-FLT_MIN, 0.0f)); + else if (contents_type == CT_Selectable || contents_type == CT_SelectableSpanRow) + { + ImGuiSelectableFlags selectable_flags = (contents_type == CT_SelectableSpanRow) ? ImGuiSelectableFlags_SpanAllColumns | ImGuiSelectableFlags_AllowItemOverlap : ImGuiSelectableFlags_None; + if (ImGui::Selectable(label, item_is_selected, selectable_flags, ImVec2(0, row_min_height))) + { + if (ImGui::GetIO().KeyCtrl) + { + if (item_is_selected) + selection.find_erase_unsorted(item->ID); + else + selection.push_back(item->ID); + } + else + { + selection.clear(); + selection.push_back(item->ID); + } + } + } + + if (ImGui::TableNextColumn()) + ImGui::TextUnformatted(item->Name); + + // Here we demonstrate marking our data set as needing to be sorted again if we modified a quantity, + // and we are currently sorting on the column showing the Quantity. + // To avoid triggering a sort while holding the button, we only trigger it when the button has been released. + // You will probably need a more advanced system in your code if you want to automatically sort when a specific entry changes. + if (ImGui::TableNextColumn()) + { + if (ImGui::SmallButton("Chop")) { item->Quantity += 1; } + if (sorts_specs_using_quantity && ImGui::IsItemDeactivated()) { items_need_sort = true; } + ImGui::SameLine(); + if (ImGui::SmallButton("Eat")) { item->Quantity -= 1; } + if (sorts_specs_using_quantity && ImGui::IsItemDeactivated()) { items_need_sort = true; } + } + + if (ImGui::TableNextColumn()) + ImGui::Text("%d", item->Quantity); + + ImGui::TableNextColumn(); + if (show_wrapped_text) + ImGui::TextWrapped("Lorem ipsum dolor sit amet"); + else + ImGui::Text("Lorem ipsum dolor sit amet"); + + if (ImGui::TableNextColumn()) + ImGui::Text("1234"); + + ImGui::PopID(); + } + } + ImGui::PopButtonRepeat(); + + // Store some info to display debug details below + table_scroll_cur = ImVec2(ImGui::GetScrollX(), ImGui::GetScrollY()); + table_scroll_max = ImVec2(ImGui::GetScrollMaxX(), ImGui::GetScrollMaxY()); + table_draw_list = ImGui::GetWindowDrawList(); + ImGui::EndTable(); + } + static bool show_debug_details = false; + ImGui::Checkbox("Debug details", &show_debug_details); + if (show_debug_details && table_draw_list) + { + ImGui::SameLine(0.0f, 0.0f); + const int table_draw_list_draw_cmd_count = table_draw_list->CmdBuffer.Size; + if (table_draw_list == parent_draw_list) + ImGui::Text(": DrawCmd: +%d (in same window)", + table_draw_list_draw_cmd_count - parent_draw_list_draw_cmd_count); + else + ImGui::Text(": DrawCmd: +%d (in child window), Scroll: (%.f/%.f) (%.f/%.f)", + table_draw_list_draw_cmd_count - 1, table_scroll_cur.x, table_scroll_max.x, table_scroll_cur.y, table_scroll_max.y); + } + ImGui::TreePop(); + } + + ImGui::PopID(); + + ShowDemoWindowColumns(); + + if (disable_indent) + ImGui::PopStyleVar(); +} + +// Demonstrate old/legacy Columns API! +// [2020: Columns are under-featured and not maintained. Prefer using the more flexible and powerful BeginTable() API!] +static void ShowDemoWindowColumns() +{ + bool open = ImGui::TreeNode("Legacy Columns API"); + ImGui::SameLine(); + HelpMarker("Columns() is an old API! Prefer using the more flexible and powerful BeginTable() API!"); + if (!open) + return; + + // Basic columns + if (ImGui::TreeNode("Basic")) + { + ImGui::Text("Without border:"); + ImGui::Columns(3, "mycolumns3", false); // 3-ways, no border + ImGui::Separator(); + for (int n = 0; n < 14; n++) + { + char label[32]; + sprintf(label, "Item %d", n); + if (ImGui::Selectable(label)) {} + //if (ImGui::Button(label, ImVec2(-FLT_MIN,0.0f))) {} + ImGui::NextColumn(); + } + ImGui::Columns(1); + ImGui::Separator(); + + ImGui::Text("With border:"); + ImGui::Columns(4, "mycolumns"); // 4-ways, with border + ImGui::Separator(); + ImGui::Text("ID"); ImGui::NextColumn(); + ImGui::Text("Name"); ImGui::NextColumn(); + ImGui::Text("Path"); ImGui::NextColumn(); + ImGui::Text("Hovered"); ImGui::NextColumn(); + ImGui::Separator(); + const char* names[3] = { "One", "Two", "Three" }; + const char* paths[3] = { "/path/one", "/path/two", "/path/three" }; + static int selected = -1; + for (int i = 0; i < 3; i++) + { + char label[32]; + sprintf(label, "%04d", i); + if (ImGui::Selectable(label, selected == i, ImGuiSelectableFlags_SpanAllColumns)) + selected = i; + bool hovered = ImGui::IsItemHovered(); + ImGui::NextColumn(); + ImGui::Text(names[i]); ImGui::NextColumn(); + ImGui::Text(paths[i]); ImGui::NextColumn(); + ImGui::Text("%d", hovered); ImGui::NextColumn(); + } + ImGui::Columns(1); + ImGui::Separator(); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Borders")) + { + // NB: Future columns API should allow automatic horizontal borders. + static bool h_borders = true; + static bool v_borders = true; + static int columns_count = 4; + const int lines_count = 3; + ImGui::SetNextItemWidth(ImGui::GetFontSize() * 8); + ImGui::DragInt("##columns_count", &columns_count, 0.1f, 2, 10, "%d columns"); + if (columns_count < 2) + columns_count = 2; + ImGui::SameLine(); + ImGui::Checkbox("horizontal", &h_borders); + ImGui::SameLine(); + ImGui::Checkbox("vertical", &v_borders); + ImGui::Columns(columns_count, NULL, v_borders); + for (int i = 0; i < columns_count * lines_count; i++) + { + if (h_borders && ImGui::GetColumnIndex() == 0) + ImGui::Separator(); + ImGui::Text("%c%c%c", 'a' + i, 'a' + i, 'a' + i); + ImGui::Text("Width %.2f", ImGui::GetColumnWidth()); + ImGui::Text("Avail %.2f", ImGui::GetContentRegionAvail().x); + ImGui::Text("Offset %.2f", ImGui::GetColumnOffset()); + ImGui::Text("Long text that is likely to clip"); + ImGui::Button("Button", ImVec2(-FLT_MIN, 0.0f)); + ImGui::NextColumn(); + } + ImGui::Columns(1); + if (h_borders) + ImGui::Separator(); + ImGui::TreePop(); + } + + // Create multiple items in a same cell before switching to next column + if (ImGui::TreeNode("Mixed items")) + { + ImGui::Columns(3, "mixed"); + ImGui::Separator(); + + ImGui::Text("Hello"); + ImGui::Button("Banana"); + ImGui::NextColumn(); + + ImGui::Text("ImGui"); + ImGui::Button("Apple"); + static float foo = 1.0f; + ImGui::InputFloat("red", &foo, 0.05f, 0, "%.3f"); + ImGui::Text("An extra line here."); + ImGui::NextColumn(); + + ImGui::Text("Sailor"); + ImGui::Button("Corniflower"); + static float bar = 1.0f; + ImGui::InputFloat("blue", &bar, 0.05f, 0, "%.3f"); + ImGui::NextColumn(); + + if (ImGui::CollapsingHeader("Category A")) { ImGui::Text("Blah blah blah"); } ImGui::NextColumn(); + if (ImGui::CollapsingHeader("Category B")) { ImGui::Text("Blah blah blah"); } ImGui::NextColumn(); + if (ImGui::CollapsingHeader("Category C")) { ImGui::Text("Blah blah blah"); } ImGui::NextColumn(); + ImGui::Columns(1); + ImGui::Separator(); + ImGui::TreePop(); + } + + // Word wrapping + if (ImGui::TreeNode("Word-wrapping")) + { + ImGui::Columns(2, "word-wrapping"); + ImGui::Separator(); + ImGui::TextWrapped("The quick brown fox jumps over the lazy dog."); + ImGui::TextWrapped("Hello Left"); + ImGui::NextColumn(); + ImGui::TextWrapped("The quick brown fox jumps over the lazy dog."); + ImGui::TextWrapped("Hello Right"); + ImGui::Columns(1); + ImGui::Separator(); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Horizontal Scrolling")) + { + ImGui::SetNextWindowContentSize(ImVec2(1500.0f, 0.0f)); + ImVec2 child_size = ImVec2(0, ImGui::GetFontSize() * 20.0f); + ImGui::BeginChild("##ScrollingRegion", child_size, false, ImGuiWindowFlags_HorizontalScrollbar); + ImGui::Columns(10); + + // Also demonstrate using clipper for large vertical lists + int ITEMS_COUNT = 2000; + ImGuiListClipper clipper; + clipper.Begin(ITEMS_COUNT); + while (clipper.Step()) + { + for (int i = clipper.DisplayStart; i < clipper.DisplayEnd; i++) + for (int j = 0; j < 10; j++) + { + ImGui::Text("Line %d Column %d...", i, j); + ImGui::NextColumn(); + } + } + ImGui::Columns(1); + ImGui::EndChild(); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Tree")) + { + ImGui::Columns(2, "tree", true); + for (int x = 0; x < 3; x++) + { + bool open1 = ImGui::TreeNode((void*)(intptr_t)x, "Node%d", x); + ImGui::NextColumn(); + ImGui::Text("Node contents"); + ImGui::NextColumn(); + if (open1) + { + for (int y = 0; y < 3; y++) + { + bool open2 = ImGui::TreeNode((void*)(intptr_t)y, "Node%d.%d", x, y); + ImGui::NextColumn(); + ImGui::Text("Node contents"); + if (open2) + { + ImGui::Text("Even more contents"); + if (ImGui::TreeNode("Tree in column")) + { + ImGui::Text("The quick brown fox jumps over the lazy dog"); + ImGui::TreePop(); + } + } + ImGui::NextColumn(); + if (open2) + ImGui::TreePop(); + } + ImGui::TreePop(); + } + } + ImGui::Columns(1); + ImGui::TreePop(); + } + + ImGui::TreePop(); +} + +static void ShowDemoWindowMisc() +{ + if (ImGui::CollapsingHeader("Filtering")) + { + // Helper class to easy setup a text filter. + // You may want to implement a more feature-full filtering scheme in your own application. + static ImGuiTextFilter filter; + ImGui::Text("Filter usage:\n" + " \"\" display all lines\n" + " \"xxx\" display lines containing \"xxx\"\n" + " \"xxx,yyy\" display lines containing \"xxx\" or \"yyy\"\n" + " \"-xxx\" hide lines containing \"xxx\""); + filter.Draw(); + const char* lines[] = { "aaa1.c", "bbb1.c", "ccc1.c", "aaa2.cpp", "bbb2.cpp", "ccc2.cpp", "abc.h", "hello, world" }; + for (int i = 0; i < IM_ARRAYSIZE(lines); i++) + if (filter.PassFilter(lines[i])) + ImGui::BulletText("%s", lines[i]); + } + + if (ImGui::CollapsingHeader("Inputs, Navigation & Focus")) + { + ImGuiIO& io = ImGui::GetIO(); + + // Display ImGuiIO output flags + ImGui::Text("WantCaptureMouse: %d", io.WantCaptureMouse); + ImGui::Text("WantCaptureKeyboard: %d", io.WantCaptureKeyboard); + ImGui::Text("WantTextInput: %d", io.WantTextInput); + ImGui::Text("WantSetMousePos: %d", io.WantSetMousePos); + ImGui::Text("NavActive: %d, NavVisible: %d", io.NavActive, io.NavVisible); + + // Display Keyboard/Mouse state + if (ImGui::TreeNode("Keyboard, Mouse & Navigation State")) + { + if (ImGui::IsMousePosValid()) + ImGui::Text("Mouse pos: (%g, %g)", io.MousePos.x, io.MousePos.y); + else + ImGui::Text("Mouse pos: "); + ImGui::Text("Mouse delta: (%g, %g)", io.MouseDelta.x, io.MouseDelta.y); + ImGui::Text("Mouse down:"); for (int i = 0; i < IM_ARRAYSIZE(io.MouseDown); i++) if (io.MouseDownDuration[i] >= 0.0f) { ImGui::SameLine(); ImGui::Text("b%d (%.02f secs)", i, io.MouseDownDuration[i]); } + ImGui::Text("Mouse clicked:"); for (int i = 0; i < IM_ARRAYSIZE(io.MouseDown); i++) if (ImGui::IsMouseClicked(i)) { ImGui::SameLine(); ImGui::Text("b%d", i); } + ImGui::Text("Mouse dblclick:"); for (int i = 0; i < IM_ARRAYSIZE(io.MouseDown); i++) if (ImGui::IsMouseDoubleClicked(i)) { ImGui::SameLine(); ImGui::Text("b%d", i); } + ImGui::Text("Mouse released:"); for (int i = 0; i < IM_ARRAYSIZE(io.MouseDown); i++) if (ImGui::IsMouseReleased(i)) { ImGui::SameLine(); ImGui::Text("b%d", i); } + ImGui::Text("Mouse wheel: %.1f", io.MouseWheel); + + ImGui::Text("Keys down:"); for (int i = 0; i < IM_ARRAYSIZE(io.KeysDown); i++) if (io.KeysDownDuration[i] >= 0.0f) { ImGui::SameLine(); ImGui::Text("%d (0x%X) (%.02f secs)", i, i, io.KeysDownDuration[i]); } + ImGui::Text("Keys pressed:"); for (int i = 0; i < IM_ARRAYSIZE(io.KeysDown); i++) if (ImGui::IsKeyPressed(i)) { ImGui::SameLine(); ImGui::Text("%d (0x%X)", i, i); } + ImGui::Text("Keys release:"); for (int i = 0; i < IM_ARRAYSIZE(io.KeysDown); i++) if (ImGui::IsKeyReleased(i)) { ImGui::SameLine(); ImGui::Text("%d (0x%X)", i, i); } + ImGui::Text("Keys mods: %s%s%s%s", io.KeyCtrl ? "CTRL " : "", io.KeyShift ? "SHIFT " : "", io.KeyAlt ? "ALT " : "", io.KeySuper ? "SUPER " : ""); + ImGui::Text("Chars queue:"); for (int i = 0; i < io.InputQueueCharacters.Size; i++) { ImWchar c = io.InputQueueCharacters[i]; ImGui::SameLine(); ImGui::Text("\'%c\' (0x%04X)", (c > ' ' && c <= 255) ? (char)c : '?', c); } // FIXME: We should convert 'c' to UTF-8 here but the functions are not public. + + ImGui::Text("NavInputs down:"); for (int i = 0; i < IM_ARRAYSIZE(io.NavInputs); i++) if (io.NavInputs[i] > 0.0f) { ImGui::SameLine(); ImGui::Text("[%d] %.2f", i, io.NavInputs[i]); } + ImGui::Text("NavInputs pressed:"); for (int i = 0; i < IM_ARRAYSIZE(io.NavInputs); i++) if (io.NavInputsDownDuration[i] == 0.0f) { ImGui::SameLine(); ImGui::Text("[%d]", i); } + ImGui::Text("NavInputs duration:"); for (int i = 0; i < IM_ARRAYSIZE(io.NavInputs); i++) if (io.NavInputsDownDuration[i] >= 0.0f) { ImGui::SameLine(); ImGui::Text("[%d] %.2f", i, io.NavInputsDownDuration[i]); } + + ImGui::Button("Hovering me sets the\nkeyboard capture flag"); + if (ImGui::IsItemHovered()) + ImGui::CaptureKeyboardFromApp(true); + ImGui::SameLine(); + ImGui::Button("Holding me clears the\nthe keyboard capture flag"); + if (ImGui::IsItemActive()) + ImGui::CaptureKeyboardFromApp(false); + + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Tabbing")) + { + ImGui::Text("Use TAB/SHIFT+TAB to cycle through keyboard editable fields."); + static char buf[32] = "hello"; + ImGui::InputText("1", buf, IM_ARRAYSIZE(buf)); + ImGui::InputText("2", buf, IM_ARRAYSIZE(buf)); + ImGui::InputText("3", buf, IM_ARRAYSIZE(buf)); + ImGui::PushAllowKeyboardFocus(false); + ImGui::InputText("4 (tab skip)", buf, IM_ARRAYSIZE(buf)); + //ImGui::SameLine(); HelpMarker("Use ImGui::PushAllowKeyboardFocus(bool) to disable tabbing through certain widgets."); + ImGui::PopAllowKeyboardFocus(); + ImGui::InputText("5", buf, IM_ARRAYSIZE(buf)); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Focus from code")) + { + bool focus_1 = ImGui::Button("Focus on 1"); ImGui::SameLine(); + bool focus_2 = ImGui::Button("Focus on 2"); ImGui::SameLine(); + bool focus_3 = ImGui::Button("Focus on 3"); + int has_focus = 0; + static char buf[128] = "click on a button to set focus"; + + if (focus_1) ImGui::SetKeyboardFocusHere(); + ImGui::InputText("1", buf, IM_ARRAYSIZE(buf)); + if (ImGui::IsItemActive()) has_focus = 1; + + if (focus_2) ImGui::SetKeyboardFocusHere(); + ImGui::InputText("2", buf, IM_ARRAYSIZE(buf)); + if (ImGui::IsItemActive()) has_focus = 2; + + ImGui::PushAllowKeyboardFocus(false); + if (focus_3) ImGui::SetKeyboardFocusHere(); + ImGui::InputText("3 (tab skip)", buf, IM_ARRAYSIZE(buf)); + if (ImGui::IsItemActive()) has_focus = 3; + ImGui::PopAllowKeyboardFocus(); + + if (has_focus) + ImGui::Text("Item with focus: %d", has_focus); + else + ImGui::Text("Item with focus: "); + + // Use >= 0 parameter to SetKeyboardFocusHere() to focus an upcoming item + static float f3[3] = { 0.0f, 0.0f, 0.0f }; + int focus_ahead = -1; + if (ImGui::Button("Focus on X")) { focus_ahead = 0; } ImGui::SameLine(); + if (ImGui::Button("Focus on Y")) { focus_ahead = 1; } ImGui::SameLine(); + if (ImGui::Button("Focus on Z")) { focus_ahead = 2; } + if (focus_ahead != -1) ImGui::SetKeyboardFocusHere(focus_ahead); + ImGui::SliderFloat3("Float3", &f3[0], 0.0f, 1.0f); + + ImGui::TextWrapped("NB: Cursor & selection are preserved when refocusing last used item in code."); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Dragging")) + { + ImGui::TextWrapped("You can use ImGui::GetMouseDragDelta(0) to query for the dragged amount on any widget."); + for (int button = 0; button < 3; button++) + { + ImGui::Text("IsMouseDragging(%d):", button); + ImGui::Text(" w/ default threshold: %d,", ImGui::IsMouseDragging(button)); + ImGui::Text(" w/ zero threshold: %d,", ImGui::IsMouseDragging(button, 0.0f)); + ImGui::Text(" w/ large threshold: %d,", ImGui::IsMouseDragging(button, 20.0f)); + } + + ImGui::Button("Drag Me"); + if (ImGui::IsItemActive()) + ImGui::GetForegroundDrawList()->AddLine(io.MouseClickedPos[0], io.MousePos, ImGui::GetColorU32(ImGuiCol_Button), 4.0f); // Draw a line between the button and the mouse cursor + + // Drag operations gets "unlocked" when the mouse has moved past a certain threshold + // (the default threshold is stored in io.MouseDragThreshold). You can request a lower or higher + // threshold using the second parameter of IsMouseDragging() and GetMouseDragDelta(). + ImVec2 value_raw = ImGui::GetMouseDragDelta(0, 0.0f); + ImVec2 value_with_lock_threshold = ImGui::GetMouseDragDelta(0); + ImVec2 mouse_delta = io.MouseDelta; + ImGui::Text("GetMouseDragDelta(0):"); + ImGui::Text(" w/ default threshold: (%.1f, %.1f)", value_with_lock_threshold.x, value_with_lock_threshold.y); + ImGui::Text(" w/ zero threshold: (%.1f, %.1f)", value_raw.x, value_raw.y); + ImGui::Text("io.MouseDelta: (%.1f, %.1f)", mouse_delta.x, mouse_delta.y); + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Mouse cursors")) + { + const char* mouse_cursors_names[] = { "Arrow", "TextInput", "ResizeAll", "ResizeNS", "ResizeEW", "ResizeNESW", "ResizeNWSE", "Hand", "NotAllowed" }; + IM_ASSERT(IM_ARRAYSIZE(mouse_cursors_names) == ImGuiMouseCursor_COUNT); + + ImGuiMouseCursor current = ImGui::GetMouseCursor(); + ImGui::Text("Current mouse cursor = %d: %s", current, mouse_cursors_names[current]); + ImGui::Text("Hover to see mouse cursors:"); + ImGui::SameLine(); HelpMarker( + "Your application can render a different mouse cursor based on what ImGui::GetMouseCursor() returns. " + "If software cursor rendering (io.MouseDrawCursor) is set ImGui will draw the right cursor for you, " + "otherwise your backend needs to handle it."); + for (int i = 0; i < ImGuiMouseCursor_COUNT; i++) + { + char label[32]; + sprintf(label, "Mouse cursor %d: %s", i, mouse_cursors_names[i]); + ImGui::Bullet(); ImGui::Selectable(label, false); + if (ImGui::IsItemHovered()) + ImGui::SetMouseCursor(i); + } + ImGui::TreePop(); + } + } +} + +//----------------------------------------------------------------------------- +// [SECTION] About Window / ShowAboutWindow() +// Access from Dear ImGui Demo -> Tools -> About +//----------------------------------------------------------------------------- + +void ImGui::ShowAboutWindow(bool* p_open) +{ + if (!ImGui::Begin("About Dear ImGui", p_open, ImGuiWindowFlags_AlwaysAutoResize)) + { + ImGui::End(); + return; + } + ImGui::Text("Dear ImGui %s", ImGui::GetVersion()); + ImGui::Separator(); + ImGui::Text("By Omar Cornut and all Dear ImGui contributors."); + ImGui::Text("Dear ImGui is licensed under the MIT License, see LICENSE for more information."); + + static bool show_config_info = false; + ImGui::Checkbox("Config/Build Information", &show_config_info); + if (show_config_info) + { + ImGuiIO& io = ImGui::GetIO(); + ImGuiStyle& style = ImGui::GetStyle(); + + bool copy_to_clipboard = ImGui::Button("Copy to clipboard"); + ImVec2 child_size = ImVec2(0, ImGui::GetTextLineHeightWithSpacing() * 18); + ImGui::BeginChildFrame(ImGui::GetID("cfg_infos"), child_size, ImGuiWindowFlags_NoMove); + if (copy_to_clipboard) + { + ImGui::LogToClipboard(); + ImGui::LogText("```\n"); // Back quotes will make text appears without formatting when pasting on GitHub + } + + ImGui::Text("Dear ImGui %s (%d)", IMGUI_VERSION, IMGUI_VERSION_NUM); + ImGui::Separator(); + ImGui::Text("sizeof(size_t): %d, sizeof(ImDrawIdx): %d, sizeof(ImDrawVert): %d", (int)sizeof(size_t), (int)sizeof(ImDrawIdx), (int)sizeof(ImDrawVert)); + ImGui::Text("define: __cplusplus=%d", (int)__cplusplus); +#ifdef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + ImGui::Text("define: IMGUI_DISABLE_OBSOLETE_FUNCTIONS"); +#endif +#ifdef IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS + ImGui::Text("define: IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS"); +#endif +#ifdef IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS + ImGui::Text("define: IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS"); +#endif +#ifdef IMGUI_DISABLE_WIN32_FUNCTIONS + ImGui::Text("define: IMGUI_DISABLE_WIN32_FUNCTIONS"); +#endif +#ifdef IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS + ImGui::Text("define: IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS"); +#endif +#ifdef IMGUI_DISABLE_DEFAULT_MATH_FUNCTIONS + ImGui::Text("define: IMGUI_DISABLE_DEFAULT_MATH_FUNCTIONS"); +#endif +#ifdef IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS + ImGui::Text("define: IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS"); +#endif +#ifdef IMGUI_DISABLE_FILE_FUNCTIONS + ImGui::Text("define: IMGUI_DISABLE_FILE_FUNCTIONS"); +#endif +#ifdef IMGUI_DISABLE_DEFAULT_ALLOCATORS + ImGui::Text("define: IMGUI_DISABLE_DEFAULT_ALLOCATORS"); +#endif +#ifdef IMGUI_USE_BGRA_PACKED_COLOR + ImGui::Text("define: IMGUI_USE_BGRA_PACKED_COLOR"); +#endif +#ifdef _WIN32 + ImGui::Text("define: _WIN32"); +#endif +#ifdef _WIN64 + ImGui::Text("define: _WIN64"); +#endif +#ifdef __linux__ + ImGui::Text("define: __linux__"); +#endif +#ifdef __APPLE__ + ImGui::Text("define: __APPLE__"); +#endif +#ifdef _MSC_VER + ImGui::Text("define: _MSC_VER=%d", _MSC_VER); +#endif +#ifdef _MSVC_LANG + ImGui::Text("define: _MSVC_LANG=%d", (int)_MSVC_LANG); +#endif +#ifdef __MINGW32__ + ImGui::Text("define: __MINGW32__"); +#endif +#ifdef __MINGW64__ + ImGui::Text("define: __MINGW64__"); +#endif +#ifdef __GNUC__ + ImGui::Text("define: __GNUC__=%d", (int)__GNUC__); +#endif +#ifdef __clang_version__ + ImGui::Text("define: __clang_version__=%s", __clang_version__); +#endif + ImGui::Separator(); + ImGui::Text("io.BackendPlatformName: %s", io.BackendPlatformName ? io.BackendPlatformName : "NULL"); + ImGui::Text("io.BackendRendererName: %s", io.BackendRendererName ? io.BackendRendererName : "NULL"); + ImGui::Text("io.ConfigFlags: 0x%08X", io.ConfigFlags); + if (io.ConfigFlags & ImGuiConfigFlags_NavEnableKeyboard) ImGui::Text(" NavEnableKeyboard"); + if (io.ConfigFlags & ImGuiConfigFlags_NavEnableGamepad) ImGui::Text(" NavEnableGamepad"); + if (io.ConfigFlags & ImGuiConfigFlags_NavEnableSetMousePos) ImGui::Text(" NavEnableSetMousePos"); + if (io.ConfigFlags & ImGuiConfigFlags_NavNoCaptureKeyboard) ImGui::Text(" NavNoCaptureKeyboard"); + if (io.ConfigFlags & ImGuiConfigFlags_NoMouse) ImGui::Text(" NoMouse"); + if (io.ConfigFlags & ImGuiConfigFlags_NoMouseCursorChange) ImGui::Text(" NoMouseCursorChange"); + if (io.MouseDrawCursor) ImGui::Text("io.MouseDrawCursor"); + if (io.ConfigMacOSXBehaviors) ImGui::Text("io.ConfigMacOSXBehaviors"); + if (io.ConfigInputTextCursorBlink) ImGui::Text("io.ConfigInputTextCursorBlink"); + if (io.ConfigWindowsResizeFromEdges) ImGui::Text("io.ConfigWindowsResizeFromEdges"); + if (io.ConfigWindowsMoveFromTitleBarOnly) ImGui::Text("io.ConfigWindowsMoveFromTitleBarOnly"); + if (io.ConfigMemoryCompactTimer >= 0.0f) ImGui::Text("io.ConfigMemoryCompactTimer = %.1f", io.ConfigMemoryCompactTimer); + ImGui::Text("io.BackendFlags: 0x%08X", io.BackendFlags); + if (io.BackendFlags & ImGuiBackendFlags_HasGamepad) ImGui::Text(" HasGamepad"); + if (io.BackendFlags & ImGuiBackendFlags_HasMouseCursors) ImGui::Text(" HasMouseCursors"); + if (io.BackendFlags & ImGuiBackendFlags_HasSetMousePos) ImGui::Text(" HasSetMousePos"); + if (io.BackendFlags & ImGuiBackendFlags_RendererHasVtxOffset) ImGui::Text(" RendererHasVtxOffset"); + ImGui::Separator(); + ImGui::Text("io.Fonts: %d fonts, Flags: 0x%08X, TexSize: %d,%d", io.Fonts->Fonts.Size, io.Fonts->Flags, io.Fonts->TexWidth, io.Fonts->TexHeight); + ImGui::Text("io.DisplaySize: %.2f,%.2f", io.DisplaySize.x, io.DisplaySize.y); + ImGui::Text("io.DisplayFramebufferScale: %.2f,%.2f", io.DisplayFramebufferScale.x, io.DisplayFramebufferScale.y); + ImGui::Separator(); + ImGui::Text("style.WindowPadding: %.2f,%.2f", style.WindowPadding.x, style.WindowPadding.y); + ImGui::Text("style.WindowBorderSize: %.2f", style.WindowBorderSize); + ImGui::Text("style.FramePadding: %.2f,%.2f", style.FramePadding.x, style.FramePadding.y); + ImGui::Text("style.FrameRounding: %.2f", style.FrameRounding); + ImGui::Text("style.FrameBorderSize: %.2f", style.FrameBorderSize); + ImGui::Text("style.ItemSpacing: %.2f,%.2f", style.ItemSpacing.x, style.ItemSpacing.y); + ImGui::Text("style.ItemInnerSpacing: %.2f,%.2f", style.ItemInnerSpacing.x, style.ItemInnerSpacing.y); + + if (copy_to_clipboard) + { + ImGui::LogText("\n```\n"); + ImGui::LogFinish(); + } + ImGui::EndChildFrame(); + } + ImGui::End(); +} + +//----------------------------------------------------------------------------- +// [SECTION] Style Editor / ShowStyleEditor() +//----------------------------------------------------------------------------- +// - ShowStyleSelector() +// - ShowFontSelector() +// - ShowStyleEditor() +//----------------------------------------------------------------------------- + +// Demo helper function to select among default colors. See ShowStyleEditor() for more advanced options. +// Here we use the simplified Combo() api that packs items into a single literal string. +// Useful for quick combo boxes where the choices are known locally. +bool ImGui::ShowStyleSelector(const char* label) +{ + static int style_idx = -1; + if (ImGui::Combo(label, &style_idx, "Dark\0Light\0Classic\0")) + { + switch (style_idx) + { + case 0: ImGui::StyleColorsDark(); break; + case 1: ImGui::StyleColorsLight(); break; + case 2: ImGui::StyleColorsClassic(); break; + } + return true; + } + return false; +} + +// Demo helper function to select among loaded fonts. +// Here we use the regular BeginCombo()/EndCombo() api which is more the more flexible one. +void ImGui::ShowFontSelector(const char* label) +{ + ImGuiIO& io = ImGui::GetIO(); + ImFont* font_current = ImGui::GetFont(); + if (ImGui::BeginCombo(label, font_current->GetDebugName())) + { + for (int n = 0; n < io.Fonts->Fonts.Size; n++) + { + ImFont* font = io.Fonts->Fonts[n]; + ImGui::PushID((void*)font); + if (ImGui::Selectable(font->GetDebugName(), font == font_current)) + io.FontDefault = font; + ImGui::PopID(); + } + ImGui::EndCombo(); + } + ImGui::SameLine(); + HelpMarker( + "- Load additional fonts with io.Fonts->AddFontFromFileTTF().\n" + "- The font atlas is built when calling io.Fonts->GetTexDataAsXXXX() or io.Fonts->Build().\n" + "- Read FAQ and docs/FONTS.md for more details.\n" + "- If you need to add/remove fonts at runtime (e.g. for DPI change), do it before calling NewFrame()."); +} + +// [Internal] Display details for a single font, called by ShowStyleEditor(). +static void NodeFont(ImFont* font) +{ + ImGuiIO& io = ImGui::GetIO(); + ImGuiStyle& style = ImGui::GetStyle(); + bool font_details_opened = ImGui::TreeNode(font, "Font: \"%s\"\n%.2f px, %d glyphs, %d file(s)", + font->ConfigData ? font->ConfigData[0].Name : "", font->FontSize, font->Glyphs.Size, font->ConfigDataCount); + ImGui::SameLine(); if (ImGui::SmallButton("Set as default")) { io.FontDefault = font; } + if (!font_details_opened) + return; + + ImGui::PushFont(font); + ImGui::Text("The quick brown fox jumps over the lazy dog"); + ImGui::PopFont(); + ImGui::DragFloat("Font scale", &font->Scale, 0.005f, 0.3f, 2.0f, "%.1f"); // Scale only this font + ImGui::SameLine(); HelpMarker( + "Note than the default embedded font is NOT meant to be scaled.\n\n" + "Font are currently rendered into bitmaps at a given size at the time of building the atlas. " + "You may oversample them to get some flexibility with scaling. " + "You can also render at multiple sizes and select which one to use at runtime.\n\n" + "(Glimmer of hope: the atlas system will be rewritten in the future to make scaling more flexible.)"); + ImGui::Text("Ascent: %f, Descent: %f, Height: %f", font->Ascent, font->Descent, font->Ascent - font->Descent); + ImGui::Text("Fallback character: '%c' (U+%04X)", font->FallbackChar, font->FallbackChar); + ImGui::Text("Ellipsis character: '%c' (U+%04X)", font->EllipsisChar, font->EllipsisChar); + const int surface_sqrt = (int)sqrtf((float)font->MetricsTotalSurface); + ImGui::Text("Texture Area: about %d px ~%dx%d px", font->MetricsTotalSurface, surface_sqrt, surface_sqrt); + for (int config_i = 0; config_i < font->ConfigDataCount; config_i++) + if (font->ConfigData) + if (const ImFontConfig* cfg = &font->ConfigData[config_i]) + ImGui::BulletText("Input %d: \'%s\', Oversample: (%d,%d), PixelSnapH: %d, Offset: (%.1f,%.1f)", + config_i, cfg->Name, cfg->OversampleH, cfg->OversampleV, cfg->PixelSnapH, cfg->GlyphOffset.x, cfg->GlyphOffset.y); + if (ImGui::TreeNode("Glyphs", "Glyphs (%d)", font->Glyphs.Size)) + { + // Display all glyphs of the fonts in separate pages of 256 characters + const ImU32 glyph_col = ImGui::GetColorU32(ImGuiCol_Text); + for (unsigned int base = 0; base <= IM_UNICODE_CODEPOINT_MAX; base += 256) + { + // Skip ahead if a large bunch of glyphs are not present in the font (test in chunks of 4k) + // This is only a small optimization to reduce the number of iterations when IM_UNICODE_MAX_CODEPOINT + // is large // (if ImWchar==ImWchar32 we will do at least about 272 queries here) + if (!(base & 4095) && font->IsGlyphRangeUnused(base, base + 4095)) + { + base += 4096 - 256; + continue; + } + + int count = 0; + for (unsigned int n = 0; n < 256; n++) + if (font->FindGlyphNoFallback((ImWchar)(base + n))) + count++; + if (count <= 0) + continue; + if (!ImGui::TreeNode((void*)(intptr_t)base, "U+%04X..U+%04X (%d %s)", base, base + 255, count, count > 1 ? "glyphs" : "glyph")) + continue; + float cell_size = font->FontSize * 1; + float cell_spacing = style.ItemSpacing.y; + ImVec2 base_pos = ImGui::GetCursorScreenPos(); + ImDrawList* draw_list = ImGui::GetWindowDrawList(); + for (unsigned int n = 0; n < 256; n++) + { + // We use ImFont::RenderChar as a shortcut because we don't have UTF-8 conversion functions + // available here and thus cannot easily generate a zero-terminated UTF-8 encoded string. + ImVec2 cell_p1(base_pos.x + (n % 16) * (cell_size + cell_spacing), base_pos.y + (n / 16) * (cell_size + cell_spacing)); + ImVec2 cell_p2(cell_p1.x + cell_size, cell_p1.y + cell_size); + const ImFontGlyph* glyph = font->FindGlyphNoFallback((ImWchar)(base + n)); + draw_list->AddRect(cell_p1, cell_p2, glyph ? IM_COL32(255, 255, 255, 100) : IM_COL32(255, 255, 255, 50)); + if (glyph) + font->RenderChar(draw_list, cell_size, cell_p1, glyph_col, (ImWchar)(base + n)); + if (glyph && ImGui::IsMouseHoveringRect(cell_p1, cell_p2)) + { + ImGui::BeginTooltip(); + ImGui::Text("Codepoint: U+%04X", base + n); + ImGui::Separator(); + ImGui::Text("Visible: %d", glyph->Visible); + ImGui::Text("AdvanceX: %.1f", glyph->AdvanceX); + ImGui::Text("Pos: (%.2f,%.2f)->(%.2f,%.2f)", glyph->X0, glyph->Y0, glyph->X1, glyph->Y1); + ImGui::Text("UV: (%.3f,%.3f)->(%.3f,%.3f)", glyph->U0, glyph->V0, glyph->U1, glyph->V1); + ImGui::EndTooltip(); + } + } + ImGui::Dummy(ImVec2((cell_size + cell_spacing) * 16, (cell_size + cell_spacing) * 16)); + ImGui::TreePop(); + } + ImGui::TreePop(); + } + ImGui::TreePop(); +} + +void ImGui::ShowStyleEditor(ImGuiStyle* ref) +{ + // You can pass in a reference ImGuiStyle structure to compare to, revert to and save to + // (without a reference style pointer, we will use one compared locally as a reference) + ImGuiStyle& style = ImGui::GetStyle(); + static ImGuiStyle ref_saved_style; + + // Default to using internal storage as reference + static bool init = true; + if (init && ref == NULL) + ref_saved_style = style; + init = false; + if (ref == NULL) + ref = &ref_saved_style; + + ImGui::PushItemWidth(ImGui::GetWindowWidth() * 0.50f); + + if (ImGui::ShowStyleSelector("Colors##Selector")) + ref_saved_style = style; + ImGui::ShowFontSelector("Fonts##Selector"); + + // Simplified Settings (expose floating-pointer border sizes as boolean representing 0.0f or 1.0f) + if (ImGui::SliderFloat("FrameRounding", &style.FrameRounding, 0.0f, 12.0f, "%.0f")) + style.GrabRounding = style.FrameRounding; // Make GrabRounding always the same value as FrameRounding + { bool border = (style.WindowBorderSize > 0.0f); if (ImGui::Checkbox("WindowBorder", &border)) { style.WindowBorderSize = border ? 1.0f : 0.0f; } } + ImGui::SameLine(); + { bool border = (style.FrameBorderSize > 0.0f); if (ImGui::Checkbox("FrameBorder", &border)) { style.FrameBorderSize = border ? 1.0f : 0.0f; } } + ImGui::SameLine(); + { bool border = (style.PopupBorderSize > 0.0f); if (ImGui::Checkbox("PopupBorder", &border)) { style.PopupBorderSize = border ? 1.0f : 0.0f; } } + + // Save/Revert button + if (ImGui::Button("Save Ref")) + *ref = ref_saved_style = style; + ImGui::SameLine(); + if (ImGui::Button("Revert Ref")) + style = *ref; + ImGui::SameLine(); + HelpMarker( + "Save/Revert in local non-persistent storage. Default Colors definition are not affected. " + "Use \"Export\" below to save them somewhere."); + + ImGui::Separator(); + + if (ImGui::BeginTabBar("##tabs", ImGuiTabBarFlags_None)) + { + if (ImGui::BeginTabItem("Sizes")) + { + ImGui::Text("Main"); + ImGui::SliderFloat2("WindowPadding", (float*)&style.WindowPadding, 0.0f, 20.0f, "%.0f"); + ImGui::SliderFloat2("FramePadding", (float*)&style.FramePadding, 0.0f, 20.0f, "%.0f"); + ImGui::SliderFloat2("CellPadding", (float*)&style.CellPadding, 0.0f, 20.0f, "%.0f"); + ImGui::SliderFloat2("ItemSpacing", (float*)&style.ItemSpacing, 0.0f, 20.0f, "%.0f"); + ImGui::SliderFloat2("ItemInnerSpacing", (float*)&style.ItemInnerSpacing, 0.0f, 20.0f, "%.0f"); + ImGui::SliderFloat2("TouchExtraPadding", (float*)&style.TouchExtraPadding, 0.0f, 10.0f, "%.0f"); + ImGui::SliderFloat("IndentSpacing", &style.IndentSpacing, 0.0f, 30.0f, "%.0f"); + ImGui::SliderFloat("ScrollbarSize", &style.ScrollbarSize, 1.0f, 20.0f, "%.0f"); + ImGui::SliderFloat("GrabMinSize", &style.GrabMinSize, 1.0f, 20.0f, "%.0f"); + ImGui::Text("Borders"); + ImGui::SliderFloat("WindowBorderSize", &style.WindowBorderSize, 0.0f, 1.0f, "%.0f"); + ImGui::SliderFloat("ChildBorderSize", &style.ChildBorderSize, 0.0f, 1.0f, "%.0f"); + ImGui::SliderFloat("PopupBorderSize", &style.PopupBorderSize, 0.0f, 1.0f, "%.0f"); + ImGui::SliderFloat("FrameBorderSize", &style.FrameBorderSize, 0.0f, 1.0f, "%.0f"); + ImGui::SliderFloat("TabBorderSize", &style.TabBorderSize, 0.0f, 1.0f, "%.0f"); + ImGui::Text("Rounding"); + ImGui::SliderFloat("WindowRounding", &style.WindowRounding, 0.0f, 12.0f, "%.0f"); + ImGui::SliderFloat("ChildRounding", &style.ChildRounding, 0.0f, 12.0f, "%.0f"); + ImGui::SliderFloat("FrameRounding", &style.FrameRounding, 0.0f, 12.0f, "%.0f"); + ImGui::SliderFloat("PopupRounding", &style.PopupRounding, 0.0f, 12.0f, "%.0f"); + ImGui::SliderFloat("ScrollbarRounding", &style.ScrollbarRounding, 0.0f, 12.0f, "%.0f"); + ImGui::SliderFloat("GrabRounding", &style.GrabRounding, 0.0f, 12.0f, "%.0f"); + ImGui::SliderFloat("LogSliderDeadzone", &style.LogSliderDeadzone, 0.0f, 12.0f, "%.0f"); + ImGui::SliderFloat("TabRounding", &style.TabRounding, 0.0f, 12.0f, "%.0f"); + ImGui::Text("Alignment"); + ImGui::SliderFloat2("WindowTitleAlign", (float*)&style.WindowTitleAlign, 0.0f, 1.0f, "%.2f"); + int window_menu_button_position = style.WindowMenuButtonPosition + 1; + if (ImGui::Combo("WindowMenuButtonPosition", (int*)&window_menu_button_position, "None\0Left\0Right\0")) + style.WindowMenuButtonPosition = window_menu_button_position - 1; + ImGui::Combo("ColorButtonPosition", (int*)&style.ColorButtonPosition, "Left\0Right\0"); + ImGui::SliderFloat2("ButtonTextAlign", (float*)&style.ButtonTextAlign, 0.0f, 1.0f, "%.2f"); + ImGui::SameLine(); HelpMarker("Alignment applies when a button is larger than its text content."); + ImGui::SliderFloat2("SelectableTextAlign", (float*)&style.SelectableTextAlign, 0.0f, 1.0f, "%.2f"); + ImGui::SameLine(); HelpMarker("Alignment applies when a selectable is larger than its text content."); + ImGui::Text("Safe Area Padding"); + ImGui::SameLine(); HelpMarker("Adjust if you cannot see the edges of your screen (e.g. on a TV where scaling has not been configured)."); + ImGui::SliderFloat2("DisplaySafeAreaPadding", (float*)&style.DisplaySafeAreaPadding, 0.0f, 30.0f, "%.0f"); + ImGui::EndTabItem(); + } + + if (ImGui::BeginTabItem("Colors")) + { + static int output_dest = 0; + static bool output_only_modified = true; + if (ImGui::Button("Export")) + { + if (output_dest == 0) + ImGui::LogToClipboard(); + else + ImGui::LogToTTY(); + ImGui::LogText("ImVec4* colors = ImGui::GetStyle().Colors;" IM_NEWLINE); + for (int i = 0; i < ImGuiCol_COUNT; i++) + { + const ImVec4& col = style.Colors[i]; + const char* name = ImGui::GetStyleColorName(i); + if (!output_only_modified || memcmp(&col, &ref->Colors[i], sizeof(ImVec4)) != 0) + ImGui::LogText("colors[ImGuiCol_%s]%*s= ImVec4(%.2ff, %.2ff, %.2ff, %.2ff);" IM_NEWLINE, + name, 23 - (int)strlen(name), "", col.x, col.y, col.z, col.w); + } + ImGui::LogFinish(); + } + ImGui::SameLine(); ImGui::SetNextItemWidth(120); ImGui::Combo("##output_type", &output_dest, "To Clipboard\0To TTY\0"); + ImGui::SameLine(); ImGui::Checkbox("Only Modified Colors", &output_only_modified); + + static ImGuiTextFilter filter; + filter.Draw("Filter colors", ImGui::GetFontSize() * 16); + + static ImGuiColorEditFlags alpha_flags = 0; + if (ImGui::RadioButton("Opaque", alpha_flags == ImGuiColorEditFlags_None)) { alpha_flags = ImGuiColorEditFlags_None; } ImGui::SameLine(); + if (ImGui::RadioButton("Alpha", alpha_flags == ImGuiColorEditFlags_AlphaPreview)) { alpha_flags = ImGuiColorEditFlags_AlphaPreview; } ImGui::SameLine(); + if (ImGui::RadioButton("Both", alpha_flags == ImGuiColorEditFlags_AlphaPreviewHalf)) { alpha_flags = ImGuiColorEditFlags_AlphaPreviewHalf; } ImGui::SameLine(); + HelpMarker( + "In the color list:\n" + "Left-click on color square to open color picker,\n" + "Right-click to open edit options menu."); + + ImGui::BeginChild("##colors", ImVec2(0, 0), true, ImGuiWindowFlags_AlwaysVerticalScrollbar | ImGuiWindowFlags_AlwaysHorizontalScrollbar | ImGuiWindowFlags_NavFlattened); + ImGui::PushItemWidth(-160); + for (int i = 0; i < ImGuiCol_COUNT; i++) + { + const char* name = ImGui::GetStyleColorName(i); + if (!filter.PassFilter(name)) + continue; + ImGui::PushID(i); + ImGui::ColorEdit4("##color", (float*)&style.Colors[i], ImGuiColorEditFlags_AlphaBar | alpha_flags); + if (memcmp(&style.Colors[i], &ref->Colors[i], sizeof(ImVec4)) != 0) + { + // Tips: in a real user application, you may want to merge and use an icon font into the main font, + // so instead of "Save"/"Revert" you'd use icons! + // Read the FAQ and docs/FONTS.md about using icon fonts. It's really easy and super convenient! + ImGui::SameLine(0.0f, style.ItemInnerSpacing.x); if (ImGui::Button("Save")) { ref->Colors[i] = style.Colors[i]; } + ImGui::SameLine(0.0f, style.ItemInnerSpacing.x); if (ImGui::Button("Revert")) { style.Colors[i] = ref->Colors[i]; } + } + ImGui::SameLine(0.0f, style.ItemInnerSpacing.x); + ImGui::TextUnformatted(name); + ImGui::PopID(); + } + ImGui::PopItemWidth(); + ImGui::EndChild(); + + ImGui::EndTabItem(); + } + + if (ImGui::BeginTabItem("Fonts")) + { + ImGuiIO& io = ImGui::GetIO(); + ImFontAtlas* atlas = io.Fonts; + HelpMarker("Read FAQ and docs/FONTS.md for details on font loading."); + ImGui::PushItemWidth(120); + for (int i = 0; i < atlas->Fonts.Size; i++) + { + ImFont* font = atlas->Fonts[i]; + ImGui::PushID(font); + NodeFont(font); + ImGui::PopID(); + } + if (ImGui::TreeNode("Atlas texture", "Atlas texture (%dx%d pixels)", atlas->TexWidth, atlas->TexHeight)) + { + ImVec4 tint_col = ImVec4(1.0f, 1.0f, 1.0f, 1.0f); + ImVec4 border_col = ImVec4(1.0f, 1.0f, 1.0f, 0.5f); + ImGui::Image(atlas->TexID, ImVec2((float)atlas->TexWidth, (float)atlas->TexHeight), ImVec2(0, 0), ImVec2(1, 1), tint_col, border_col); + ImGui::TreePop(); + } + + // Post-baking font scaling. Note that this is NOT the nice way of scaling fonts, read below. + // (we enforce hard clamping manually as by default DragFloat/SliderFloat allows CTRL+Click text to get out of bounds). + const float MIN_SCALE = 0.3f; + const float MAX_SCALE = 2.0f; + HelpMarker( + "Those are old settings provided for convenience.\n" + "However, the _correct_ way of scaling your UI is currently to reload your font at the designed size, " + "rebuild the font atlas, and call style.ScaleAllSizes() on a reference ImGuiStyle structure.\n" + "Using those settings here will give you poor quality results."); + static float window_scale = 1.0f; + if (ImGui::DragFloat("window scale", &window_scale, 0.005f, MIN_SCALE, MAX_SCALE, "%.2f", ImGuiSliderFlags_AlwaysClamp)) // Scale only this window + ImGui::SetWindowFontScale(window_scale); + ImGui::DragFloat("global scale", &io.FontGlobalScale, 0.005f, MIN_SCALE, MAX_SCALE, "%.2f", ImGuiSliderFlags_AlwaysClamp); // Scale everything + ImGui::PopItemWidth(); + + ImGui::EndTabItem(); + } + + if (ImGui::BeginTabItem("Rendering")) + { + ImGui::Checkbox("Anti-aliased lines", &style.AntiAliasedLines); + ImGui::SameLine(); + HelpMarker("When disabling anti-aliasing lines, you'll probably want to disable borders in your style as well."); + + ImGui::Checkbox("Anti-aliased lines use texture", &style.AntiAliasedLinesUseTex); + ImGui::SameLine(); + HelpMarker("Faster lines using texture data. Require backend to render with bilinear filtering (not point/nearest filtering)."); + + ImGui::Checkbox("Anti-aliased fill", &style.AntiAliasedFill); + ImGui::PushItemWidth(100); + ImGui::DragFloat("Curve Tessellation Tolerance", &style.CurveTessellationTol, 0.02f, 0.10f, 10.0f, "%.2f"); + if (style.CurveTessellationTol < 0.10f) style.CurveTessellationTol = 0.10f; + + // When editing the "Circle Segment Max Error" value, draw a preview of its effect on auto-tessellated circles. + ImGui::DragFloat("Circle Segment Max Error", &style.CircleSegmentMaxError, 0.01f, 0.10f, 10.0f, "%.2f"); + if (ImGui::IsItemActive()) + { + ImGui::SetNextWindowPos(ImGui::GetCursorScreenPos()); + ImGui::BeginTooltip(); + ImVec2 p = ImGui::GetCursorScreenPos(); + ImDrawList* draw_list = ImGui::GetWindowDrawList(); + float RAD_MIN = 10.0f, RAD_MAX = 80.0f; + float off_x = 10.0f; + for (int n = 0; n < 7; n++) + { + const float rad = RAD_MIN + (RAD_MAX - RAD_MIN) * (float)n / (7.0f - 1.0f); + draw_list->AddCircle(ImVec2(p.x + off_x + rad, p.y + RAD_MAX), rad, ImGui::GetColorU32(ImGuiCol_Text), 0); + off_x += 10.0f + rad * 2.0f; + } + ImGui::Dummy(ImVec2(off_x, RAD_MAX * 2.0f)); + ImGui::EndTooltip(); + } + ImGui::SameLine(); + HelpMarker("When drawing circle primitives with \"num_segments == 0\" tesselation will be calculated automatically."); + + ImGui::DragFloat("Global Alpha", &style.Alpha, 0.005f, 0.20f, 1.0f, "%.2f"); // Not exposing zero here so user doesn't "lose" the UI (zero alpha clips all widgets). But application code could have a toggle to switch between zero and non-zero. + ImGui::PopItemWidth(); + + ImGui::EndTabItem(); + } + + ImGui::EndTabBar(); + } + + ImGui::PopItemWidth(); +} + +//----------------------------------------------------------------------------- +// [SECTION] Example App: Main Menu Bar / ShowExampleAppMainMenuBar() +//----------------------------------------------------------------------------- +// - ShowExampleAppMainMenuBar() +// - ShowExampleMenuFile() +//----------------------------------------------------------------------------- + +// Demonstrate creating a "main" fullscreen menu bar and populating it. +// Note the difference between BeginMainMenuBar() and BeginMenuBar(): +// - BeginMenuBar() = menu-bar inside current window (which needs the ImGuiWindowFlags_MenuBar flag!) +// - BeginMainMenuBar() = helper to create menu-bar-sized window at the top of the main viewport + call BeginMenuBar() into it. +static void ShowExampleAppMainMenuBar() +{ + if (ImGui::BeginMainMenuBar()) + { + if (ImGui::BeginMenu("File")) + { + ShowExampleMenuFile(); + ImGui::EndMenu(); + } + if (ImGui::BeginMenu("Edit")) + { + if (ImGui::MenuItem("Undo", "CTRL+Z")) {} + if (ImGui::MenuItem("Redo", "CTRL+Y", false, false)) {} // Disabled item + ImGui::Separator(); + if (ImGui::MenuItem("Cut", "CTRL+X")) {} + if (ImGui::MenuItem("Copy", "CTRL+C")) {} + if (ImGui::MenuItem("Paste", "CTRL+V")) {} + ImGui::EndMenu(); + } + ImGui::EndMainMenuBar(); + } +} + +// Note that shortcuts are currently provided for display only +// (future version will add explicit flags to BeginMenu() to request processing shortcuts) +static void ShowExampleMenuFile() +{ + ImGui::MenuItem("(demo menu)", NULL, false, false); + if (ImGui::MenuItem("New")) {} + if (ImGui::MenuItem("Open", "Ctrl+O")) {} + if (ImGui::BeginMenu("Open Recent")) + { + ImGui::MenuItem("fish_hat.c"); + ImGui::MenuItem("fish_hat.inl"); + ImGui::MenuItem("fish_hat.h"); + if (ImGui::BeginMenu("More..")) + { + ImGui::MenuItem("Hello"); + ImGui::MenuItem("Sailor"); + if (ImGui::BeginMenu("Recurse..")) + { + ShowExampleMenuFile(); + ImGui::EndMenu(); + } + ImGui::EndMenu(); + } + ImGui::EndMenu(); + } + if (ImGui::MenuItem("Save", "Ctrl+S")) {} + if (ImGui::MenuItem("Save As..")) {} + + ImGui::Separator(); + if (ImGui::BeginMenu("Options")) + { + static bool enabled = true; + ImGui::MenuItem("Enabled", "", &enabled); + ImGui::BeginChild("child", ImVec2(0, 60), true); + for (int i = 0; i < 10; i++) + ImGui::Text("Scrolling Text %d", i); + ImGui::EndChild(); + static float f = 0.5f; + static int n = 0; + ImGui::SliderFloat("Value", &f, 0.0f, 1.0f); + ImGui::InputFloat("Input", &f, 0.1f); + ImGui::Combo("Combo", &n, "Yes\0No\0Maybe\0\0"); + ImGui::EndMenu(); + } + + if (ImGui::BeginMenu("Colors")) + { + float sz = ImGui::GetTextLineHeight(); + for (int i = 0; i < ImGuiCol_COUNT; i++) + { + const char* name = ImGui::GetStyleColorName((ImGuiCol)i); + ImVec2 p = ImGui::GetCursorScreenPos(); + ImGui::GetWindowDrawList()->AddRectFilled(p, ImVec2(p.x + sz, p.y + sz), ImGui::GetColorU32((ImGuiCol)i)); + ImGui::Dummy(ImVec2(sz, sz)); + ImGui::SameLine(); + ImGui::MenuItem(name); + } + ImGui::EndMenu(); + } + + // Here we demonstrate appending again to the "Options" menu (which we already created above) + // Of course in this demo it is a little bit silly that this function calls BeginMenu("Options") twice. + // In a real code-base using it would make senses to use this feature from very different code locations. + if (ImGui::BeginMenu("Options")) // <-- Append! + { + static bool b = true; + ImGui::Checkbox("SomeOption", &b); + ImGui::EndMenu(); + } + + if (ImGui::BeginMenu("Disabled", false)) // Disabled + { + IM_ASSERT(0); + } + if (ImGui::MenuItem("Checked", NULL, true)) {} + if (ImGui::MenuItem("Quit", "Alt+F4")) {} +} + +//----------------------------------------------------------------------------- +// [SECTION] Example App: Debug Console / ShowExampleAppConsole() +//----------------------------------------------------------------------------- + +// Demonstrate creating a simple console window, with scrolling, filtering, completion and history. +// For the console example, we are using a more C++ like approach of declaring a class to hold both data and functions. +struct ExampleAppConsole +{ + char InputBuf[256]; + ImVector Items; + ImVector Commands; + ImVector History; + int HistoryPos; // -1: new line, 0..History.Size-1 browsing history. + ImGuiTextFilter Filter; + bool AutoScroll; + bool ScrollToBottom; + + ExampleAppConsole() + { + ClearLog(); + memset(InputBuf, 0, sizeof(InputBuf)); + HistoryPos = -1; + + // "CLASSIFY" is here to provide the test case where "C"+[tab] completes to "CL" and display multiple matches. + Commands.push_back("HELP"); + Commands.push_back("HISTORY"); + Commands.push_back("CLEAR"); + Commands.push_back("CLASSIFY"); + AutoScroll = true; + ScrollToBottom = false; + AddLog("Welcome to Dear ImGui!"); + } + ~ExampleAppConsole() + { + ClearLog(); + for (int i = 0; i < History.Size; i++) + free(History[i]); + } + + // Portable helpers + static int Stricmp(const char* s1, const char* s2) { int d; while ((d = toupper(*s2) - toupper(*s1)) == 0 && *s1) { s1++; s2++; } return d; } + static int Strnicmp(const char* s1, const char* s2, int n) { int d = 0; while (n > 0 && (d = toupper(*s2) - toupper(*s1)) == 0 && *s1) { s1++; s2++; n--; } return d; } + static char* Strdup(const char* s) { IM_ASSERT(s); size_t len = strlen(s) + 1; void* buf = malloc(len); IM_ASSERT(buf); return (char*)memcpy(buf, (const void*)s, len); } + static void Strtrim(char* s) { char* str_end = s + strlen(s); while (str_end > s && str_end[-1] == ' ') str_end--; *str_end = 0; } + + void ClearLog() + { + for (int i = 0; i < Items.Size; i++) + free(Items[i]); + Items.clear(); + } + + void AddLog(const char* fmt, ...) IM_FMTARGS(2) + { + // FIXME-OPT + char buf[1024]; + va_list args; + va_start(args, fmt); + vsnprintf(buf, IM_ARRAYSIZE(buf), fmt, args); + buf[IM_ARRAYSIZE(buf)-1] = 0; + va_end(args); + Items.push_back(Strdup(buf)); + } + + void Draw(const char* title, bool* p_open) + { + ImGui::SetNextWindowSize(ImVec2(520, 600), ImGuiCond_FirstUseEver); + if (!ImGui::Begin(title, p_open)) + { + ImGui::End(); + return; + } + + // As a specific feature guaranteed by the library, after calling Begin() the last Item represent the title bar. + // So e.g. IsItemHovered() will return true when hovering the title bar. + // Here we create a context menu only available from the title bar. + if (ImGui::BeginPopupContextItem()) + { + if (ImGui::MenuItem("Close Console")) + *p_open = false; + ImGui::EndPopup(); + } + + ImGui::TextWrapped( + "This example implements a console with basic coloring, completion (TAB key) and history (Up/Down keys). A more elaborate " + "implementation may want to store entries along with extra data such as timestamp, emitter, etc."); + ImGui::TextWrapped("Enter 'HELP' for help."); + + // TODO: display items starting from the bottom + + if (ImGui::SmallButton("Add Debug Text")) { AddLog("%d some text", Items.Size); AddLog("some more text"); AddLog("display very important message here!"); } + ImGui::SameLine(); + if (ImGui::SmallButton("Add Debug Error")) { AddLog("[error] something went wrong"); } + ImGui::SameLine(); + if (ImGui::SmallButton("Clear")) { ClearLog(); } + ImGui::SameLine(); + bool copy_to_clipboard = ImGui::SmallButton("Copy"); + //static float t = 0.0f; if (ImGui::GetTime() - t > 0.02f) { t = ImGui::GetTime(); AddLog("Spam %f", t); } + + ImGui::Separator(); + + // Options menu + if (ImGui::BeginPopup("Options")) + { + ImGui::Checkbox("Auto-scroll", &AutoScroll); + ImGui::EndPopup(); + } + + // Options, Filter + if (ImGui::Button("Options")) + ImGui::OpenPopup("Options"); + ImGui::SameLine(); + Filter.Draw("Filter (\"incl,-excl\") (\"error\")", 180); + ImGui::Separator(); + + // Reserve enough left-over height for 1 separator + 1 input text + const float footer_height_to_reserve = ImGui::GetStyle().ItemSpacing.y + ImGui::GetFrameHeightWithSpacing(); + ImGui::BeginChild("ScrollingRegion", ImVec2(0, -footer_height_to_reserve), false, ImGuiWindowFlags_HorizontalScrollbar); + if (ImGui::BeginPopupContextWindow()) + { + if (ImGui::Selectable("Clear")) ClearLog(); + ImGui::EndPopup(); + } + + // Display every line as a separate entry so we can change their color or add custom widgets. + // If you only want raw text you can use ImGui::TextUnformatted(log.begin(), log.end()); + // NB- if you have thousands of entries this approach may be too inefficient and may require user-side clipping + // to only process visible items. The clipper will automatically measure the height of your first item and then + // "seek" to display only items in the visible area. + // To use the clipper we can replace your standard loop: + // for (int i = 0; i < Items.Size; i++) + // With: + // ImGuiListClipper clipper; + // clipper.Begin(Items.Size); + // while (clipper.Step()) + // for (int i = clipper.DisplayStart; i < clipper.DisplayEnd; i++) + // - That your items are evenly spaced (same height) + // - That you have cheap random access to your elements (you can access them given their index, + // without processing all the ones before) + // You cannot this code as-is if a filter is active because it breaks the 'cheap random-access' property. + // We would need random-access on the post-filtered list. + // A typical application wanting coarse clipping and filtering may want to pre-compute an array of indices + // or offsets of items that passed the filtering test, recomputing this array when user changes the filter, + // and appending newly elements as they are inserted. This is left as a task to the user until we can manage + // to improve this example code! + // If your items are of variable height: + // - Split them into same height items would be simpler and facilitate random-seeking into your list. + // - Consider using manual call to IsRectVisible() and skipping extraneous decoration from your items. + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(4, 1)); // Tighten spacing + if (copy_to_clipboard) + ImGui::LogToClipboard(); + for (int i = 0; i < Items.Size; i++) + { + const char* item = Items[i]; + if (!Filter.PassFilter(item)) + continue; + + // Normally you would store more information in your item than just a string. + // (e.g. make Items[] an array of structure, store color/type etc.) + ImVec4 color; + bool has_color = false; + if (strstr(item, "[error]")) { color = ImVec4(1.0f, 0.4f, 0.4f, 1.0f); has_color = true; } + else if (strncmp(item, "# ", 2) == 0) { color = ImVec4(1.0f, 0.8f, 0.6f, 1.0f); has_color = true; } + if (has_color) + ImGui::PushStyleColor(ImGuiCol_Text, color); + ImGui::TextUnformatted(item); + if (has_color) + ImGui::PopStyleColor(); + } + if (copy_to_clipboard) + ImGui::LogFinish(); + + if (ScrollToBottom || (AutoScroll && ImGui::GetScrollY() >= ImGui::GetScrollMaxY())) + ImGui::SetScrollHereY(1.0f); + ScrollToBottom = false; + + ImGui::PopStyleVar(); + ImGui::EndChild(); + ImGui::Separator(); + + // Command-line + bool reclaim_focus = false; + ImGuiInputTextFlags input_text_flags = ImGuiInputTextFlags_EnterReturnsTrue | ImGuiInputTextFlags_CallbackCompletion | ImGuiInputTextFlags_CallbackHistory; + if (ImGui::InputText("Input", InputBuf, IM_ARRAYSIZE(InputBuf), input_text_flags, &TextEditCallbackStub, (void*)this)) + { + char* s = InputBuf; + Strtrim(s); + if (s[0]) + ExecCommand(s); + strcpy(s, ""); + reclaim_focus = true; + } + + // Auto-focus on window apparition + ImGui::SetItemDefaultFocus(); + if (reclaim_focus) + ImGui::SetKeyboardFocusHere(-1); // Auto focus previous widget + + ImGui::End(); + } + + void ExecCommand(const char* command_line) + { + AddLog("# %s\n", command_line); + + // Insert into history. First find match and delete it so it can be pushed to the back. + // This isn't trying to be smart or optimal. + HistoryPos = -1; + for (int i = History.Size - 1; i >= 0; i--) + if (Stricmp(History[i], command_line) == 0) + { + free(History[i]); + History.erase(History.begin() + i); + break; + } + History.push_back(Strdup(command_line)); + + // Process command + if (Stricmp(command_line, "CLEAR") == 0) + { + ClearLog(); + } + else if (Stricmp(command_line, "HELP") == 0) + { + AddLog("Commands:"); + for (int i = 0; i < Commands.Size; i++) + AddLog("- %s", Commands[i]); + } + else if (Stricmp(command_line, "HISTORY") == 0) + { + int first = History.Size - 10; + for (int i = first > 0 ? first : 0; i < History.Size; i++) + AddLog("%3d: %s\n", i, History[i]); + } + else + { + AddLog("Unknown command: '%s'\n", command_line); + } + + // On command input, we scroll to bottom even if AutoScroll==false + ScrollToBottom = true; + } + + // In C++11 you'd be better off using lambdas for this sort of forwarding callbacks + static int TextEditCallbackStub(ImGuiInputTextCallbackData* data) + { + ExampleAppConsole* console = (ExampleAppConsole*)data->UserData; + return console->TextEditCallback(data); + } + + int TextEditCallback(ImGuiInputTextCallbackData* data) + { + //AddLog("cursor: %d, selection: %d-%d", data->CursorPos, data->SelectionStart, data->SelectionEnd); + switch (data->EventFlag) + { + case ImGuiInputTextFlags_CallbackCompletion: + { + // Example of TEXT COMPLETION + + // Locate beginning of current word + const char* word_end = data->Buf + data->CursorPos; + const char* word_start = word_end; + while (word_start > data->Buf) + { + const char c = word_start[-1]; + if (c == ' ' || c == '\t' || c == ',' || c == ';') + break; + word_start--; + } + + // Build a list of candidates + ImVector candidates; + for (int i = 0; i < Commands.Size; i++) + if (Strnicmp(Commands[i], word_start, (int)(word_end - word_start)) == 0) + candidates.push_back(Commands[i]); + + if (candidates.Size == 0) + { + // No match + AddLog("No match for \"%.*s\"!\n", (int)(word_end - word_start), word_start); + } + else if (candidates.Size == 1) + { + // Single match. Delete the beginning of the word and replace it entirely so we've got nice casing. + data->DeleteChars((int)(word_start - data->Buf), (int)(word_end - word_start)); + data->InsertChars(data->CursorPos, candidates[0]); + data->InsertChars(data->CursorPos, " "); + } + else + { + // Multiple matches. Complete as much as we can.. + // So inputing "C"+Tab will complete to "CL" then display "CLEAR" and "CLASSIFY" as matches. + int match_len = (int)(word_end - word_start); + for (;;) + { + int c = 0; + bool all_candidates_matches = true; + for (int i = 0; i < candidates.Size && all_candidates_matches; i++) + if (i == 0) + c = toupper(candidates[i][match_len]); + else if (c == 0 || c != toupper(candidates[i][match_len])) + all_candidates_matches = false; + if (!all_candidates_matches) + break; + match_len++; + } + + if (match_len > 0) + { + data->DeleteChars((int)(word_start - data->Buf), (int)(word_end - word_start)); + data->InsertChars(data->CursorPos, candidates[0], candidates[0] + match_len); + } + + // List matches + AddLog("Possible matches:\n"); + for (int i = 0; i < candidates.Size; i++) + AddLog("- %s\n", candidates[i]); + } + + break; + } + case ImGuiInputTextFlags_CallbackHistory: + { + // Example of HISTORY + const int prev_history_pos = HistoryPos; + if (data->EventKey == ImGuiKey_UpArrow) + { + if (HistoryPos == -1) + HistoryPos = History.Size - 1; + else if (HistoryPos > 0) + HistoryPos--; + } + else if (data->EventKey == ImGuiKey_DownArrow) + { + if (HistoryPos != -1) + if (++HistoryPos >= History.Size) + HistoryPos = -1; + } + + // A better implementation would preserve the data on the current input line along with cursor position. + if (prev_history_pos != HistoryPos) + { + const char* history_str = (HistoryPos >= 0) ? History[HistoryPos] : ""; + data->DeleteChars(0, data->BufTextLen); + data->InsertChars(0, history_str); + } + } + } + return 0; + } +}; + +static void ShowExampleAppConsole(bool* p_open) +{ + static ExampleAppConsole console; + console.Draw("Example: Console", p_open); +} + +//----------------------------------------------------------------------------- +// [SECTION] Example App: Debug Log / ShowExampleAppLog() +//----------------------------------------------------------------------------- + +// Usage: +// static ExampleAppLog my_log; +// my_log.AddLog("Hello %d world\n", 123); +// my_log.Draw("title"); +struct ExampleAppLog +{ + ImGuiTextBuffer Buf; + ImGuiTextFilter Filter; + ImVector LineOffsets; // Index to lines offset. We maintain this with AddLog() calls. + bool AutoScroll; // Keep scrolling if already at the bottom. + + ExampleAppLog() + { + AutoScroll = true; + Clear(); + } + + void Clear() + { + Buf.clear(); + LineOffsets.clear(); + LineOffsets.push_back(0); + } + + void AddLog(const char* fmt, ...) IM_FMTARGS(2) + { + int old_size = Buf.size(); + va_list args; + va_start(args, fmt); + Buf.appendfv(fmt, args); + va_end(args); + for (int new_size = Buf.size(); old_size < new_size; old_size++) + if (Buf[old_size] == '\n') + LineOffsets.push_back(old_size + 1); + } + + void Draw(const char* title, bool* p_open = NULL) + { + if (!ImGui::Begin(title, p_open)) + { + ImGui::End(); + return; + } + + // Options menu + if (ImGui::BeginPopup("Options")) + { + ImGui::Checkbox("Auto-scroll", &AutoScroll); + ImGui::EndPopup(); + } + + // Main window + if (ImGui::Button("Options")) + ImGui::OpenPopup("Options"); + ImGui::SameLine(); + bool clear = ImGui::Button("Clear"); + ImGui::SameLine(); + bool copy = ImGui::Button("Copy"); + ImGui::SameLine(); + Filter.Draw("Filter", -100.0f); + + ImGui::Separator(); + ImGui::BeginChild("scrolling", ImVec2(0, 0), false, ImGuiWindowFlags_HorizontalScrollbar); + + if (clear) + Clear(); + if (copy) + ImGui::LogToClipboard(); + + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(0, 0)); + const char* buf = Buf.begin(); + const char* buf_end = Buf.end(); + if (Filter.IsActive()) + { + // In this example we don't use the clipper when Filter is enabled. + // This is because we don't have a random access on the result on our filter. + // A real application processing logs with ten of thousands of entries may want to store the result of + // search/filter.. especially if the filtering function is not trivial (e.g. reg-exp). + for (int line_no = 0; line_no < LineOffsets.Size; line_no++) + { + const char* line_start = buf + LineOffsets[line_no]; + const char* line_end = (line_no + 1 < LineOffsets.Size) ? (buf + LineOffsets[line_no + 1] - 1) : buf_end; + if (Filter.PassFilter(line_start, line_end)) + ImGui::TextUnformatted(line_start, line_end); + } + } + else + { + // The simplest and easy way to display the entire buffer: + // ImGui::TextUnformatted(buf_begin, buf_end); + // And it'll just work. TextUnformatted() has specialization for large blob of text and will fast-forward + // to skip non-visible lines. Here we instead demonstrate using the clipper to only process lines that are + // within the visible area. + // If you have tens of thousands of items and their processing cost is non-negligible, coarse clipping them + // on your side is recommended. Using ImGuiListClipper requires + // - A) random access into your data + // - B) items all being the same height, + // both of which we can handle since we an array pointing to the beginning of each line of text. + // When using the filter (in the block of code above) we don't have random access into the data to display + // anymore, which is why we don't use the clipper. Storing or skimming through the search result would make + // it possible (and would be recommended if you want to search through tens of thousands of entries). + ImGuiListClipper clipper; + clipper.Begin(LineOffsets.Size); + while (clipper.Step()) + { + for (int line_no = clipper.DisplayStart; line_no < clipper.DisplayEnd; line_no++) + { + const char* line_start = buf + LineOffsets[line_no]; + const char* line_end = (line_no + 1 < LineOffsets.Size) ? (buf + LineOffsets[line_no + 1] - 1) : buf_end; + ImGui::TextUnformatted(line_start, line_end); + } + } + clipper.End(); + } + ImGui::PopStyleVar(); + + if (AutoScroll && ImGui::GetScrollY() >= ImGui::GetScrollMaxY()) + ImGui::SetScrollHereY(1.0f); + + ImGui::EndChild(); + ImGui::End(); + } +}; + +// Demonstrate creating a simple log window with basic filtering. +static void ShowExampleAppLog(bool* p_open) +{ + static ExampleAppLog log; + + // For the demo: add a debug button _BEFORE_ the normal log window contents + // We take advantage of a rarely used feature: multiple calls to Begin()/End() are appending to the _same_ window. + // Most of the contents of the window will be added by the log.Draw() call. + ImGui::SetNextWindowSize(ImVec2(500, 400), ImGuiCond_FirstUseEver); + ImGui::Begin("Example: Log", p_open); + if (ImGui::SmallButton("[Debug] Add 5 entries")) + { + static int counter = 0; + const char* categories[3] = { "info", "warn", "error" }; + const char* words[] = { "Bumfuzzled", "Cattywampus", "Snickersnee", "Abibliophobia", "Absquatulate", "Nincompoop", "Pauciloquent" }; + for (int n = 0; n < 5; n++) + { + const char* category = categories[counter % IM_ARRAYSIZE(categories)]; + const char* word = words[counter % IM_ARRAYSIZE(words)]; + log.AddLog("[%05d] [%s] Hello, current time is %.1f, here's a word: '%s'\n", + ImGui::GetFrameCount(), category, ImGui::GetTime(), word); + counter++; + } + } + ImGui::End(); + + // Actually call in the regular Log helper (which will Begin() into the same window as we just did) + log.Draw("Example: Log", p_open); +} + +//----------------------------------------------------------------------------- +// [SECTION] Example App: Simple Layout / ShowExampleAppLayout() +//----------------------------------------------------------------------------- + +// Demonstrate create a window with multiple child windows. +static void ShowExampleAppLayout(bool* p_open) +{ + ImGui::SetNextWindowSize(ImVec2(500, 440), ImGuiCond_FirstUseEver); + if (ImGui::Begin("Example: Simple layout", p_open, ImGuiWindowFlags_MenuBar)) + { + if (ImGui::BeginMenuBar()) + { + if (ImGui::BeginMenu("File")) + { + if (ImGui::MenuItem("Close")) *p_open = false; + ImGui::EndMenu(); + } + ImGui::EndMenuBar(); + } + + // Left + static int selected = 0; + { + ImGui::BeginChild("left pane", ImVec2(150, 0), true); + for (int i = 0; i < 100; i++) + { + char label[128]; + sprintf(label, "MyObject %d", i); + if (ImGui::Selectable(label, selected == i)) + selected = i; + } + ImGui::EndChild(); + } + ImGui::SameLine(); + + // Right + { + ImGui::BeginGroup(); + ImGui::BeginChild("item view", ImVec2(0, -ImGui::GetFrameHeightWithSpacing())); // Leave room for 1 line below us + ImGui::Text("MyObject: %d", selected); + ImGui::Separator(); + if (ImGui::BeginTabBar("##Tabs", ImGuiTabBarFlags_None)) + { + if (ImGui::BeginTabItem("Description")) + { + ImGui::TextWrapped("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. "); + ImGui::EndTabItem(); + } + if (ImGui::BeginTabItem("Details")) + { + ImGui::Text("ID: 0123456789"); + ImGui::EndTabItem(); + } + ImGui::EndTabBar(); + } + ImGui::EndChild(); + if (ImGui::Button("Revert")) {} + ImGui::SameLine(); + if (ImGui::Button("Save")) {} + ImGui::EndGroup(); + } + } + ImGui::End(); +} + +//----------------------------------------------------------------------------- +// [SECTION] Example App: Property Editor / ShowExampleAppPropertyEditor() +//----------------------------------------------------------------------------- + +static void ShowPlaceholderObject(const char* prefix, int uid) +{ + // Use object uid as identifier. Most commonly you could also use the object pointer as a base ID. + ImGui::PushID(uid); + + // Text and Tree nodes are less high than framed widgets, using AlignTextToFramePadding() we add vertical spacing to make the tree lines equal high. + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::AlignTextToFramePadding(); + bool node_open = ImGui::TreeNode("Object", "%s_%u", prefix, uid); + ImGui::TableSetColumnIndex(1); + ImGui::Text("my sailor is rich"); + + if (node_open) + { + static float placeholder_members[8] = { 0.0f, 0.0f, 1.0f, 3.1416f, 100.0f, 999.0f }; + for (int i = 0; i < 8; i++) + { + ImGui::PushID(i); // Use field index as identifier. + if (i < 2) + { + ShowPlaceholderObject("Child", 424242); + } + else + { + // Here we use a TreeNode to highlight on hover (we could use e.g. Selectable as well) + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::AlignTextToFramePadding(); + ImGuiTreeNodeFlags flags = ImGuiTreeNodeFlags_Leaf | ImGuiTreeNodeFlags_NoTreePushOnOpen | ImGuiTreeNodeFlags_Bullet; + ImGui::TreeNodeEx("Field", flags, "Field_%d", i); + + ImGui::TableSetColumnIndex(1); + ImGui::SetNextItemWidth(-FLT_MIN); + if (i >= 5) + ImGui::InputFloat("##value", &placeholder_members[i], 1.0f); + else + ImGui::DragFloat("##value", &placeholder_members[i], 0.01f); + ImGui::NextColumn(); + } + ImGui::PopID(); + } + ImGui::TreePop(); + } + ImGui::PopID(); +} + +// Demonstrate create a simple property editor. +static void ShowExampleAppPropertyEditor(bool* p_open) +{ + ImGui::SetNextWindowSize(ImVec2(430, 450), ImGuiCond_FirstUseEver); + if (!ImGui::Begin("Example: Property editor", p_open)) + { + ImGui::End(); + return; + } + + HelpMarker( + "This example shows how you may implement a property editor using two columns.\n" + "All objects/fields data are dummies here.\n" + "Remember that in many simple cases, you can use ImGui::SameLine(xxx) to position\n" + "your cursor horizontally instead of using the Columns() API."); + + ImGui::PushStyleVar(ImGuiStyleVar_FramePadding, ImVec2(2, 2)); + if (ImGui::BeginTable("split", 2, ImGuiTableFlags_BordersOuter | ImGuiTableFlags_Resizable)) + { + // Iterate placeholder objects (all the same data) + for (int obj_i = 0; obj_i < 4; obj_i++) + { + ShowPlaceholderObject("Object", obj_i); + //ImGui::Separator(); + } + ImGui::EndTable(); + } + ImGui::PopStyleVar(); + ImGui::End(); +} + +//----------------------------------------------------------------------------- +// [SECTION] Example App: Long Text / ShowExampleAppLongText() +//----------------------------------------------------------------------------- + +// Demonstrate/test rendering huge amount of text, and the incidence of clipping. +static void ShowExampleAppLongText(bool* p_open) +{ + ImGui::SetNextWindowSize(ImVec2(520, 600), ImGuiCond_FirstUseEver); + if (!ImGui::Begin("Example: Long text display", p_open)) + { + ImGui::End(); + return; + } + + static int test_type = 0; + static ImGuiTextBuffer log; + static int lines = 0; + ImGui::Text("Printing unusually long amount of text."); + ImGui::Combo("Test type", &test_type, + "Single call to TextUnformatted()\0" + "Multiple calls to Text(), clipped\0" + "Multiple calls to Text(), not clipped (slow)\0"); + ImGui::Text("Buffer contents: %d lines, %d bytes", lines, log.size()); + if (ImGui::Button("Clear")) { log.clear(); lines = 0; } + ImGui::SameLine(); + if (ImGui::Button("Add 1000 lines")) + { + for (int i = 0; i < 1000; i++) + log.appendf("%i The quick brown fox jumps over the lazy dog\n", lines + i); + lines += 1000; + } + ImGui::BeginChild("Log"); + switch (test_type) + { + case 0: + // Single call to TextUnformatted() with a big buffer + ImGui::TextUnformatted(log.begin(), log.end()); + break; + case 1: + { + // Multiple calls to Text(), manually coarsely clipped - demonstrate how to use the ImGuiListClipper helper. + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(0, 0)); + ImGuiListClipper clipper; + clipper.Begin(lines); + while (clipper.Step()) + for (int i = clipper.DisplayStart; i < clipper.DisplayEnd; i++) + ImGui::Text("%i The quick brown fox jumps over the lazy dog", i); + ImGui::PopStyleVar(); + break; + } + case 2: + // Multiple calls to Text(), not clipped (slow) + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(0, 0)); + for (int i = 0; i < lines; i++) + ImGui::Text("%i The quick brown fox jumps over the lazy dog", i); + ImGui::PopStyleVar(); + break; + } + ImGui::EndChild(); + ImGui::End(); +} + +//----------------------------------------------------------------------------- +// [SECTION] Example App: Auto Resize / ShowExampleAppAutoResize() +//----------------------------------------------------------------------------- + +// Demonstrate creating a window which gets auto-resized according to its content. +static void ShowExampleAppAutoResize(bool* p_open) +{ + if (!ImGui::Begin("Example: Auto-resizing window", p_open, ImGuiWindowFlags_AlwaysAutoResize)) + { + ImGui::End(); + return; + } + + static int lines = 10; + ImGui::TextUnformatted( + "Window will resize every-frame to the size of its content.\n" + "Note that you probably don't want to query the window size to\n" + "output your content because that would create a feedback loop."); + ImGui::SliderInt("Number of lines", &lines, 1, 20); + for (int i = 0; i < lines; i++) + ImGui::Text("%*sThis is line %d", i * 4, "", i); // Pad with space to extend size horizontally + ImGui::End(); +} + +//----------------------------------------------------------------------------- +// [SECTION] Example App: Constrained Resize / ShowExampleAppConstrainedResize() +//----------------------------------------------------------------------------- + +// Demonstrate creating a window with custom resize constraints. +static void ShowExampleAppConstrainedResize(bool* p_open) +{ + struct CustomConstraints + { + // Helper functions to demonstrate programmatic constraints + static void Square(ImGuiSizeCallbackData* data) { data->DesiredSize.x = data->DesiredSize.y = IM_MAX(data->DesiredSize.x, data->DesiredSize.y); } + static void Step(ImGuiSizeCallbackData* data) { float step = (float)(int)(intptr_t)data->UserData; data->DesiredSize = ImVec2((int)(data->DesiredSize.x / step + 0.5f) * step, (int)(data->DesiredSize.y / step + 0.5f) * step); } + }; + + const char* test_desc[] = + { + "Resize vertical only", + "Resize horizontal only", + "Width > 100, Height > 100", + "Width 400-500", + "Height 400-500", + "Custom: Always Square", + "Custom: Fixed Steps (100)", + }; + + static bool auto_resize = false; + static int type = 0; + static int display_lines = 10; + if (type == 0) ImGui::SetNextWindowSizeConstraints(ImVec2(-1, 0), ImVec2(-1, FLT_MAX)); // Vertical only + if (type == 1) ImGui::SetNextWindowSizeConstraints(ImVec2(0, -1), ImVec2(FLT_MAX, -1)); // Horizontal only + if (type == 2) ImGui::SetNextWindowSizeConstraints(ImVec2(100, 100), ImVec2(FLT_MAX, FLT_MAX)); // Width > 100, Height > 100 + if (type == 3) ImGui::SetNextWindowSizeConstraints(ImVec2(400, -1), ImVec2(500, -1)); // Width 400-500 + if (type == 4) ImGui::SetNextWindowSizeConstraints(ImVec2(-1, 400), ImVec2(-1, 500)); // Height 400-500 + if (type == 5) ImGui::SetNextWindowSizeConstraints(ImVec2(0, 0), ImVec2(FLT_MAX, FLT_MAX), CustomConstraints::Square); // Always Square + if (type == 6) ImGui::SetNextWindowSizeConstraints(ImVec2(0, 0), ImVec2(FLT_MAX, FLT_MAX), CustomConstraints::Step, (void*)(intptr_t)100); // Fixed Step + + ImGuiWindowFlags flags = auto_resize ? ImGuiWindowFlags_AlwaysAutoResize : 0; + if (ImGui::Begin("Example: Constrained Resize", p_open, flags)) + { + if (ImGui::Button("200x200")) { ImGui::SetWindowSize(ImVec2(200, 200)); } ImGui::SameLine(); + if (ImGui::Button("500x500")) { ImGui::SetWindowSize(ImVec2(500, 500)); } ImGui::SameLine(); + if (ImGui::Button("800x200")) { ImGui::SetWindowSize(ImVec2(800, 200)); } + ImGui::SetNextItemWidth(200); + ImGui::Combo("Constraint", &type, test_desc, IM_ARRAYSIZE(test_desc)); + ImGui::SetNextItemWidth(200); + ImGui::DragInt("Lines", &display_lines, 0.2f, 1, 100); + ImGui::Checkbox("Auto-resize", &auto_resize); + for (int i = 0; i < display_lines; i++) + ImGui::Text("%*sHello, sailor! Making this line long enough for the example.", i * 4, ""); + } + ImGui::End(); +} + +//----------------------------------------------------------------------------- +// [SECTION] Example App: Simple Overlay / ShowExampleAppSimpleOverlay() +//----------------------------------------------------------------------------- + +// Demonstrate creating a simple static window with no decoration +// + a context-menu to choose which corner of the screen to use. +static void ShowExampleAppSimpleOverlay(bool* p_open) +{ + const float DISTANCE = 10.0f; + static int corner = 0; + ImGuiIO& io = ImGui::GetIO(); + ImGuiWindowFlags window_flags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoFocusOnAppearing | ImGuiWindowFlags_NoNav; + if (corner != -1) + { + window_flags |= ImGuiWindowFlags_NoMove; + ImVec2 window_pos = ImVec2((corner & 1) ? io.DisplaySize.x - DISTANCE : DISTANCE, (corner & 2) ? io.DisplaySize.y - DISTANCE : DISTANCE); + ImVec2 window_pos_pivot = ImVec2((corner & 1) ? 1.0f : 0.0f, (corner & 2) ? 1.0f : 0.0f); + ImGui::SetNextWindowPos(window_pos, ImGuiCond_Always, window_pos_pivot); + } + ImGui::SetNextWindowBgAlpha(0.35f); // Transparent background + if (ImGui::Begin("Example: Simple overlay", p_open, window_flags)) + { + ImGui::Text("Simple overlay\n" "in the corner of the screen.\n" "(right-click to change position)"); + ImGui::Separator(); + if (ImGui::IsMousePosValid()) + ImGui::Text("Mouse Position: (%.1f,%.1f)", io.MousePos.x, io.MousePos.y); + else + ImGui::Text("Mouse Position: "); + if (ImGui::BeginPopupContextWindow()) + { + if (ImGui::MenuItem("Custom", NULL, corner == -1)) corner = -1; + if (ImGui::MenuItem("Top-left", NULL, corner == 0)) corner = 0; + if (ImGui::MenuItem("Top-right", NULL, corner == 1)) corner = 1; + if (ImGui::MenuItem("Bottom-left", NULL, corner == 2)) corner = 2; + if (ImGui::MenuItem("Bottom-right", NULL, corner == 3)) corner = 3; + if (p_open && ImGui::MenuItem("Close")) *p_open = false; + ImGui::EndPopup(); + } + } + ImGui::End(); +} + +//----------------------------------------------------------------------------- +// [SECTION] Example App: Manipulating Window Titles / ShowExampleAppWindowTitles() +//----------------------------------------------------------------------------- + +// Demonstrate using "##" and "###" in identifiers to manipulate ID generation. +// This apply to all regular items as well. +// Read FAQ section "How can I have multiple widgets with the same label?" for details. +static void ShowExampleAppWindowTitles(bool*) +{ + // By default, Windows are uniquely identified by their title. + // You can use the "##" and "###" markers to manipulate the display/ID. + + // Using "##" to display same title but have unique identifier. + ImGui::SetNextWindowPos(ImVec2(100, 100), ImGuiCond_FirstUseEver); + ImGui::Begin("Same title as another window##1"); + ImGui::Text("This is window 1.\nMy title is the same as window 2, but my identifier is unique."); + ImGui::End(); + + ImGui::SetNextWindowPos(ImVec2(100, 200), ImGuiCond_FirstUseEver); + ImGui::Begin("Same title as another window##2"); + ImGui::Text("This is window 2.\nMy title is the same as window 1, but my identifier is unique."); + ImGui::End(); + + // Using "###" to display a changing title but keep a static identifier "AnimatedTitle" + char buf[128]; + sprintf(buf, "Animated title %c %d###AnimatedTitle", "|/-\\"[(int)(ImGui::GetTime() / 0.25f) & 3], ImGui::GetFrameCount()); + ImGui::SetNextWindowPos(ImVec2(100, 300), ImGuiCond_FirstUseEver); + ImGui::Begin(buf); + ImGui::Text("This window has a changing title."); + ImGui::End(); +} + +//----------------------------------------------------------------------------- +// [SECTION] Example App: Custom Rendering using ImDrawList API / ShowExampleAppCustomRendering() +//----------------------------------------------------------------------------- + +// Demonstrate using the low-level ImDrawList to draw custom shapes. +static void ShowExampleAppCustomRendering(bool* p_open) +{ + if (!ImGui::Begin("Example: Custom rendering", p_open)) + { + ImGui::End(); + return; + } + + // Tip: If you do a lot of custom rendering, you probably want to use your own geometrical types and benefit of + // overloaded operators, etc. Define IM_VEC2_CLASS_EXTRA in imconfig.h to create implicit conversions between your + // types and ImVec2/ImVec4. Dear ImGui defines overloaded operators but they are internal to imgui.cpp and not + // exposed outside (to avoid messing with your types) In this example we are not using the maths operators! + + if (ImGui::BeginTabBar("##TabBar")) + { + if (ImGui::BeginTabItem("Primitives")) + { + ImGui::PushItemWidth(-ImGui::GetFontSize() * 15); + ImDrawList* draw_list = ImGui::GetWindowDrawList(); + + // Draw gradients + // (note that those are currently exacerbating our sRGB/Linear issues) + // Calling ImGui::GetColorU32() multiplies the given colors by the current Style Alpha, but you may pass the IM_COL32() directly as well.. + ImGui::Text("Gradients"); + ImVec2 gradient_size = ImVec2(ImGui::CalcItemWidth(), ImGui::GetFrameHeight()); + { + ImVec2 p0 = ImGui::GetCursorScreenPos(); + ImVec2 p1 = ImVec2(p0.x + gradient_size.x, p0.y + gradient_size.y); + ImU32 col_a = ImGui::GetColorU32(IM_COL32(0, 0, 0, 255)); + ImU32 col_b = ImGui::GetColorU32(IM_COL32(255, 255, 255, 255)); + draw_list->AddRectFilledMultiColor(p0, p1, col_a, col_b, col_b, col_a); + ImGui::InvisibleButton("##gradient1", gradient_size); + } + { + ImVec2 p0 = ImGui::GetCursorScreenPos(); + ImVec2 p1 = ImVec2(p0.x + gradient_size.x, p0.y + gradient_size.y); + ImU32 col_a = ImGui::GetColorU32(IM_COL32(0, 255, 0, 255)); + ImU32 col_b = ImGui::GetColorU32(IM_COL32(255, 0, 0, 255)); + draw_list->AddRectFilledMultiColor(p0, p1, col_a, col_b, col_b, col_a); + ImGui::InvisibleButton("##gradient2", gradient_size); + } + + // Draw a bunch of primitives + ImGui::Text("All primitives"); + static float sz = 36.0f; + static float thickness = 3.0f; + static int ngon_sides = 6; + static bool circle_segments_override = false; + static int circle_segments_override_v = 12; + static bool curve_segments_override = false; + static int curve_segments_override_v = 8; + static ImVec4 colf = ImVec4(1.0f, 1.0f, 0.4f, 1.0f); + ImGui::DragFloat("Size", &sz, 0.2f, 2.0f, 72.0f, "%.0f"); + ImGui::DragFloat("Thickness", &thickness, 0.05f, 1.0f, 8.0f, "%.02f"); + ImGui::SliderInt("N-gon sides", &ngon_sides, 3, 12); + ImGui::Checkbox("##circlesegmentoverride", &circle_segments_override); + ImGui::SameLine(0.0f, ImGui::GetStyle().ItemInnerSpacing.x); + circle_segments_override |= ImGui::SliderInt("Circle segments override", &circle_segments_override_v, 3, 40); + ImGui::Checkbox("##curvessegmentoverride", &curve_segments_override); + ImGui::SameLine(0.0f, ImGui::GetStyle().ItemInnerSpacing.x); + curve_segments_override |= ImGui::SliderInt("Curves segments override", &curve_segments_override_v, 3, 40); + ImGui::ColorEdit4("Color", &colf.x); + + const ImVec2 p = ImGui::GetCursorScreenPos(); + const ImU32 col = ImColor(colf); + const float spacing = 10.0f; + const ImDrawCornerFlags corners_none = 0; + const ImDrawCornerFlags corners_all = ImDrawCornerFlags_All; + const ImDrawCornerFlags corners_tl_br = ImDrawCornerFlags_TopLeft | ImDrawCornerFlags_BotRight; + const int circle_segments = circle_segments_override ? circle_segments_override_v : 0; + const int curve_segments = curve_segments_override ? curve_segments_override_v : 0; + float x = p.x + 4.0f; + float y = p.y + 4.0f; + for (int n = 0; n < 2; n++) + { + // First line uses a thickness of 1.0f, second line uses the configurable thickness + float th = (n == 0) ? 1.0f : thickness; + draw_list->AddNgon(ImVec2(x + sz*0.5f, y + sz*0.5f), sz*0.5f, col, ngon_sides, th); x += sz + spacing; // N-gon + draw_list->AddCircle(ImVec2(x + sz*0.5f, y + sz*0.5f), sz*0.5f, col, circle_segments, th); x += sz + spacing; // Circle + draw_list->AddRect(ImVec2(x, y), ImVec2(x + sz, y + sz), col, 0.0f, corners_none, th); x += sz + spacing; // Square + draw_list->AddRect(ImVec2(x, y), ImVec2(x + sz, y + sz), col, 10.0f, corners_all, th); x += sz + spacing; // Square with all rounded corners + draw_list->AddRect(ImVec2(x, y), ImVec2(x + sz, y + sz), col, 10.0f, corners_tl_br, th); x += sz + spacing; // Square with two rounded corners + draw_list->AddTriangle(ImVec2(x+sz*0.5f,y), ImVec2(x+sz, y+sz-0.5f), ImVec2(x, y+sz-0.5f), col, th);x += sz + spacing; // Triangle + //draw_list->AddTriangle(ImVec2(x+sz*0.2f,y), ImVec2(x, y+sz-0.5f), ImVec2(x+sz*0.4f, y+sz-0.5f), col, th);x+= sz*0.4f + spacing; // Thin triangle + draw_list->AddLine(ImVec2(x, y), ImVec2(x + sz, y), col, th); x += sz + spacing; // Horizontal line (note: drawing a filled rectangle will be faster!) + draw_list->AddLine(ImVec2(x, y), ImVec2(x, y + sz), col, th); x += spacing; // Vertical line (note: drawing a filled rectangle will be faster!) + draw_list->AddLine(ImVec2(x, y), ImVec2(x + sz, y + sz), col, th); x += sz + spacing; // Diagonal line + + // Quadratic Bezier Curve (3 control points) + ImVec2 cp3[3] = { ImVec2(x, y + sz * 0.6f), ImVec2(x + sz * 0.5f, y - sz * 0.4f), ImVec2(x + sz, y + sz) }; + draw_list->AddBezierQuadratic(cp3[0], cp3[1], cp3[2], col, th, curve_segments); x += sz + spacing; + + // Cubic Bezier Curve (4 control points) + ImVec2 cp4[4] = { ImVec2(x, y), ImVec2(x + sz * 1.3f, y + sz * 0.3f), ImVec2(x + sz - sz * 1.3f, y + sz - sz * 0.3f), ImVec2(x + sz, y + sz) }; + draw_list->AddBezierCubic(cp4[0], cp4[1], cp4[2], cp4[3], col, th, curve_segments); + + x = p.x + 4; + y += sz + spacing; + } + draw_list->AddNgonFilled(ImVec2(x + sz * 0.5f, y + sz * 0.5f), sz*0.5f, col, ngon_sides); x += sz + spacing; // N-gon + draw_list->AddCircleFilled(ImVec2(x + sz*0.5f, y + sz*0.5f), sz*0.5f, col, circle_segments); x += sz + spacing; // Circle + draw_list->AddRectFilled(ImVec2(x, y), ImVec2(x + sz, y + sz), col); x += sz + spacing; // Square + draw_list->AddRectFilled(ImVec2(x, y), ImVec2(x + sz, y + sz), col, 10.0f); x += sz + spacing; // Square with all rounded corners + draw_list->AddRectFilled(ImVec2(x, y), ImVec2(x + sz, y + sz), col, 10.0f, corners_tl_br); x += sz + spacing; // Square with two rounded corners + draw_list->AddTriangleFilled(ImVec2(x+sz*0.5f,y), ImVec2(x+sz, y+sz-0.5f), ImVec2(x, y+sz-0.5f), col); x += sz + spacing; // Triangle + //draw_list->AddTriangleFilled(ImVec2(x+sz*0.2f,y), ImVec2(x, y+sz-0.5f), ImVec2(x+sz*0.4f, y+sz-0.5f), col); x += sz*0.4f + spacing; // Thin triangle + draw_list->AddRectFilled(ImVec2(x, y), ImVec2(x + sz, y + thickness), col); x += sz + spacing; // Horizontal line (faster than AddLine, but only handle integer thickness) + draw_list->AddRectFilled(ImVec2(x, y), ImVec2(x + thickness, y + sz), col); x += spacing * 2.0f;// Vertical line (faster than AddLine, but only handle integer thickness) + draw_list->AddRectFilled(ImVec2(x, y), ImVec2(x + 1, y + 1), col); x += sz; // Pixel (faster than AddLine) + draw_list->AddRectFilledMultiColor(ImVec2(x, y), ImVec2(x + sz, y + sz), IM_COL32(0, 0, 0, 255), IM_COL32(255, 0, 0, 255), IM_COL32(255, 255, 0, 255), IM_COL32(0, 255, 0, 255)); + + ImGui::Dummy(ImVec2((sz + spacing) * 10.2f, (sz + spacing) * 3.0f)); + ImGui::PopItemWidth(); + ImGui::EndTabItem(); + } + + if (ImGui::BeginTabItem("Canvas")) + { + static ImVector points; + static ImVec2 scrolling(0.0f, 0.0f); + static bool opt_enable_grid = true; + static bool opt_enable_context_menu = true; + static bool adding_line = false; + + ImGui::Checkbox("Enable grid", &opt_enable_grid); + ImGui::Checkbox("Enable context menu", &opt_enable_context_menu); + ImGui::Text("Mouse Left: drag to add lines,\nMouse Right: drag to scroll, click for context menu."); + + // Typically you would use a BeginChild()/EndChild() pair to benefit from a clipping region + own scrolling. + // Here we demonstrate that this can be replaced by simple offsetting + custom drawing + PushClipRect/PopClipRect() calls. + // To use a child window instead we could use, e.g: + // ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0, 0)); // Disable padding + // ImGui::PushStyleColor(ImGuiCol_ChildBg, IM_COL32(50, 50, 50, 255)); // Set a background color + // ImGui::BeginChild("canvas", ImVec2(0.0f, 0.0f), true, ImGuiWindowFlags_NoMove); + // ImGui::PopStyleColor(); + // ImGui::PopStyleVar(); + // [...] + // ImGui::EndChild(); + + // Using InvisibleButton() as a convenience 1) it will advance the layout cursor and 2) allows us to use IsItemHovered()/IsItemActive() + ImVec2 canvas_p0 = ImGui::GetCursorScreenPos(); // ImDrawList API uses screen coordinates! + ImVec2 canvas_sz = ImGui::GetContentRegionAvail(); // Resize canvas to what's available + if (canvas_sz.x < 50.0f) canvas_sz.x = 50.0f; + if (canvas_sz.y < 50.0f) canvas_sz.y = 50.0f; + ImVec2 canvas_p1 = ImVec2(canvas_p0.x + canvas_sz.x, canvas_p0.y + canvas_sz.y); + + // Draw border and background color + ImGuiIO& io = ImGui::GetIO(); + ImDrawList* draw_list = ImGui::GetWindowDrawList(); + draw_list->AddRectFilled(canvas_p0, canvas_p1, IM_COL32(50, 50, 50, 255)); + draw_list->AddRect(canvas_p0, canvas_p1, IM_COL32(255, 255, 255, 255)); + + // This will catch our interactions + ImGui::InvisibleButton("canvas", canvas_sz, ImGuiButtonFlags_MouseButtonLeft | ImGuiButtonFlags_MouseButtonRight); + const bool is_hovered = ImGui::IsItemHovered(); // Hovered + const bool is_active = ImGui::IsItemActive(); // Held + const ImVec2 origin(canvas_p0.x + scrolling.x, canvas_p0.y + scrolling.y); // Lock scrolled origin + const ImVec2 mouse_pos_in_canvas(io.MousePos.x - origin.x, io.MousePos.y - origin.y); + + // Add first and second point + if (is_hovered && !adding_line && ImGui::IsMouseClicked(ImGuiMouseButton_Left)) + { + points.push_back(mouse_pos_in_canvas); + points.push_back(mouse_pos_in_canvas); + adding_line = true; + } + if (adding_line) + { + points.back() = mouse_pos_in_canvas; + if (!ImGui::IsMouseDown(ImGuiMouseButton_Left)) + adding_line = false; + } + + // Pan (we use a zero mouse threshold when there's no context menu) + // You may decide to make that threshold dynamic based on whether the mouse is hovering something etc. + const float mouse_threshold_for_pan = opt_enable_context_menu ? -1.0f : 0.0f; + if (is_active && ImGui::IsMouseDragging(ImGuiMouseButton_Right, mouse_threshold_for_pan)) + { + scrolling.x += io.MouseDelta.x; + scrolling.y += io.MouseDelta.y; + } + + // Context menu (under default mouse threshold) + ImVec2 drag_delta = ImGui::GetMouseDragDelta(ImGuiMouseButton_Right); + if (opt_enable_context_menu && ImGui::IsMouseReleased(ImGuiMouseButton_Right) && drag_delta.x == 0.0f && drag_delta.y == 0.0f) + ImGui::OpenPopupOnItemClick("context"); + if (ImGui::BeginPopup("context")) + { + if (adding_line) + points.resize(points.size() - 2); + adding_line = false; + if (ImGui::MenuItem("Remove one", NULL, false, points.Size > 0)) { points.resize(points.size() - 2); } + if (ImGui::MenuItem("Remove all", NULL, false, points.Size > 0)) { points.clear(); } + ImGui::EndPopup(); + } + + // Draw grid + all lines in the canvas + draw_list->PushClipRect(canvas_p0, canvas_p1, true); + if (opt_enable_grid) + { + const float GRID_STEP = 64.0f; + for (float x = fmodf(scrolling.x, GRID_STEP); x < canvas_sz.x; x += GRID_STEP) + draw_list->AddLine(ImVec2(canvas_p0.x + x, canvas_p0.y), ImVec2(canvas_p0.x + x, canvas_p1.y), IM_COL32(200, 200, 200, 40)); + for (float y = fmodf(scrolling.y, GRID_STEP); y < canvas_sz.y; y += GRID_STEP) + draw_list->AddLine(ImVec2(canvas_p0.x, canvas_p0.y + y), ImVec2(canvas_p1.x, canvas_p0.y + y), IM_COL32(200, 200, 200, 40)); + } + for (int n = 0; n < points.Size; n += 2) + draw_list->AddLine(ImVec2(origin.x + points[n].x, origin.y + points[n].y), ImVec2(origin.x + points[n + 1].x, origin.y + points[n + 1].y), IM_COL32(255, 255, 0, 255), 2.0f); + draw_list->PopClipRect(); + + ImGui::EndTabItem(); + } + + if (ImGui::BeginTabItem("BG/FG draw lists")) + { + static bool draw_bg = true; + static bool draw_fg = true; + ImGui::Checkbox("Draw in Background draw list", &draw_bg); + ImGui::SameLine(); HelpMarker("The Background draw list will be rendered below every Dear ImGui windows."); + ImGui::Checkbox("Draw in Foreground draw list", &draw_fg); + ImGui::SameLine(); HelpMarker("The Foreground draw list will be rendered over every Dear ImGui windows."); + ImVec2 window_pos = ImGui::GetWindowPos(); + ImVec2 window_size = ImGui::GetWindowSize(); + ImVec2 window_center = ImVec2(window_pos.x + window_size.x * 0.5f, window_pos.y + window_size.y * 0.5f); + if (draw_bg) + ImGui::GetBackgroundDrawList()->AddCircle(window_center, window_size.x * 0.6f, IM_COL32(255, 0, 0, 200), 0, 10 + 4); + if (draw_fg) + ImGui::GetForegroundDrawList()->AddCircle(window_center, window_size.y * 0.6f, IM_COL32(0, 255, 0, 200), 0, 10); + ImGui::EndTabItem(); + } + + ImGui::EndTabBar(); + } + + ImGui::End(); +} + +//----------------------------------------------------------------------------- +// [SECTION] Example App: Documents Handling / ShowExampleAppDocuments() +//----------------------------------------------------------------------------- + +// Simplified structure to mimic a Document model +struct MyDocument +{ + const char* Name; // Document title + bool Open; // Set when open (we keep an array of all available documents to simplify demo code!) + bool OpenPrev; // Copy of Open from last update. + bool Dirty; // Set when the document has been modified + bool WantClose; // Set when the document + ImVec4 Color; // An arbitrary variable associated to the document + + MyDocument(const char* name, bool open = true, const ImVec4& color = ImVec4(1.0f, 1.0f, 1.0f, 1.0f)) + { + Name = name; + Open = OpenPrev = open; + Dirty = false; + WantClose = false; + Color = color; + } + void DoOpen() { Open = true; } + void DoQueueClose() { WantClose = true; } + void DoForceClose() { Open = false; Dirty = false; } + void DoSave() { Dirty = false; } + + // Display placeholder contents for the Document + static void DisplayContents(MyDocument* doc) + { + ImGui::PushID(doc); + ImGui::Text("Document \"%s\"", doc->Name); + ImGui::PushStyleColor(ImGuiCol_Text, doc->Color); + ImGui::TextWrapped("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."); + ImGui::PopStyleColor(); + if (ImGui::Button("Modify", ImVec2(100, 0))) + doc->Dirty = true; + ImGui::SameLine(); + if (ImGui::Button("Save", ImVec2(100, 0))) + doc->DoSave(); + ImGui::ColorEdit3("color", &doc->Color.x); // Useful to test drag and drop and hold-dragged-to-open-tab behavior. + ImGui::PopID(); + } + + // Display context menu for the Document + static void DisplayContextMenu(MyDocument* doc) + { + if (!ImGui::BeginPopupContextItem()) + return; + + char buf[256]; + sprintf(buf, "Save %s", doc->Name); + if (ImGui::MenuItem(buf, "CTRL+S", false, doc->Open)) + doc->DoSave(); + if (ImGui::MenuItem("Close", "CTRL+W", false, doc->Open)) + doc->DoQueueClose(); + ImGui::EndPopup(); + } +}; + +struct ExampleAppDocuments +{ + ImVector Documents; + + ExampleAppDocuments() + { + Documents.push_back(MyDocument("Lettuce", true, ImVec4(0.4f, 0.8f, 0.4f, 1.0f))); + Documents.push_back(MyDocument("Eggplant", true, ImVec4(0.8f, 0.5f, 1.0f, 1.0f))); + Documents.push_back(MyDocument("Carrot", true, ImVec4(1.0f, 0.8f, 0.5f, 1.0f))); + Documents.push_back(MyDocument("Tomato", false, ImVec4(1.0f, 0.3f, 0.4f, 1.0f))); + Documents.push_back(MyDocument("A Rather Long Title", false)); + Documents.push_back(MyDocument("Some Document", false)); + } +}; + +// [Optional] Notify the system of Tabs/Windows closure that happened outside the regular tab interface. +// If a tab has been closed programmatically (aka closed from another source such as the Checkbox() in the demo, +// as opposed to clicking on the regular tab closing button) and stops being submitted, it will take a frame for +// the tab bar to notice its absence. During this frame there will be a gap in the tab bar, and if the tab that has +// disappeared was the selected one, the tab bar will report no selected tab during the frame. This will effectively +// give the impression of a flicker for one frame. +// We call SetTabItemClosed() to manually notify the Tab Bar or Docking system of removed tabs to avoid this glitch. +// Note that this completely optional, and only affect tab bars with the ImGuiTabBarFlags_Reorderable flag. +static void NotifyOfDocumentsClosedElsewhere(ExampleAppDocuments& app) +{ + for (int doc_n = 0; doc_n < app.Documents.Size; doc_n++) + { + MyDocument* doc = &app.Documents[doc_n]; + if (!doc->Open && doc->OpenPrev) + ImGui::SetTabItemClosed(doc->Name); + doc->OpenPrev = doc->Open; + } +} + +void ShowExampleAppDocuments(bool* p_open) +{ + static ExampleAppDocuments app; + + // Options + static bool opt_reorderable = true; + static ImGuiTabBarFlags opt_fitting_flags = ImGuiTabBarFlags_FittingPolicyDefault_; + + bool window_contents_visible = ImGui::Begin("Example: Documents", p_open, ImGuiWindowFlags_MenuBar); + if (!window_contents_visible) + { + ImGui::End(); + return; + } + + // Menu + if (ImGui::BeginMenuBar()) + { + if (ImGui::BeginMenu("File")) + { + int open_count = 0; + for (int doc_n = 0; doc_n < app.Documents.Size; doc_n++) + open_count += app.Documents[doc_n].Open ? 1 : 0; + + if (ImGui::BeginMenu("Open", open_count < app.Documents.Size)) + { + for (int doc_n = 0; doc_n < app.Documents.Size; doc_n++) + { + MyDocument* doc = &app.Documents[doc_n]; + if (!doc->Open) + if (ImGui::MenuItem(doc->Name)) + doc->DoOpen(); + } + ImGui::EndMenu(); + } + if (ImGui::MenuItem("Close All Documents", NULL, false, open_count > 0)) + for (int doc_n = 0; doc_n < app.Documents.Size; doc_n++) + app.Documents[doc_n].DoQueueClose(); + if (ImGui::MenuItem("Exit", "Alt+F4")) {} + ImGui::EndMenu(); + } + ImGui::EndMenuBar(); + } + + // [Debug] List documents with one checkbox for each + for (int doc_n = 0; doc_n < app.Documents.Size; doc_n++) + { + MyDocument* doc = &app.Documents[doc_n]; + if (doc_n > 0) + ImGui::SameLine(); + ImGui::PushID(doc); + if (ImGui::Checkbox(doc->Name, &doc->Open)) + if (!doc->Open) + doc->DoForceClose(); + ImGui::PopID(); + } + + ImGui::Separator(); + + // Submit Tab Bar and Tabs + { + ImGuiTabBarFlags tab_bar_flags = (opt_fitting_flags) | (opt_reorderable ? ImGuiTabBarFlags_Reorderable : 0); + if (ImGui::BeginTabBar("##tabs", tab_bar_flags)) + { + if (opt_reorderable) + NotifyOfDocumentsClosedElsewhere(app); + + // [DEBUG] Stress tests + //if ((ImGui::GetFrameCount() % 30) == 0) docs[1].Open ^= 1; // [DEBUG] Automatically show/hide a tab. Test various interactions e.g. dragging with this on. + //if (ImGui::GetIO().KeyCtrl) ImGui::SetTabItemSelected(docs[1].Name); // [DEBUG] Test SetTabItemSelected(), probably not very useful as-is anyway.. + + // Submit Tabs + for (int doc_n = 0; doc_n < app.Documents.Size; doc_n++) + { + MyDocument* doc = &app.Documents[doc_n]; + if (!doc->Open) + continue; + + ImGuiTabItemFlags tab_flags = (doc->Dirty ? ImGuiTabItemFlags_UnsavedDocument : 0); + bool visible = ImGui::BeginTabItem(doc->Name, &doc->Open, tab_flags); + + // Cancel attempt to close when unsaved add to save queue so we can display a popup. + if (!doc->Open && doc->Dirty) + { + doc->Open = true; + doc->DoQueueClose(); + } + + MyDocument::DisplayContextMenu(doc); + if (visible) + { + MyDocument::DisplayContents(doc); + ImGui::EndTabItem(); + } + } + + ImGui::EndTabBar(); + } + } + + // Update closing queue + static ImVector close_queue; + if (close_queue.empty()) + { + // Close queue is locked once we started a popup + for (int doc_n = 0; doc_n < app.Documents.Size; doc_n++) + { + MyDocument* doc = &app.Documents[doc_n]; + if (doc->WantClose) + { + doc->WantClose = false; + close_queue.push_back(doc); + } + } + } + + // Display closing confirmation UI + if (!close_queue.empty()) + { + int close_queue_unsaved_documents = 0; + for (int n = 0; n < close_queue.Size; n++) + if (close_queue[n]->Dirty) + close_queue_unsaved_documents++; + + if (close_queue_unsaved_documents == 0) + { + // Close documents when all are unsaved + for (int n = 0; n < close_queue.Size; n++) + close_queue[n]->DoForceClose(); + close_queue.clear(); + } + else + { + if (!ImGui::IsPopupOpen("Save?")) + ImGui::OpenPopup("Save?"); + if (ImGui::BeginPopupModal("Save?")) + { + ImGui::Text("Save change to the following items?"); + ImGui::SetNextItemWidth(-1.0f); + if (ImGui::ListBoxHeader("##", close_queue_unsaved_documents, 6)) + { + for (int n = 0; n < close_queue.Size; n++) + if (close_queue[n]->Dirty) + ImGui::Text("%s", close_queue[n]->Name); + ImGui::ListBoxFooter(); + } + + if (ImGui::Button("Yes", ImVec2(80, 0))) + { + for (int n = 0; n < close_queue.Size; n++) + { + if (close_queue[n]->Dirty) + close_queue[n]->DoSave(); + close_queue[n]->DoForceClose(); + } + close_queue.clear(); + ImGui::CloseCurrentPopup(); + } + ImGui::SameLine(); + if (ImGui::Button("No", ImVec2(80, 0))) + { + for (int n = 0; n < close_queue.Size; n++) + close_queue[n]->DoForceClose(); + close_queue.clear(); + ImGui::CloseCurrentPopup(); + } + ImGui::SameLine(); + if (ImGui::Button("Cancel", ImVec2(80, 0))) + { + close_queue.clear(); + ImGui::CloseCurrentPopup(); + } + ImGui::EndPopup(); + } + } + } + + ImGui::End(); +} + +// End of Demo code +#else + +void ImGui::ShowAboutWindow(bool*) {} +void ImGui::ShowDemoWindow(bool*) {} +void ImGui::ShowUserGuide() {} +void ImGui::ShowStyleEditor(ImGuiStyle*) {} + +#endif + +#endif // #ifndef IMGUI_DISABLE diff --git a/cpp-projects/3d-engine/imgui/imgui_draw.cpp b/cpp-projects/3d-engine/imgui/imgui_draw.cpp new file mode 100644 index 0000000..ad106c3 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/imgui_draw.cpp @@ -0,0 +1,3913 @@ +// dear imgui, v1.81 WIP +// (drawing and font code) + +/* + +Index of this file: + +// [SECTION] STB libraries implementation +// [SECTION] Style functions +// [SECTION] ImDrawList +// [SECTION] ImDrawListSplitter +// [SECTION] ImDrawData +// [SECTION] Helpers ShadeVertsXXX functions +// [SECTION] ImFontConfig +// [SECTION] ImFontAtlas +// [SECTION] ImFontAtlas glyph ranges helpers +// [SECTION] ImFontGlyphRangesBuilder +// [SECTION] ImFont +// [SECTION] ImGui Internal Render Helpers +// [SECTION] Decompression code +// [SECTION] Default font data (ProggyClean.ttf) + +*/ + +#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS) +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include "imgui.h" +#ifndef IMGUI_DISABLE + +#ifndef IMGUI_DEFINE_MATH_OPERATORS +#define IMGUI_DEFINE_MATH_OPERATORS +#endif + +#include "imgui_internal.h" +#ifdef IMGUI_ENABLE_FREETYPE +#include "misc/freetype/imgui_freetype.h" +#endif + +#include // vsnprintf, sscanf, printf +#if !defined(alloca) +#if defined(__GLIBC__) || defined(__sun) || defined(__APPLE__) || defined(__NEWLIB__) +#include // alloca (glibc uses . Note that Cygwin may have _WIN32 defined, so the order matters here) +#elif defined(_WIN32) +#include // alloca +#if !defined(alloca) +#define alloca _alloca // for clang with MS Codegen +#endif +#else +#include // alloca +#endif +#endif + +// Visual Studio warnings +#ifdef _MSC_VER +#pragma warning (disable: 4127) // condition expression is constant +#pragma warning (disable: 4505) // unreferenced local function has been removed (stb stuff) +#pragma warning (disable: 4996) // 'This function or variable may be unsafe': strcpy, strdup, sprintf, vsnprintf, sscanf, fopen +#endif + +// Clang/GCC warnings with -Weverything +#if defined(__clang__) +#if __has_warning("-Wunknown-warning-option") +#pragma clang diagnostic ignored "-Wunknown-warning-option" // warning: unknown warning group 'xxx' // not all warnings are known by all Clang versions and they tend to be rename-happy.. so ignoring warnings triggers new warnings on some configuration. Great! +#endif +#if __has_warning("-Walloca") +#pragma clang diagnostic ignored "-Walloca" // warning: use of function '__builtin_alloca' is discouraged +#endif +#pragma clang diagnostic ignored "-Wunknown-pragmas" // warning: unknown warning group 'xxx' +#pragma clang diagnostic ignored "-Wold-style-cast" // warning: use of old-style cast // yes, they are more terse. +#pragma clang diagnostic ignored "-Wfloat-equal" // warning: comparing floating point with == or != is unsafe // storing and comparing against same constants ok. +#pragma clang diagnostic ignored "-Wglobal-constructors" // warning: declaration requires a global destructor // similar to above, not sure what the exact difference is. +#pragma clang diagnostic ignored "-Wsign-conversion" // warning: implicit conversion changes signedness +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" // warning: zero as null pointer constant // some standard header variations use #define NULL 0 +#pragma clang diagnostic ignored "-Wcomma" // warning: possible misuse of comma operator here +#pragma clang diagnostic ignored "-Wreserved-id-macro" // warning: macro name is a reserved identifier +#pragma clang diagnostic ignored "-Wdouble-promotion" // warning: implicit conversion from 'float' to 'double' when passing argument to function // using printf() is a misery with this as C++ va_arg ellipsis changes float to double. +#pragma clang diagnostic ignored "-Wimplicit-int-float-conversion" // warning: implicit conversion from 'xxx' to 'float' may lose precision +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wpragmas" // warning: unknown option after '#pragma GCC diagnostic' kind +#pragma GCC diagnostic ignored "-Wunused-function" // warning: 'xxxx' defined but not used +#pragma GCC diagnostic ignored "-Wdouble-promotion" // warning: implicit conversion from 'float' to 'double' when passing argument to function +#pragma GCC diagnostic ignored "-Wconversion" // warning: conversion to 'xxxx' from 'xxxx' may alter its value +#pragma GCC diagnostic ignored "-Wstack-protector" // warning: stack protector not protecting local variables: variable length buffer +#pragma GCC diagnostic ignored "-Wclass-memaccess" // [__GNUC__ >= 8] warning: 'memset/memcpy' clearing/writing an object of type 'xxxx' with no trivial copy-assignment; use assignment or value-initialization instead +#endif + +//------------------------------------------------------------------------- +// [SECTION] STB libraries implementation +//------------------------------------------------------------------------- + +// Compile time options: +//#define IMGUI_STB_NAMESPACE ImStb +//#define IMGUI_STB_TRUETYPE_FILENAME "my_folder/stb_truetype.h" +//#define IMGUI_STB_RECT_PACK_FILENAME "my_folder/stb_rect_pack.h" +//#define IMGUI_DISABLE_STB_TRUETYPE_IMPLEMENTATION +//#define IMGUI_DISABLE_STB_RECT_PACK_IMPLEMENTATION + +#ifdef IMGUI_STB_NAMESPACE +namespace IMGUI_STB_NAMESPACE +{ +#endif + +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable: 4456) // declaration of 'xx' hides previous local declaration +#endif + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wimplicit-fallthrough" +#pragma clang diagnostic ignored "-Wcast-qual" // warning: cast from 'const xxxx *' to 'xxx *' drops const qualifier +#endif + +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wtype-limits" // warning: comparison is always true due to limited range of data type [-Wtype-limits] +#pragma GCC diagnostic ignored "-Wcast-qual" // warning: cast from type 'const xxxx *' to type 'xxxx *' casts away qualifiers +#endif + +#ifndef STB_RECT_PACK_IMPLEMENTATION // in case the user already have an implementation in the _same_ compilation unit (e.g. unity builds) +#ifndef IMGUI_DISABLE_STB_RECT_PACK_IMPLEMENTATION // in case the user already have an implementation in another compilation unit +#define STBRP_STATIC +#define STBRP_ASSERT(x) do { IM_ASSERT(x); } while (0) +#define STBRP_SORT ImQsort +#define STB_RECT_PACK_IMPLEMENTATION +#endif +#ifdef IMGUI_STB_RECT_PACK_FILENAME +#include IMGUI_STB_RECT_PACK_FILENAME +#else +#include "imstb_rectpack.h" +#endif +#endif + +#ifdef IMGUI_ENABLE_STB_TRUETYPE +#ifndef STB_TRUETYPE_IMPLEMENTATION // in case the user already have an implementation in the _same_ compilation unit (e.g. unity builds) +#ifndef IMGUI_DISABLE_STB_TRUETYPE_IMPLEMENTATION // in case the user already have an implementation in another compilation unit +#define STBTT_malloc(x,u) ((void)(u), IM_ALLOC(x)) +#define STBTT_free(x,u) ((void)(u), IM_FREE(x)) +#define STBTT_assert(x) do { IM_ASSERT(x); } while(0) +#define STBTT_fmod(x,y) ImFmod(x,y) +#define STBTT_sqrt(x) ImSqrt(x) +#define STBTT_pow(x,y) ImPow(x,y) +#define STBTT_fabs(x) ImFabs(x) +#define STBTT_ifloor(x) ((int)ImFloorStd(x)) +#define STBTT_iceil(x) ((int)ImCeil(x)) +#define STBTT_STATIC +#define STB_TRUETYPE_IMPLEMENTATION +#else +#define STBTT_DEF extern +#endif +#ifdef IMGUI_STB_TRUETYPE_FILENAME +#include IMGUI_STB_TRUETYPE_FILENAME +#else +#include "imstb_truetype.h" +#endif +#endif +#endif // IMGUI_ENABLE_STB_TRUETYPE + +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +#if defined(_MSC_VER) +#pragma warning (pop) +#endif + +#ifdef IMGUI_STB_NAMESPACE +} // namespace ImStb +using namespace IMGUI_STB_NAMESPACE; +#endif + +//----------------------------------------------------------------------------- +// [SECTION] Style functions +//----------------------------------------------------------------------------- + +void ImGui::StyleColorsDark(ImGuiStyle* dst) +{ + ImGuiStyle* style = dst ? dst : &ImGui::GetStyle(); + ImVec4* colors = style->Colors; + + colors[ImGuiCol_Text] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImGuiCol_TextDisabled] = ImVec4(0.50f, 0.50f, 0.50f, 1.00f); + colors[ImGuiCol_WindowBg] = ImVec4(0.06f, 0.06f, 0.06f, 0.94f); + colors[ImGuiCol_ChildBg] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f); + colors[ImGuiCol_PopupBg] = ImVec4(0.08f, 0.08f, 0.08f, 0.94f); + colors[ImGuiCol_Border] = ImVec4(0.43f, 0.43f, 0.50f, 0.50f); + colors[ImGuiCol_BorderShadow] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f); + colors[ImGuiCol_FrameBg] = ImVec4(0.16f, 0.29f, 0.48f, 0.54f); + colors[ImGuiCol_FrameBgHovered] = ImVec4(0.26f, 0.59f, 0.98f, 0.40f); + colors[ImGuiCol_FrameBgActive] = ImVec4(0.26f, 0.59f, 0.98f, 0.67f); + colors[ImGuiCol_TitleBg] = ImVec4(0.04f, 0.04f, 0.04f, 1.00f); + colors[ImGuiCol_TitleBgActive] = ImVec4(0.16f, 0.29f, 0.48f, 1.00f); + colors[ImGuiCol_TitleBgCollapsed] = ImVec4(0.00f, 0.00f, 0.00f, 0.51f); + colors[ImGuiCol_MenuBarBg] = ImVec4(0.14f, 0.14f, 0.14f, 1.00f); + colors[ImGuiCol_ScrollbarBg] = ImVec4(0.02f, 0.02f, 0.02f, 0.53f); + colors[ImGuiCol_ScrollbarGrab] = ImVec4(0.31f, 0.31f, 0.31f, 1.00f); + colors[ImGuiCol_ScrollbarGrabHovered] = ImVec4(0.41f, 0.41f, 0.41f, 1.00f); + colors[ImGuiCol_ScrollbarGrabActive] = ImVec4(0.51f, 0.51f, 0.51f, 1.00f); + colors[ImGuiCol_CheckMark] = ImVec4(0.26f, 0.59f, 0.98f, 1.00f); + colors[ImGuiCol_SliderGrab] = ImVec4(0.24f, 0.52f, 0.88f, 1.00f); + colors[ImGuiCol_SliderGrabActive] = ImVec4(0.26f, 0.59f, 0.98f, 1.00f); + colors[ImGuiCol_Button] = ImVec4(0.26f, 0.59f, 0.98f, 0.40f); + colors[ImGuiCol_ButtonHovered] = ImVec4(0.26f, 0.59f, 0.98f, 1.00f); + colors[ImGuiCol_ButtonActive] = ImVec4(0.06f, 0.53f, 0.98f, 1.00f); + colors[ImGuiCol_Header] = ImVec4(0.26f, 0.59f, 0.98f, 0.31f); + colors[ImGuiCol_HeaderHovered] = ImVec4(0.26f, 0.59f, 0.98f, 0.80f); + colors[ImGuiCol_HeaderActive] = ImVec4(0.26f, 0.59f, 0.98f, 1.00f); + colors[ImGuiCol_Separator] = colors[ImGuiCol_Border]; + colors[ImGuiCol_SeparatorHovered] = ImVec4(0.10f, 0.40f, 0.75f, 0.78f); + colors[ImGuiCol_SeparatorActive] = ImVec4(0.10f, 0.40f, 0.75f, 1.00f); + colors[ImGuiCol_ResizeGrip] = ImVec4(0.26f, 0.59f, 0.98f, 0.20f); + colors[ImGuiCol_ResizeGripHovered] = ImVec4(0.26f, 0.59f, 0.98f, 0.67f); + colors[ImGuiCol_ResizeGripActive] = ImVec4(0.26f, 0.59f, 0.98f, 0.95f); + colors[ImGuiCol_Tab] = ImLerp(colors[ImGuiCol_Header], colors[ImGuiCol_TitleBgActive], 0.80f); + colors[ImGuiCol_TabHovered] = colors[ImGuiCol_HeaderHovered]; + colors[ImGuiCol_TabActive] = ImLerp(colors[ImGuiCol_HeaderActive], colors[ImGuiCol_TitleBgActive], 0.60f); + colors[ImGuiCol_TabUnfocused] = ImLerp(colors[ImGuiCol_Tab], colors[ImGuiCol_TitleBg], 0.80f); + colors[ImGuiCol_TabUnfocusedActive] = ImLerp(colors[ImGuiCol_TabActive], colors[ImGuiCol_TitleBg], 0.40f); + colors[ImGuiCol_PlotLines] = ImVec4(0.61f, 0.61f, 0.61f, 1.00f); + colors[ImGuiCol_PlotLinesHovered] = ImVec4(1.00f, 0.43f, 0.35f, 1.00f); + colors[ImGuiCol_PlotHistogram] = ImVec4(0.90f, 0.70f, 0.00f, 1.00f); + colors[ImGuiCol_PlotHistogramHovered] = ImVec4(1.00f, 0.60f, 0.00f, 1.00f); + colors[ImGuiCol_TableHeaderBg] = ImVec4(0.19f, 0.19f, 0.20f, 1.00f); + colors[ImGuiCol_TableBorderStrong] = ImVec4(0.31f, 0.31f, 0.35f, 1.00f); // Prefer using Alpha=1.0 here + colors[ImGuiCol_TableBorderLight] = ImVec4(0.23f, 0.23f, 0.25f, 1.00f); // Prefer using Alpha=1.0 here + colors[ImGuiCol_TableRowBg] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f); + colors[ImGuiCol_TableRowBgAlt] = ImVec4(1.00f, 1.00f, 1.00f, 0.06f); + colors[ImGuiCol_TextSelectedBg] = ImVec4(0.26f, 0.59f, 0.98f, 0.35f); + colors[ImGuiCol_DragDropTarget] = ImVec4(1.00f, 1.00f, 0.00f, 0.90f); + colors[ImGuiCol_NavHighlight] = ImVec4(0.26f, 0.59f, 0.98f, 1.00f); + colors[ImGuiCol_NavWindowingHighlight] = ImVec4(1.00f, 1.00f, 1.00f, 0.70f); + colors[ImGuiCol_NavWindowingDimBg] = ImVec4(0.80f, 0.80f, 0.80f, 0.20f); + colors[ImGuiCol_ModalWindowDimBg] = ImVec4(0.80f, 0.80f, 0.80f, 0.35f); +} + +void ImGui::StyleColorsClassic(ImGuiStyle* dst) +{ + ImGuiStyle* style = dst ? dst : &ImGui::GetStyle(); + ImVec4* colors = style->Colors; + + colors[ImGuiCol_Text] = ImVec4(0.90f, 0.90f, 0.90f, 1.00f); + colors[ImGuiCol_TextDisabled] = ImVec4(0.60f, 0.60f, 0.60f, 1.00f); + colors[ImGuiCol_WindowBg] = ImVec4(0.00f, 0.00f, 0.00f, 0.85f); + colors[ImGuiCol_ChildBg] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f); + colors[ImGuiCol_PopupBg] = ImVec4(0.11f, 0.11f, 0.14f, 0.92f); + colors[ImGuiCol_Border] = ImVec4(0.50f, 0.50f, 0.50f, 0.50f); + colors[ImGuiCol_BorderShadow] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f); + colors[ImGuiCol_FrameBg] = ImVec4(0.43f, 0.43f, 0.43f, 0.39f); + colors[ImGuiCol_FrameBgHovered] = ImVec4(0.47f, 0.47f, 0.69f, 0.40f); + colors[ImGuiCol_FrameBgActive] = ImVec4(0.42f, 0.41f, 0.64f, 0.69f); + colors[ImGuiCol_TitleBg] = ImVec4(0.27f, 0.27f, 0.54f, 0.83f); + colors[ImGuiCol_TitleBgActive] = ImVec4(0.32f, 0.32f, 0.63f, 0.87f); + colors[ImGuiCol_TitleBgCollapsed] = ImVec4(0.40f, 0.40f, 0.80f, 0.20f); + colors[ImGuiCol_MenuBarBg] = ImVec4(0.40f, 0.40f, 0.55f, 0.80f); + colors[ImGuiCol_ScrollbarBg] = ImVec4(0.20f, 0.25f, 0.30f, 0.60f); + colors[ImGuiCol_ScrollbarGrab] = ImVec4(0.40f, 0.40f, 0.80f, 0.30f); + colors[ImGuiCol_ScrollbarGrabHovered] = ImVec4(0.40f, 0.40f, 0.80f, 0.40f); + colors[ImGuiCol_ScrollbarGrabActive] = ImVec4(0.41f, 0.39f, 0.80f, 0.60f); + colors[ImGuiCol_CheckMark] = ImVec4(0.90f, 0.90f, 0.90f, 0.50f); + colors[ImGuiCol_SliderGrab] = ImVec4(1.00f, 1.00f, 1.00f, 0.30f); + colors[ImGuiCol_SliderGrabActive] = ImVec4(0.41f, 0.39f, 0.80f, 0.60f); + colors[ImGuiCol_Button] = ImVec4(0.35f, 0.40f, 0.61f, 0.62f); + colors[ImGuiCol_ButtonHovered] = ImVec4(0.40f, 0.48f, 0.71f, 0.79f); + colors[ImGuiCol_ButtonActive] = ImVec4(0.46f, 0.54f, 0.80f, 1.00f); + colors[ImGuiCol_Header] = ImVec4(0.40f, 0.40f, 0.90f, 0.45f); + colors[ImGuiCol_HeaderHovered] = ImVec4(0.45f, 0.45f, 0.90f, 0.80f); + colors[ImGuiCol_HeaderActive] = ImVec4(0.53f, 0.53f, 0.87f, 0.80f); + colors[ImGuiCol_Separator] = ImVec4(0.50f, 0.50f, 0.50f, 0.60f); + colors[ImGuiCol_SeparatorHovered] = ImVec4(0.60f, 0.60f, 0.70f, 1.00f); + colors[ImGuiCol_SeparatorActive] = ImVec4(0.70f, 0.70f, 0.90f, 1.00f); + colors[ImGuiCol_ResizeGrip] = ImVec4(1.00f, 1.00f, 1.00f, 0.10f); + colors[ImGuiCol_ResizeGripHovered] = ImVec4(0.78f, 0.82f, 1.00f, 0.60f); + colors[ImGuiCol_ResizeGripActive] = ImVec4(0.78f, 0.82f, 1.00f, 0.90f); + colors[ImGuiCol_Tab] = ImLerp(colors[ImGuiCol_Header], colors[ImGuiCol_TitleBgActive], 0.80f); + colors[ImGuiCol_TabHovered] = colors[ImGuiCol_HeaderHovered]; + colors[ImGuiCol_TabActive] = ImLerp(colors[ImGuiCol_HeaderActive], colors[ImGuiCol_TitleBgActive], 0.60f); + colors[ImGuiCol_TabUnfocused] = ImLerp(colors[ImGuiCol_Tab], colors[ImGuiCol_TitleBg], 0.80f); + colors[ImGuiCol_TabUnfocusedActive] = ImLerp(colors[ImGuiCol_TabActive], colors[ImGuiCol_TitleBg], 0.40f); + colors[ImGuiCol_PlotLines] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImGuiCol_PlotLinesHovered] = ImVec4(0.90f, 0.70f, 0.00f, 1.00f); + colors[ImGuiCol_PlotHistogram] = ImVec4(0.90f, 0.70f, 0.00f, 1.00f); + colors[ImGuiCol_PlotHistogramHovered] = ImVec4(1.00f, 0.60f, 0.00f, 1.00f); + colors[ImGuiCol_TableHeaderBg] = ImVec4(0.27f, 0.27f, 0.38f, 1.00f); + colors[ImGuiCol_TableBorderStrong] = ImVec4(0.31f, 0.31f, 0.45f, 1.00f); // Prefer using Alpha=1.0 here + colors[ImGuiCol_TableBorderLight] = ImVec4(0.26f, 0.26f, 0.28f, 1.00f); // Prefer using Alpha=1.0 here + colors[ImGuiCol_TableRowBg] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f); + colors[ImGuiCol_TableRowBgAlt] = ImVec4(1.00f, 1.00f, 1.00f, 0.07f); + colors[ImGuiCol_TextSelectedBg] = ImVec4(0.00f, 0.00f, 1.00f, 0.35f); + colors[ImGuiCol_DragDropTarget] = ImVec4(1.00f, 1.00f, 0.00f, 0.90f); + colors[ImGuiCol_NavHighlight] = colors[ImGuiCol_HeaderHovered]; + colors[ImGuiCol_NavWindowingHighlight] = ImVec4(1.00f, 1.00f, 1.00f, 0.70f); + colors[ImGuiCol_NavWindowingDimBg] = ImVec4(0.80f, 0.80f, 0.80f, 0.20f); + colors[ImGuiCol_ModalWindowDimBg] = ImVec4(0.20f, 0.20f, 0.20f, 0.35f); +} + +// Those light colors are better suited with a thicker font than the default one + FrameBorder +void ImGui::StyleColorsLight(ImGuiStyle* dst) +{ + ImGuiStyle* style = dst ? dst : &ImGui::GetStyle(); + ImVec4* colors = style->Colors; + + colors[ImGuiCol_Text] = ImVec4(0.00f, 0.00f, 0.00f, 1.00f); + colors[ImGuiCol_TextDisabled] = ImVec4(0.60f, 0.60f, 0.60f, 1.00f); + colors[ImGuiCol_WindowBg] = ImVec4(0.94f, 0.94f, 0.94f, 1.00f); + colors[ImGuiCol_ChildBg] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f); + colors[ImGuiCol_PopupBg] = ImVec4(1.00f, 1.00f, 1.00f, 0.98f); + colors[ImGuiCol_Border] = ImVec4(0.00f, 0.00f, 0.00f, 0.30f); + colors[ImGuiCol_BorderShadow] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f); + colors[ImGuiCol_FrameBg] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); + colors[ImGuiCol_FrameBgHovered] = ImVec4(0.26f, 0.59f, 0.98f, 0.40f); + colors[ImGuiCol_FrameBgActive] = ImVec4(0.26f, 0.59f, 0.98f, 0.67f); + colors[ImGuiCol_TitleBg] = ImVec4(0.96f, 0.96f, 0.96f, 1.00f); + colors[ImGuiCol_TitleBgActive] = ImVec4(0.82f, 0.82f, 0.82f, 1.00f); + colors[ImGuiCol_TitleBgCollapsed] = ImVec4(1.00f, 1.00f, 1.00f, 0.51f); + colors[ImGuiCol_MenuBarBg] = ImVec4(0.86f, 0.86f, 0.86f, 1.00f); + colors[ImGuiCol_ScrollbarBg] = ImVec4(0.98f, 0.98f, 0.98f, 0.53f); + colors[ImGuiCol_ScrollbarGrab] = ImVec4(0.69f, 0.69f, 0.69f, 0.80f); + colors[ImGuiCol_ScrollbarGrabHovered] = ImVec4(0.49f, 0.49f, 0.49f, 0.80f); + colors[ImGuiCol_ScrollbarGrabActive] = ImVec4(0.49f, 0.49f, 0.49f, 1.00f); + colors[ImGuiCol_CheckMark] = ImVec4(0.26f, 0.59f, 0.98f, 1.00f); + colors[ImGuiCol_SliderGrab] = ImVec4(0.26f, 0.59f, 0.98f, 0.78f); + colors[ImGuiCol_SliderGrabActive] = ImVec4(0.46f, 0.54f, 0.80f, 0.60f); + colors[ImGuiCol_Button] = ImVec4(0.26f, 0.59f, 0.98f, 0.40f); + colors[ImGuiCol_ButtonHovered] = ImVec4(0.26f, 0.59f, 0.98f, 1.00f); + colors[ImGuiCol_ButtonActive] = ImVec4(0.06f, 0.53f, 0.98f, 1.00f); + colors[ImGuiCol_Header] = ImVec4(0.26f, 0.59f, 0.98f, 0.31f); + colors[ImGuiCol_HeaderHovered] = ImVec4(0.26f, 0.59f, 0.98f, 0.80f); + colors[ImGuiCol_HeaderActive] = ImVec4(0.26f, 0.59f, 0.98f, 1.00f); + colors[ImGuiCol_Separator] = ImVec4(0.39f, 0.39f, 0.39f, 0.62f); + colors[ImGuiCol_SeparatorHovered] = ImVec4(0.14f, 0.44f, 0.80f, 0.78f); + colors[ImGuiCol_SeparatorActive] = ImVec4(0.14f, 0.44f, 0.80f, 1.00f); + colors[ImGuiCol_ResizeGrip] = ImVec4(0.35f, 0.35f, 0.35f, 0.17f); + colors[ImGuiCol_ResizeGripHovered] = ImVec4(0.26f, 0.59f, 0.98f, 0.67f); + colors[ImGuiCol_ResizeGripActive] = ImVec4(0.26f, 0.59f, 0.98f, 0.95f); + colors[ImGuiCol_Tab] = ImLerp(colors[ImGuiCol_Header], colors[ImGuiCol_TitleBgActive], 0.90f); + colors[ImGuiCol_TabHovered] = colors[ImGuiCol_HeaderHovered]; + colors[ImGuiCol_TabActive] = ImLerp(colors[ImGuiCol_HeaderActive], colors[ImGuiCol_TitleBgActive], 0.60f); + colors[ImGuiCol_TabUnfocused] = ImLerp(colors[ImGuiCol_Tab], colors[ImGuiCol_TitleBg], 0.80f); + colors[ImGuiCol_TabUnfocusedActive] = ImLerp(colors[ImGuiCol_TabActive], colors[ImGuiCol_TitleBg], 0.40f); + colors[ImGuiCol_PlotLines] = ImVec4(0.39f, 0.39f, 0.39f, 1.00f); + colors[ImGuiCol_PlotLinesHovered] = ImVec4(1.00f, 0.43f, 0.35f, 1.00f); + colors[ImGuiCol_PlotHistogram] = ImVec4(0.90f, 0.70f, 0.00f, 1.00f); + colors[ImGuiCol_PlotHistogramHovered] = ImVec4(1.00f, 0.45f, 0.00f, 1.00f); + colors[ImGuiCol_TableHeaderBg] = ImVec4(0.78f, 0.87f, 0.98f, 1.00f); + colors[ImGuiCol_TableBorderStrong] = ImVec4(0.57f, 0.57f, 0.64f, 1.00f); // Prefer using Alpha=1.0 here + colors[ImGuiCol_TableBorderLight] = ImVec4(0.68f, 0.68f, 0.74f, 1.00f); // Prefer using Alpha=1.0 here + colors[ImGuiCol_TableRowBg] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f); + colors[ImGuiCol_TableRowBgAlt] = ImVec4(0.30f, 0.30f, 0.30f, 0.09f); + colors[ImGuiCol_TextSelectedBg] = ImVec4(0.26f, 0.59f, 0.98f, 0.35f); + colors[ImGuiCol_DragDropTarget] = ImVec4(0.26f, 0.59f, 0.98f, 0.95f); + colors[ImGuiCol_NavHighlight] = colors[ImGuiCol_HeaderHovered]; + colors[ImGuiCol_NavWindowingHighlight] = ImVec4(0.70f, 0.70f, 0.70f, 0.70f); + colors[ImGuiCol_NavWindowingDimBg] = ImVec4(0.20f, 0.20f, 0.20f, 0.20f); + colors[ImGuiCol_ModalWindowDimBg] = ImVec4(0.20f, 0.20f, 0.20f, 0.35f); +} + +//----------------------------------------------------------------------------- +// [SECTION] ImDrawList +//----------------------------------------------------------------------------- + +ImDrawListSharedData::ImDrawListSharedData() +{ + memset(this, 0, sizeof(*this)); + for (int i = 0; i < IM_ARRAYSIZE(ArcFastVtx); i++) + { + const float a = ((float)i * 2 * IM_PI) / (float)IM_ARRAYSIZE(ArcFastVtx); + ArcFastVtx[i] = ImVec2(ImCos(a), ImSin(a)); + } +} + +void ImDrawListSharedData::SetCircleSegmentMaxError(float max_error) +{ + if (CircleSegmentMaxError == max_error) + return; + CircleSegmentMaxError = max_error; + for (int i = 0; i < IM_ARRAYSIZE(CircleSegmentCounts); i++) + { + const float radius = (float)i; + const int segment_count = IM_DRAWLIST_CIRCLE_AUTO_SEGMENT_CALC(radius, CircleSegmentMaxError); + CircleSegmentCounts[i] = (ImU8)ImMin(segment_count, 255); + } +} + +// Initialize before use in a new frame. We always have a command ready in the buffer. +void ImDrawList::_ResetForNewFrame() +{ + // Verify that the ImDrawCmd fields we want to memcmp() are contiguous in memory. + // (those should be IM_STATIC_ASSERT() in theory but with our pre C++11 setup the whole check doesn't compile with GCC) + IM_ASSERT(IM_OFFSETOF(ImDrawCmd, ClipRect) == 0); + IM_ASSERT(IM_OFFSETOF(ImDrawCmd, TextureId) == sizeof(ImVec4)); + IM_ASSERT(IM_OFFSETOF(ImDrawCmd, VtxOffset) == sizeof(ImVec4) + sizeof(ImTextureID)); + + CmdBuffer.resize(0); + IdxBuffer.resize(0); + VtxBuffer.resize(0); + Flags = _Data->InitialFlags; + memset(&_CmdHeader, 0, sizeof(_CmdHeader)); + _VtxCurrentIdx = 0; + _VtxWritePtr = NULL; + _IdxWritePtr = NULL; + _ClipRectStack.resize(0); + _TextureIdStack.resize(0); + _Path.resize(0); + _Splitter.Clear(); + CmdBuffer.push_back(ImDrawCmd()); + _FringeScale = 1.0f; +} + +void ImDrawList::_ClearFreeMemory() +{ + CmdBuffer.clear(); + IdxBuffer.clear(); + VtxBuffer.clear(); + Flags = ImDrawListFlags_None; + _VtxCurrentIdx = 0; + _VtxWritePtr = NULL; + _IdxWritePtr = NULL; + _ClipRectStack.clear(); + _TextureIdStack.clear(); + _Path.clear(); + _Splitter.ClearFreeMemory(); +} + +ImDrawList* ImDrawList::CloneOutput() const +{ + ImDrawList* dst = IM_NEW(ImDrawList(_Data)); + dst->CmdBuffer = CmdBuffer; + dst->IdxBuffer = IdxBuffer; + dst->VtxBuffer = VtxBuffer; + dst->Flags = Flags; + return dst; +} + +void ImDrawList::AddDrawCmd() +{ + ImDrawCmd draw_cmd; + draw_cmd.ClipRect = _CmdHeader.ClipRect; // Same as calling ImDrawCmd_HeaderCopy() + draw_cmd.TextureId = _CmdHeader.TextureId; + draw_cmd.VtxOffset = _CmdHeader.VtxOffset; + draw_cmd.IdxOffset = IdxBuffer.Size; + + IM_ASSERT(draw_cmd.ClipRect.x <= draw_cmd.ClipRect.z && draw_cmd.ClipRect.y <= draw_cmd.ClipRect.w); + CmdBuffer.push_back(draw_cmd); +} + +// Pop trailing draw command (used before merging or presenting to user) +// Note that this leaves the ImDrawList in a state unfit for further commands, as most code assume that CmdBuffer.Size > 0 && CmdBuffer.back().UserCallback == NULL +void ImDrawList::_PopUnusedDrawCmd() +{ + if (CmdBuffer.Size == 0) + return; + ImDrawCmd* curr_cmd = &CmdBuffer.Data[CmdBuffer.Size - 1]; + if (curr_cmd->ElemCount == 0 && curr_cmd->UserCallback == NULL) + CmdBuffer.pop_back(); +} + +void ImDrawList::AddCallback(ImDrawCallback callback, void* callback_data) +{ + ImDrawCmd* curr_cmd = &CmdBuffer.Data[CmdBuffer.Size - 1]; + IM_ASSERT(curr_cmd->UserCallback == NULL); + if (curr_cmd->ElemCount != 0) + { + AddDrawCmd(); + curr_cmd = &CmdBuffer.Data[CmdBuffer.Size - 1]; + } + curr_cmd->UserCallback = callback; + curr_cmd->UserCallbackData = callback_data; + + AddDrawCmd(); // Force a new command after us (see comment below) +} + +// Compare ClipRect, TextureId and VtxOffset with a single memcmp() +#define ImDrawCmd_HeaderSize (IM_OFFSETOF(ImDrawCmd, VtxOffset) + sizeof(unsigned int)) +#define ImDrawCmd_HeaderCompare(CMD_LHS, CMD_RHS) (memcmp(CMD_LHS, CMD_RHS, ImDrawCmd_HeaderSize)) // Compare ClipRect, TextureId, VtxOffset +#define ImDrawCmd_HeaderCopy(CMD_DST, CMD_SRC) (memcpy(CMD_DST, CMD_SRC, ImDrawCmd_HeaderSize)) // Copy ClipRect, TextureId, VtxOffset + +// Our scheme may appears a bit unusual, basically we want the most-common calls AddLine AddRect etc. to not have to perform any check so we always have a command ready in the stack. +// The cost of figuring out if a new command has to be added or if we can merge is paid in those Update** functions only. +void ImDrawList::_OnChangedClipRect() +{ + // If current command is used with different settings we need to add a new command + ImDrawCmd* curr_cmd = &CmdBuffer.Data[CmdBuffer.Size - 1]; + if (curr_cmd->ElemCount != 0 && memcmp(&curr_cmd->ClipRect, &_CmdHeader.ClipRect, sizeof(ImVec4)) != 0) + { + AddDrawCmd(); + return; + } + IM_ASSERT(curr_cmd->UserCallback == NULL); + + // Try to merge with previous command if it matches, else use current command + ImDrawCmd* prev_cmd = curr_cmd - 1; + if (curr_cmd->ElemCount == 0 && CmdBuffer.Size > 1 && ImDrawCmd_HeaderCompare(&_CmdHeader, prev_cmd) == 0 && prev_cmd->UserCallback == NULL) + { + CmdBuffer.pop_back(); + return; + } + + curr_cmd->ClipRect = _CmdHeader.ClipRect; +} + +void ImDrawList::_OnChangedTextureID() +{ + // If current command is used with different settings we need to add a new command + ImDrawCmd* curr_cmd = &CmdBuffer.Data[CmdBuffer.Size - 1]; + if (curr_cmd->ElemCount != 0 && curr_cmd->TextureId != _CmdHeader.TextureId) + { + AddDrawCmd(); + return; + } + IM_ASSERT(curr_cmd->UserCallback == NULL); + + // Try to merge with previous command if it matches, else use current command + ImDrawCmd* prev_cmd = curr_cmd - 1; + if (curr_cmd->ElemCount == 0 && CmdBuffer.Size > 1 && ImDrawCmd_HeaderCompare(&_CmdHeader, prev_cmd) == 0 && prev_cmd->UserCallback == NULL) + { + CmdBuffer.pop_back(); + return; + } + + curr_cmd->TextureId = _CmdHeader.TextureId; +} + +void ImDrawList::_OnChangedVtxOffset() +{ + // We don't need to compare curr_cmd->VtxOffset != _CmdHeader.VtxOffset because we know it'll be different at the time we call this. + _VtxCurrentIdx = 0; + ImDrawCmd* curr_cmd = &CmdBuffer.Data[CmdBuffer.Size - 1]; + //IM_ASSERT(curr_cmd->VtxOffset != _CmdHeader.VtxOffset); // See #3349 + if (curr_cmd->ElemCount != 0) + { + AddDrawCmd(); + return; + } + IM_ASSERT(curr_cmd->UserCallback == NULL); + curr_cmd->VtxOffset = _CmdHeader.VtxOffset; +} + +// Render-level scissoring. This is passed down to your render function but not used for CPU-side coarse clipping. Prefer using higher-level ImGui::PushClipRect() to affect logic (hit-testing and widget culling) +void ImDrawList::PushClipRect(ImVec2 cr_min, ImVec2 cr_max, bool intersect_with_current_clip_rect) +{ + ImVec4 cr(cr_min.x, cr_min.y, cr_max.x, cr_max.y); + if (intersect_with_current_clip_rect) + { + ImVec4 current = _CmdHeader.ClipRect; + if (cr.x < current.x) cr.x = current.x; + if (cr.y < current.y) cr.y = current.y; + if (cr.z > current.z) cr.z = current.z; + if (cr.w > current.w) cr.w = current.w; + } + cr.z = ImMax(cr.x, cr.z); + cr.w = ImMax(cr.y, cr.w); + + _ClipRectStack.push_back(cr); + _CmdHeader.ClipRect = cr; + _OnChangedClipRect(); +} + +void ImDrawList::PushClipRectFullScreen() +{ + PushClipRect(ImVec2(_Data->ClipRectFullscreen.x, _Data->ClipRectFullscreen.y), ImVec2(_Data->ClipRectFullscreen.z, _Data->ClipRectFullscreen.w)); +} + +void ImDrawList::PopClipRect() +{ + _ClipRectStack.pop_back(); + _CmdHeader.ClipRect = (_ClipRectStack.Size == 0) ? _Data->ClipRectFullscreen : _ClipRectStack.Data[_ClipRectStack.Size - 1]; + _OnChangedClipRect(); +} + +void ImDrawList::PushTextureID(ImTextureID texture_id) +{ + _TextureIdStack.push_back(texture_id); + _CmdHeader.TextureId = texture_id; + _OnChangedTextureID(); +} + +void ImDrawList::PopTextureID() +{ + _TextureIdStack.pop_back(); + _CmdHeader.TextureId = (_TextureIdStack.Size == 0) ? (ImTextureID)NULL : _TextureIdStack.Data[_TextureIdStack.Size - 1]; + _OnChangedTextureID(); +} + +// Reserve space for a number of vertices and indices. +// You must finish filling your reserved data before calling PrimReserve() again, as it may reallocate or +// submit the intermediate results. PrimUnreserve() can be used to release unused allocations. +void ImDrawList::PrimReserve(int idx_count, int vtx_count) +{ + // Large mesh support (when enabled) + IM_ASSERT_PARANOID(idx_count >= 0 && vtx_count >= 0); + if (sizeof(ImDrawIdx) == 2 && (_VtxCurrentIdx + vtx_count >= (1 << 16)) && (Flags & ImDrawListFlags_AllowVtxOffset)) + { + // FIXME: In theory we should be testing that vtx_count <64k here. + // In practice, RenderText() relies on reserving ahead for a worst case scenario so it is currently useful for us + // to not make that check until we rework the text functions to handle clipping and large horizontal lines better. + _CmdHeader.VtxOffset = VtxBuffer.Size; + _OnChangedVtxOffset(); + } + + ImDrawCmd* draw_cmd = &CmdBuffer.Data[CmdBuffer.Size - 1]; + draw_cmd->ElemCount += idx_count; + + int vtx_buffer_old_size = VtxBuffer.Size; + VtxBuffer.resize(vtx_buffer_old_size + vtx_count); + _VtxWritePtr = VtxBuffer.Data + vtx_buffer_old_size; + + int idx_buffer_old_size = IdxBuffer.Size; + IdxBuffer.resize(idx_buffer_old_size + idx_count); + _IdxWritePtr = IdxBuffer.Data + idx_buffer_old_size; +} + +// Release the a number of reserved vertices/indices from the end of the last reservation made with PrimReserve(). +void ImDrawList::PrimUnreserve(int idx_count, int vtx_count) +{ + IM_ASSERT_PARANOID(idx_count >= 0 && vtx_count >= 0); + + ImDrawCmd* draw_cmd = &CmdBuffer.Data[CmdBuffer.Size - 1]; + draw_cmd->ElemCount -= idx_count; + VtxBuffer.shrink(VtxBuffer.Size - vtx_count); + IdxBuffer.shrink(IdxBuffer.Size - idx_count); +} + +// Fully unrolled with inline call to keep our debug builds decently fast. +void ImDrawList::PrimRect(const ImVec2& a, const ImVec2& c, ImU32 col) +{ + ImVec2 b(c.x, a.y), d(a.x, c.y), uv(_Data->TexUvWhitePixel); + ImDrawIdx idx = (ImDrawIdx)_VtxCurrentIdx; + _IdxWritePtr[0] = idx; _IdxWritePtr[1] = (ImDrawIdx)(idx+1); _IdxWritePtr[2] = (ImDrawIdx)(idx+2); + _IdxWritePtr[3] = idx; _IdxWritePtr[4] = (ImDrawIdx)(idx+2); _IdxWritePtr[5] = (ImDrawIdx)(idx+3); + _VtxWritePtr[0].pos = a; _VtxWritePtr[0].uv = uv; _VtxWritePtr[0].col = col; + _VtxWritePtr[1].pos = b; _VtxWritePtr[1].uv = uv; _VtxWritePtr[1].col = col; + _VtxWritePtr[2].pos = c; _VtxWritePtr[2].uv = uv; _VtxWritePtr[2].col = col; + _VtxWritePtr[3].pos = d; _VtxWritePtr[3].uv = uv; _VtxWritePtr[3].col = col; + _VtxWritePtr += 4; + _VtxCurrentIdx += 4; + _IdxWritePtr += 6; +} + +void ImDrawList::PrimRectUV(const ImVec2& a, const ImVec2& c, const ImVec2& uv_a, const ImVec2& uv_c, ImU32 col) +{ + ImVec2 b(c.x, a.y), d(a.x, c.y), uv_b(uv_c.x, uv_a.y), uv_d(uv_a.x, uv_c.y); + ImDrawIdx idx = (ImDrawIdx)_VtxCurrentIdx; + _IdxWritePtr[0] = idx; _IdxWritePtr[1] = (ImDrawIdx)(idx+1); _IdxWritePtr[2] = (ImDrawIdx)(idx+2); + _IdxWritePtr[3] = idx; _IdxWritePtr[4] = (ImDrawIdx)(idx+2); _IdxWritePtr[5] = (ImDrawIdx)(idx+3); + _VtxWritePtr[0].pos = a; _VtxWritePtr[0].uv = uv_a; _VtxWritePtr[0].col = col; + _VtxWritePtr[1].pos = b; _VtxWritePtr[1].uv = uv_b; _VtxWritePtr[1].col = col; + _VtxWritePtr[2].pos = c; _VtxWritePtr[2].uv = uv_c; _VtxWritePtr[2].col = col; + _VtxWritePtr[3].pos = d; _VtxWritePtr[3].uv = uv_d; _VtxWritePtr[3].col = col; + _VtxWritePtr += 4; + _VtxCurrentIdx += 4; + _IdxWritePtr += 6; +} + +void ImDrawList::PrimQuadUV(const ImVec2& a, const ImVec2& b, const ImVec2& c, const ImVec2& d, const ImVec2& uv_a, const ImVec2& uv_b, const ImVec2& uv_c, const ImVec2& uv_d, ImU32 col) +{ + ImDrawIdx idx = (ImDrawIdx)_VtxCurrentIdx; + _IdxWritePtr[0] = idx; _IdxWritePtr[1] = (ImDrawIdx)(idx+1); _IdxWritePtr[2] = (ImDrawIdx)(idx+2); + _IdxWritePtr[3] = idx; _IdxWritePtr[4] = (ImDrawIdx)(idx+2); _IdxWritePtr[5] = (ImDrawIdx)(idx+3); + _VtxWritePtr[0].pos = a; _VtxWritePtr[0].uv = uv_a; _VtxWritePtr[0].col = col; + _VtxWritePtr[1].pos = b; _VtxWritePtr[1].uv = uv_b; _VtxWritePtr[1].col = col; + _VtxWritePtr[2].pos = c; _VtxWritePtr[2].uv = uv_c; _VtxWritePtr[2].col = col; + _VtxWritePtr[3].pos = d; _VtxWritePtr[3].uv = uv_d; _VtxWritePtr[3].col = col; + _VtxWritePtr += 4; + _VtxCurrentIdx += 4; + _IdxWritePtr += 6; +} + +// On AddPolyline() and AddConvexPolyFilled() we intentionally avoid using ImVec2 and superfluous function calls to optimize debug/non-inlined builds. +// Those macros expects l-values. +#define IM_NORMALIZE2F_OVER_ZERO(VX,VY) do { float d2 = VX*VX + VY*VY; if (d2 > 0.0f) { float inv_len = 1.0f / ImSqrt(d2); VX *= inv_len; VY *= inv_len; } } while (0) +#define IM_FIXNORMAL2F(VX,VY) do { float d2 = VX*VX + VY*VY; if (d2 < 0.5f) d2 = 0.5f; float inv_lensq = 1.0f / d2; VX *= inv_lensq; VY *= inv_lensq; } while (0) + +// TODO: Thickness anti-aliased lines cap are missing their AA fringe. +// We avoid using the ImVec2 math operators here to reduce cost to a minimum for debug/non-inlined builds. +void ImDrawList::AddPolyline(const ImVec2* points, const int points_count, ImU32 col, bool closed, float thickness) +{ + if (points_count < 2) + return; + + const ImVec2 opaque_uv = _Data->TexUvWhitePixel; + const int count = closed ? points_count : points_count - 1; // The number of line segments we need to draw + const bool thick_line = (thickness > _FringeScale); + + if (Flags & ImDrawListFlags_AntiAliasedLines) + { + // Anti-aliased stroke + const float AA_SIZE = _FringeScale; + const ImU32 col_trans = col & ~IM_COL32_A_MASK; + + // Thicknesses <1.0 should behave like thickness 1.0 + thickness = ImMax(thickness, 1.0f); + const int integer_thickness = (int)thickness; + const float fractional_thickness = thickness - integer_thickness; + + // Do we want to draw this line using a texture? + // - For now, only draw integer-width lines using textures to avoid issues with the way scaling occurs, could be improved. + // - If AA_SIZE is not 1.0f we cannot use the texture path. + const bool use_texture = (Flags & ImDrawListFlags_AntiAliasedLinesUseTex) && (integer_thickness < IM_DRAWLIST_TEX_LINES_WIDTH_MAX) && (fractional_thickness <= 0.00001f) && (AA_SIZE == 1.0f); + + // We should never hit this, because NewFrame() doesn't set ImDrawListFlags_AntiAliasedLinesUseTex unless ImFontAtlasFlags_NoBakedLines is off + IM_ASSERT_PARANOID(!use_texture || !(_Data->Font->ContainerAtlas->Flags & ImFontAtlasFlags_NoBakedLines)); + + const int idx_count = use_texture ? (count * 6) : (thick_line ? count * 18 : count * 12); + const int vtx_count = use_texture ? (points_count * 2) : (thick_line ? points_count * 4 : points_count * 3); + PrimReserve(idx_count, vtx_count); + + // Temporary buffer + // The first items are normals at each line point, then after that there are either 2 or 4 temp points for each line point + ImVec2* temp_normals = (ImVec2*)alloca(points_count * ((use_texture || !thick_line) ? 3 : 5) * sizeof(ImVec2)); //-V630 + ImVec2* temp_points = temp_normals + points_count; + + // Calculate normals (tangents) for each line segment + for (int i1 = 0; i1 < count; i1++) + { + const int i2 = (i1 + 1) == points_count ? 0 : i1 + 1; + float dx = points[i2].x - points[i1].x; + float dy = points[i2].y - points[i1].y; + IM_NORMALIZE2F_OVER_ZERO(dx, dy); + temp_normals[i1].x = dy; + temp_normals[i1].y = -dx; + } + if (!closed) + temp_normals[points_count - 1] = temp_normals[points_count - 2]; + + // If we are drawing a one-pixel-wide line without a texture, or a textured line of any width, we only need 2 or 3 vertices per point + if (use_texture || !thick_line) + { + // [PATH 1] Texture-based lines (thick or non-thick) + // [PATH 2] Non texture-based lines (non-thick) + + // The width of the geometry we need to draw - this is essentially pixels for the line itself, plus "one pixel" for AA. + // - In the texture-based path, we don't use AA_SIZE here because the +1 is tied to the generated texture + // (see ImFontAtlasBuildRenderLinesTexData() function), and so alternate values won't work without changes to that code. + // - In the non texture-based paths, we would allow AA_SIZE to potentially be != 1.0f with a patch (e.g. fringe_scale patch to + // allow scaling geometry while preserving one-screen-pixel AA fringe). + const float half_draw_size = use_texture ? ((thickness * 0.5f) + 1) : AA_SIZE; + + // If line is not closed, the first and last points need to be generated differently as there are no normals to blend + if (!closed) + { + temp_points[0] = points[0] + temp_normals[0] * half_draw_size; + temp_points[1] = points[0] - temp_normals[0] * half_draw_size; + temp_points[(points_count-1)*2+0] = points[points_count-1] + temp_normals[points_count-1] * half_draw_size; + temp_points[(points_count-1)*2+1] = points[points_count-1] - temp_normals[points_count-1] * half_draw_size; + } + + // Generate the indices to form a number of triangles for each line segment, and the vertices for the line edges + // This takes points n and n+1 and writes into n+1, with the first point in a closed line being generated from the final one (as n+1 wraps) + // FIXME-OPT: Merge the different loops, possibly remove the temporary buffer. + unsigned int idx1 = _VtxCurrentIdx; // Vertex index for start of line segment + for (int i1 = 0; i1 < count; i1++) // i1 is the first point of the line segment + { + const int i2 = (i1 + 1) == points_count ? 0 : i1 + 1; // i2 is the second point of the line segment + const unsigned int idx2 = ((i1 + 1) == points_count) ? _VtxCurrentIdx : (idx1 + (use_texture ? 2 : 3)); // Vertex index for end of segment + + // Average normals + float dm_x = (temp_normals[i1].x + temp_normals[i2].x) * 0.5f; + float dm_y = (temp_normals[i1].y + temp_normals[i2].y) * 0.5f; + IM_FIXNORMAL2F(dm_x, dm_y); + dm_x *= half_draw_size; // dm_x, dm_y are offset to the outer edge of the AA area + dm_y *= half_draw_size; + + // Add temporary vertexes for the outer edges + ImVec2* out_vtx = &temp_points[i2 * 2]; + out_vtx[0].x = points[i2].x + dm_x; + out_vtx[0].y = points[i2].y + dm_y; + out_vtx[1].x = points[i2].x - dm_x; + out_vtx[1].y = points[i2].y - dm_y; + + if (use_texture) + { + // Add indices for two triangles + _IdxWritePtr[0] = (ImDrawIdx)(idx2 + 0); _IdxWritePtr[1] = (ImDrawIdx)(idx1 + 0); _IdxWritePtr[2] = (ImDrawIdx)(idx1 + 1); // Right tri + _IdxWritePtr[3] = (ImDrawIdx)(idx2 + 1); _IdxWritePtr[4] = (ImDrawIdx)(idx1 + 1); _IdxWritePtr[5] = (ImDrawIdx)(idx2 + 0); // Left tri + _IdxWritePtr += 6; + } + else + { + // Add indexes for four triangles + _IdxWritePtr[0] = (ImDrawIdx)(idx2 + 0); _IdxWritePtr[1] = (ImDrawIdx)(idx1 + 0); _IdxWritePtr[2] = (ImDrawIdx)(idx1 + 2); // Right tri 1 + _IdxWritePtr[3] = (ImDrawIdx)(idx1 + 2); _IdxWritePtr[4] = (ImDrawIdx)(idx2 + 2); _IdxWritePtr[5] = (ImDrawIdx)(idx2 + 0); // Right tri 2 + _IdxWritePtr[6] = (ImDrawIdx)(idx2 + 1); _IdxWritePtr[7] = (ImDrawIdx)(idx1 + 1); _IdxWritePtr[8] = (ImDrawIdx)(idx1 + 0); // Left tri 1 + _IdxWritePtr[9] = (ImDrawIdx)(idx1 + 0); _IdxWritePtr[10] = (ImDrawIdx)(idx2 + 0); _IdxWritePtr[11] = (ImDrawIdx)(idx2 + 1); // Left tri 2 + _IdxWritePtr += 12; + } + + idx1 = idx2; + } + + // Add vertexes for each point on the line + if (use_texture) + { + // If we're using textures we only need to emit the left/right edge vertices + ImVec4 tex_uvs = _Data->TexUvLines[integer_thickness]; + /*if (fractional_thickness != 0.0f) // Currently always zero when use_texture==false! + { + const ImVec4 tex_uvs_1 = _Data->TexUvLines[integer_thickness + 1]; + tex_uvs.x = tex_uvs.x + (tex_uvs_1.x - tex_uvs.x) * fractional_thickness; // inlined ImLerp() + tex_uvs.y = tex_uvs.y + (tex_uvs_1.y - tex_uvs.y) * fractional_thickness; + tex_uvs.z = tex_uvs.z + (tex_uvs_1.z - tex_uvs.z) * fractional_thickness; + tex_uvs.w = tex_uvs.w + (tex_uvs_1.w - tex_uvs.w) * fractional_thickness; + }*/ + ImVec2 tex_uv0(tex_uvs.x, tex_uvs.y); + ImVec2 tex_uv1(tex_uvs.z, tex_uvs.w); + for (int i = 0; i < points_count; i++) + { + _VtxWritePtr[0].pos = temp_points[i * 2 + 0]; _VtxWritePtr[0].uv = tex_uv0; _VtxWritePtr[0].col = col; // Left-side outer edge + _VtxWritePtr[1].pos = temp_points[i * 2 + 1]; _VtxWritePtr[1].uv = tex_uv1; _VtxWritePtr[1].col = col; // Right-side outer edge + _VtxWritePtr += 2; + } + } + else + { + // If we're not using a texture, we need the center vertex as well + for (int i = 0; i < points_count; i++) + { + _VtxWritePtr[0].pos = points[i]; _VtxWritePtr[0].uv = opaque_uv; _VtxWritePtr[0].col = col; // Center of line + _VtxWritePtr[1].pos = temp_points[i * 2 + 0]; _VtxWritePtr[1].uv = opaque_uv; _VtxWritePtr[1].col = col_trans; // Left-side outer edge + _VtxWritePtr[2].pos = temp_points[i * 2 + 1]; _VtxWritePtr[2].uv = opaque_uv; _VtxWritePtr[2].col = col_trans; // Right-side outer edge + _VtxWritePtr += 3; + } + } + } + else + { + // [PATH 2] Non texture-based lines (thick): we need to draw the solid line core and thus require four vertices per point + const float half_inner_thickness = (thickness - AA_SIZE) * 0.5f; + + // If line is not closed, the first and last points need to be generated differently as there are no normals to blend + if (!closed) + { + const int points_last = points_count - 1; + temp_points[0] = points[0] + temp_normals[0] * (half_inner_thickness + AA_SIZE); + temp_points[1] = points[0] + temp_normals[0] * (half_inner_thickness); + temp_points[2] = points[0] - temp_normals[0] * (half_inner_thickness); + temp_points[3] = points[0] - temp_normals[0] * (half_inner_thickness + AA_SIZE); + temp_points[points_last * 4 + 0] = points[points_last] + temp_normals[points_last] * (half_inner_thickness + AA_SIZE); + temp_points[points_last * 4 + 1] = points[points_last] + temp_normals[points_last] * (half_inner_thickness); + temp_points[points_last * 4 + 2] = points[points_last] - temp_normals[points_last] * (half_inner_thickness); + temp_points[points_last * 4 + 3] = points[points_last] - temp_normals[points_last] * (half_inner_thickness + AA_SIZE); + } + + // Generate the indices to form a number of triangles for each line segment, and the vertices for the line edges + // This takes points n and n+1 and writes into n+1, with the first point in a closed line being generated from the final one (as n+1 wraps) + // FIXME-OPT: Merge the different loops, possibly remove the temporary buffer. + unsigned int idx1 = _VtxCurrentIdx; // Vertex index for start of line segment + for (int i1 = 0; i1 < count; i1++) // i1 is the first point of the line segment + { + const int i2 = (i1 + 1) == points_count ? 0 : (i1 + 1); // i2 is the second point of the line segment + const unsigned int idx2 = (i1 + 1) == points_count ? _VtxCurrentIdx : (idx1 + 4); // Vertex index for end of segment + + // Average normals + float dm_x = (temp_normals[i1].x + temp_normals[i2].x) * 0.5f; + float dm_y = (temp_normals[i1].y + temp_normals[i2].y) * 0.5f; + IM_FIXNORMAL2F(dm_x, dm_y); + float dm_out_x = dm_x * (half_inner_thickness + AA_SIZE); + float dm_out_y = dm_y * (half_inner_thickness + AA_SIZE); + float dm_in_x = dm_x * half_inner_thickness; + float dm_in_y = dm_y * half_inner_thickness; + + // Add temporary vertices + ImVec2* out_vtx = &temp_points[i2 * 4]; + out_vtx[0].x = points[i2].x + dm_out_x; + out_vtx[0].y = points[i2].y + dm_out_y; + out_vtx[1].x = points[i2].x + dm_in_x; + out_vtx[1].y = points[i2].y + dm_in_y; + out_vtx[2].x = points[i2].x - dm_in_x; + out_vtx[2].y = points[i2].y - dm_in_y; + out_vtx[3].x = points[i2].x - dm_out_x; + out_vtx[3].y = points[i2].y - dm_out_y; + + // Add indexes + _IdxWritePtr[0] = (ImDrawIdx)(idx2 + 1); _IdxWritePtr[1] = (ImDrawIdx)(idx1 + 1); _IdxWritePtr[2] = (ImDrawIdx)(idx1 + 2); + _IdxWritePtr[3] = (ImDrawIdx)(idx1 + 2); _IdxWritePtr[4] = (ImDrawIdx)(idx2 + 2); _IdxWritePtr[5] = (ImDrawIdx)(idx2 + 1); + _IdxWritePtr[6] = (ImDrawIdx)(idx2 + 1); _IdxWritePtr[7] = (ImDrawIdx)(idx1 + 1); _IdxWritePtr[8] = (ImDrawIdx)(idx1 + 0); + _IdxWritePtr[9] = (ImDrawIdx)(idx1 + 0); _IdxWritePtr[10] = (ImDrawIdx)(idx2 + 0); _IdxWritePtr[11] = (ImDrawIdx)(idx2 + 1); + _IdxWritePtr[12] = (ImDrawIdx)(idx2 + 2); _IdxWritePtr[13] = (ImDrawIdx)(idx1 + 2); _IdxWritePtr[14] = (ImDrawIdx)(idx1 + 3); + _IdxWritePtr[15] = (ImDrawIdx)(idx1 + 3); _IdxWritePtr[16] = (ImDrawIdx)(idx2 + 3); _IdxWritePtr[17] = (ImDrawIdx)(idx2 + 2); + _IdxWritePtr += 18; + + idx1 = idx2; + } + + // Add vertices + for (int i = 0; i < points_count; i++) + { + _VtxWritePtr[0].pos = temp_points[i * 4 + 0]; _VtxWritePtr[0].uv = opaque_uv; _VtxWritePtr[0].col = col_trans; + _VtxWritePtr[1].pos = temp_points[i * 4 + 1]; _VtxWritePtr[1].uv = opaque_uv; _VtxWritePtr[1].col = col; + _VtxWritePtr[2].pos = temp_points[i * 4 + 2]; _VtxWritePtr[2].uv = opaque_uv; _VtxWritePtr[2].col = col; + _VtxWritePtr[3].pos = temp_points[i * 4 + 3]; _VtxWritePtr[3].uv = opaque_uv; _VtxWritePtr[3].col = col_trans; + _VtxWritePtr += 4; + } + } + _VtxCurrentIdx += (ImDrawIdx)vtx_count; + } + else + { + // [PATH 4] Non texture-based, Non anti-aliased lines + const int idx_count = count * 6; + const int vtx_count = count * 4; // FIXME-OPT: Not sharing edges + PrimReserve(idx_count, vtx_count); + + for (int i1 = 0; i1 < count; i1++) + { + const int i2 = (i1 + 1) == points_count ? 0 : i1 + 1; + const ImVec2& p1 = points[i1]; + const ImVec2& p2 = points[i2]; + + float dx = p2.x - p1.x; + float dy = p2.y - p1.y; + IM_NORMALIZE2F_OVER_ZERO(dx, dy); + dx *= (thickness * 0.5f); + dy *= (thickness * 0.5f); + + _VtxWritePtr[0].pos.x = p1.x + dy; _VtxWritePtr[0].pos.y = p1.y - dx; _VtxWritePtr[0].uv = opaque_uv; _VtxWritePtr[0].col = col; + _VtxWritePtr[1].pos.x = p2.x + dy; _VtxWritePtr[1].pos.y = p2.y - dx; _VtxWritePtr[1].uv = opaque_uv; _VtxWritePtr[1].col = col; + _VtxWritePtr[2].pos.x = p2.x - dy; _VtxWritePtr[2].pos.y = p2.y + dx; _VtxWritePtr[2].uv = opaque_uv; _VtxWritePtr[2].col = col; + _VtxWritePtr[3].pos.x = p1.x - dy; _VtxWritePtr[3].pos.y = p1.y + dx; _VtxWritePtr[3].uv = opaque_uv; _VtxWritePtr[3].col = col; + _VtxWritePtr += 4; + + _IdxWritePtr[0] = (ImDrawIdx)(_VtxCurrentIdx); _IdxWritePtr[1] = (ImDrawIdx)(_VtxCurrentIdx + 1); _IdxWritePtr[2] = (ImDrawIdx)(_VtxCurrentIdx + 2); + _IdxWritePtr[3] = (ImDrawIdx)(_VtxCurrentIdx); _IdxWritePtr[4] = (ImDrawIdx)(_VtxCurrentIdx + 2); _IdxWritePtr[5] = (ImDrawIdx)(_VtxCurrentIdx + 3); + _IdxWritePtr += 6; + _VtxCurrentIdx += 4; + } + } +} + +// We intentionally avoid using ImVec2 and its math operators here to reduce cost to a minimum for debug/non-inlined builds. +void ImDrawList::AddConvexPolyFilled(const ImVec2* points, const int points_count, ImU32 col) +{ + if (points_count < 3) + return; + + const ImVec2 uv = _Data->TexUvWhitePixel; + + if (Flags & ImDrawListFlags_AntiAliasedFill) + { + // Anti-aliased Fill + const float AA_SIZE = _FringeScale; + const ImU32 col_trans = col & ~IM_COL32_A_MASK; + const int idx_count = (points_count - 2)*3 + points_count * 6; + const int vtx_count = (points_count * 2); + PrimReserve(idx_count, vtx_count); + + // Add indexes for fill + unsigned int vtx_inner_idx = _VtxCurrentIdx; + unsigned int vtx_outer_idx = _VtxCurrentIdx + 1; + for (int i = 2; i < points_count; i++) + { + _IdxWritePtr[0] = (ImDrawIdx)(vtx_inner_idx); _IdxWritePtr[1] = (ImDrawIdx)(vtx_inner_idx + ((i - 1) << 1)); _IdxWritePtr[2] = (ImDrawIdx)(vtx_inner_idx + (i << 1)); + _IdxWritePtr += 3; + } + + // Compute normals + ImVec2* temp_normals = (ImVec2*)alloca(points_count * sizeof(ImVec2)); //-V630 + for (int i0 = points_count - 1, i1 = 0; i1 < points_count; i0 = i1++) + { + const ImVec2& p0 = points[i0]; + const ImVec2& p1 = points[i1]; + float dx = p1.x - p0.x; + float dy = p1.y - p0.y; + IM_NORMALIZE2F_OVER_ZERO(dx, dy); + temp_normals[i0].x = dy; + temp_normals[i0].y = -dx; + } + + for (int i0 = points_count - 1, i1 = 0; i1 < points_count; i0 = i1++) + { + // Average normals + const ImVec2& n0 = temp_normals[i0]; + const ImVec2& n1 = temp_normals[i1]; + float dm_x = (n0.x + n1.x) * 0.5f; + float dm_y = (n0.y + n1.y) * 0.5f; + IM_FIXNORMAL2F(dm_x, dm_y); + dm_x *= AA_SIZE * 0.5f; + dm_y *= AA_SIZE * 0.5f; + + // Add vertices + _VtxWritePtr[0].pos.x = (points[i1].x - dm_x); _VtxWritePtr[0].pos.y = (points[i1].y - dm_y); _VtxWritePtr[0].uv = uv; _VtxWritePtr[0].col = col; // Inner + _VtxWritePtr[1].pos.x = (points[i1].x + dm_x); _VtxWritePtr[1].pos.y = (points[i1].y + dm_y); _VtxWritePtr[1].uv = uv; _VtxWritePtr[1].col = col_trans; // Outer + _VtxWritePtr += 2; + + // Add indexes for fringes + _IdxWritePtr[0] = (ImDrawIdx)(vtx_inner_idx + (i1 << 1)); _IdxWritePtr[1] = (ImDrawIdx)(vtx_inner_idx + (i0 << 1)); _IdxWritePtr[2] = (ImDrawIdx)(vtx_outer_idx + (i0 << 1)); + _IdxWritePtr[3] = (ImDrawIdx)(vtx_outer_idx + (i0 << 1)); _IdxWritePtr[4] = (ImDrawIdx)(vtx_outer_idx + (i1 << 1)); _IdxWritePtr[5] = (ImDrawIdx)(vtx_inner_idx + (i1 << 1)); + _IdxWritePtr += 6; + } + _VtxCurrentIdx += (ImDrawIdx)vtx_count; + } + else + { + // Non Anti-aliased Fill + const int idx_count = (points_count - 2)*3; + const int vtx_count = points_count; + PrimReserve(idx_count, vtx_count); + for (int i = 0; i < vtx_count; i++) + { + _VtxWritePtr[0].pos = points[i]; _VtxWritePtr[0].uv = uv; _VtxWritePtr[0].col = col; + _VtxWritePtr++; + } + for (int i = 2; i < points_count; i++) + { + _IdxWritePtr[0] = (ImDrawIdx)(_VtxCurrentIdx); _IdxWritePtr[1] = (ImDrawIdx)(_VtxCurrentIdx + i - 1); _IdxWritePtr[2] = (ImDrawIdx)(_VtxCurrentIdx + i); + _IdxWritePtr += 3; + } + _VtxCurrentIdx += (ImDrawIdx)vtx_count; + } +} + +void ImDrawList::PathArcToFast(const ImVec2& center, float radius, int a_min_of_12, int a_max_of_12) +{ + if (radius == 0.0f || a_min_of_12 > a_max_of_12) + { + _Path.push_back(center); + return; + } + + // For legacy reason the PathArcToFast() always takes angles where 2*PI is represented by 12, + // but it is possible to set IM_DRAWLIST_ARCFAST_TESSELATION_MULTIPLIER to a higher value. This should compile to a no-op otherwise. +#if IM_DRAWLIST_ARCFAST_TESSELLATION_MULTIPLIER != 1 + a_min_of_12 *= IM_DRAWLIST_ARCFAST_TESSELLATION_MULTIPLIER; + a_max_of_12 *= IM_DRAWLIST_ARCFAST_TESSELLATION_MULTIPLIER; +#endif + + _Path.reserve(_Path.Size + (a_max_of_12 - a_min_of_12 + 1)); + for (int a = a_min_of_12; a <= a_max_of_12; a++) + { + const ImVec2& c = _Data->ArcFastVtx[a % IM_ARRAYSIZE(_Data->ArcFastVtx)]; + _Path.push_back(ImVec2(center.x + c.x * radius, center.y + c.y * radius)); + } +} + +void ImDrawList::PathArcTo(const ImVec2& center, float radius, float a_min, float a_max, int num_segments) +{ + if (radius == 0.0f) + { + _Path.push_back(center); + return; + } + + // Note that we are adding a point at both a_min and a_max. + // If you are trying to draw a full closed circle you don't want the overlapping points! + _Path.reserve(_Path.Size + (num_segments + 1)); + for (int i = 0; i <= num_segments; i++) + { + const float a = a_min + ((float)i / (float)num_segments) * (a_max - a_min); + _Path.push_back(ImVec2(center.x + ImCos(a) * radius, center.y + ImSin(a) * radius)); + } +} + +ImVec2 ImBezierCubicCalc(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, float t) +{ + float u = 1.0f - t; + float w1 = u * u * u; + float w2 = 3 * u * u * t; + float w3 = 3 * u * t * t; + float w4 = t * t * t; + return ImVec2(w1 * p1.x + w2 * p2.x + w3 * p3.x + w4 * p4.x, w1 * p1.y + w2 * p2.y + w3 * p3.y + w4 * p4.y); +} + +ImVec2 ImBezierQuadraticCalc(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, float t) +{ + float u = 1.0f - t; + float w1 = u * u; + float w2 = 2 * u * t; + float w3 = t * t; + return ImVec2(w1 * p1.x + w2 * p2.x + w3 * p3.x, w1 * p1.y + w2 * p2.y + w3 * p3.y); +} + +// Closely mimics ImBezierCubicClosestPointCasteljau() in imgui.cpp +static void PathBezierCubicCurveToCasteljau(ImVector* path, float x1, float y1, float x2, float y2, float x3, float y3, float x4, float y4, float tess_tol, int level) +{ + float dx = x4 - x1; + float dy = y4 - y1; + float d2 = (x2 - x4) * dy - (y2 - y4) * dx; + float d3 = (x3 - x4) * dy - (y3 - y4) * dx; + d2 = (d2 >= 0) ? d2 : -d2; + d3 = (d3 >= 0) ? d3 : -d3; + if ((d2 + d3) * (d2 + d3) < tess_tol * (dx * dx + dy * dy)) + { + path->push_back(ImVec2(x4, y4)); + } + else if (level < 10) + { + float x12 = (x1 + x2) * 0.5f, y12 = (y1 + y2) * 0.5f; + float x23 = (x2 + x3) * 0.5f, y23 = (y2 + y3) * 0.5f; + float x34 = (x3 + x4) * 0.5f, y34 = (y3 + y4) * 0.5f; + float x123 = (x12 + x23) * 0.5f, y123 = (y12 + y23) * 0.5f; + float x234 = (x23 + x34) * 0.5f, y234 = (y23 + y34) * 0.5f; + float x1234 = (x123 + x234) * 0.5f, y1234 = (y123 + y234) * 0.5f; + PathBezierCubicCurveToCasteljau(path, x1, y1, x12, y12, x123, y123, x1234, y1234, tess_tol, level + 1); + PathBezierCubicCurveToCasteljau(path, x1234, y1234, x234, y234, x34, y34, x4, y4, tess_tol, level + 1); + } +} + +static void PathBezierQuadraticCurveToCasteljau(ImVector* path, float x1, float y1, float x2, float y2, float x3, float y3, float tess_tol, int level) +{ + float dx = x3 - x1, dy = y3 - y1; + float det = (x2 - x3) * dy - (y2 - y3) * dx; + if (det * det * 4.0f < tess_tol * (dx * dx + dy * dy)) + { + path->push_back(ImVec2(x3, y3)); + } + else if (level < 10) + { + float x12 = (x1 + x2) * 0.5f, y12 = (y1 + y2) * 0.5f; + float x23 = (x2 + x3) * 0.5f, y23 = (y2 + y3) * 0.5f; + float x123 = (x12 + x23) * 0.5f, y123 = (y12 + y23) * 0.5f; + PathBezierQuadraticCurveToCasteljau(path, x1, y1, x12, y12, x123, y123, tess_tol, level + 1); + PathBezierQuadraticCurveToCasteljau(path, x123, y123, x23, y23, x3, y3, tess_tol, level + 1); + } +} + +void ImDrawList::PathBezierCubicCurveTo(const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, int num_segments) +{ + ImVec2 p1 = _Path.back(); + if (num_segments == 0) + { + PathBezierCubicCurveToCasteljau(&_Path, p1.x, p1.y, p2.x, p2.y, p3.x, p3.y, p4.x, p4.y, _Data->CurveTessellationTol, 0); // Auto-tessellated + } + else + { + float t_step = 1.0f / (float)num_segments; + for (int i_step = 1; i_step <= num_segments; i_step++) + _Path.push_back(ImBezierCubicCalc(p1, p2, p3, p4, t_step * i_step)); + } +} + +void ImDrawList::PathBezierQuadraticCurveTo(const ImVec2& p2, const ImVec2& p3, int num_segments) +{ + ImVec2 p1 = _Path.back(); + if (num_segments == 0) + { + PathBezierQuadraticCurveToCasteljau(&_Path, p1.x, p1.y, p2.x, p2.y, p3.x, p3.y, _Data->CurveTessellationTol, 0);// Auto-tessellated + } + else + { + float t_step = 1.0f / (float)num_segments; + for (int i_step = 1; i_step <= num_segments; i_step++) + _Path.push_back(ImBezierQuadraticCalc(p1, p2, p3, t_step * i_step)); + } +} + +void ImDrawList::PathRect(const ImVec2& a, const ImVec2& b, float rounding, ImDrawCornerFlags rounding_corners) +{ + rounding = ImMin(rounding, ImFabs(b.x - a.x) * ( ((rounding_corners & ImDrawCornerFlags_Top) == ImDrawCornerFlags_Top) || ((rounding_corners & ImDrawCornerFlags_Bot) == ImDrawCornerFlags_Bot) ? 0.5f : 1.0f ) - 1.0f); + rounding = ImMin(rounding, ImFabs(b.y - a.y) * ( ((rounding_corners & ImDrawCornerFlags_Left) == ImDrawCornerFlags_Left) || ((rounding_corners & ImDrawCornerFlags_Right) == ImDrawCornerFlags_Right) ? 0.5f : 1.0f ) - 1.0f); + + if (rounding <= 0.0f || rounding_corners == 0) + { + PathLineTo(a); + PathLineTo(ImVec2(b.x, a.y)); + PathLineTo(b); + PathLineTo(ImVec2(a.x, b.y)); + } + else + { + const float rounding_tl = (rounding_corners & ImDrawCornerFlags_TopLeft) ? rounding : 0.0f; + const float rounding_tr = (rounding_corners & ImDrawCornerFlags_TopRight) ? rounding : 0.0f; + const float rounding_br = (rounding_corners & ImDrawCornerFlags_BotRight) ? rounding : 0.0f; + const float rounding_bl = (rounding_corners & ImDrawCornerFlags_BotLeft) ? rounding : 0.0f; + PathArcToFast(ImVec2(a.x + rounding_tl, a.y + rounding_tl), rounding_tl, 6, 9); + PathArcToFast(ImVec2(b.x - rounding_tr, a.y + rounding_tr), rounding_tr, 9, 12); + PathArcToFast(ImVec2(b.x - rounding_br, b.y - rounding_br), rounding_br, 0, 3); + PathArcToFast(ImVec2(a.x + rounding_bl, b.y - rounding_bl), rounding_bl, 3, 6); + } +} + +void ImDrawList::AddLine(const ImVec2& p1, const ImVec2& p2, ImU32 col, float thickness) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + PathLineTo(p1 + ImVec2(0.5f, 0.5f)); + PathLineTo(p2 + ImVec2(0.5f, 0.5f)); + PathStroke(col, false, thickness); +} + +// p_min = upper-left, p_max = lower-right +// Note we don't render 1 pixels sized rectangles properly. +void ImDrawList::AddRect(const ImVec2& p_min, const ImVec2& p_max, ImU32 col, float rounding, ImDrawCornerFlags rounding_corners, float thickness) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + if (Flags & ImDrawListFlags_AntiAliasedLines) + PathRect(p_min + ImVec2(0.50f, 0.50f), p_max - ImVec2(0.50f, 0.50f), rounding, rounding_corners); + else + PathRect(p_min + ImVec2(0.50f, 0.50f), p_max - ImVec2(0.49f, 0.49f), rounding, rounding_corners); // Better looking lower-right corner and rounded non-AA shapes. + PathStroke(col, true, thickness); +} + +void ImDrawList::AddRectFilled(const ImVec2& p_min, const ImVec2& p_max, ImU32 col, float rounding, ImDrawCornerFlags rounding_corners) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + if (rounding > 0.0f) + { + PathRect(p_min, p_max, rounding, rounding_corners); + PathFillConvex(col); + } + else + { + PrimReserve(6, 4); + PrimRect(p_min, p_max, col); + } +} + +// p_min = upper-left, p_max = lower-right +void ImDrawList::AddRectFilledMultiColor(const ImVec2& p_min, const ImVec2& p_max, ImU32 col_upr_left, ImU32 col_upr_right, ImU32 col_bot_right, ImU32 col_bot_left) +{ + if (((col_upr_left | col_upr_right | col_bot_right | col_bot_left) & IM_COL32_A_MASK) == 0) + return; + + const ImVec2 uv = _Data->TexUvWhitePixel; + PrimReserve(6, 4); + PrimWriteIdx((ImDrawIdx)(_VtxCurrentIdx)); PrimWriteIdx((ImDrawIdx)(_VtxCurrentIdx + 1)); PrimWriteIdx((ImDrawIdx)(_VtxCurrentIdx + 2)); + PrimWriteIdx((ImDrawIdx)(_VtxCurrentIdx)); PrimWriteIdx((ImDrawIdx)(_VtxCurrentIdx + 2)); PrimWriteIdx((ImDrawIdx)(_VtxCurrentIdx + 3)); + PrimWriteVtx(p_min, uv, col_upr_left); + PrimWriteVtx(ImVec2(p_max.x, p_min.y), uv, col_upr_right); + PrimWriteVtx(p_max, uv, col_bot_right); + PrimWriteVtx(ImVec2(p_min.x, p_max.y), uv, col_bot_left); +} + +void ImDrawList::AddQuad(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, ImU32 col, float thickness) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + + PathLineTo(p1); + PathLineTo(p2); + PathLineTo(p3); + PathLineTo(p4); + PathStroke(col, true, thickness); +} + +void ImDrawList::AddQuadFilled(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, ImU32 col) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + + PathLineTo(p1); + PathLineTo(p2); + PathLineTo(p3); + PathLineTo(p4); + PathFillConvex(col); +} + +void ImDrawList::AddTriangle(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, ImU32 col, float thickness) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + + PathLineTo(p1); + PathLineTo(p2); + PathLineTo(p3); + PathStroke(col, true, thickness); +} + +void ImDrawList::AddTriangleFilled(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, ImU32 col) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + + PathLineTo(p1); + PathLineTo(p2); + PathLineTo(p3); + PathFillConvex(col); +} + +void ImDrawList::AddCircle(const ImVec2& center, float radius, ImU32 col, int num_segments, float thickness) +{ + if ((col & IM_COL32_A_MASK) == 0 || radius <= 0.0f) + return; + + // Obtain segment count + if (num_segments <= 0) + { + // Automatic segment count + const int radius_idx = (int)radius; + if (radius_idx < IM_ARRAYSIZE(_Data->CircleSegmentCounts)) + num_segments = _Data->CircleSegmentCounts[radius_idx]; // Use cached value + else + num_segments = IM_DRAWLIST_CIRCLE_AUTO_SEGMENT_CALC(radius, _Data->CircleSegmentMaxError); + } + else + { + // Explicit segment count (still clamp to avoid drawing insanely tessellated shapes) + num_segments = ImClamp(num_segments, 3, IM_DRAWLIST_CIRCLE_AUTO_SEGMENT_MAX); + } + + // Because we are filling a closed shape we remove 1 from the count of segments/points + const float a_max = (IM_PI * 2.0f) * ((float)num_segments - 1.0f) / (float)num_segments; + if (num_segments == 12) + PathArcToFast(center, radius - 0.5f, 0, 12 - 1); + else + PathArcTo(center, radius - 0.5f, 0.0f, a_max, num_segments - 1); + PathStroke(col, true, thickness); +} + +void ImDrawList::AddCircleFilled(const ImVec2& center, float radius, ImU32 col, int num_segments) +{ + if ((col & IM_COL32_A_MASK) == 0 || radius <= 0.0f) + return; + + // Obtain segment count + if (num_segments <= 0) + { + // Automatic segment count + const int radius_idx = (int)radius; + if (radius_idx < IM_ARRAYSIZE(_Data->CircleSegmentCounts)) + num_segments = _Data->CircleSegmentCounts[radius_idx]; // Use cached value + else + num_segments = IM_DRAWLIST_CIRCLE_AUTO_SEGMENT_CALC(radius, _Data->CircleSegmentMaxError); + } + else + { + // Explicit segment count (still clamp to avoid drawing insanely tessellated shapes) + num_segments = ImClamp(num_segments, 3, IM_DRAWLIST_CIRCLE_AUTO_SEGMENT_MAX); + } + + // Because we are filling a closed shape we remove 1 from the count of segments/points + const float a_max = (IM_PI * 2.0f) * ((float)num_segments - 1.0f) / (float)num_segments; + if (num_segments == 12) + PathArcToFast(center, radius, 0, 12 - 1); + else + PathArcTo(center, radius, 0.0f, a_max, num_segments - 1); + PathFillConvex(col); +} + +// Guaranteed to honor 'num_segments' +void ImDrawList::AddNgon(const ImVec2& center, float radius, ImU32 col, int num_segments, float thickness) +{ + if ((col & IM_COL32_A_MASK) == 0 || num_segments <= 2) + return; + + // Because we are filling a closed shape we remove 1 from the count of segments/points + const float a_max = (IM_PI * 2.0f) * ((float)num_segments - 1.0f) / (float)num_segments; + PathArcTo(center, radius - 0.5f, 0.0f, a_max, num_segments - 1); + PathStroke(col, true, thickness); +} + +// Guaranteed to honor 'num_segments' +void ImDrawList::AddNgonFilled(const ImVec2& center, float radius, ImU32 col, int num_segments) +{ + if ((col & IM_COL32_A_MASK) == 0 || num_segments <= 2) + return; + + // Because we are filling a closed shape we remove 1 from the count of segments/points + const float a_max = (IM_PI * 2.0f) * ((float)num_segments - 1.0f) / (float)num_segments; + PathArcTo(center, radius, 0.0f, a_max, num_segments - 1); + PathFillConvex(col); +} + +// Cubic Bezier takes 4 controls points +void ImDrawList::AddBezierCubic(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, ImU32 col, float thickness, int num_segments) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + + PathLineTo(p1); + PathBezierCubicCurveTo(p2, p3, p4, num_segments); + PathStroke(col, false, thickness); +} + +// Quadratic Bezier takes 3 controls points +void ImDrawList::AddBezierQuadratic(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, ImU32 col, float thickness, int num_segments) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + + PathLineTo(p1); + PathBezierQuadraticCurveTo(p2, p3, num_segments); + PathStroke(col, false, thickness); +} + +void ImDrawList::AddText(const ImFont* font, float font_size, const ImVec2& pos, ImU32 col, const char* text_begin, const char* text_end, float wrap_width, const ImVec4* cpu_fine_clip_rect) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + + if (text_end == NULL) + text_end = text_begin + strlen(text_begin); + if (text_begin == text_end) + return; + + // Pull default font/size from the shared ImDrawListSharedData instance + if (font == NULL) + font = _Data->Font; + if (font_size == 0.0f) + font_size = _Data->FontSize; + + IM_ASSERT(font->ContainerAtlas->TexID == _CmdHeader.TextureId); // Use high-level ImGui::PushFont() or low-level ImDrawList::PushTextureId() to change font. + + ImVec4 clip_rect = _CmdHeader.ClipRect; + if (cpu_fine_clip_rect) + { + clip_rect.x = ImMax(clip_rect.x, cpu_fine_clip_rect->x); + clip_rect.y = ImMax(clip_rect.y, cpu_fine_clip_rect->y); + clip_rect.z = ImMin(clip_rect.z, cpu_fine_clip_rect->z); + clip_rect.w = ImMin(clip_rect.w, cpu_fine_clip_rect->w); + } + font->RenderText(this, font_size, pos, col, clip_rect, text_begin, text_end, wrap_width, cpu_fine_clip_rect != NULL); +} + +void ImDrawList::AddText(const ImVec2& pos, ImU32 col, const char* text_begin, const char* text_end) +{ + AddText(NULL, 0.0f, pos, col, text_begin, text_end); +} + +void ImDrawList::AddImage(ImTextureID user_texture_id, const ImVec2& p_min, const ImVec2& p_max, const ImVec2& uv_min, const ImVec2& uv_max, ImU32 col) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + + const bool push_texture_id = user_texture_id != _CmdHeader.TextureId; + if (push_texture_id) + PushTextureID(user_texture_id); + + PrimReserve(6, 4); + PrimRectUV(p_min, p_max, uv_min, uv_max, col); + + if (push_texture_id) + PopTextureID(); +} + +void ImDrawList::AddImageQuad(ImTextureID user_texture_id, const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, const ImVec2& uv1, const ImVec2& uv2, const ImVec2& uv3, const ImVec2& uv4, ImU32 col) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + + const bool push_texture_id = user_texture_id != _CmdHeader.TextureId; + if (push_texture_id) + PushTextureID(user_texture_id); + + PrimReserve(6, 4); + PrimQuadUV(p1, p2, p3, p4, uv1, uv2, uv3, uv4, col); + + if (push_texture_id) + PopTextureID(); +} + +void ImDrawList::AddImageRounded(ImTextureID user_texture_id, const ImVec2& p_min, const ImVec2& p_max, const ImVec2& uv_min, const ImVec2& uv_max, ImU32 col, float rounding, ImDrawCornerFlags rounding_corners) +{ + if ((col & IM_COL32_A_MASK) == 0) + return; + + if (rounding <= 0.0f || (rounding_corners & ImDrawCornerFlags_All) == 0) + { + AddImage(user_texture_id, p_min, p_max, uv_min, uv_max, col); + return; + } + + const bool push_texture_id = _TextureIdStack.empty() || user_texture_id != _TextureIdStack.back(); + if (push_texture_id) + PushTextureID(user_texture_id); + + int vert_start_idx = VtxBuffer.Size; + PathRect(p_min, p_max, rounding, rounding_corners); + PathFillConvex(col); + int vert_end_idx = VtxBuffer.Size; + ImGui::ShadeVertsLinearUV(this, vert_start_idx, vert_end_idx, p_min, p_max, uv_min, uv_max, true); + + if (push_texture_id) + PopTextureID(); +} + + +//----------------------------------------------------------------------------- +// [SECTION] ImDrawListSplitter +//----------------------------------------------------------------------------- +// FIXME: This may be a little confusing, trying to be a little too low-level/optimal instead of just doing vector swap.. +//----------------------------------------------------------------------------- + +void ImDrawListSplitter::ClearFreeMemory() +{ + for (int i = 0; i < _Channels.Size; i++) + { + if (i == _Current) + memset(&_Channels[i], 0, sizeof(_Channels[i])); // Current channel is a copy of CmdBuffer/IdxBuffer, don't destruct again + _Channels[i]._CmdBuffer.clear(); + _Channels[i]._IdxBuffer.clear(); + } + _Current = 0; + _Count = 1; + _Channels.clear(); +} + +void ImDrawListSplitter::Split(ImDrawList* draw_list, int channels_count) +{ + IM_UNUSED(draw_list); + IM_ASSERT(_Current == 0 && _Count <= 1 && "Nested channel splitting is not supported. Please use separate instances of ImDrawListSplitter."); + int old_channels_count = _Channels.Size; + if (old_channels_count < channels_count) + { + _Channels.reserve(channels_count); // Avoid over reserving since this is likely to stay stable + _Channels.resize(channels_count); + } + _Count = channels_count; + + // Channels[] (24/32 bytes each) hold storage that we'll swap with draw_list->_CmdBuffer/_IdxBuffer + // The content of Channels[0] at this point doesn't matter. We clear it to make state tidy in a debugger but we don't strictly need to. + // When we switch to the next channel, we'll copy draw_list->_CmdBuffer/_IdxBuffer into Channels[0] and then Channels[1] into draw_list->CmdBuffer/_IdxBuffer + memset(&_Channels[0], 0, sizeof(ImDrawChannel)); + for (int i = 1; i < channels_count; i++) + { + if (i >= old_channels_count) + { + IM_PLACEMENT_NEW(&_Channels[i]) ImDrawChannel(); + } + else + { + _Channels[i]._CmdBuffer.resize(0); + _Channels[i]._IdxBuffer.resize(0); + } + } +} + +void ImDrawListSplitter::Merge(ImDrawList* draw_list) +{ + // Note that we never use or rely on _Channels.Size because it is merely a buffer that we never shrink back to 0 to keep all sub-buffers ready for use. + if (_Count <= 1) + return; + + SetCurrentChannel(draw_list, 0); + draw_list->_PopUnusedDrawCmd(); + + // Calculate our final buffer sizes. Also fix the incorrect IdxOffset values in each command. + int new_cmd_buffer_count = 0; + int new_idx_buffer_count = 0; + ImDrawCmd* last_cmd = (_Count > 0 && draw_list->CmdBuffer.Size > 0) ? &draw_list->CmdBuffer.back() : NULL; + int idx_offset = last_cmd ? last_cmd->IdxOffset + last_cmd->ElemCount : 0; + for (int i = 1; i < _Count; i++) + { + ImDrawChannel& ch = _Channels[i]; + + // Equivalent of PopUnusedDrawCmd() for this channel's cmdbuffer and except we don't need to test for UserCallback. + if (ch._CmdBuffer.Size > 0 && ch._CmdBuffer.back().ElemCount == 0) + ch._CmdBuffer.pop_back(); + + if (ch._CmdBuffer.Size > 0 && last_cmd != NULL) + { + ImDrawCmd* next_cmd = &ch._CmdBuffer[0]; + if (ImDrawCmd_HeaderCompare(last_cmd, next_cmd) == 0 && last_cmd->UserCallback == NULL && next_cmd->UserCallback == NULL) + { + // Merge previous channel last draw command with current channel first draw command if matching. + last_cmd->ElemCount += next_cmd->ElemCount; + idx_offset += next_cmd->ElemCount; + ch._CmdBuffer.erase(ch._CmdBuffer.Data); // FIXME-OPT: Improve for multiple merges. + } + } + if (ch._CmdBuffer.Size > 0) + last_cmd = &ch._CmdBuffer.back(); + new_cmd_buffer_count += ch._CmdBuffer.Size; + new_idx_buffer_count += ch._IdxBuffer.Size; + for (int cmd_n = 0; cmd_n < ch._CmdBuffer.Size; cmd_n++) + { + ch._CmdBuffer.Data[cmd_n].IdxOffset = idx_offset; + idx_offset += ch._CmdBuffer.Data[cmd_n].ElemCount; + } + } + draw_list->CmdBuffer.resize(draw_list->CmdBuffer.Size + new_cmd_buffer_count); + draw_list->IdxBuffer.resize(draw_list->IdxBuffer.Size + new_idx_buffer_count); + + // Write commands and indices in order (they are fairly small structures, we don't copy vertices only indices) + ImDrawCmd* cmd_write = draw_list->CmdBuffer.Data + draw_list->CmdBuffer.Size - new_cmd_buffer_count; + ImDrawIdx* idx_write = draw_list->IdxBuffer.Data + draw_list->IdxBuffer.Size - new_idx_buffer_count; + for (int i = 1; i < _Count; i++) + { + ImDrawChannel& ch = _Channels[i]; + if (int sz = ch._CmdBuffer.Size) { memcpy(cmd_write, ch._CmdBuffer.Data, sz * sizeof(ImDrawCmd)); cmd_write += sz; } + if (int sz = ch._IdxBuffer.Size) { memcpy(idx_write, ch._IdxBuffer.Data, sz * sizeof(ImDrawIdx)); idx_write += sz; } + } + draw_list->_IdxWritePtr = idx_write; + + // Ensure there's always a non-callback draw command trailing the command-buffer + if (draw_list->CmdBuffer.Size == 0 || draw_list->CmdBuffer.back().UserCallback != NULL) + draw_list->AddDrawCmd(); + + // If current command is used with different settings we need to add a new command + ImDrawCmd* curr_cmd = &draw_list->CmdBuffer.Data[draw_list->CmdBuffer.Size - 1]; + if (curr_cmd->ElemCount == 0) + ImDrawCmd_HeaderCopy(curr_cmd, &draw_list->_CmdHeader); // Copy ClipRect, TextureId, VtxOffset + else if (ImDrawCmd_HeaderCompare(curr_cmd, &draw_list->_CmdHeader) != 0) + draw_list->AddDrawCmd(); + + _Count = 1; +} + +void ImDrawListSplitter::SetCurrentChannel(ImDrawList* draw_list, int idx) +{ + IM_ASSERT(idx >= 0 && idx < _Count); + if (_Current == idx) + return; + + // Overwrite ImVector (12/16 bytes), four times. This is merely a silly optimization instead of doing .swap() + memcpy(&_Channels.Data[_Current]._CmdBuffer, &draw_list->CmdBuffer, sizeof(draw_list->CmdBuffer)); + memcpy(&_Channels.Data[_Current]._IdxBuffer, &draw_list->IdxBuffer, sizeof(draw_list->IdxBuffer)); + _Current = idx; + memcpy(&draw_list->CmdBuffer, &_Channels.Data[idx]._CmdBuffer, sizeof(draw_list->CmdBuffer)); + memcpy(&draw_list->IdxBuffer, &_Channels.Data[idx]._IdxBuffer, sizeof(draw_list->IdxBuffer)); + draw_list->_IdxWritePtr = draw_list->IdxBuffer.Data + draw_list->IdxBuffer.Size; + + // If current command is used with different settings we need to add a new command + ImDrawCmd* curr_cmd = (draw_list->CmdBuffer.Size == 0) ? NULL : &draw_list->CmdBuffer.Data[draw_list->CmdBuffer.Size - 1]; + if (curr_cmd == NULL) + draw_list->AddDrawCmd(); + else if (curr_cmd->ElemCount == 0) + ImDrawCmd_HeaderCopy(curr_cmd, &draw_list->_CmdHeader); // Copy ClipRect, TextureId, VtxOffset + else if (ImDrawCmd_HeaderCompare(curr_cmd, &draw_list->_CmdHeader) != 0) + draw_list->AddDrawCmd(); +} + +//----------------------------------------------------------------------------- +// [SECTION] ImDrawData +//----------------------------------------------------------------------------- + +// For backward compatibility: convert all buffers from indexed to de-indexed, in case you cannot render indexed. Note: this is slow and most likely a waste of resources. Always prefer indexed rendering! +void ImDrawData::DeIndexAllBuffers() +{ + ImVector new_vtx_buffer; + TotalVtxCount = TotalIdxCount = 0; + for (int i = 0; i < CmdListsCount; i++) + { + ImDrawList* cmd_list = CmdLists[i]; + if (cmd_list->IdxBuffer.empty()) + continue; + new_vtx_buffer.resize(cmd_list->IdxBuffer.Size); + for (int j = 0; j < cmd_list->IdxBuffer.Size; j++) + new_vtx_buffer[j] = cmd_list->VtxBuffer[cmd_list->IdxBuffer[j]]; + cmd_list->VtxBuffer.swap(new_vtx_buffer); + cmd_list->IdxBuffer.resize(0); + TotalVtxCount += cmd_list->VtxBuffer.Size; + } +} + +// Helper to scale the ClipRect field of each ImDrawCmd. +// Use if your final output buffer is at a different scale than draw_data->DisplaySize, +// or if there is a difference between your window resolution and framebuffer resolution. +void ImDrawData::ScaleClipRects(const ImVec2& fb_scale) +{ + for (int i = 0; i < CmdListsCount; i++) + { + ImDrawList* cmd_list = CmdLists[i]; + for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) + { + ImDrawCmd* cmd = &cmd_list->CmdBuffer[cmd_i]; + cmd->ClipRect = ImVec4(cmd->ClipRect.x * fb_scale.x, cmd->ClipRect.y * fb_scale.y, cmd->ClipRect.z * fb_scale.x, cmd->ClipRect.w * fb_scale.y); + } + } +} + +//----------------------------------------------------------------------------- +// [SECTION] Helpers ShadeVertsXXX functions +//----------------------------------------------------------------------------- + +// Generic linear color gradient, write to RGB fields, leave A untouched. +void ImGui::ShadeVertsLinearColorGradientKeepAlpha(ImDrawList* draw_list, int vert_start_idx, int vert_end_idx, ImVec2 gradient_p0, ImVec2 gradient_p1, ImU32 col0, ImU32 col1) +{ + ImVec2 gradient_extent = gradient_p1 - gradient_p0; + float gradient_inv_length2 = 1.0f / ImLengthSqr(gradient_extent); + ImDrawVert* vert_start = draw_list->VtxBuffer.Data + vert_start_idx; + ImDrawVert* vert_end = draw_list->VtxBuffer.Data + vert_end_idx; + const int col0_r = (int)(col0 >> IM_COL32_R_SHIFT) & 0xFF; + const int col0_g = (int)(col0 >> IM_COL32_G_SHIFT) & 0xFF; + const int col0_b = (int)(col0 >> IM_COL32_B_SHIFT) & 0xFF; + const int col_delta_r = ((int)(col1 >> IM_COL32_R_SHIFT) & 0xFF) - col0_r; + const int col_delta_g = ((int)(col1 >> IM_COL32_G_SHIFT) & 0xFF) - col0_g; + const int col_delta_b = ((int)(col1 >> IM_COL32_B_SHIFT) & 0xFF) - col0_b; + for (ImDrawVert* vert = vert_start; vert < vert_end; vert++) + { + float d = ImDot(vert->pos - gradient_p0, gradient_extent); + float t = ImClamp(d * gradient_inv_length2, 0.0f, 1.0f); + int r = (int)(col0_r + col_delta_r * t); + int g = (int)(col0_g + col_delta_g * t); + int b = (int)(col0_b + col_delta_b * t); + vert->col = (r << IM_COL32_R_SHIFT) | (g << IM_COL32_G_SHIFT) | (b << IM_COL32_B_SHIFT) | (vert->col & IM_COL32_A_MASK); + } +} + +// Distribute UV over (a, b) rectangle +void ImGui::ShadeVertsLinearUV(ImDrawList* draw_list, int vert_start_idx, int vert_end_idx, const ImVec2& a, const ImVec2& b, const ImVec2& uv_a, const ImVec2& uv_b, bool clamp) +{ + const ImVec2 size = b - a; + const ImVec2 uv_size = uv_b - uv_a; + const ImVec2 scale = ImVec2( + size.x != 0.0f ? (uv_size.x / size.x) : 0.0f, + size.y != 0.0f ? (uv_size.y / size.y) : 0.0f); + + ImDrawVert* vert_start = draw_list->VtxBuffer.Data + vert_start_idx; + ImDrawVert* vert_end = draw_list->VtxBuffer.Data + vert_end_idx; + if (clamp) + { + const ImVec2 min = ImMin(uv_a, uv_b); + const ImVec2 max = ImMax(uv_a, uv_b); + for (ImDrawVert* vertex = vert_start; vertex < vert_end; ++vertex) + vertex->uv = ImClamp(uv_a + ImMul(ImVec2(vertex->pos.x, vertex->pos.y) - a, scale), min, max); + } + else + { + for (ImDrawVert* vertex = vert_start; vertex < vert_end; ++vertex) + vertex->uv = uv_a + ImMul(ImVec2(vertex->pos.x, vertex->pos.y) - a, scale); + } +} + +//----------------------------------------------------------------------------- +// [SECTION] ImFontConfig +//----------------------------------------------------------------------------- + +ImFontConfig::ImFontConfig() +{ + memset(this, 0, sizeof(*this)); + FontDataOwnedByAtlas = true; + OversampleH = 3; // FIXME: 2 may be a better default? + OversampleV = 1; + GlyphMaxAdvanceX = FLT_MAX; + RasterizerMultiply = 1.0f; + EllipsisChar = (ImWchar)-1; +} + +//----------------------------------------------------------------------------- +// [SECTION] ImFontAtlas +//----------------------------------------------------------------------------- + +// A work of art lies ahead! (. = white layer, X = black layer, others are blank) +// The 2x2 white texels on the top left are the ones we'll use everywhere in Dear ImGui to render filled shapes. +const int FONT_ATLAS_DEFAULT_TEX_DATA_W = 108; // Actual texture will be 2 times that + 1 spacing. +const int FONT_ATLAS_DEFAULT_TEX_DATA_H = 27; +static const char FONT_ATLAS_DEFAULT_TEX_DATA_PIXELS[FONT_ATLAS_DEFAULT_TEX_DATA_W * FONT_ATLAS_DEFAULT_TEX_DATA_H + 1] = +{ + "..- -XXXXXXX- X - X -XXXXXXX - XXXXXXX- XX " + "..- -X.....X- X.X - X.X -X.....X - X.....X- X..X " + "--- -XXX.XXX- X...X - X...X -X....X - X....X- X..X " + "X - X.X - X.....X - X.....X -X...X - X...X- X..X " + "XX - X.X -X.......X- X.......X -X..X.X - X.X..X- X..X " + "X.X - X.X -XXXX.XXXX- XXXX.XXXX -X.X X.X - X.X X.X- X..XXX " + "X..X - X.X - X.X - X.X -XX X.X - X.X XX- X..X..XXX " + "X...X - X.X - X.X - XX X.X XX - X.X - X.X - X..X..X..XX " + "X....X - X.X - X.X - X.X X.X X.X - X.X - X.X - X..X..X..X.X " + "X.....X - X.X - X.X - X..X X.X X..X - X.X - X.X -XXX X..X..X..X..X" + "X......X - X.X - X.X - X...XXXXXX.XXXXXX...X - X.X XX-XX X.X -X..XX........X..X" + "X.......X - X.X - X.X -X.....................X- X.X X.X-X.X X.X -X...X...........X" + "X........X - X.X - X.X - X...XXXXXX.XXXXXX...X - X.X..X-X..X.X - X..............X" + "X.........X -XXX.XXX- X.X - X..X X.X X..X - X...X-X...X - X.............X" + "X..........X-X.....X- X.X - X.X X.X X.X - X....X-X....X - X.............X" + "X......XXXXX-XXXXXXX- X.X - XX X.X XX - X.....X-X.....X - X............X" + "X...X..X --------- X.X - X.X - XXXXXXX-XXXXXXX - X...........X " + "X..X X..X - -XXXX.XXXX- XXXX.XXXX ------------------------------------- X..........X " + "X.X X..X - -X.......X- X.......X - XX XX - - X..........X " + "XX X..X - - X.....X - X.....X - X.X X.X - - X........X " + " X..X - X...X - X...X - X..X X..X - - X........X " + " XX - X.X - X.X - X...XXXXXXXXXXXXX...X - - XXXXXXXXXX " + "------------ - X - X -X.....................X- ------------------" + " ----------------------------------- X...XXXXXXXXXXXXX...X - " + " - X..X X..X - " + " - X.X X.X - " + " - XX XX - " +}; + +static const ImVec2 FONT_ATLAS_DEFAULT_TEX_CURSOR_DATA[ImGuiMouseCursor_COUNT][3] = +{ + // Pos ........ Size ......... Offset ...... + { ImVec2( 0,3), ImVec2(12,19), ImVec2( 0, 0) }, // ImGuiMouseCursor_Arrow + { ImVec2(13,0), ImVec2( 7,16), ImVec2( 1, 8) }, // ImGuiMouseCursor_TextInput + { ImVec2(31,0), ImVec2(23,23), ImVec2(11,11) }, // ImGuiMouseCursor_ResizeAll + { ImVec2(21,0), ImVec2( 9,23), ImVec2( 4,11) }, // ImGuiMouseCursor_ResizeNS + { ImVec2(55,18),ImVec2(23, 9), ImVec2(11, 4) }, // ImGuiMouseCursor_ResizeEW + { ImVec2(73,0), ImVec2(17,17), ImVec2( 8, 8) }, // ImGuiMouseCursor_ResizeNESW + { ImVec2(55,0), ImVec2(17,17), ImVec2( 8, 8) }, // ImGuiMouseCursor_ResizeNWSE + { ImVec2(91,0), ImVec2(17,22), ImVec2( 5, 0) }, // ImGuiMouseCursor_Hand +}; + +ImFontAtlas::ImFontAtlas() +{ + memset(this, 0, sizeof(*this)); + TexGlyphPadding = 1; + PackIdMouseCursors = PackIdLines = -1; +} + +ImFontAtlas::~ImFontAtlas() +{ + IM_ASSERT(!Locked && "Cannot modify a locked ImFontAtlas between NewFrame() and EndFrame/Render()!"); + Clear(); +} + +void ImFontAtlas::ClearInputData() +{ + IM_ASSERT(!Locked && "Cannot modify a locked ImFontAtlas between NewFrame() and EndFrame/Render()!"); + for (int i = 0; i < ConfigData.Size; i++) + if (ConfigData[i].FontData && ConfigData[i].FontDataOwnedByAtlas) + { + IM_FREE(ConfigData[i].FontData); + ConfigData[i].FontData = NULL; + } + + // When clearing this we lose access to the font name and other information used to build the font. + for (int i = 0; i < Fonts.Size; i++) + if (Fonts[i]->ConfigData >= ConfigData.Data && Fonts[i]->ConfigData < ConfigData.Data + ConfigData.Size) + { + Fonts[i]->ConfigData = NULL; + Fonts[i]->ConfigDataCount = 0; + } + ConfigData.clear(); + CustomRects.clear(); + PackIdMouseCursors = PackIdLines = -1; +} + +void ImFontAtlas::ClearTexData() +{ + IM_ASSERT(!Locked && "Cannot modify a locked ImFontAtlas between NewFrame() and EndFrame/Render()!"); + if (TexPixelsAlpha8) + IM_FREE(TexPixelsAlpha8); + if (TexPixelsRGBA32) + IM_FREE(TexPixelsRGBA32); + TexPixelsAlpha8 = NULL; + TexPixelsRGBA32 = NULL; +} + +void ImFontAtlas::ClearFonts() +{ + IM_ASSERT(!Locked && "Cannot modify a locked ImFontAtlas between NewFrame() and EndFrame/Render()!"); + for (int i = 0; i < Fonts.Size; i++) + IM_DELETE(Fonts[i]); + Fonts.clear(); +} + +void ImFontAtlas::Clear() +{ + ClearInputData(); + ClearTexData(); + ClearFonts(); +} + +void ImFontAtlas::GetTexDataAsAlpha8(unsigned char** out_pixels, int* out_width, int* out_height, int* out_bytes_per_pixel) +{ + // Build atlas on demand + if (TexPixelsAlpha8 == NULL) + { + if (ConfigData.empty()) + AddFontDefault(); + Build(); + } + + *out_pixels = TexPixelsAlpha8; + if (out_width) *out_width = TexWidth; + if (out_height) *out_height = TexHeight; + if (out_bytes_per_pixel) *out_bytes_per_pixel = 1; +} + +void ImFontAtlas::GetTexDataAsRGBA32(unsigned char** out_pixels, int* out_width, int* out_height, int* out_bytes_per_pixel) +{ + // Convert to RGBA32 format on demand + // Although it is likely to be the most commonly used format, our font rendering is 1 channel / 8 bpp + if (!TexPixelsRGBA32) + { + unsigned char* pixels = NULL; + GetTexDataAsAlpha8(&pixels, NULL, NULL); + if (pixels) + { + TexPixelsRGBA32 = (unsigned int*)IM_ALLOC((size_t)TexWidth * (size_t)TexHeight * 4); + const unsigned char* src = pixels; + unsigned int* dst = TexPixelsRGBA32; + for (int n = TexWidth * TexHeight; n > 0; n--) + *dst++ = IM_COL32(255, 255, 255, (unsigned int)(*src++)); + } + } + + *out_pixels = (unsigned char*)TexPixelsRGBA32; + if (out_width) *out_width = TexWidth; + if (out_height) *out_height = TexHeight; + if (out_bytes_per_pixel) *out_bytes_per_pixel = 4; +} + +ImFont* ImFontAtlas::AddFont(const ImFontConfig* font_cfg) +{ + IM_ASSERT(!Locked && "Cannot modify a locked ImFontAtlas between NewFrame() and EndFrame/Render()!"); + IM_ASSERT(font_cfg->FontData != NULL && font_cfg->FontDataSize > 0); + IM_ASSERT(font_cfg->SizePixels > 0.0f); + + // Create new font + if (!font_cfg->MergeMode) + Fonts.push_back(IM_NEW(ImFont)); + else + IM_ASSERT(!Fonts.empty() && "Cannot use MergeMode for the first font"); // When using MergeMode make sure that a font has already been added before. You can use ImGui::GetIO().Fonts->AddFontDefault() to add the default imgui font. + + ConfigData.push_back(*font_cfg); + ImFontConfig& new_font_cfg = ConfigData.back(); + if (new_font_cfg.DstFont == NULL) + new_font_cfg.DstFont = Fonts.back(); + if (!new_font_cfg.FontDataOwnedByAtlas) + { + new_font_cfg.FontData = IM_ALLOC(new_font_cfg.FontDataSize); + new_font_cfg.FontDataOwnedByAtlas = true; + memcpy(new_font_cfg.FontData, font_cfg->FontData, (size_t)new_font_cfg.FontDataSize); + } + + if (new_font_cfg.DstFont->EllipsisChar == (ImWchar)-1) + new_font_cfg.DstFont->EllipsisChar = font_cfg->EllipsisChar; + + // Invalidate texture + ClearTexData(); + return new_font_cfg.DstFont; +} + +// Default font TTF is compressed with stb_compress then base85 encoded (see misc/fonts/binary_to_compressed_c.cpp for encoder) +static unsigned int stb_decompress_length(const unsigned char* input); +static unsigned int stb_decompress(unsigned char* output, const unsigned char* input, unsigned int length); +static const char* GetDefaultCompressedFontDataTTFBase85(); +static unsigned int Decode85Byte(char c) { return c >= '\\' ? c-36 : c-35; } +static void Decode85(const unsigned char* src, unsigned char* dst) +{ + while (*src) + { + unsigned int tmp = Decode85Byte(src[0]) + 85 * (Decode85Byte(src[1]) + 85 * (Decode85Byte(src[2]) + 85 * (Decode85Byte(src[3]) + 85 * Decode85Byte(src[4])))); + dst[0] = ((tmp >> 0) & 0xFF); dst[1] = ((tmp >> 8) & 0xFF); dst[2] = ((tmp >> 16) & 0xFF); dst[3] = ((tmp >> 24) & 0xFF); // We can't assume little-endianness. + src += 5; + dst += 4; + } +} + +// Load embedded ProggyClean.ttf at size 13, disable oversampling +ImFont* ImFontAtlas::AddFontDefault(const ImFontConfig* font_cfg_template) +{ + ImFontConfig font_cfg = font_cfg_template ? *font_cfg_template : ImFontConfig(); + if (!font_cfg_template) + { + font_cfg.OversampleH = font_cfg.OversampleV = 1; + font_cfg.PixelSnapH = true; + } + if (font_cfg.SizePixels <= 0.0f) + font_cfg.SizePixels = 13.0f * 1.0f; + if (font_cfg.Name[0] == '\0') + ImFormatString(font_cfg.Name, IM_ARRAYSIZE(font_cfg.Name), "ProggyClean.ttf, %dpx", (int)font_cfg.SizePixels); + font_cfg.EllipsisChar = (ImWchar)0x0085; + font_cfg.GlyphOffset.y = 1.0f * IM_FLOOR(font_cfg.SizePixels / 13.0f); // Add +1 offset per 13 units + + const char* ttf_compressed_base85 = GetDefaultCompressedFontDataTTFBase85(); + const ImWchar* glyph_ranges = font_cfg.GlyphRanges != NULL ? font_cfg.GlyphRanges : GetGlyphRangesDefault(); + ImFont* font = AddFontFromMemoryCompressedBase85TTF(ttf_compressed_base85, font_cfg.SizePixels, &font_cfg, glyph_ranges); + return font; +} + +ImFont* ImFontAtlas::AddFontFromFileTTF(const char* filename, float size_pixels, const ImFontConfig* font_cfg_template, const ImWchar* glyph_ranges) +{ + IM_ASSERT(!Locked && "Cannot modify a locked ImFontAtlas between NewFrame() and EndFrame/Render()!"); + size_t data_size = 0; + void* data = ImFileLoadToMemory(filename, "rb", &data_size, 0); + if (!data) + { + IM_ASSERT_USER_ERROR(0, "Could not load font file!"); + return NULL; + } + ImFontConfig font_cfg = font_cfg_template ? *font_cfg_template : ImFontConfig(); + if (font_cfg.Name[0] == '\0') + { + // Store a short copy of filename into into the font name for convenience + const char* p; + for (p = filename + strlen(filename); p > filename && p[-1] != '/' && p[-1] != '\\'; p--) {} + ImFormatString(font_cfg.Name, IM_ARRAYSIZE(font_cfg.Name), "%s, %.0fpx", p, size_pixels); + } + return AddFontFromMemoryTTF(data, (int)data_size, size_pixels, &font_cfg, glyph_ranges); +} + +// NB: Transfer ownership of 'ttf_data' to ImFontAtlas, unless font_cfg_template->FontDataOwnedByAtlas == false. Owned TTF buffer will be deleted after Build(). +ImFont* ImFontAtlas::AddFontFromMemoryTTF(void* ttf_data, int ttf_size, float size_pixels, const ImFontConfig* font_cfg_template, const ImWchar* glyph_ranges) +{ + IM_ASSERT(!Locked && "Cannot modify a locked ImFontAtlas between NewFrame() and EndFrame/Render()!"); + ImFontConfig font_cfg = font_cfg_template ? *font_cfg_template : ImFontConfig(); + IM_ASSERT(font_cfg.FontData == NULL); + font_cfg.FontData = ttf_data; + font_cfg.FontDataSize = ttf_size; + font_cfg.SizePixels = size_pixels; + if (glyph_ranges) + font_cfg.GlyphRanges = glyph_ranges; + return AddFont(&font_cfg); +} + +ImFont* ImFontAtlas::AddFontFromMemoryCompressedTTF(const void* compressed_ttf_data, int compressed_ttf_size, float size_pixels, const ImFontConfig* font_cfg_template, const ImWchar* glyph_ranges) +{ + const unsigned int buf_decompressed_size = stb_decompress_length((const unsigned char*)compressed_ttf_data); + unsigned char* buf_decompressed_data = (unsigned char*)IM_ALLOC(buf_decompressed_size); + stb_decompress(buf_decompressed_data, (const unsigned char*)compressed_ttf_data, (unsigned int)compressed_ttf_size); + + ImFontConfig font_cfg = font_cfg_template ? *font_cfg_template : ImFontConfig(); + IM_ASSERT(font_cfg.FontData == NULL); + font_cfg.FontDataOwnedByAtlas = true; + return AddFontFromMemoryTTF(buf_decompressed_data, (int)buf_decompressed_size, size_pixels, &font_cfg, glyph_ranges); +} + +ImFont* ImFontAtlas::AddFontFromMemoryCompressedBase85TTF(const char* compressed_ttf_data_base85, float size_pixels, const ImFontConfig* font_cfg, const ImWchar* glyph_ranges) +{ + int compressed_ttf_size = (((int)strlen(compressed_ttf_data_base85) + 4) / 5) * 4; + void* compressed_ttf = IM_ALLOC((size_t)compressed_ttf_size); + Decode85((const unsigned char*)compressed_ttf_data_base85, (unsigned char*)compressed_ttf); + ImFont* font = AddFontFromMemoryCompressedTTF(compressed_ttf, compressed_ttf_size, size_pixels, font_cfg, glyph_ranges); + IM_FREE(compressed_ttf); + return font; +} + +int ImFontAtlas::AddCustomRectRegular(int width, int height) +{ + IM_ASSERT(width > 0 && width <= 0xFFFF); + IM_ASSERT(height > 0 && height <= 0xFFFF); + ImFontAtlasCustomRect r; + r.Width = (unsigned short)width; + r.Height = (unsigned short)height; + CustomRects.push_back(r); + return CustomRects.Size - 1; // Return index +} + +int ImFontAtlas::AddCustomRectFontGlyph(ImFont* font, ImWchar id, int width, int height, float advance_x, const ImVec2& offset) +{ +#ifdef IMGUI_USE_WCHAR32 + IM_ASSERT(id <= IM_UNICODE_CODEPOINT_MAX); +#endif + IM_ASSERT(font != NULL); + IM_ASSERT(width > 0 && width <= 0xFFFF); + IM_ASSERT(height > 0 && height <= 0xFFFF); + ImFontAtlasCustomRect r; + r.Width = (unsigned short)width; + r.Height = (unsigned short)height; + r.GlyphID = id; + r.GlyphAdvanceX = advance_x; + r.GlyphOffset = offset; + r.Font = font; + CustomRects.push_back(r); + return CustomRects.Size - 1; // Return index +} + +void ImFontAtlas::CalcCustomRectUV(const ImFontAtlasCustomRect* rect, ImVec2* out_uv_min, ImVec2* out_uv_max) const +{ + IM_ASSERT(TexWidth > 0 && TexHeight > 0); // Font atlas needs to be built before we can calculate UV coordinates + IM_ASSERT(rect->IsPacked()); // Make sure the rectangle has been packed + *out_uv_min = ImVec2((float)rect->X * TexUvScale.x, (float)rect->Y * TexUvScale.y); + *out_uv_max = ImVec2((float)(rect->X + rect->Width) * TexUvScale.x, (float)(rect->Y + rect->Height) * TexUvScale.y); +} + +bool ImFontAtlas::GetMouseCursorTexData(ImGuiMouseCursor cursor_type, ImVec2* out_offset, ImVec2* out_size, ImVec2 out_uv_border[2], ImVec2 out_uv_fill[2]) +{ + if (cursor_type <= ImGuiMouseCursor_None || cursor_type >= ImGuiMouseCursor_COUNT) + return false; + if (Flags & ImFontAtlasFlags_NoMouseCursors) + return false; + + IM_ASSERT(PackIdMouseCursors != -1); + ImFontAtlasCustomRect* r = GetCustomRectByIndex(PackIdMouseCursors); + ImVec2 pos = FONT_ATLAS_DEFAULT_TEX_CURSOR_DATA[cursor_type][0] + ImVec2((float)r->X, (float)r->Y); + ImVec2 size = FONT_ATLAS_DEFAULT_TEX_CURSOR_DATA[cursor_type][1]; + *out_size = size; + *out_offset = FONT_ATLAS_DEFAULT_TEX_CURSOR_DATA[cursor_type][2]; + out_uv_border[0] = (pos) * TexUvScale; + out_uv_border[1] = (pos + size) * TexUvScale; + pos.x += FONT_ATLAS_DEFAULT_TEX_DATA_W + 1; + out_uv_fill[0] = (pos) * TexUvScale; + out_uv_fill[1] = (pos + size) * TexUvScale; + return true; +} + +bool ImFontAtlas::Build() +{ + IM_ASSERT(!Locked && "Cannot modify a locked ImFontAtlas between NewFrame() and EndFrame/Render()!"); + + // Select builder + // - Note that we do not reassign to atlas->FontBuilderIO, since it is likely to point to static data which + // may mess with some hot-reloading schemes. If you need to assign to this (for dynamic selection) AND are + // using a hot-reloading scheme that messes up static data, store your own instance of ImFontBuilderIO somewhere + // and point to it instead of pointing directly to return value of the GetBuilderXXX functions. + const ImFontBuilderIO* builder_io = FontBuilderIO; + if (builder_io == NULL) + { +#ifdef IMGUI_ENABLE_FREETYPE + builder_io = ImGuiFreeType::GetBuilderForFreeType(); +#elif defined(IMGUI_ENABLE_STB_TRUETYPE) + builder_io = ImFontAtlasGetBuilderForStbTruetype(); +#else + IM_ASSERT(0); // Invalid Build function +#endif + } + + // Build + return builder_io->FontBuilder_Build(this); +} + +void ImFontAtlasBuildMultiplyCalcLookupTable(unsigned char out_table[256], float in_brighten_factor) +{ + for (unsigned int i = 0; i < 256; i++) + { + unsigned int value = (unsigned int)(i * in_brighten_factor); + out_table[i] = value > 255 ? 255 : (value & 0xFF); + } +} + +void ImFontAtlasBuildMultiplyRectAlpha8(const unsigned char table[256], unsigned char* pixels, int x, int y, int w, int h, int stride) +{ + unsigned char* data = pixels + x + y * stride; + for (int j = h; j > 0; j--, data += stride) + for (int i = 0; i < w; i++) + data[i] = table[data[i]]; +} + +#ifdef IMGUI_ENABLE_STB_TRUETYPE +// Temporary data for one source font (multiple source fonts can be merged into one destination ImFont) +// (C++03 doesn't allow instancing ImVector<> with function-local types so we declare the type here.) +struct ImFontBuildSrcData +{ + stbtt_fontinfo FontInfo; + stbtt_pack_range PackRange; // Hold the list of codepoints to pack (essentially points to Codepoints.Data) + stbrp_rect* Rects; // Rectangle to pack. We first fill in their size and the packer will give us their position. + stbtt_packedchar* PackedChars; // Output glyphs + const ImWchar* SrcRanges; // Ranges as requested by user (user is allowed to request too much, e.g. 0x0020..0xFFFF) + int DstIndex; // Index into atlas->Fonts[] and dst_tmp_array[] + int GlyphsHighest; // Highest requested codepoint + int GlyphsCount; // Glyph count (excluding missing glyphs and glyphs already set by an earlier source font) + ImBitVector GlyphsSet; // Glyph bit map (random access, 1-bit per codepoint. This will be a maximum of 8KB) + ImVector GlyphsList; // Glyph codepoints list (flattened version of GlyphsMap) +}; + +// Temporary data for one destination ImFont* (multiple source fonts can be merged into one destination ImFont) +struct ImFontBuildDstData +{ + int SrcCount; // Number of source fonts targeting this destination font. + int GlyphsHighest; + int GlyphsCount; + ImBitVector GlyphsSet; // This is used to resolve collision when multiple sources are merged into a same destination font. +}; + +static void UnpackBitVectorToFlatIndexList(const ImBitVector* in, ImVector* out) +{ + IM_ASSERT(sizeof(in->Storage.Data[0]) == sizeof(int)); + const ImU32* it_begin = in->Storage.begin(); + const ImU32* it_end = in->Storage.end(); + for (const ImU32* it = it_begin; it < it_end; it++) + if (ImU32 entries_32 = *it) + for (ImU32 bit_n = 0; bit_n < 32; bit_n++) + if (entries_32 & ((ImU32)1 << bit_n)) + out->push_back((int)(((it - it_begin) << 5) + bit_n)); +} + +static bool ImFontAtlasBuildWithStbTruetype(ImFontAtlas* atlas) +{ + IM_ASSERT(atlas->ConfigData.Size > 0); + + ImFontAtlasBuildInit(atlas); + + // Clear atlas + atlas->TexID = (ImTextureID)NULL; + atlas->TexWidth = atlas->TexHeight = 0; + atlas->TexUvScale = ImVec2(0.0f, 0.0f); + atlas->TexUvWhitePixel = ImVec2(0.0f, 0.0f); + atlas->ClearTexData(); + + // Temporary storage for building + ImVector src_tmp_array; + ImVector dst_tmp_array; + src_tmp_array.resize(atlas->ConfigData.Size); + dst_tmp_array.resize(atlas->Fonts.Size); + memset(src_tmp_array.Data, 0, (size_t)src_tmp_array.size_in_bytes()); + memset(dst_tmp_array.Data, 0, (size_t)dst_tmp_array.size_in_bytes()); + + // 1. Initialize font loading structure, check font data validity + for (int src_i = 0; src_i < atlas->ConfigData.Size; src_i++) + { + ImFontBuildSrcData& src_tmp = src_tmp_array[src_i]; + ImFontConfig& cfg = atlas->ConfigData[src_i]; + IM_ASSERT(cfg.DstFont && (!cfg.DstFont->IsLoaded() || cfg.DstFont->ContainerAtlas == atlas)); + + // Find index from cfg.DstFont (we allow the user to set cfg.DstFont. Also it makes casual debugging nicer than when storing indices) + src_tmp.DstIndex = -1; + for (int output_i = 0; output_i < atlas->Fonts.Size && src_tmp.DstIndex == -1; output_i++) + if (cfg.DstFont == atlas->Fonts[output_i]) + src_tmp.DstIndex = output_i; + if (src_tmp.DstIndex == -1) + { + IM_ASSERT(src_tmp.DstIndex != -1); // cfg.DstFont not pointing within atlas->Fonts[] array? + return false; + } + // Initialize helper structure for font loading and verify that the TTF/OTF data is correct + const int font_offset = stbtt_GetFontOffsetForIndex((unsigned char*)cfg.FontData, cfg.FontNo); + IM_ASSERT(font_offset >= 0 && "FontData is incorrect, or FontNo cannot be found."); + if (!stbtt_InitFont(&src_tmp.FontInfo, (unsigned char*)cfg.FontData, font_offset)) + return false; + + // Measure highest codepoints + ImFontBuildDstData& dst_tmp = dst_tmp_array[src_tmp.DstIndex]; + src_tmp.SrcRanges = cfg.GlyphRanges ? cfg.GlyphRanges : atlas->GetGlyphRangesDefault(); + for (const ImWchar* src_range = src_tmp.SrcRanges; src_range[0] && src_range[1]; src_range += 2) + src_tmp.GlyphsHighest = ImMax(src_tmp.GlyphsHighest, (int)src_range[1]); + dst_tmp.SrcCount++; + dst_tmp.GlyphsHighest = ImMax(dst_tmp.GlyphsHighest, src_tmp.GlyphsHighest); + } + + // 2. For every requested codepoint, check for their presence in the font data, and handle redundancy or overlaps between source fonts to avoid unused glyphs. + int total_glyphs_count = 0; + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + { + ImFontBuildSrcData& src_tmp = src_tmp_array[src_i]; + ImFontBuildDstData& dst_tmp = dst_tmp_array[src_tmp.DstIndex]; + src_tmp.GlyphsSet.Create(src_tmp.GlyphsHighest + 1); + if (dst_tmp.GlyphsSet.Storage.empty()) + dst_tmp.GlyphsSet.Create(dst_tmp.GlyphsHighest + 1); + + for (const ImWchar* src_range = src_tmp.SrcRanges; src_range[0] && src_range[1]; src_range += 2) + for (unsigned int codepoint = src_range[0]; codepoint <= src_range[1]; codepoint++) + { + if (dst_tmp.GlyphsSet.TestBit(codepoint)) // Don't overwrite existing glyphs. We could make this an option for MergeMode (e.g. MergeOverwrite==true) + continue; + if (!stbtt_FindGlyphIndex(&src_tmp.FontInfo, codepoint)) // It is actually in the font? + continue; + + // Add to avail set/counters + src_tmp.GlyphsCount++; + dst_tmp.GlyphsCount++; + src_tmp.GlyphsSet.SetBit(codepoint); + dst_tmp.GlyphsSet.SetBit(codepoint); + total_glyphs_count++; + } + } + + // 3. Unpack our bit map into a flat list (we now have all the Unicode points that we know are requested _and_ available _and_ not overlapping another) + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + { + ImFontBuildSrcData& src_tmp = src_tmp_array[src_i]; + src_tmp.GlyphsList.reserve(src_tmp.GlyphsCount); + UnpackBitVectorToFlatIndexList(&src_tmp.GlyphsSet, &src_tmp.GlyphsList); + src_tmp.GlyphsSet.Clear(); + IM_ASSERT(src_tmp.GlyphsList.Size == src_tmp.GlyphsCount); + } + for (int dst_i = 0; dst_i < dst_tmp_array.Size; dst_i++) + dst_tmp_array[dst_i].GlyphsSet.Clear(); + dst_tmp_array.clear(); + + // Allocate packing character data and flag packed characters buffer as non-packed (x0=y0=x1=y1=0) + // (We technically don't need to zero-clear buf_rects, but let's do it for the sake of sanity) + ImVector buf_rects; + ImVector buf_packedchars; + buf_rects.resize(total_glyphs_count); + buf_packedchars.resize(total_glyphs_count); + memset(buf_rects.Data, 0, (size_t)buf_rects.size_in_bytes()); + memset(buf_packedchars.Data, 0, (size_t)buf_packedchars.size_in_bytes()); + + // 4. Gather glyphs sizes so we can pack them in our virtual canvas. + int total_surface = 0; + int buf_rects_out_n = 0; + int buf_packedchars_out_n = 0; + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + { + ImFontBuildSrcData& src_tmp = src_tmp_array[src_i]; + if (src_tmp.GlyphsCount == 0) + continue; + + src_tmp.Rects = &buf_rects[buf_rects_out_n]; + src_tmp.PackedChars = &buf_packedchars[buf_packedchars_out_n]; + buf_rects_out_n += src_tmp.GlyphsCount; + buf_packedchars_out_n += src_tmp.GlyphsCount; + + // Convert our ranges in the format stb_truetype wants + ImFontConfig& cfg = atlas->ConfigData[src_i]; + src_tmp.PackRange.font_size = cfg.SizePixels; + src_tmp.PackRange.first_unicode_codepoint_in_range = 0; + src_tmp.PackRange.array_of_unicode_codepoints = src_tmp.GlyphsList.Data; + src_tmp.PackRange.num_chars = src_tmp.GlyphsList.Size; + src_tmp.PackRange.chardata_for_range = src_tmp.PackedChars; + src_tmp.PackRange.h_oversample = (unsigned char)cfg.OversampleH; + src_tmp.PackRange.v_oversample = (unsigned char)cfg.OversampleV; + + // Gather the sizes of all rectangles we will need to pack (this loop is based on stbtt_PackFontRangesGatherRects) + const float scale = (cfg.SizePixels > 0) ? stbtt_ScaleForPixelHeight(&src_tmp.FontInfo, cfg.SizePixels) : stbtt_ScaleForMappingEmToPixels(&src_tmp.FontInfo, -cfg.SizePixels); + const int padding = atlas->TexGlyphPadding; + for (int glyph_i = 0; glyph_i < src_tmp.GlyphsList.Size; glyph_i++) + { + int x0, y0, x1, y1; + const int glyph_index_in_font = stbtt_FindGlyphIndex(&src_tmp.FontInfo, src_tmp.GlyphsList[glyph_i]); + IM_ASSERT(glyph_index_in_font != 0); + stbtt_GetGlyphBitmapBoxSubpixel(&src_tmp.FontInfo, glyph_index_in_font, scale * cfg.OversampleH, scale * cfg.OversampleV, 0, 0, &x0, &y0, &x1, &y1); + src_tmp.Rects[glyph_i].w = (stbrp_coord)(x1 - x0 + padding + cfg.OversampleH - 1); + src_tmp.Rects[glyph_i].h = (stbrp_coord)(y1 - y0 + padding + cfg.OversampleV - 1); + total_surface += src_tmp.Rects[glyph_i].w * src_tmp.Rects[glyph_i].h; + } + } + + // We need a width for the skyline algorithm, any width! + // The exact width doesn't really matter much, but some API/GPU have texture size limitations and increasing width can decrease height. + // User can override TexDesiredWidth and TexGlyphPadding if they wish, otherwise we use a simple heuristic to select the width based on expected surface. + const int surface_sqrt = (int)ImSqrt((float)total_surface) + 1; + atlas->TexHeight = 0; + if (atlas->TexDesiredWidth > 0) + atlas->TexWidth = atlas->TexDesiredWidth; + else + atlas->TexWidth = (surface_sqrt >= 4096 * 0.7f) ? 4096 : (surface_sqrt >= 2048 * 0.7f) ? 2048 : (surface_sqrt >= 1024 * 0.7f) ? 1024 : 512; + + // 5. Start packing + // Pack our extra data rectangles first, so it will be on the upper-left corner of our texture (UV will have small values). + const int TEX_HEIGHT_MAX = 1024 * 32; + stbtt_pack_context spc = {}; + stbtt_PackBegin(&spc, NULL, atlas->TexWidth, TEX_HEIGHT_MAX, 0, atlas->TexGlyphPadding, NULL); + ImFontAtlasBuildPackCustomRects(atlas, spc.pack_info); + + // 6. Pack each source font. No rendering yet, we are working with rectangles in an infinitely tall texture at this point. + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + { + ImFontBuildSrcData& src_tmp = src_tmp_array[src_i]; + if (src_tmp.GlyphsCount == 0) + continue; + + stbrp_pack_rects((stbrp_context*)spc.pack_info, src_tmp.Rects, src_tmp.GlyphsCount); + + // Extend texture height and mark missing glyphs as non-packed so we won't render them. + // FIXME: We are not handling packing failure here (would happen if we got off TEX_HEIGHT_MAX or if a single if larger than TexWidth?) + for (int glyph_i = 0; glyph_i < src_tmp.GlyphsCount; glyph_i++) + if (src_tmp.Rects[glyph_i].was_packed) + atlas->TexHeight = ImMax(atlas->TexHeight, src_tmp.Rects[glyph_i].y + src_tmp.Rects[glyph_i].h); + } + + // 7. Allocate texture + atlas->TexHeight = (atlas->Flags & ImFontAtlasFlags_NoPowerOfTwoHeight) ? (atlas->TexHeight + 1) : ImUpperPowerOfTwo(atlas->TexHeight); + atlas->TexUvScale = ImVec2(1.0f / atlas->TexWidth, 1.0f / atlas->TexHeight); + atlas->TexPixelsAlpha8 = (unsigned char*)IM_ALLOC(atlas->TexWidth * atlas->TexHeight); + memset(atlas->TexPixelsAlpha8, 0, atlas->TexWidth * atlas->TexHeight); + spc.pixels = atlas->TexPixelsAlpha8; + spc.height = atlas->TexHeight; + + // 8. Render/rasterize font characters into the texture + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + { + ImFontConfig& cfg = atlas->ConfigData[src_i]; + ImFontBuildSrcData& src_tmp = src_tmp_array[src_i]; + if (src_tmp.GlyphsCount == 0) + continue; + + stbtt_PackFontRangesRenderIntoRects(&spc, &src_tmp.FontInfo, &src_tmp.PackRange, 1, src_tmp.Rects); + + // Apply multiply operator + if (cfg.RasterizerMultiply != 1.0f) + { + unsigned char multiply_table[256]; + ImFontAtlasBuildMultiplyCalcLookupTable(multiply_table, cfg.RasterizerMultiply); + stbrp_rect* r = &src_tmp.Rects[0]; + for (int glyph_i = 0; glyph_i < src_tmp.GlyphsCount; glyph_i++, r++) + if (r->was_packed) + ImFontAtlasBuildMultiplyRectAlpha8(multiply_table, atlas->TexPixelsAlpha8, r->x, r->y, r->w, r->h, atlas->TexWidth * 1); + } + src_tmp.Rects = NULL; + } + + // End packing + stbtt_PackEnd(&spc); + buf_rects.clear(); + + // 9. Setup ImFont and glyphs for runtime + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + { + ImFontBuildSrcData& src_tmp = src_tmp_array[src_i]; + if (src_tmp.GlyphsCount == 0) + continue; + + // When merging fonts with MergeMode=true: + // - We can have multiple input fonts writing into a same destination font. + // - dst_font->ConfigData is != from cfg which is our source configuration. + ImFontConfig& cfg = atlas->ConfigData[src_i]; + ImFont* dst_font = cfg.DstFont; + + const float font_scale = stbtt_ScaleForPixelHeight(&src_tmp.FontInfo, cfg.SizePixels); + int unscaled_ascent, unscaled_descent, unscaled_line_gap; + stbtt_GetFontVMetrics(&src_tmp.FontInfo, &unscaled_ascent, &unscaled_descent, &unscaled_line_gap); + + const float ascent = ImFloor(unscaled_ascent * font_scale + ((unscaled_ascent > 0.0f) ? +1 : -1)); + const float descent = ImFloor(unscaled_descent * font_scale + ((unscaled_descent > 0.0f) ? +1 : -1)); + ImFontAtlasBuildSetupFont(atlas, dst_font, &cfg, ascent, descent); + const float font_off_x = cfg.GlyphOffset.x; + const float font_off_y = cfg.GlyphOffset.y + IM_ROUND(dst_font->Ascent); + + for (int glyph_i = 0; glyph_i < src_tmp.GlyphsCount; glyph_i++) + { + // Register glyph + const int codepoint = src_tmp.GlyphsList[glyph_i]; + const stbtt_packedchar& pc = src_tmp.PackedChars[glyph_i]; + stbtt_aligned_quad q; + float unused_x = 0.0f, unused_y = 0.0f; + stbtt_GetPackedQuad(src_tmp.PackedChars, atlas->TexWidth, atlas->TexHeight, glyph_i, &unused_x, &unused_y, &q, 0); + dst_font->AddGlyph(&cfg, (ImWchar)codepoint, q.x0 + font_off_x, q.y0 + font_off_y, q.x1 + font_off_x, q.y1 + font_off_y, q.s0, q.t0, q.s1, q.t1, pc.xadvance); + } + } + + // Cleanup temporary (ImVector doesn't honor destructor) + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + src_tmp_array[src_i].~ImFontBuildSrcData(); + + ImFontAtlasBuildFinish(atlas); + return true; +} + +const ImFontBuilderIO* ImFontAtlasGetBuilderForStbTruetype() +{ + static ImFontBuilderIO io; + io.FontBuilder_Build = ImFontAtlasBuildWithStbTruetype; + return &io; +} + +#endif // IMGUI_ENABLE_STB_TRUETYPE + +void ImFontAtlasBuildSetupFont(ImFontAtlas* atlas, ImFont* font, ImFontConfig* font_config, float ascent, float descent) +{ + if (!font_config->MergeMode) + { + font->ClearOutputData(); + font->FontSize = font_config->SizePixels; + font->ConfigData = font_config; + font->ConfigDataCount = 0; + font->ContainerAtlas = atlas; + font->Ascent = ascent; + font->Descent = descent; + } + font->ConfigDataCount++; +} + +void ImFontAtlasBuildPackCustomRects(ImFontAtlas* atlas, void* stbrp_context_opaque) +{ + stbrp_context* pack_context = (stbrp_context*)stbrp_context_opaque; + IM_ASSERT(pack_context != NULL); + + ImVector& user_rects = atlas->CustomRects; + IM_ASSERT(user_rects.Size >= 1); // We expect at least the default custom rects to be registered, else something went wrong. + + ImVector pack_rects; + pack_rects.resize(user_rects.Size); + memset(pack_rects.Data, 0, (size_t)pack_rects.size_in_bytes()); + for (int i = 0; i < user_rects.Size; i++) + { + pack_rects[i].w = user_rects[i].Width; + pack_rects[i].h = user_rects[i].Height; + } + stbrp_pack_rects(pack_context, &pack_rects[0], pack_rects.Size); + for (int i = 0; i < pack_rects.Size; i++) + if (pack_rects[i].was_packed) + { + user_rects[i].X = pack_rects[i].x; + user_rects[i].Y = pack_rects[i].y; + IM_ASSERT(pack_rects[i].w == user_rects[i].Width && pack_rects[i].h == user_rects[i].Height); + atlas->TexHeight = ImMax(atlas->TexHeight, pack_rects[i].y + pack_rects[i].h); + } +} + +void ImFontAtlasBuildRender8bppRectFromString(ImFontAtlas* atlas, int x, int y, int w, int h, const char* in_str, char in_marker_char, unsigned char in_marker_pixel_value) +{ + IM_ASSERT(x >= 0 && x + w <= atlas->TexWidth); + IM_ASSERT(y >= 0 && y + h <= atlas->TexHeight); + unsigned char* out_pixel = atlas->TexPixelsAlpha8 + x + (y * atlas->TexWidth); + for (int off_y = 0; off_y < h; off_y++, out_pixel += atlas->TexWidth, in_str += w) + for (int off_x = 0; off_x < w; off_x++) + out_pixel[off_x] = (in_str[off_x] == in_marker_char) ? in_marker_pixel_value : 0x00; +} + +static void ImFontAtlasBuildRenderDefaultTexData(ImFontAtlas* atlas) +{ + ImFontAtlasCustomRect* r = atlas->GetCustomRectByIndex(atlas->PackIdMouseCursors); + IM_ASSERT(r->IsPacked()); + + const int w = atlas->TexWidth; + if (!(atlas->Flags & ImFontAtlasFlags_NoMouseCursors)) + { + // Render/copy pixels + IM_ASSERT(r->Width == FONT_ATLAS_DEFAULT_TEX_DATA_W * 2 + 1 && r->Height == FONT_ATLAS_DEFAULT_TEX_DATA_H); + const int x_for_white = r->X; + const int x_for_black = r->X + FONT_ATLAS_DEFAULT_TEX_DATA_W + 1; + ImFontAtlasBuildRender8bppRectFromString(atlas, x_for_white, r->Y, FONT_ATLAS_DEFAULT_TEX_DATA_W, FONT_ATLAS_DEFAULT_TEX_DATA_H, FONT_ATLAS_DEFAULT_TEX_DATA_PIXELS, '.', 0xFF); + ImFontAtlasBuildRender8bppRectFromString(atlas, x_for_black, r->Y, FONT_ATLAS_DEFAULT_TEX_DATA_W, FONT_ATLAS_DEFAULT_TEX_DATA_H, FONT_ATLAS_DEFAULT_TEX_DATA_PIXELS, 'X', 0xFF); + } + else + { + // Render 4 white pixels + IM_ASSERT(r->Width == 2 && r->Height == 2); + const int offset = (int)r->X + (int)r->Y * w; + atlas->TexPixelsAlpha8[offset] = atlas->TexPixelsAlpha8[offset + 1] = atlas->TexPixelsAlpha8[offset + w] = atlas->TexPixelsAlpha8[offset + w + 1] = 0xFF; + } + atlas->TexUvWhitePixel = ImVec2((r->X + 0.5f) * atlas->TexUvScale.x, (r->Y + 0.5f) * atlas->TexUvScale.y); +} + +static void ImFontAtlasBuildRenderLinesTexData(ImFontAtlas* atlas) +{ + if (atlas->Flags & ImFontAtlasFlags_NoBakedLines) + return; + + // This generates a triangular shape in the texture, with the various line widths stacked on top of each other to allow interpolation between them + ImFontAtlasCustomRect* r = atlas->GetCustomRectByIndex(atlas->PackIdLines); + IM_ASSERT(r->IsPacked()); + for (unsigned int n = 0; n < IM_DRAWLIST_TEX_LINES_WIDTH_MAX + 1; n++) // +1 because of the zero-width row + { + // Each line consists of at least two empty pixels at the ends, with a line of solid pixels in the middle + unsigned int y = n; + unsigned int line_width = n; + unsigned int pad_left = (r->Width - line_width) / 2; + unsigned int pad_right = r->Width - (pad_left + line_width); + + // Write each slice + IM_ASSERT(pad_left + line_width + pad_right == r->Width && y < r->Height); // Make sure we're inside the texture bounds before we start writing pixels + unsigned char* write_ptr = &atlas->TexPixelsAlpha8[r->X + ((r->Y + y) * atlas->TexWidth)]; + memset(write_ptr, 0x00, pad_left); + memset(write_ptr + pad_left, 0xFF, line_width); + memset(write_ptr + pad_left + line_width, 0x00, pad_right); + + // Calculate UVs for this line + ImVec2 uv0 = ImVec2((float)(r->X + pad_left - 1), (float)(r->Y + y)) * atlas->TexUvScale; + ImVec2 uv1 = ImVec2((float)(r->X + pad_left + line_width + 1), (float)(r->Y + y + 1)) * atlas->TexUvScale; + float half_v = (uv0.y + uv1.y) * 0.5f; // Calculate a constant V in the middle of the row to avoid sampling artifacts + atlas->TexUvLines[n] = ImVec4(uv0.x, half_v, uv1.x, half_v); + } +} + +// Note: this is called / shared by both the stb_truetype and the FreeType builder +void ImFontAtlasBuildInit(ImFontAtlas* atlas) +{ + // Register texture region for mouse cursors or standard white pixels + if (atlas->PackIdMouseCursors < 0) + { + if (!(atlas->Flags & ImFontAtlasFlags_NoMouseCursors)) + atlas->PackIdMouseCursors = atlas->AddCustomRectRegular(FONT_ATLAS_DEFAULT_TEX_DATA_W * 2 + 1, FONT_ATLAS_DEFAULT_TEX_DATA_H); + else + atlas->PackIdMouseCursors = atlas->AddCustomRectRegular(2, 2); + } + + // Register texture region for thick lines + // The +2 here is to give space for the end caps, whilst height +1 is to accommodate the fact we have a zero-width row + if (atlas->PackIdLines < 0) + { + if (!(atlas->Flags & ImFontAtlasFlags_NoBakedLines)) + atlas->PackIdLines = atlas->AddCustomRectRegular(IM_DRAWLIST_TEX_LINES_WIDTH_MAX + 2, IM_DRAWLIST_TEX_LINES_WIDTH_MAX + 1); + } +} + +// This is called/shared by both the stb_truetype and the FreeType builder. +void ImFontAtlasBuildFinish(ImFontAtlas* atlas) +{ + // Render into our custom data blocks + IM_ASSERT(atlas->TexPixelsAlpha8 != NULL); + ImFontAtlasBuildRenderDefaultTexData(atlas); + ImFontAtlasBuildRenderLinesTexData(atlas); + + // Register custom rectangle glyphs + for (int i = 0; i < atlas->CustomRects.Size; i++) + { + const ImFontAtlasCustomRect* r = &atlas->CustomRects[i]; + if (r->Font == NULL || r->GlyphID == 0) + continue; + + // Will ignore ImFontConfig settings: GlyphMinAdvanceX, GlyphMinAdvanceY, GlyphExtraSpacing, PixelSnapH + IM_ASSERT(r->Font->ContainerAtlas == atlas); + ImVec2 uv0, uv1; + atlas->CalcCustomRectUV(r, &uv0, &uv1); + r->Font->AddGlyph(NULL, (ImWchar)r->GlyphID, r->GlyphOffset.x, r->GlyphOffset.y, r->GlyphOffset.x + r->Width, r->GlyphOffset.y + r->Height, uv0.x, uv0.y, uv1.x, uv1.y, r->GlyphAdvanceX); + } + + // Build all fonts lookup tables + for (int i = 0; i < atlas->Fonts.Size; i++) + if (atlas->Fonts[i]->DirtyLookupTables) + atlas->Fonts[i]->BuildLookupTable(); + + // Ellipsis character is required for rendering elided text. We prefer using U+2026 (horizontal ellipsis). + // However some old fonts may contain ellipsis at U+0085. Here we auto-detect most suitable ellipsis character. + // FIXME: Also note that 0x2026 is currently seldom included in our font ranges. Because of this we are more likely to use three individual dots. + for (int i = 0; i < atlas->Fonts.size(); i++) + { + ImFont* font = atlas->Fonts[i]; + if (font->EllipsisChar != (ImWchar)-1) + continue; + const ImWchar ellipsis_variants[] = { (ImWchar)0x2026, (ImWchar)0x0085 }; + for (int j = 0; j < IM_ARRAYSIZE(ellipsis_variants); j++) + if (font->FindGlyphNoFallback(ellipsis_variants[j]) != NULL) // Verify glyph exists + { + font->EllipsisChar = ellipsis_variants[j]; + break; + } + } +} + +// Retrieve list of range (2 int per range, values are inclusive) +const ImWchar* ImFontAtlas::GetGlyphRangesDefault() +{ + static const ImWchar ranges[] = + { + 0x0020, 0x00FF, // Basic Latin + Latin Supplement + 0, + }; + return &ranges[0]; +} + +const ImWchar* ImFontAtlas::GetGlyphRangesKorean() +{ + static const ImWchar ranges[] = + { + 0x0020, 0x00FF, // Basic Latin + Latin Supplement + 0x3131, 0x3163, // Korean alphabets + 0xAC00, 0xD7A3, // Korean characters + 0, + }; + return &ranges[0]; +} + +const ImWchar* ImFontAtlas::GetGlyphRangesChineseFull() +{ + static const ImWchar ranges[] = + { + 0x0020, 0x00FF, // Basic Latin + Latin Supplement + 0x2000, 0x206F, // General Punctuation + 0x3000, 0x30FF, // CJK Symbols and Punctuations, Hiragana, Katakana + 0x31F0, 0x31FF, // Katakana Phonetic Extensions + 0xFF00, 0xFFEF, // Half-width characters + 0x4e00, 0x9FAF, // CJK Ideograms + 0, + }; + return &ranges[0]; +} + +static void UnpackAccumulativeOffsetsIntoRanges(int base_codepoint, const short* accumulative_offsets, int accumulative_offsets_count, ImWchar* out_ranges) +{ + for (int n = 0; n < accumulative_offsets_count; n++, out_ranges += 2) + { + out_ranges[0] = out_ranges[1] = (ImWchar)(base_codepoint + accumulative_offsets[n]); + base_codepoint += accumulative_offsets[n]; + } + out_ranges[0] = 0; +} + +//------------------------------------------------------------------------- +// [SECTION] ImFontAtlas glyph ranges helpers +//------------------------------------------------------------------------- + +const ImWchar* ImFontAtlas::GetGlyphRangesChineseSimplifiedCommon() +{ + // Store 2500 regularly used characters for Simplified Chinese. + // Sourced from https://zh.wiktionary.org/wiki/%E9%99%84%E5%BD%95:%E7%8E%B0%E4%BB%A3%E6%B1%89%E8%AF%AD%E5%B8%B8%E7%94%A8%E5%AD%97%E8%A1%A8 + // This table covers 97.97% of all characters used during the month in July, 1987. + // You can use ImFontGlyphRangesBuilder to create your own ranges derived from this, by merging existing ranges or adding new characters. + // (Stored as accumulative offsets from the initial unicode codepoint 0x4E00. This encoding is designed to helps us compact the source code size.) + static const short accumulative_offsets_from_0x4E00[] = + { + 0,1,2,4,1,1,1,1,2,1,3,2,1,2,2,1,1,1,1,1,5,2,1,2,3,3,3,2,2,4,1,1,1,2,1,5,2,3,1,2,1,2,1,1,2,1,1,2,2,1,4,1,1,1,1,5,10,1,2,19,2,1,2,1,2,1,2,1,2, + 1,5,1,6,3,2,1,2,2,1,1,1,4,8,5,1,1,4,1,1,3,1,2,1,5,1,2,1,1,1,10,1,1,5,2,4,6,1,4,2,2,2,12,2,1,1,6,1,1,1,4,1,1,4,6,5,1,4,2,2,4,10,7,1,1,4,2,4, + 2,1,4,3,6,10,12,5,7,2,14,2,9,1,1,6,7,10,4,7,13,1,5,4,8,4,1,1,2,28,5,6,1,1,5,2,5,20,2,2,9,8,11,2,9,17,1,8,6,8,27,4,6,9,20,11,27,6,68,2,2,1,1, + 1,2,1,2,2,7,6,11,3,3,1,1,3,1,2,1,1,1,1,1,3,1,1,8,3,4,1,5,7,2,1,4,4,8,4,2,1,2,1,1,4,5,6,3,6,2,12,3,1,3,9,2,4,3,4,1,5,3,3,1,3,7,1,5,1,1,1,1,2, + 3,4,5,2,3,2,6,1,1,2,1,7,1,7,3,4,5,15,2,2,1,5,3,22,19,2,1,1,1,1,2,5,1,1,1,6,1,1,12,8,2,9,18,22,4,1,1,5,1,16,1,2,7,10,15,1,1,6,2,4,1,2,4,1,6, + 1,1,3,2,4,1,6,4,5,1,2,1,1,2,1,10,3,1,3,2,1,9,3,2,5,7,2,19,4,3,6,1,1,1,1,1,4,3,2,1,1,1,2,5,3,1,1,1,2,2,1,1,2,1,1,2,1,3,1,1,1,3,7,1,4,1,1,2,1, + 1,2,1,2,4,4,3,8,1,1,1,2,1,3,5,1,3,1,3,4,6,2,2,14,4,6,6,11,9,1,15,3,1,28,5,2,5,5,3,1,3,4,5,4,6,14,3,2,3,5,21,2,7,20,10,1,2,19,2,4,28,28,2,3, + 2,1,14,4,1,26,28,42,12,40,3,52,79,5,14,17,3,2,2,11,3,4,6,3,1,8,2,23,4,5,8,10,4,2,7,3,5,1,1,6,3,1,2,2,2,5,28,1,1,7,7,20,5,3,29,3,17,26,1,8,4, + 27,3,6,11,23,5,3,4,6,13,24,16,6,5,10,25,35,7,3,2,3,3,14,3,6,2,6,1,4,2,3,8,2,1,1,3,3,3,4,1,1,13,2,2,4,5,2,1,14,14,1,2,2,1,4,5,2,3,1,14,3,12, + 3,17,2,16,5,1,2,1,8,9,3,19,4,2,2,4,17,25,21,20,28,75,1,10,29,103,4,1,2,1,1,4,2,4,1,2,3,24,2,2,2,1,1,2,1,3,8,1,1,1,2,1,1,3,1,1,1,6,1,5,3,1,1, + 1,3,4,1,1,5,2,1,5,6,13,9,16,1,1,1,1,3,2,3,2,4,5,2,5,2,2,3,7,13,7,2,2,1,1,1,1,2,3,3,2,1,6,4,9,2,1,14,2,14,2,1,18,3,4,14,4,11,41,15,23,15,23, + 176,1,3,4,1,1,1,1,5,3,1,2,3,7,3,1,1,2,1,2,4,4,6,2,4,1,9,7,1,10,5,8,16,29,1,1,2,2,3,1,3,5,2,4,5,4,1,1,2,2,3,3,7,1,6,10,1,17,1,44,4,6,2,1,1,6, + 5,4,2,10,1,6,9,2,8,1,24,1,2,13,7,8,8,2,1,4,1,3,1,3,3,5,2,5,10,9,4,9,12,2,1,6,1,10,1,1,7,7,4,10,8,3,1,13,4,3,1,6,1,3,5,2,1,2,17,16,5,2,16,6, + 1,4,2,1,3,3,6,8,5,11,11,1,3,3,2,4,6,10,9,5,7,4,7,4,7,1,1,4,2,1,3,6,8,7,1,6,11,5,5,3,24,9,4,2,7,13,5,1,8,82,16,61,1,1,1,4,2,2,16,10,3,8,1,1, + 6,4,2,1,3,1,1,1,4,3,8,4,2,2,1,1,1,1,1,6,3,5,1,1,4,6,9,2,1,1,1,2,1,7,2,1,6,1,5,4,4,3,1,8,1,3,3,1,3,2,2,2,2,3,1,6,1,2,1,2,1,3,7,1,8,2,1,2,1,5, + 2,5,3,5,10,1,2,1,1,3,2,5,11,3,9,3,5,1,1,5,9,1,2,1,5,7,9,9,8,1,3,3,3,6,8,2,3,2,1,1,32,6,1,2,15,9,3,7,13,1,3,10,13,2,14,1,13,10,2,1,3,10,4,15, + 2,15,15,10,1,3,9,6,9,32,25,26,47,7,3,2,3,1,6,3,4,3,2,8,5,4,1,9,4,2,2,19,10,6,2,3,8,1,2,2,4,2,1,9,4,4,4,6,4,8,9,2,3,1,1,1,1,3,5,5,1,3,8,4,6, + 2,1,4,12,1,5,3,7,13,2,5,8,1,6,1,2,5,14,6,1,5,2,4,8,15,5,1,23,6,62,2,10,1,1,8,1,2,2,10,4,2,2,9,2,1,1,3,2,3,1,5,3,3,2,1,3,8,1,1,1,11,3,1,1,4, + 3,7,1,14,1,2,3,12,5,2,5,1,6,7,5,7,14,11,1,3,1,8,9,12,2,1,11,8,4,4,2,6,10,9,13,1,1,3,1,5,1,3,2,4,4,1,18,2,3,14,11,4,29,4,2,7,1,3,13,9,2,2,5, + 3,5,20,7,16,8,5,72,34,6,4,22,12,12,28,45,36,9,7,39,9,191,1,1,1,4,11,8,4,9,2,3,22,1,1,1,1,4,17,1,7,7,1,11,31,10,2,4,8,2,3,2,1,4,2,16,4,32,2, + 3,19,13,4,9,1,5,2,14,8,1,1,3,6,19,6,5,1,16,6,2,10,8,5,1,2,3,1,5,5,1,11,6,6,1,3,3,2,6,3,8,1,1,4,10,7,5,7,7,5,8,9,2,1,3,4,1,1,3,1,3,3,2,6,16, + 1,4,6,3,1,10,6,1,3,15,2,9,2,10,25,13,9,16,6,2,2,10,11,4,3,9,1,2,6,6,5,4,30,40,1,10,7,12,14,33,6,3,6,7,3,1,3,1,11,14,4,9,5,12,11,49,18,51,31, + 140,31,2,2,1,5,1,8,1,10,1,4,4,3,24,1,10,1,3,6,6,16,3,4,5,2,1,4,2,57,10,6,22,2,22,3,7,22,6,10,11,36,18,16,33,36,2,5,5,1,1,1,4,10,1,4,13,2,7, + 5,2,9,3,4,1,7,43,3,7,3,9,14,7,9,1,11,1,1,3,7,4,18,13,1,14,1,3,6,10,73,2,2,30,6,1,11,18,19,13,22,3,46,42,37,89,7,3,16,34,2,2,3,9,1,7,1,1,1,2, + 2,4,10,7,3,10,3,9,5,28,9,2,6,13,7,3,1,3,10,2,7,2,11,3,6,21,54,85,2,1,4,2,2,1,39,3,21,2,2,5,1,1,1,4,1,1,3,4,15,1,3,2,4,4,2,3,8,2,20,1,8,7,13, + 4,1,26,6,2,9,34,4,21,52,10,4,4,1,5,12,2,11,1,7,2,30,12,44,2,30,1,1,3,6,16,9,17,39,82,2,2,24,7,1,7,3,16,9,14,44,2,1,2,1,2,3,5,2,4,1,6,7,5,3, + 2,6,1,11,5,11,2,1,18,19,8,1,3,24,29,2,1,3,5,2,2,1,13,6,5,1,46,11,3,5,1,1,5,8,2,10,6,12,6,3,7,11,2,4,16,13,2,5,1,1,2,2,5,2,28,5,2,23,10,8,4, + 4,22,39,95,38,8,14,9,5,1,13,5,4,3,13,12,11,1,9,1,27,37,2,5,4,4,63,211,95,2,2,2,1,3,5,2,1,1,2,2,1,1,1,3,2,4,1,2,1,1,5,2,2,1,1,2,3,1,3,1,1,1, + 3,1,4,2,1,3,6,1,1,3,7,15,5,3,2,5,3,9,11,4,2,22,1,6,3,8,7,1,4,28,4,16,3,3,25,4,4,27,27,1,4,1,2,2,7,1,3,5,2,28,8,2,14,1,8,6,16,25,3,3,3,14,3, + 3,1,1,2,1,4,6,3,8,4,1,1,1,2,3,6,10,6,2,3,18,3,2,5,5,4,3,1,5,2,5,4,23,7,6,12,6,4,17,11,9,5,1,1,10,5,12,1,1,11,26,33,7,3,6,1,17,7,1,5,12,1,11, + 2,4,1,8,14,17,23,1,2,1,7,8,16,11,9,6,5,2,6,4,16,2,8,14,1,11,8,9,1,1,1,9,25,4,11,19,7,2,15,2,12,8,52,7,5,19,2,16,4,36,8,1,16,8,24,26,4,6,2,9, + 5,4,36,3,28,12,25,15,37,27,17,12,59,38,5,32,127,1,2,9,17,14,4,1,2,1,1,8,11,50,4,14,2,19,16,4,17,5,4,5,26,12,45,2,23,45,104,30,12,8,3,10,2,2, + 3,3,1,4,20,7,2,9,6,15,2,20,1,3,16,4,11,15,6,134,2,5,59,1,2,2,2,1,9,17,3,26,137,10,211,59,1,2,4,1,4,1,1,1,2,6,2,3,1,1,2,3,2,3,1,3,4,4,2,3,3, + 1,4,3,1,7,2,2,3,1,2,1,3,3,3,2,2,3,2,1,3,14,6,1,3,2,9,6,15,27,9,34,145,1,1,2,1,1,1,1,2,1,1,1,1,2,2,2,3,1,2,1,1,1,2,3,5,8,3,5,2,4,1,3,2,2,2,12, + 4,1,1,1,10,4,5,1,20,4,16,1,15,9,5,12,2,9,2,5,4,2,26,19,7,1,26,4,30,12,15,42,1,6,8,172,1,1,4,2,1,1,11,2,2,4,2,1,2,1,10,8,1,2,1,4,5,1,2,5,1,8, + 4,1,3,4,2,1,6,2,1,3,4,1,2,1,1,1,1,12,5,7,2,4,3,1,1,1,3,3,6,1,2,2,3,3,3,2,1,2,12,14,11,6,6,4,12,2,8,1,7,10,1,35,7,4,13,15,4,3,23,21,28,52,5, + 26,5,6,1,7,10,2,7,53,3,2,1,1,1,2,163,532,1,10,11,1,3,3,4,8,2,8,6,2,2,23,22,4,2,2,4,2,1,3,1,3,3,5,9,8,2,1,2,8,1,10,2,12,21,20,15,105,2,3,1,1, + 3,2,3,1,1,2,5,1,4,15,11,19,1,1,1,1,5,4,5,1,1,2,5,3,5,12,1,2,5,1,11,1,1,15,9,1,4,5,3,26,8,2,1,3,1,1,15,19,2,12,1,2,5,2,7,2,19,2,20,6,26,7,5, + 2,2,7,34,21,13,70,2,128,1,1,2,1,1,2,1,1,3,2,2,2,15,1,4,1,3,4,42,10,6,1,49,85,8,1,2,1,1,4,4,2,3,6,1,5,7,4,3,211,4,1,2,1,2,5,1,2,4,2,2,6,5,6, + 10,3,4,48,100,6,2,16,296,5,27,387,2,2,3,7,16,8,5,38,15,39,21,9,10,3,7,59,13,27,21,47,5,21,6 + }; + static ImWchar base_ranges[] = // not zero-terminated + { + 0x0020, 0x00FF, // Basic Latin + Latin Supplement + 0x2000, 0x206F, // General Punctuation + 0x3000, 0x30FF, // CJK Symbols and Punctuations, Hiragana, Katakana + 0x31F0, 0x31FF, // Katakana Phonetic Extensions + 0xFF00, 0xFFEF // Half-width characters + }; + static ImWchar full_ranges[IM_ARRAYSIZE(base_ranges) + IM_ARRAYSIZE(accumulative_offsets_from_0x4E00) * 2 + 1] = { 0 }; + if (!full_ranges[0]) + { + memcpy(full_ranges, base_ranges, sizeof(base_ranges)); + UnpackAccumulativeOffsetsIntoRanges(0x4E00, accumulative_offsets_from_0x4E00, IM_ARRAYSIZE(accumulative_offsets_from_0x4E00), full_ranges + IM_ARRAYSIZE(base_ranges)); + } + return &full_ranges[0]; +} + +const ImWchar* ImFontAtlas::GetGlyphRangesJapanese() +{ + // 2999 ideograms code points for Japanese + // - 2136 Joyo (meaning "for regular use" or "for common use") Kanji code points + // - 863 Jinmeiyo (meaning "for personal name") Kanji code points + // - Sourced from the character information database of the Information-technology Promotion Agency, Japan + // - https://mojikiban.ipa.go.jp/mji/ + // - Available under the terms of the Creative Commons Attribution-ShareAlike 2.1 Japan (CC BY-SA 2.1 JP). + // - https://creativecommons.org/licenses/by-sa/2.1/jp/deed.en + // - https://creativecommons.org/licenses/by-sa/2.1/jp/legalcode + // - You can generate this code by the script at: + // - https://github.com/vaiorabbit/everyday_use_kanji + // - References: + // - List of Joyo Kanji + // - (Official list by the Agency for Cultural Affairs) https://www.bunka.go.jp/kokugo_nihongo/sisaku/joho/joho/kakuki/14/tosin02/index.html + // - (Wikipedia) https://en.wikipedia.org/wiki/List_of_j%C5%8Dy%C5%8D_kanji + // - List of Jinmeiyo Kanji + // - (Official list by the Ministry of Justice) http://www.moj.go.jp/MINJI/minji86.html + // - (Wikipedia) https://en.wikipedia.org/wiki/Jinmeiy%C5%8D_kanji + // - Missing 1 Joyo Kanji: U+20B9F (Kun'yomi: Shikaru, On'yomi: Shitsu,shichi), see https://github.com/ocornut/imgui/pull/3627 for details. + // You can use ImFontGlyphRangesBuilder to create your own ranges derived from this, by merging existing ranges or adding new characters. + // (Stored as accumulative offsets from the initial unicode codepoint 0x4E00. This encoding is designed to helps us compact the source code size.) + static const short accumulative_offsets_from_0x4E00[] = + { + 0,1,2,4,1,1,1,1,2,1,3,3,2,2,1,5,3,5,7,5,6,1,2,1,7,2,6,3,1,8,1,1,4,1,1,18,2,11,2,6,2,1,2,1,5,1,2,1,3,1,2,1,2,3,3,1,1,2,3,1,1,1,12,7,9,1,4,5,1, + 1,2,1,10,1,1,9,2,2,4,5,6,9,3,1,1,1,1,9,3,18,5,2,2,2,2,1,6,3,7,1,1,1,1,2,2,4,2,1,23,2,10,4,3,5,2,4,10,2,4,13,1,6,1,9,3,1,1,6,6,7,6,3,1,2,11,3, + 2,2,3,2,15,2,2,5,4,3,6,4,1,2,5,2,12,16,6,13,9,13,2,1,1,7,16,4,7,1,19,1,5,1,2,2,7,7,8,2,6,5,4,9,18,7,4,5,9,13,11,8,15,2,1,1,1,2,1,2,2,1,2,2,8, + 2,9,3,3,1,1,4,4,1,1,1,4,9,1,4,3,5,5,2,7,5,3,4,8,2,1,13,2,3,3,1,14,1,1,4,5,1,3,6,1,5,2,1,1,3,3,3,3,1,1,2,7,6,6,7,1,4,7,6,1,1,1,1,1,12,3,3,9,5, + 2,6,1,5,6,1,2,3,18,2,4,14,4,1,3,6,1,1,6,3,5,5,3,2,2,2,2,12,3,1,4,2,3,2,3,11,1,7,4,1,2,1,3,17,1,9,1,24,1,1,4,2,2,4,1,2,7,1,1,1,3,1,2,2,4,15,1, + 1,2,1,1,2,1,5,2,5,20,2,5,9,1,10,8,7,6,1,1,1,1,1,1,6,2,1,2,8,1,1,1,1,5,1,1,3,1,1,1,1,3,1,1,12,4,1,3,1,1,1,1,1,10,3,1,7,5,13,1,2,3,4,6,1,1,30, + 2,9,9,1,15,38,11,3,1,8,24,7,1,9,8,10,2,1,9,31,2,13,6,2,9,4,49,5,2,15,2,1,10,2,1,1,1,2,2,6,15,30,35,3,14,18,8,1,16,10,28,12,19,45,38,1,3,2,3, + 13,2,1,7,3,6,5,3,4,3,1,5,7,8,1,5,3,18,5,3,6,1,21,4,24,9,24,40,3,14,3,21,3,2,1,2,4,2,3,1,15,15,6,5,1,1,3,1,5,6,1,9,7,3,3,2,1,4,3,8,21,5,16,4, + 5,2,10,11,11,3,6,3,2,9,3,6,13,1,2,1,1,1,1,11,12,6,6,1,4,2,6,5,2,1,1,3,3,6,13,3,1,1,5,1,2,3,3,14,2,1,2,2,2,5,1,9,5,1,1,6,12,3,12,3,4,13,2,14, + 2,8,1,17,5,1,16,4,2,2,21,8,9,6,23,20,12,25,19,9,38,8,3,21,40,25,33,13,4,3,1,4,1,2,4,1,2,5,26,2,1,1,2,1,3,6,2,1,1,1,1,1,1,2,3,1,1,1,9,2,3,1,1, + 1,3,6,3,2,1,1,6,6,1,8,2,2,2,1,4,1,2,3,2,7,3,2,4,1,2,1,2,2,1,1,1,1,1,3,1,2,5,4,10,9,4,9,1,1,1,1,1,1,5,3,2,1,6,4,9,6,1,10,2,31,17,8,3,7,5,40,1, + 7,7,1,6,5,2,10,7,8,4,15,39,25,6,28,47,18,10,7,1,3,1,1,2,1,1,1,3,3,3,1,1,1,3,4,2,1,4,1,3,6,10,7,8,6,2,2,1,3,3,2,5,8,7,9,12,2,15,1,1,4,1,2,1,1, + 1,3,2,1,3,3,5,6,2,3,2,10,1,4,2,8,1,1,1,11,6,1,21,4,16,3,1,3,1,4,2,3,6,5,1,3,1,1,3,3,4,6,1,1,10,4,2,7,10,4,7,4,2,9,4,3,1,1,1,4,1,8,3,4,1,3,1, + 6,1,4,2,1,4,7,2,1,8,1,4,5,1,1,2,2,4,6,2,7,1,10,1,1,3,4,11,10,8,21,4,6,1,3,5,2,1,2,28,5,5,2,3,13,1,2,3,1,4,2,1,5,20,3,8,11,1,3,3,3,1,8,10,9,2, + 10,9,2,3,1,1,2,4,1,8,3,6,1,7,8,6,11,1,4,29,8,4,3,1,2,7,13,1,4,1,6,2,6,12,12,2,20,3,2,3,6,4,8,9,2,7,34,5,1,18,6,1,1,4,4,5,7,9,1,2,2,4,3,4,1,7, + 2,2,2,6,2,3,25,5,3,6,1,4,6,7,4,2,1,4,2,13,6,4,4,3,1,5,3,4,4,3,2,1,1,4,1,2,1,1,3,1,11,1,6,3,1,7,3,6,2,8,8,6,9,3,4,11,3,2,10,12,2,5,11,1,6,4,5, + 3,1,8,5,4,6,6,3,5,1,1,3,2,1,2,2,6,17,12,1,10,1,6,12,1,6,6,19,9,6,16,1,13,4,4,15,7,17,6,11,9,15,12,6,7,2,1,2,2,15,9,3,21,4,6,49,18,7,3,2,3,1, + 6,8,2,2,6,2,9,1,3,6,4,4,1,2,16,2,5,2,1,6,2,3,5,3,1,2,5,1,2,1,9,3,1,8,6,4,8,11,3,1,1,1,1,3,1,13,8,4,1,3,2,2,1,4,1,11,1,5,2,1,5,2,5,8,6,1,1,7, + 4,3,8,3,2,7,2,1,5,1,5,2,4,7,6,2,8,5,1,11,4,5,3,6,18,1,2,13,3,3,1,21,1,1,4,1,4,1,1,1,8,1,2,2,7,1,2,4,2,2,9,2,1,1,1,4,3,6,3,12,5,1,1,1,5,6,3,2, + 4,8,2,2,4,2,7,1,8,9,5,2,3,2,1,3,2,13,7,14,6,5,1,1,2,1,4,2,23,2,1,1,6,3,1,4,1,15,3,1,7,3,9,14,1,3,1,4,1,1,5,8,1,3,8,3,8,15,11,4,14,4,4,2,5,5, + 1,7,1,6,14,7,7,8,5,15,4,8,6,5,6,2,1,13,1,20,15,11,9,2,5,6,2,11,2,6,2,5,1,5,8,4,13,19,25,4,1,1,11,1,34,2,5,9,14,6,2,2,6,1,1,14,1,3,14,13,1,6, + 12,21,14,14,6,32,17,8,32,9,28,1,2,4,11,8,3,1,14,2,5,15,1,1,1,1,3,6,4,1,3,4,11,3,1,1,11,30,1,5,1,4,1,5,8,1,1,3,2,4,3,17,35,2,6,12,17,3,1,6,2, + 1,1,12,2,7,3,3,2,1,16,2,8,3,6,5,4,7,3,3,8,1,9,8,5,1,2,1,3,2,8,1,2,9,12,1,1,2,3,8,3,24,12,4,3,7,5,8,3,3,3,3,3,3,1,23,10,3,1,2,2,6,3,1,16,1,16, + 22,3,10,4,11,6,9,7,7,3,6,2,2,2,4,10,2,1,1,2,8,7,1,6,4,1,3,3,3,5,10,12,12,2,3,12,8,15,1,1,16,6,6,1,5,9,11,4,11,4,2,6,12,1,17,5,13,1,4,9,5,1,11, + 2,1,8,1,5,7,28,8,3,5,10,2,17,3,38,22,1,2,18,12,10,4,38,18,1,4,44,19,4,1,8,4,1,12,1,4,31,12,1,14,7,75,7,5,10,6,6,13,3,2,11,11,3,2,5,28,15,6,18, + 18,5,6,4,3,16,1,7,18,7,36,3,5,3,1,7,1,9,1,10,7,2,4,2,6,2,9,7,4,3,32,12,3,7,10,2,23,16,3,1,12,3,31,4,11,1,3,8,9,5,1,30,15,6,12,3,2,2,11,19,9, + 14,2,6,2,3,19,13,17,5,3,3,25,3,14,1,1,1,36,1,3,2,19,3,13,36,9,13,31,6,4,16,34,2,5,4,2,3,3,5,1,1,1,4,3,1,17,3,2,3,5,3,1,3,2,3,5,6,3,12,11,1,3, + 1,2,26,7,12,7,2,14,3,3,7,7,11,25,25,28,16,4,36,1,2,1,6,2,1,9,3,27,17,4,3,4,13,4,1,3,2,2,1,10,4,2,4,6,3,8,2,1,18,1,1,24,2,2,4,33,2,3,63,7,1,6, + 40,7,3,4,4,2,4,15,18,1,16,1,1,11,2,41,14,1,3,18,13,3,2,4,16,2,17,7,15,24,7,18,13,44,2,2,3,6,1,1,7,5,1,7,1,4,3,3,5,10,8,2,3,1,8,1,1,27,4,2,1, + 12,1,2,1,10,6,1,6,7,5,2,3,7,11,5,11,3,6,6,2,3,15,4,9,1,1,2,1,2,11,2,8,12,8,5,4,2,3,1,5,2,2,1,14,1,12,11,4,1,11,17,17,4,3,2,5,5,7,3,1,5,9,9,8, + 2,5,6,6,13,13,2,1,2,6,1,2,2,49,4,9,1,2,10,16,7,8,4,3,2,23,4,58,3,29,1,14,19,19,11,11,2,7,5,1,3,4,6,2,18,5,12,12,17,17,3,3,2,4,1,6,2,3,4,3,1, + 1,1,1,5,1,1,9,1,3,1,3,6,1,8,1,1,2,6,4,14,3,1,4,11,4,1,3,32,1,2,4,13,4,1,2,4,2,1,3,1,11,1,4,2,1,4,4,6,3,5,1,6,5,7,6,3,23,3,5,3,5,3,3,13,3,9,10, + 1,12,10,2,3,18,13,7,160,52,4,2,2,3,2,14,5,4,12,4,6,4,1,20,4,11,6,2,12,27,1,4,1,2,2,7,4,5,2,28,3,7,25,8,3,19,3,6,10,2,2,1,10,2,5,4,1,3,4,1,5, + 3,2,6,9,3,6,2,16,3,3,16,4,5,5,3,2,1,2,16,15,8,2,6,21,2,4,1,22,5,8,1,1,21,11,2,1,11,11,19,13,12,4,2,3,2,3,6,1,8,11,1,4,2,9,5,2,1,11,2,9,1,1,2, + 14,31,9,3,4,21,14,4,8,1,7,2,2,2,5,1,4,20,3,3,4,10,1,11,9,8,2,1,4,5,14,12,14,2,17,9,6,31,4,14,1,20,13,26,5,2,7,3,6,13,2,4,2,19,6,2,2,18,9,3,5, + 12,12,14,4,6,2,3,6,9,5,22,4,5,25,6,4,8,5,2,6,27,2,35,2,16,3,7,8,8,6,6,5,9,17,2,20,6,19,2,13,3,1,1,1,4,17,12,2,14,7,1,4,18,12,38,33,2,10,1,1, + 2,13,14,17,11,50,6,33,20,26,74,16,23,45,50,13,38,33,6,6,7,4,4,2,1,3,2,5,8,7,8,9,3,11,21,9,13,1,3,10,6,7,1,2,2,18,5,5,1,9,9,2,68,9,19,13,2,5, + 1,4,4,7,4,13,3,9,10,21,17,3,26,2,1,5,2,4,5,4,1,7,4,7,3,4,2,1,6,1,1,20,4,1,9,2,2,1,3,3,2,3,2,1,1,1,20,2,3,1,6,2,3,6,2,4,8,1,3,2,10,3,5,3,4,4, + 3,4,16,1,6,1,10,2,4,2,1,1,2,10,11,2,2,3,1,24,31,4,10,10,2,5,12,16,164,15,4,16,7,9,15,19,17,1,2,1,1,5,1,1,1,1,1,3,1,4,3,1,3,1,3,1,2,1,1,3,3,7, + 2,8,1,2,2,2,1,3,4,3,7,8,12,92,2,10,3,1,3,14,5,25,16,42,4,7,7,4,2,21,5,27,26,27,21,25,30,31,2,1,5,13,3,22,5,6,6,11,9,12,1,5,9,7,5,5,22,60,3,5, + 13,1,1,8,1,1,3,3,2,1,9,3,3,18,4,1,2,3,7,6,3,1,2,3,9,1,3,1,3,2,1,3,1,1,1,2,1,11,3,1,6,9,1,3,2,3,1,2,1,5,1,1,4,3,4,1,2,2,4,4,1,7,2,1,2,2,3,5,13, + 18,3,4,14,9,9,4,16,3,7,5,8,2,6,48,28,3,1,1,4,2,14,8,2,9,2,1,15,2,4,3,2,10,16,12,8,7,1,1,3,1,1,1,2,7,4,1,6,4,38,39,16,23,7,15,15,3,2,12,7,21, + 37,27,6,5,4,8,2,10,8,8,6,5,1,2,1,3,24,1,16,17,9,23,10,17,6,1,51,55,44,13,294,9,3,6,2,4,2,2,15,1,1,1,13,21,17,68,14,8,9,4,1,4,9,3,11,7,1,1,1, + 5,6,3,2,1,1,1,2,3,8,1,2,2,4,1,5,5,2,1,4,3,7,13,4,1,4,1,3,1,1,1,5,5,10,1,6,1,5,2,1,5,2,4,1,4,5,7,3,18,2,9,11,32,4,3,3,2,4,7,11,16,9,11,8,13,38, + 32,8,4,2,1,1,2,1,2,4,4,1,1,1,4,1,21,3,11,1,16,1,1,6,1,3,2,4,9,8,57,7,44,1,3,3,13,3,10,1,1,7,5,2,7,21,47,63,3,15,4,7,1,16,1,1,2,8,2,3,42,15,4, + 1,29,7,22,10,3,78,16,12,20,18,4,67,11,5,1,3,15,6,21,31,32,27,18,13,71,35,5,142,4,10,1,2,50,19,33,16,35,37,16,19,27,7,1,133,19,1,4,8,7,20,1,4, + 4,1,10,3,1,6,1,2,51,5,40,15,24,43,22928,11,1,13,154,70,3,1,1,7,4,10,1,2,1,1,2,1,2,1,2,2,1,1,2,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1, + 3,2,1,1,1,1,2,1,1, + }; + static ImWchar base_ranges[] = // not zero-terminated + { + 0x0020, 0x00FF, // Basic Latin + Latin Supplement + 0x3000, 0x30FF, // CJK Symbols and Punctuations, Hiragana, Katakana + 0x31F0, 0x31FF, // Katakana Phonetic Extensions + 0xFF00, 0xFFEF // Half-width characters + }; + static ImWchar full_ranges[IM_ARRAYSIZE(base_ranges) + IM_ARRAYSIZE(accumulative_offsets_from_0x4E00)*2 + 1] = { 0 }; + if (!full_ranges[0]) + { + memcpy(full_ranges, base_ranges, sizeof(base_ranges)); + UnpackAccumulativeOffsetsIntoRanges(0x4E00, accumulative_offsets_from_0x4E00, IM_ARRAYSIZE(accumulative_offsets_from_0x4E00), full_ranges + IM_ARRAYSIZE(base_ranges)); + } + return &full_ranges[0]; +} + +const ImWchar* ImFontAtlas::GetGlyphRangesCyrillic() +{ + static const ImWchar ranges[] = + { + 0x0020, 0x00FF, // Basic Latin + Latin Supplement + 0x0400, 0x052F, // Cyrillic + Cyrillic Supplement + 0x2DE0, 0x2DFF, // Cyrillic Extended-A + 0xA640, 0xA69F, // Cyrillic Extended-B + 0, + }; + return &ranges[0]; +} + +const ImWchar* ImFontAtlas::GetGlyphRangesThai() +{ + static const ImWchar ranges[] = + { + 0x0020, 0x00FF, // Basic Latin + 0x2010, 0x205E, // Punctuations + 0x0E00, 0x0E7F, // Thai + 0, + }; + return &ranges[0]; +} + +const ImWchar* ImFontAtlas::GetGlyphRangesVietnamese() +{ + static const ImWchar ranges[] = + { + 0x0020, 0x00FF, // Basic Latin + 0x0102, 0x0103, + 0x0110, 0x0111, + 0x0128, 0x0129, + 0x0168, 0x0169, + 0x01A0, 0x01A1, + 0x01AF, 0x01B0, + 0x1EA0, 0x1EF9, + 0, + }; + return &ranges[0]; +} + +//----------------------------------------------------------------------------- +// [SECTION] ImFontGlyphRangesBuilder +//----------------------------------------------------------------------------- + +void ImFontGlyphRangesBuilder::AddText(const char* text, const char* text_end) +{ + while (text_end ? (text < text_end) : *text) + { + unsigned int c = 0; + int c_len = ImTextCharFromUtf8(&c, text, text_end); + text += c_len; + if (c_len == 0) + break; + AddChar((ImWchar)c); + } +} + +void ImFontGlyphRangesBuilder::AddRanges(const ImWchar* ranges) +{ + for (; ranges[0]; ranges += 2) + for (ImWchar c = ranges[0]; c <= ranges[1]; c++) + AddChar(c); +} + +void ImFontGlyphRangesBuilder::BuildRanges(ImVector* out_ranges) +{ + const int max_codepoint = IM_UNICODE_CODEPOINT_MAX; + for (int n = 0; n <= max_codepoint; n++) + if (GetBit(n)) + { + out_ranges->push_back((ImWchar)n); + while (n < max_codepoint && GetBit(n + 1)) + n++; + out_ranges->push_back((ImWchar)n); + } + out_ranges->push_back(0); +} + +//----------------------------------------------------------------------------- +// [SECTION] ImFont +//----------------------------------------------------------------------------- + +ImFont::ImFont() +{ + FontSize = 0.0f; + FallbackAdvanceX = 0.0f; + FallbackChar = (ImWchar)'?'; + EllipsisChar = (ImWchar)-1; + FallbackGlyph = NULL; + ContainerAtlas = NULL; + ConfigData = NULL; + ConfigDataCount = 0; + DirtyLookupTables = false; + Scale = 1.0f; + Ascent = Descent = 0.0f; + MetricsTotalSurface = 0; + memset(Used4kPagesMap, 0, sizeof(Used4kPagesMap)); +} + +ImFont::~ImFont() +{ + ClearOutputData(); +} + +void ImFont::ClearOutputData() +{ + FontSize = 0.0f; + FallbackAdvanceX = 0.0f; + Glyphs.clear(); + IndexAdvanceX.clear(); + IndexLookup.clear(); + FallbackGlyph = NULL; + ContainerAtlas = NULL; + DirtyLookupTables = true; + Ascent = Descent = 0.0f; + MetricsTotalSurface = 0; +} + +void ImFont::BuildLookupTable() +{ + int max_codepoint = 0; + for (int i = 0; i != Glyphs.Size; i++) + max_codepoint = ImMax(max_codepoint, (int)Glyphs[i].Codepoint); + + // Build lookup table + IM_ASSERT(Glyphs.Size < 0xFFFF); // -1 is reserved + IndexAdvanceX.clear(); + IndexLookup.clear(); + DirtyLookupTables = false; + memset(Used4kPagesMap, 0, sizeof(Used4kPagesMap)); + GrowIndex(max_codepoint + 1); + for (int i = 0; i < Glyphs.Size; i++) + { + int codepoint = (int)Glyphs[i].Codepoint; + IndexAdvanceX[codepoint] = Glyphs[i].AdvanceX; + IndexLookup[codepoint] = (ImWchar)i; + + // Mark 4K page as used + const int page_n = codepoint / 4096; + Used4kPagesMap[page_n >> 3] |= 1 << (page_n & 7); + } + + // Create a glyph to handle TAB + // FIXME: Needs proper TAB handling but it needs to be contextualized (or we could arbitrary say that each string starts at "column 0" ?) + if (FindGlyph((ImWchar)' ')) + { + if (Glyphs.back().Codepoint != '\t') // So we can call this function multiple times (FIXME: Flaky) + Glyphs.resize(Glyphs.Size + 1); + ImFontGlyph& tab_glyph = Glyphs.back(); + tab_glyph = *FindGlyph((ImWchar)' '); + tab_glyph.Codepoint = '\t'; + tab_glyph.AdvanceX *= IM_TABSIZE; + IndexAdvanceX[(int)tab_glyph.Codepoint] = (float)tab_glyph.AdvanceX; + IndexLookup[(int)tab_glyph.Codepoint] = (ImWchar)(Glyphs.Size - 1); + } + + // Mark special glyphs as not visible (note that AddGlyph already mark as non-visible glyphs with zero-size polygons) + SetGlyphVisible((ImWchar)' ', false); + SetGlyphVisible((ImWchar)'\t', false); + + // Setup fall-backs + FallbackGlyph = FindGlyphNoFallback(FallbackChar); + FallbackAdvanceX = FallbackGlyph ? FallbackGlyph->AdvanceX : 0.0f; + for (int i = 0; i < max_codepoint + 1; i++) + if (IndexAdvanceX[i] < 0.0f) + IndexAdvanceX[i] = FallbackAdvanceX; +} + +// API is designed this way to avoid exposing the 4K page size +// e.g. use with IsGlyphRangeUnused(0, 255) +bool ImFont::IsGlyphRangeUnused(unsigned int c_begin, unsigned int c_last) +{ + unsigned int page_begin = (c_begin / 4096); + unsigned int page_last = (c_last / 4096); + for (unsigned int page_n = page_begin; page_n <= page_last; page_n++) + if ((page_n >> 3) < sizeof(Used4kPagesMap)) + if (Used4kPagesMap[page_n >> 3] & (1 << (page_n & 7))) + return false; + return true; +} + +void ImFont::SetGlyphVisible(ImWchar c, bool visible) +{ + if (ImFontGlyph* glyph = (ImFontGlyph*)(void*)FindGlyph((ImWchar)c)) + glyph->Visible = visible ? 1 : 0; +} + +void ImFont::SetFallbackChar(ImWchar c) +{ + FallbackChar = c; + BuildLookupTable(); +} + +void ImFont::GrowIndex(int new_size) +{ + IM_ASSERT(IndexAdvanceX.Size == IndexLookup.Size); + if (new_size <= IndexLookup.Size) + return; + IndexAdvanceX.resize(new_size, -1.0f); + IndexLookup.resize(new_size, (ImWchar)-1); +} + +// x0/y0/x1/y1 are offset from the character upper-left layout position, in pixels. Therefore x0/y0 are often fairly close to zero. +// Not to be mistaken with texture coordinates, which are held by u0/v0/u1/v1 in normalized format (0.0..1.0 on each texture axis). +// 'cfg' is not necessarily == 'this->ConfigData' because multiple source fonts+configs can be used to build one target font. +void ImFont::AddGlyph(const ImFontConfig* cfg, ImWchar codepoint, float x0, float y0, float x1, float y1, float u0, float v0, float u1, float v1, float advance_x) +{ + if (cfg != NULL) + { + // Clamp & recenter if needed + const float advance_x_original = advance_x; + advance_x = ImClamp(advance_x, cfg->GlyphMinAdvanceX, cfg->GlyphMaxAdvanceX); + if (advance_x != advance_x_original) + { + float char_off_x = cfg->PixelSnapH ? ImFloor((advance_x - advance_x_original) * 0.5f) : (advance_x - advance_x_original) * 0.5f; + x0 += char_off_x; + x1 += char_off_x; + } + + // Snap to pixel + if (cfg->PixelSnapH) + advance_x = IM_ROUND(advance_x); + + // Bake spacing + advance_x += cfg->GlyphExtraSpacing.x; + } + + Glyphs.resize(Glyphs.Size + 1); + ImFontGlyph& glyph = Glyphs.back(); + glyph.Codepoint = (unsigned int)codepoint; + glyph.Visible = (x0 != x1) && (y0 != y1); + glyph.X0 = x0; + glyph.Y0 = y0; + glyph.X1 = x1; + glyph.Y1 = y1; + glyph.U0 = u0; + glyph.V0 = v0; + glyph.U1 = u1; + glyph.V1 = v1; + glyph.AdvanceX = advance_x; + + // Compute rough surface usage metrics (+1 to account for average padding, +0.99 to round) + // We use (U1-U0)*TexWidth instead of X1-X0 to account for oversampling. + float pad = ContainerAtlas->TexGlyphPadding + 0.99f; + DirtyLookupTables = true; + MetricsTotalSurface += (int)((glyph.U1 - glyph.U0) * ContainerAtlas->TexWidth + pad) * (int)((glyph.V1 - glyph.V0) * ContainerAtlas->TexHeight + pad); +} + +void ImFont::AddRemapChar(ImWchar dst, ImWchar src, bool overwrite_dst) +{ + IM_ASSERT(IndexLookup.Size > 0); // Currently this can only be called AFTER the font has been built, aka after calling ImFontAtlas::GetTexDataAs*() function. + unsigned int index_size = (unsigned int)IndexLookup.Size; + + if (dst < index_size && IndexLookup.Data[dst] == (ImWchar)-1 && !overwrite_dst) // 'dst' already exists + return; + if (src >= index_size && dst >= index_size) // both 'dst' and 'src' don't exist -> no-op + return; + + GrowIndex(dst + 1); + IndexLookup[dst] = (src < index_size) ? IndexLookup.Data[src] : (ImWchar)-1; + IndexAdvanceX[dst] = (src < index_size) ? IndexAdvanceX.Data[src] : 1.0f; +} + +const ImFontGlyph* ImFont::FindGlyph(ImWchar c) const +{ + if (c >= (size_t)IndexLookup.Size) + return FallbackGlyph; + const ImWchar i = IndexLookup.Data[c]; + if (i == (ImWchar)-1) + return FallbackGlyph; + return &Glyphs.Data[i]; +} + +const ImFontGlyph* ImFont::FindGlyphNoFallback(ImWchar c) const +{ + if (c >= (size_t)IndexLookup.Size) + return NULL; + const ImWchar i = IndexLookup.Data[c]; + if (i == (ImWchar)-1) + return NULL; + return &Glyphs.Data[i]; +} + +const char* ImFont::CalcWordWrapPositionA(float scale, const char* text, const char* text_end, float wrap_width) const +{ + // Simple word-wrapping for English, not full-featured. Please submit failing cases! + // FIXME: Much possible improvements (don't cut things like "word !", "word!!!" but cut within "word,,,,", more sensible support for punctuations, support for Unicode punctuations, etc.) + + // For references, possible wrap point marked with ^ + // "aaa bbb, ccc,ddd. eee fff. ggg!" + // ^ ^ ^ ^ ^__ ^ ^ + + // List of hardcoded separators: .,;!?'" + + // Skip extra blanks after a line returns (that includes not counting them in width computation) + // e.g. "Hello world" --> "Hello" "World" + + // Cut words that cannot possibly fit within one line. + // e.g.: "The tropical fish" with ~5 characters worth of width --> "The tr" "opical" "fish" + + float line_width = 0.0f; + float word_width = 0.0f; + float blank_width = 0.0f; + wrap_width /= scale; // We work with unscaled widths to avoid scaling every characters + + const char* word_end = text; + const char* prev_word_end = NULL; + bool inside_word = true; + + const char* s = text; + while (s < text_end) + { + unsigned int c = (unsigned int)*s; + const char* next_s; + if (c < 0x80) + next_s = s + 1; + else + next_s = s + ImTextCharFromUtf8(&c, s, text_end); + if (c == 0) + break; + + if (c < 32) + { + if (c == '\n') + { + line_width = word_width = blank_width = 0.0f; + inside_word = true; + s = next_s; + continue; + } + if (c == '\r') + { + s = next_s; + continue; + } + } + + const float char_width = ((int)c < IndexAdvanceX.Size ? IndexAdvanceX.Data[c] : FallbackAdvanceX); + if (ImCharIsBlankW(c)) + { + if (inside_word) + { + line_width += blank_width; + blank_width = 0.0f; + word_end = s; + } + blank_width += char_width; + inside_word = false; + } + else + { + word_width += char_width; + if (inside_word) + { + word_end = next_s; + } + else + { + prev_word_end = word_end; + line_width += word_width + blank_width; + word_width = blank_width = 0.0f; + } + + // Allow wrapping after punctuation. + inside_word = (c != '.' && c != ',' && c != ';' && c != '!' && c != '?' && c != '\"'); + } + + // We ignore blank width at the end of the line (they can be skipped) + if (line_width + word_width > wrap_width) + { + // Words that cannot possibly fit within an entire line will be cut anywhere. + if (word_width < wrap_width) + s = prev_word_end ? prev_word_end : word_end; + break; + } + + s = next_s; + } + + return s; +} + +ImVec2 ImFont::CalcTextSizeA(float size, float max_width, float wrap_width, const char* text_begin, const char* text_end, const char** remaining) const +{ + if (!text_end) + text_end = text_begin + strlen(text_begin); // FIXME-OPT: Need to avoid this. + + const float line_height = size; + const float scale = size / FontSize; + + ImVec2 text_size = ImVec2(0, 0); + float line_width = 0.0f; + + const bool word_wrap_enabled = (wrap_width > 0.0f); + const char* word_wrap_eol = NULL; + + const char* s = text_begin; + while (s < text_end) + { + if (word_wrap_enabled) + { + // Calculate how far we can render. Requires two passes on the string data but keeps the code simple and not intrusive for what's essentially an uncommon feature. + if (!word_wrap_eol) + { + word_wrap_eol = CalcWordWrapPositionA(scale, s, text_end, wrap_width - line_width); + if (word_wrap_eol == s) // Wrap_width is too small to fit anything. Force displaying 1 character to minimize the height discontinuity. + word_wrap_eol++; // +1 may not be a character start point in UTF-8 but it's ok because we use s >= word_wrap_eol below + } + + if (s >= word_wrap_eol) + { + if (text_size.x < line_width) + text_size.x = line_width; + text_size.y += line_height; + line_width = 0.0f; + word_wrap_eol = NULL; + + // Wrapping skips upcoming blanks + while (s < text_end) + { + const char c = *s; + if (ImCharIsBlankA(c)) { s++; } else if (c == '\n') { s++; break; } else { break; } + } + continue; + } + } + + // Decode and advance source + const char* prev_s = s; + unsigned int c = (unsigned int)*s; + if (c < 0x80) + { + s += 1; + } + else + { + s += ImTextCharFromUtf8(&c, s, text_end); + if (c == 0) // Malformed UTF-8? + break; + } + + if (c < 32) + { + if (c == '\n') + { + text_size.x = ImMax(text_size.x, line_width); + text_size.y += line_height; + line_width = 0.0f; + continue; + } + if (c == '\r') + continue; + } + + const float char_width = ((int)c < IndexAdvanceX.Size ? IndexAdvanceX.Data[c] : FallbackAdvanceX) * scale; + if (line_width + char_width >= max_width) + { + s = prev_s; + break; + } + + line_width += char_width; + } + + if (text_size.x < line_width) + text_size.x = line_width; + + if (line_width > 0 || text_size.y == 0.0f) + text_size.y += line_height; + + if (remaining) + *remaining = s; + + return text_size; +} + +void ImFont::RenderChar(ImDrawList* draw_list, float size, ImVec2 pos, ImU32 col, ImWchar c) const +{ + const ImFontGlyph* glyph = FindGlyph(c); + if (!glyph || !glyph->Visible) + return; + float scale = (size >= 0.0f) ? (size / FontSize) : 1.0f; + pos.x = IM_FLOOR(pos.x); + pos.y = IM_FLOOR(pos.y); + draw_list->PrimReserve(6, 4); + draw_list->PrimRectUV(ImVec2(pos.x + glyph->X0 * scale, pos.y + glyph->Y0 * scale), ImVec2(pos.x + glyph->X1 * scale, pos.y + glyph->Y1 * scale), ImVec2(glyph->U0, glyph->V0), ImVec2(glyph->U1, glyph->V1), col); +} + +void ImFont::RenderText(ImDrawList* draw_list, float size, ImVec2 pos, ImU32 col, const ImVec4& clip_rect, const char* text_begin, const char* text_end, float wrap_width, bool cpu_fine_clip) const +{ + if (!text_end) + text_end = text_begin + strlen(text_begin); // ImGui:: functions generally already provides a valid text_end, so this is merely to handle direct calls. + + // Align to be pixel perfect + pos.x = IM_FLOOR(pos.x); + pos.y = IM_FLOOR(pos.y); + float x = pos.x; + float y = pos.y; + if (y > clip_rect.w) + return; + + const float scale = size / FontSize; + const float line_height = FontSize * scale; + const bool word_wrap_enabled = (wrap_width > 0.0f); + const char* word_wrap_eol = NULL; + + // Fast-forward to first visible line + const char* s = text_begin; + if (y + line_height < clip_rect.y && !word_wrap_enabled) + while (y + line_height < clip_rect.y && s < text_end) + { + s = (const char*)memchr(s, '\n', text_end - s); + s = s ? s + 1 : text_end; + y += line_height; + } + + // For large text, scan for the last visible line in order to avoid over-reserving in the call to PrimReserve() + // Note that very large horizontal line will still be affected by the issue (e.g. a one megabyte string buffer without a newline will likely crash atm) + if (text_end - s > 10000 && !word_wrap_enabled) + { + const char* s_end = s; + float y_end = y; + while (y_end < clip_rect.w && s_end < text_end) + { + s_end = (const char*)memchr(s_end, '\n', text_end - s_end); + s_end = s_end ? s_end + 1 : text_end; + y_end += line_height; + } + text_end = s_end; + } + if (s == text_end) + return; + + // Reserve vertices for remaining worse case (over-reserving is useful and easily amortized) + const int vtx_count_max = (int)(text_end - s) * 4; + const int idx_count_max = (int)(text_end - s) * 6; + const int idx_expected_size = draw_list->IdxBuffer.Size + idx_count_max; + draw_list->PrimReserve(idx_count_max, vtx_count_max); + + ImDrawVert* vtx_write = draw_list->_VtxWritePtr; + ImDrawIdx* idx_write = draw_list->_IdxWritePtr; + unsigned int vtx_current_idx = draw_list->_VtxCurrentIdx; + + while (s < text_end) + { + if (word_wrap_enabled) + { + // Calculate how far we can render. Requires two passes on the string data but keeps the code simple and not intrusive for what's essentially an uncommon feature. + if (!word_wrap_eol) + { + word_wrap_eol = CalcWordWrapPositionA(scale, s, text_end, wrap_width - (x - pos.x)); + if (word_wrap_eol == s) // Wrap_width is too small to fit anything. Force displaying 1 character to minimize the height discontinuity. + word_wrap_eol++; // +1 may not be a character start point in UTF-8 but it's ok because we use s >= word_wrap_eol below + } + + if (s >= word_wrap_eol) + { + x = pos.x; + y += line_height; + word_wrap_eol = NULL; + + // Wrapping skips upcoming blanks + while (s < text_end) + { + const char c = *s; + if (ImCharIsBlankA(c)) { s++; } else if (c == '\n') { s++; break; } else { break; } + } + continue; + } + } + + // Decode and advance source + unsigned int c = (unsigned int)*s; + if (c < 0x80) + { + s += 1; + } + else + { + s += ImTextCharFromUtf8(&c, s, text_end); + if (c == 0) // Malformed UTF-8? + break; + } + + if (c < 32) + { + if (c == '\n') + { + x = pos.x; + y += line_height; + if (y > clip_rect.w) + break; // break out of main loop + continue; + } + if (c == '\r') + continue; + } + + const ImFontGlyph* glyph = FindGlyph((ImWchar)c); + if (glyph == NULL) + continue; + + float char_width = glyph->AdvanceX * scale; + if (glyph->Visible) + { + // We don't do a second finer clipping test on the Y axis as we've already skipped anything before clip_rect.y and exit once we pass clip_rect.w + float x1 = x + glyph->X0 * scale; + float x2 = x + glyph->X1 * scale; + float y1 = y + glyph->Y0 * scale; + float y2 = y + glyph->Y1 * scale; + if (x1 <= clip_rect.z && x2 >= clip_rect.x) + { + // Render a character + float u1 = glyph->U0; + float v1 = glyph->V0; + float u2 = glyph->U1; + float v2 = glyph->V1; + + // CPU side clipping used to fit text in their frame when the frame is too small. Only does clipping for axis aligned quads. + if (cpu_fine_clip) + { + if (x1 < clip_rect.x) + { + u1 = u1 + (1.0f - (x2 - clip_rect.x) / (x2 - x1)) * (u2 - u1); + x1 = clip_rect.x; + } + if (y1 < clip_rect.y) + { + v1 = v1 + (1.0f - (y2 - clip_rect.y) / (y2 - y1)) * (v2 - v1); + y1 = clip_rect.y; + } + if (x2 > clip_rect.z) + { + u2 = u1 + ((clip_rect.z - x1) / (x2 - x1)) * (u2 - u1); + x2 = clip_rect.z; + } + if (y2 > clip_rect.w) + { + v2 = v1 + ((clip_rect.w - y1) / (y2 - y1)) * (v2 - v1); + y2 = clip_rect.w; + } + if (y1 >= y2) + { + x += char_width; + continue; + } + } + + // We are NOT calling PrimRectUV() here because non-inlined causes too much overhead in a debug builds. Inlined here: + { + idx_write[0] = (ImDrawIdx)(vtx_current_idx); idx_write[1] = (ImDrawIdx)(vtx_current_idx+1); idx_write[2] = (ImDrawIdx)(vtx_current_idx+2); + idx_write[3] = (ImDrawIdx)(vtx_current_idx); idx_write[4] = (ImDrawIdx)(vtx_current_idx+2); idx_write[5] = (ImDrawIdx)(vtx_current_idx+3); + vtx_write[0].pos.x = x1; vtx_write[0].pos.y = y1; vtx_write[0].col = col; vtx_write[0].uv.x = u1; vtx_write[0].uv.y = v1; + vtx_write[1].pos.x = x2; vtx_write[1].pos.y = y1; vtx_write[1].col = col; vtx_write[1].uv.x = u2; vtx_write[1].uv.y = v1; + vtx_write[2].pos.x = x2; vtx_write[2].pos.y = y2; vtx_write[2].col = col; vtx_write[2].uv.x = u2; vtx_write[2].uv.y = v2; + vtx_write[3].pos.x = x1; vtx_write[3].pos.y = y2; vtx_write[3].col = col; vtx_write[3].uv.x = u1; vtx_write[3].uv.y = v2; + vtx_write += 4; + vtx_current_idx += 4; + idx_write += 6; + } + } + } + x += char_width; + } + + // Give back unused vertices (clipped ones, blanks) ~ this is essentially a PrimUnreserve() action. + draw_list->VtxBuffer.Size = (int)(vtx_write - draw_list->VtxBuffer.Data); // Same as calling shrink() + draw_list->IdxBuffer.Size = (int)(idx_write - draw_list->IdxBuffer.Data); + draw_list->CmdBuffer[draw_list->CmdBuffer.Size - 1].ElemCount -= (idx_expected_size - draw_list->IdxBuffer.Size); + draw_list->_VtxWritePtr = vtx_write; + draw_list->_IdxWritePtr = idx_write; + draw_list->_VtxCurrentIdx = vtx_current_idx; +} + +//----------------------------------------------------------------------------- +// [SECTION] ImGui Internal Render Helpers +//----------------------------------------------------------------------------- +// Vaguely redesigned to stop accessing ImGui global state: +// - RenderArrow() +// - RenderBullet() +// - RenderCheckMark() +// - RenderMouseCursor() +// - RenderArrowPointingAt() +// - RenderRectFilledRangeH() +//----------------------------------------------------------------------------- +// Function in need of a redesign (legacy mess) +// - RenderColorRectWithAlphaCheckerboard() +//----------------------------------------------------------------------------- + +// Render an arrow aimed to be aligned with text (p_min is a position in the same space text would be positioned). To e.g. denote expanded/collapsed state +void ImGui::RenderArrow(ImDrawList* draw_list, ImVec2 pos, ImU32 col, ImGuiDir dir, float scale) +{ + const float h = draw_list->_Data->FontSize * 1.00f; + float r = h * 0.40f * scale; + ImVec2 center = pos + ImVec2(h * 0.50f, h * 0.50f * scale); + + ImVec2 a, b, c; + switch (dir) + { + case ImGuiDir_Up: + case ImGuiDir_Down: + if (dir == ImGuiDir_Up) r = -r; + a = ImVec2(+0.000f, +0.750f) * r; + b = ImVec2(-0.866f, -0.750f) * r; + c = ImVec2(+0.866f, -0.750f) * r; + break; + case ImGuiDir_Left: + case ImGuiDir_Right: + if (dir == ImGuiDir_Left) r = -r; + a = ImVec2(+0.750f, +0.000f) * r; + b = ImVec2(-0.750f, +0.866f) * r; + c = ImVec2(-0.750f, -0.866f) * r; + break; + case ImGuiDir_None: + case ImGuiDir_COUNT: + IM_ASSERT(0); + break; + } + draw_list->AddTriangleFilled(center + a, center + b, center + c, col); +} + +void ImGui::RenderBullet(ImDrawList* draw_list, ImVec2 pos, ImU32 col) +{ + draw_list->AddCircleFilled(pos, draw_list->_Data->FontSize * 0.20f, col, 8); +} + +void ImGui::RenderCheckMark(ImDrawList* draw_list, ImVec2 pos, ImU32 col, float sz) +{ + float thickness = ImMax(sz / 5.0f, 1.0f); + sz -= thickness * 0.5f; + pos += ImVec2(thickness * 0.25f, thickness * 0.25f); + + float third = sz / 3.0f; + float bx = pos.x + third; + float by = pos.y + sz - third * 0.5f; + draw_list->PathLineTo(ImVec2(bx - third, by - third)); + draw_list->PathLineTo(ImVec2(bx, by)); + draw_list->PathLineTo(ImVec2(bx + third * 2.0f, by - third * 2.0f)); + draw_list->PathStroke(col, false, thickness); +} + +void ImGui::RenderMouseCursor(ImDrawList* draw_list, ImVec2 pos, float scale, ImGuiMouseCursor mouse_cursor, ImU32 col_fill, ImU32 col_border, ImU32 col_shadow) +{ + if (mouse_cursor == ImGuiMouseCursor_None) + return; + IM_ASSERT(mouse_cursor > ImGuiMouseCursor_None && mouse_cursor < ImGuiMouseCursor_COUNT); + + ImFontAtlas* font_atlas = draw_list->_Data->Font->ContainerAtlas; + ImVec2 offset, size, uv[4]; + if (font_atlas->GetMouseCursorTexData(mouse_cursor, &offset, &size, &uv[0], &uv[2])) + { + pos -= offset; + const ImTextureID tex_id = font_atlas->TexID; + draw_list->PushTextureID(tex_id); + draw_list->AddImage(tex_id, pos + ImVec2(1, 0) * scale, pos + (ImVec2(1, 0) + size) * scale, uv[2], uv[3], col_shadow); + draw_list->AddImage(tex_id, pos + ImVec2(2, 0) * scale, pos + (ImVec2(2, 0) + size) * scale, uv[2], uv[3], col_shadow); + draw_list->AddImage(tex_id, pos, pos + size * scale, uv[2], uv[3], col_border); + draw_list->AddImage(tex_id, pos, pos + size * scale, uv[0], uv[1], col_fill); + draw_list->PopTextureID(); + } +} + +// Render an arrow. 'pos' is position of the arrow tip. half_sz.x is length from base to tip. half_sz.y is length on each side. +void ImGui::RenderArrowPointingAt(ImDrawList* draw_list, ImVec2 pos, ImVec2 half_sz, ImGuiDir direction, ImU32 col) +{ + switch (direction) + { + case ImGuiDir_Left: draw_list->AddTriangleFilled(ImVec2(pos.x + half_sz.x, pos.y - half_sz.y), ImVec2(pos.x + half_sz.x, pos.y + half_sz.y), pos, col); return; + case ImGuiDir_Right: draw_list->AddTriangleFilled(ImVec2(pos.x - half_sz.x, pos.y + half_sz.y), ImVec2(pos.x - half_sz.x, pos.y - half_sz.y), pos, col); return; + case ImGuiDir_Up: draw_list->AddTriangleFilled(ImVec2(pos.x + half_sz.x, pos.y + half_sz.y), ImVec2(pos.x - half_sz.x, pos.y + half_sz.y), pos, col); return; + case ImGuiDir_Down: draw_list->AddTriangleFilled(ImVec2(pos.x - half_sz.x, pos.y - half_sz.y), ImVec2(pos.x + half_sz.x, pos.y - half_sz.y), pos, col); return; + case ImGuiDir_None: case ImGuiDir_COUNT: break; // Fix warnings + } +} + +static inline float ImAcos01(float x) +{ + if (x <= 0.0f) return IM_PI * 0.5f; + if (x >= 1.0f) return 0.0f; + return ImAcos(x); + //return (-0.69813170079773212f * x * x - 0.87266462599716477f) * x + 1.5707963267948966f; // Cheap approximation, may be enough for what we do. +} + +// FIXME: Cleanup and move code to ImDrawList. +void ImGui::RenderRectFilledRangeH(ImDrawList* draw_list, const ImRect& rect, ImU32 col, float x_start_norm, float x_end_norm, float rounding) +{ + if (x_end_norm == x_start_norm) + return; + if (x_start_norm > x_end_norm) + ImSwap(x_start_norm, x_end_norm); + + ImVec2 p0 = ImVec2(ImLerp(rect.Min.x, rect.Max.x, x_start_norm), rect.Min.y); + ImVec2 p1 = ImVec2(ImLerp(rect.Min.x, rect.Max.x, x_end_norm), rect.Max.y); + if (rounding == 0.0f) + { + draw_list->AddRectFilled(p0, p1, col, 0.0f); + return; + } + + rounding = ImClamp(ImMin((rect.Max.x - rect.Min.x) * 0.5f, (rect.Max.y - rect.Min.y) * 0.5f) - 1.0f, 0.0f, rounding); + const float inv_rounding = 1.0f / rounding; + const float arc0_b = ImAcos01(1.0f - (p0.x - rect.Min.x) * inv_rounding); + const float arc0_e = ImAcos01(1.0f - (p1.x - rect.Min.x) * inv_rounding); + const float half_pi = IM_PI * 0.5f; // We will == compare to this because we know this is the exact value ImAcos01 can return. + const float x0 = ImMax(p0.x, rect.Min.x + rounding); + if (arc0_b == arc0_e) + { + draw_list->PathLineTo(ImVec2(x0, p1.y)); + draw_list->PathLineTo(ImVec2(x0, p0.y)); + } + else if (arc0_b == 0.0f && arc0_e == half_pi) + { + draw_list->PathArcToFast(ImVec2(x0, p1.y - rounding), rounding, 3, 6); // BL + draw_list->PathArcToFast(ImVec2(x0, p0.y + rounding), rounding, 6, 9); // TR + } + else + { + draw_list->PathArcTo(ImVec2(x0, p1.y - rounding), rounding, IM_PI - arc0_e, IM_PI - arc0_b, 3); // BL + draw_list->PathArcTo(ImVec2(x0, p0.y + rounding), rounding, IM_PI + arc0_b, IM_PI + arc0_e, 3); // TR + } + if (p1.x > rect.Min.x + rounding) + { + const float arc1_b = ImAcos01(1.0f - (rect.Max.x - p1.x) * inv_rounding); + const float arc1_e = ImAcos01(1.0f - (rect.Max.x - p0.x) * inv_rounding); + const float x1 = ImMin(p1.x, rect.Max.x - rounding); + if (arc1_b == arc1_e) + { + draw_list->PathLineTo(ImVec2(x1, p0.y)); + draw_list->PathLineTo(ImVec2(x1, p1.y)); + } + else if (arc1_b == 0.0f && arc1_e == half_pi) + { + draw_list->PathArcToFast(ImVec2(x1, p0.y + rounding), rounding, 9, 12); // TR + draw_list->PathArcToFast(ImVec2(x1, p1.y - rounding), rounding, 0, 3); // BR + } + else + { + draw_list->PathArcTo(ImVec2(x1, p0.y + rounding), rounding, -arc1_e, -arc1_b, 3); // TR + draw_list->PathArcTo(ImVec2(x1, p1.y - rounding), rounding, +arc1_b, +arc1_e, 3); // BR + } + } + draw_list->PathFillConvex(col); +} + +void ImGui::RenderRectFilledWithHole(ImDrawList* draw_list, ImRect outer, ImRect inner, ImU32 col, float rounding) +{ + const bool fill_L = (inner.Min.x > outer.Min.x); + const bool fill_R = (inner.Max.x < outer.Max.x); + const bool fill_U = (inner.Min.y > outer.Min.y); + const bool fill_D = (inner.Max.y < outer.Max.y); + if (fill_L) draw_list->AddRectFilled(ImVec2(outer.Min.x, inner.Min.y), ImVec2(inner.Min.x, inner.Max.y), col, rounding, (fill_U ? 0 : ImDrawCornerFlags_TopLeft) | (fill_D ? 0 : ImDrawCornerFlags_BotLeft)); + if (fill_R) draw_list->AddRectFilled(ImVec2(inner.Max.x, inner.Min.y), ImVec2(outer.Max.x, inner.Max.y), col, rounding, (fill_U ? 0 : ImDrawCornerFlags_TopRight) | (fill_D ? 0 : ImDrawCornerFlags_BotRight)); + if (fill_U) draw_list->AddRectFilled(ImVec2(inner.Min.x, outer.Min.y), ImVec2(inner.Max.x, inner.Min.y), col, rounding, (fill_L ? 0 : ImDrawCornerFlags_TopLeft) | (fill_R ? 0 : ImDrawCornerFlags_TopRight)); + if (fill_D) draw_list->AddRectFilled(ImVec2(inner.Min.x, inner.Max.y), ImVec2(inner.Max.x, outer.Max.y), col, rounding, (fill_L ? 0 : ImDrawCornerFlags_BotLeft) | (fill_R ? 0 : ImDrawCornerFlags_BotRight)); + if (fill_L && fill_U) draw_list->AddRectFilled(ImVec2(outer.Min.x, outer.Min.y), ImVec2(inner.Min.x, inner.Min.y), col, rounding, ImDrawCornerFlags_TopLeft); + if (fill_R && fill_U) draw_list->AddRectFilled(ImVec2(inner.Max.x, outer.Min.y), ImVec2(outer.Max.x, inner.Min.y), col, rounding, ImDrawCornerFlags_TopRight); + if (fill_L && fill_D) draw_list->AddRectFilled(ImVec2(outer.Min.x, inner.Max.y), ImVec2(inner.Min.x, outer.Max.y), col, rounding, ImDrawCornerFlags_BotLeft); + if (fill_R && fill_D) draw_list->AddRectFilled(ImVec2(inner.Max.x, inner.Max.y), ImVec2(outer.Max.x, outer.Max.y), col, rounding, ImDrawCornerFlags_BotRight); +} + +// Helper for ColorPicker4() +// NB: This is rather brittle and will show artifact when rounding this enabled if rounded corners overlap multiple cells. Caller currently responsible for avoiding that. +// Spent a non reasonable amount of time trying to getting this right for ColorButton with rounding+anti-aliasing+ImGuiColorEditFlags_HalfAlphaPreview flag + various grid sizes and offsets, and eventually gave up... probably more reasonable to disable rounding altogether. +// FIXME: uses ImGui::GetColorU32 +void ImGui::RenderColorRectWithAlphaCheckerboard(ImDrawList* draw_list, ImVec2 p_min, ImVec2 p_max, ImU32 col, float grid_step, ImVec2 grid_off, float rounding, int rounding_corners_flags) +{ + if (((col & IM_COL32_A_MASK) >> IM_COL32_A_SHIFT) < 0xFF) + { + ImU32 col_bg1 = ImGui::GetColorU32(ImAlphaBlendColors(IM_COL32(204, 204, 204, 255), col)); + ImU32 col_bg2 = ImGui::GetColorU32(ImAlphaBlendColors(IM_COL32(128, 128, 128, 255), col)); + draw_list->AddRectFilled(p_min, p_max, col_bg1, rounding, rounding_corners_flags); + + int yi = 0; + for (float y = p_min.y + grid_off.y; y < p_max.y; y += grid_step, yi++) + { + float y1 = ImClamp(y, p_min.y, p_max.y), y2 = ImMin(y + grid_step, p_max.y); + if (y2 <= y1) + continue; + for (float x = p_min.x + grid_off.x + (yi & 1) * grid_step; x < p_max.x; x += grid_step * 2.0f) + { + float x1 = ImClamp(x, p_min.x, p_max.x), x2 = ImMin(x + grid_step, p_max.x); + if (x2 <= x1) + continue; + int rounding_corners_flags_cell = 0; + if (y1 <= p_min.y) { if (x1 <= p_min.x) rounding_corners_flags_cell |= ImDrawCornerFlags_TopLeft; if (x2 >= p_max.x) rounding_corners_flags_cell |= ImDrawCornerFlags_TopRight; } + if (y2 >= p_max.y) { if (x1 <= p_min.x) rounding_corners_flags_cell |= ImDrawCornerFlags_BotLeft; if (x2 >= p_max.x) rounding_corners_flags_cell |= ImDrawCornerFlags_BotRight; } + rounding_corners_flags_cell &= rounding_corners_flags; + draw_list->AddRectFilled(ImVec2(x1, y1), ImVec2(x2, y2), col_bg2, rounding_corners_flags_cell ? rounding : 0.0f, rounding_corners_flags_cell); + } + } + } + else + { + draw_list->AddRectFilled(p_min, p_max, col, rounding, rounding_corners_flags); + } +} + +//----------------------------------------------------------------------------- +// [SECTION] Decompression code +//----------------------------------------------------------------------------- +// Compressed with stb_compress() then converted to a C array and encoded as base85. +// Use the program in misc/fonts/binary_to_compressed_c.cpp to create the array from a TTF file. +// The purpose of encoding as base85 instead of "0x00,0x01,..." style is only save on _source code_ size. +// Decompression from stb.h (public domain) by Sean Barrett https://github.com/nothings/stb/blob/master/stb.h +//----------------------------------------------------------------------------- + +static unsigned int stb_decompress_length(const unsigned char *input) +{ + return (input[8] << 24) + (input[9] << 16) + (input[10] << 8) + input[11]; +} + +static unsigned char *stb__barrier_out_e, *stb__barrier_out_b; +static const unsigned char *stb__barrier_in_b; +static unsigned char *stb__dout; +static void stb__match(const unsigned char *data, unsigned int length) +{ + // INVERSE of memmove... write each byte before copying the next... + IM_ASSERT(stb__dout + length <= stb__barrier_out_e); + if (stb__dout + length > stb__barrier_out_e) { stb__dout += length; return; } + if (data < stb__barrier_out_b) { stb__dout = stb__barrier_out_e+1; return; } + while (length--) *stb__dout++ = *data++; +} + +static void stb__lit(const unsigned char *data, unsigned int length) +{ + IM_ASSERT(stb__dout + length <= stb__barrier_out_e); + if (stb__dout + length > stb__barrier_out_e) { stb__dout += length; return; } + if (data < stb__barrier_in_b) { stb__dout = stb__barrier_out_e+1; return; } + memcpy(stb__dout, data, length); + stb__dout += length; +} + +#define stb__in2(x) ((i[x] << 8) + i[(x)+1]) +#define stb__in3(x) ((i[x] << 16) + stb__in2((x)+1)) +#define stb__in4(x) ((i[x] << 24) + stb__in3((x)+1)) + +static const unsigned char *stb_decompress_token(const unsigned char *i) +{ + if (*i >= 0x20) { // use fewer if's for cases that expand small + if (*i >= 0x80) stb__match(stb__dout-i[1]-1, i[0] - 0x80 + 1), i += 2; + else if (*i >= 0x40) stb__match(stb__dout-(stb__in2(0) - 0x4000 + 1), i[2]+1), i += 3; + else /* *i >= 0x20 */ stb__lit(i+1, i[0] - 0x20 + 1), i += 1 + (i[0] - 0x20 + 1); + } else { // more ifs for cases that expand large, since overhead is amortized + if (*i >= 0x18) stb__match(stb__dout-(stb__in3(0) - 0x180000 + 1), i[3]+1), i += 4; + else if (*i >= 0x10) stb__match(stb__dout-(stb__in3(0) - 0x100000 + 1), stb__in2(3)+1), i += 5; + else if (*i >= 0x08) stb__lit(i+2, stb__in2(0) - 0x0800 + 1), i += 2 + (stb__in2(0) - 0x0800 + 1); + else if (*i == 0x07) stb__lit(i+3, stb__in2(1) + 1), i += 3 + (stb__in2(1) + 1); + else if (*i == 0x06) stb__match(stb__dout-(stb__in3(1)+1), i[4]+1), i += 5; + else if (*i == 0x04) stb__match(stb__dout-(stb__in3(1)+1), stb__in2(4)+1), i += 6; + } + return i; +} + +static unsigned int stb_adler32(unsigned int adler32, unsigned char *buffer, unsigned int buflen) +{ + const unsigned long ADLER_MOD = 65521; + unsigned long s1 = adler32 & 0xffff, s2 = adler32 >> 16; + unsigned long blocklen = buflen % 5552; + + unsigned long i; + while (buflen) { + for (i=0; i + 7 < blocklen; i += 8) { + s1 += buffer[0], s2 += s1; + s1 += buffer[1], s2 += s1; + s1 += buffer[2], s2 += s1; + s1 += buffer[3], s2 += s1; + s1 += buffer[4], s2 += s1; + s1 += buffer[5], s2 += s1; + s1 += buffer[6], s2 += s1; + s1 += buffer[7], s2 += s1; + + buffer += 8; + } + + for (; i < blocklen; ++i) + s1 += *buffer++, s2 += s1; + + s1 %= ADLER_MOD, s2 %= ADLER_MOD; + buflen -= blocklen; + blocklen = 5552; + } + return (unsigned int)(s2 << 16) + (unsigned int)s1; +} + +static unsigned int stb_decompress(unsigned char *output, const unsigned char *i, unsigned int /*length*/) +{ + if (stb__in4(0) != 0x57bC0000) return 0; + if (stb__in4(4) != 0) return 0; // error! stream is > 4GB + const unsigned int olen = stb_decompress_length(i); + stb__barrier_in_b = i; + stb__barrier_out_e = output + olen; + stb__barrier_out_b = output; + i += 16; + + stb__dout = output; + for (;;) { + const unsigned char *old_i = i; + i = stb_decompress_token(i); + if (i == old_i) { + if (*i == 0x05 && i[1] == 0xfa) { + IM_ASSERT(stb__dout == output + olen); + if (stb__dout != output + olen) return 0; + if (stb_adler32(1, output, olen) != (unsigned int) stb__in4(2)) + return 0; + return olen; + } else { + IM_ASSERT(0); /* NOTREACHED */ + return 0; + } + } + IM_ASSERT(stb__dout <= output + olen); + if (stb__dout > output + olen) + return 0; + } +} + +//----------------------------------------------------------------------------- +// [SECTION] Default font data (ProggyClean.ttf) +//----------------------------------------------------------------------------- +// ProggyClean.ttf +// Copyright (c) 2004, 2005 Tristan Grimmer +// MIT license (see License.txt in http://www.upperbounds.net/download/ProggyClean.ttf.zip) +// Download and more information at http://upperbounds.net +//----------------------------------------------------------------------------- +// File: 'ProggyClean.ttf' (41208 bytes) +// Exported using misc/fonts/binary_to_compressed_c.cpp (with compression + base85 string encoding). +// The purpose of encoding as base85 instead of "0x00,0x01,..." style is only save on _source code_ size. +//----------------------------------------------------------------------------- +static const char proggy_clean_ttf_compressed_data_base85[11980 + 1] = + "7])#######hV0qs'/###[),##/l:$#Q6>##5[n42>c-TH`->>#/e>11NNV=Bv(*:.F?uu#(gRU.o0XGH`$vhLG1hxt9?W`#,5LsCp#-i>.r$<$6pD>Lb';9Crc6tgXmKVeU2cD4Eo3R/" + "2*>]b(MC;$jPfY.;h^`IWM9Qo#t'X#(v#Y9w0#1D$CIf;W'#pWUPXOuxXuU(H9M(1=Ke$$'5F%)]0^#0X@U.a$FBjVQTSDgEKnIS7EM9>ZY9w0#L;>>#Mx&4Mvt//L[MkA#W@lK.N'[0#7RL_&#w+F%HtG9M#XL`N&.,GM4Pg;--VsM.M0rJfLH2eTM`*oJMHRC`N" + "kfimM2J,W-jXS:)r0wK#@Fge$U>`w'N7G#$#fB#$E^$#:9:hk+eOe--6x)F7*E%?76%^GMHePW-Z5l'&GiF#$956:rS?dA#fiK:)Yr+`�j@'DbG&#^$PG.Ll+DNa&VZ>1i%h1S9u5o@YaaW$e+bROPOpxTO7Stwi1::iB1q)C_=dV26J;2,]7op$]uQr@_V7$q^%lQwtuHY]=DX,n3L#0PHDO4f9>dC@O>HBuKPpP*E,N+b3L#lpR/MrTEH.IAQk.a>D[.e;mc." + "x]Ip.PH^'/aqUO/$1WxLoW0[iLAw=4h(9.`G" + "CRUxHPeR`5Mjol(dUWxZa(>STrPkrJiWx`5U7F#.g*jrohGg`cg:lSTvEY/EV_7H4Q9[Z%cnv;JQYZ5q.l7Zeas:HOIZOB?Ggv:[7MI2k).'2($5FNP&EQ(,)" + "U]W]+fh18.vsai00);D3@4ku5P?DP8aJt+;qUM]=+b'8@;mViBKx0DE[-auGl8:PJ&Dj+M6OC]O^((##]`0i)drT;-7X`=-H3[igUnPG-NZlo.#k@h#=Ork$m>a>$-?Tm$UV(?#P6YY#" + "'/###xe7q.73rI3*pP/$1>s9)W,JrM7SN]'/4C#v$U`0#V.[0>xQsH$fEmPMgY2u7Kh(G%siIfLSoS+MK2eTM$=5,M8p`A.;_R%#u[K#$x4AG8.kK/HSB==-'Ie/QTtG?-.*^N-4B/ZM" + "_3YlQC7(p7q)&](`6_c)$/*JL(L-^(]$wIM`dPtOdGA,U3:w2M-0+WomX2u7lqM2iEumMTcsF?-aT=Z-97UEnXglEn1K-bnEO`gu" + "Ft(c%=;Am_Qs@jLooI&NX;]0#j4#F14;gl8-GQpgwhrq8'=l_f-b49'UOqkLu7-##oDY2L(te+Mch&gLYtJ,MEtJfLh'x'M=$CS-ZZ%P]8bZ>#S?YY#%Q&q'3^Fw&?D)UDNrocM3A76/" + "/oL?#h7gl85[qW/NDOk%16ij;+:1a'iNIdb-ou8.P*w,v5#EI$TWS>Pot-R*H'-SEpA:g)f+O$%%`kA#G=8RMmG1&O`>to8bC]T&$,n.LoO>29sp3dt-52U%VM#q7'DHpg+#Z9%H[Ket`e;)f#Km8&+DC$I46>#Kr]]u-[=99tts1.qb#q72g1WJO81q+eN'03'eM>&1XxY-caEnO" + "j%2n8)),?ILR5^.Ibn<-X-Mq7[a82Lq:F&#ce+S9wsCK*x`569E8ew'He]h:sI[2LM$[guka3ZRd6:t%IG:;$%YiJ:Nq=?eAw;/:nnDq0(CYcMpG)qLN4$##&J-XTt,%OVU4)S1+R-#dg0/Nn?Ku1^0f$B*P:Rowwm-`0PKjYDDM'3]d39VZHEl4,.j']Pk-M.h^&:0FACm$maq-&sgw0t7/6(^xtk%" + "LuH88Fj-ekm>GA#_>568x6(OFRl-IZp`&b,_P'$MhLbxfc$mj`,O;&%W2m`Zh:/)Uetw:aJ%]K9h:TcF]u_-Sj9,VK3M.*'&0D[Ca]J9gp8,kAW]" + "%(?A%R$f<->Zts'^kn=-^@c4%-pY6qI%J%1IGxfLU9CP8cbPlXv);C=b),<2mOvP8up,UVf3839acAWAW-W?#ao/^#%KYo8fRULNd2.>%m]UK:n%r$'sw]J;5pAoO_#2mO3n,'=H5(et" + "Hg*`+RLgv>=4U8guD$I%D:W>-r5V*%j*W:Kvej.Lp$'?;++O'>()jLR-^u68PHm8ZFWe+ej8h:9r6L*0//c&iH&R8pRbA#Kjm%upV1g:" + "a_#Ur7FuA#(tRh#.Y5K+@?3<-8m0$PEn;J:rh6?I6uG<-`wMU'ircp0LaE_OtlMb&1#6T.#FDKu#1Lw%u%+GM+X'e?YLfjM[VO0MbuFp7;>Q&#WIo)0@F%q7c#4XAXN-U&VBpqB>0ie&jhZ[?iLR@@_AvA-iQC(=ksRZRVp7`.=+NpBC%rh&3]R:8XDmE5^V8O(x<-+k?'(^](H.aREZSi,#1:[IXaZFOm<-ui#qUq2$##Ri;u75OK#(RtaW-K-F`S+cF]uN`-KMQ%rP/Xri.LRcB##=YL3BgM/3M" + "D?@f&1'BW-)Ju#bmmWCMkk&#TR`C,5d>g)F;t,4:@_l8G/5h4vUd%&%950:VXD'QdWoY-F$BtUwmfe$YqL'8(PWX(" + "P?^@Po3$##`MSs?DWBZ/S>+4%>fX,VWv/w'KD`LP5IbH;rTV>n3cEK8U#bX]l-/V+^lj3;vlMb&[5YQ8#pekX9JP3XUC72L,,?+Ni&co7ApnO*5NK,((W-i:$,kp'UDAO(G0Sq7MVjJs" + "bIu)'Z,*[>br5fX^:FPAWr-m2KgLQ_nN6'8uTGT5g)uLv:873UpTLgH+#FgpH'_o1780Ph8KmxQJ8#H72L4@768@Tm&Q" + "h4CB/5OvmA&,Q&QbUoi$a_%3M01H)4x7I^&KQVgtFnV+;[Pc>[m4k//,]1?#`VY[Jr*3&&slRfLiVZJ:]?=K3Sw=[$=uRB?3xk48@aege0jT6'N#(q%.O=?2S]u*(m<-" + "V8J'(1)G][68hW$5'q[GC&5j`TE?m'esFGNRM)j,ffZ?-qx8;->g4t*:CIP/[Qap7/9'#(1sao7w-.qNUdkJ)tCF&#B^;xGvn2r9FEPFFFcL@.iFNkTve$m%#QvQS8U@)2Z+3K:AKM5i" + "sZ88+dKQ)W6>J%CL`.d*(B`-n8D9oK-XV1q['-5k'cAZ69e;D_?$ZPP&s^+7])$*$#@QYi9,5P r+$%CE=68>K8r0=dSC%%(@p7" + ".m7jilQ02'0-VWAgTlGW'b)Tq7VT9q^*^$$.:&N@@" + "$&)WHtPm*5_rO0&e%K&#-30j(E4#'Zb.o/(Tpm$>K'f@[PvFl,hfINTNU6u'0pao7%XUp9]5.>%h`8_=VYbxuel.NTSsJfLacFu3B'lQSu/m6-Oqem8T+oE--$0a/k]uj9EwsG>%veR*" + "hv^BFpQj:K'#SJ,sB-'#](j.Lg92rTw-*n%@/;39rrJF,l#qV%OrtBeC6/,;qB3ebNW[?,Hqj2L.1NP&GjUR=1D8QaS3Up&@*9wP?+lo7b?@%'k4`p0Z$22%K3+iCZj?XJN4Nm&+YF]u" + "@-W$U%VEQ/,,>>#)D#%8cY#YZ?=,`Wdxu/ae&#" + "w6)R89tI#6@s'(6Bf7a&?S=^ZI_kS&ai`&=tE72L_D,;^R)7[$so8lKN%5/$(vdfq7+ebA#" + "u1p]ovUKW&Y%q]'>$1@-[xfn$7ZTp7mM,G,Ko7a&Gu%G[RMxJs[0MM%wci.LFDK)(%:_i2B5CsR8&9Z&#=mPEnm0f`<&c)QL5uJ#%u%lJj+D-r;BoFDoS97h5g)E#o:&S4weDF,9^Hoe`h*L+_a*NrLW-1pG_&2UdB8" + "6e%B/:=>)N4xeW.*wft-;$'58-ESqr#U`'6AQ]m&6/`Z>#S?YY#Vc;r7U2&326d=w&H####?TZ`*4?&.MK?LP8Vxg>$[QXc%QJv92.(Db*B)gb*BM9dM*hJMAo*c&#" + "b0v=Pjer]$gG&JXDf->'StvU7505l9$AFvgYRI^&<^b68?j#q9QX4SM'RO#&sL1IM.rJfLUAj221]d##DW=m83u5;'bYx,*Sl0hL(W;;$doB&O/TQ:(Z^xBdLjLV#*8U_72Lh+2Q8Cj0i:6hp&$C/:p(HK>T8Y[gHQ4`4)'$Ab(Nof%V'8hL&#SfD07&6D@M.*J:;$-rv29'M]8qMv-tLp,'886iaC=Hb*YJoKJ,(j%K=H`K.v9HggqBIiZu'QvBT.#=)0ukruV&.)3=(^1`o*Pj4<-#MJ+gLq9-##@HuZPN0]u:h7.T..G:;$/Usj(T7`Q8tT72LnYl<-qx8;-HV7Q-&Xdx%1a,hC=0u+HlsV>nuIQL-5" + "_>@kXQtMacfD.m-VAb8;IReM3$wf0''hra*so568'Ip&vRs849'MRYSp%:t:h5qSgwpEr$B>Q,;s(C#$)`svQuF$##-D,##,g68@2[T;.XSdN9Qe)rpt._K-#5wF)sP'##p#C0c%-Gb%" + "hd+<-j'Ai*x&&HMkT]C'OSl##5RG[JXaHN;d'uA#x._U;.`PU@(Z3dt4r152@:v,'R.Sj'w#0<-;kPI)FfJ&#AYJ&#//)>-k=m=*XnK$>=)72L]0I%>.G690a:$##<,);?;72#?x9+d;" + "^V'9;jY@;)br#q^YQpx:X#Te$Z^'=-=bGhLf:D6&bNwZ9-ZD#n^9HhLMr5G;']d&6'wYmTFmLq9wI>P(9mI[>kC-ekLC/R&CH+s'B;K-M6$EB%is00:" + "+A4[7xks.LrNk0&E)wILYF@2L'0Nb$+pv<(2.768/FrY&h$^3i&@+G%JT'<-,v`3;_)I9M^AE]CN?Cl2AZg+%4iTpT3$U4O]GKx'm9)b@p7YsvK3w^YR-" + "CdQ*:Ir<($u&)#(&?L9Rg3H)4fiEp^iI9O8KnTj,]H?D*r7'M;PwZ9K0E^k&-cpI;.p/6_vwoFMV<->#%Xi.LxVnrU(4&8/P+:hLSKj$#U%]49t'I:rgMi'FL@a:0Y-uA[39',(vbma*" + "hU%<-SRF`Tt:542R_VV$p@[p8DV[A,?1839FWdFTi1O*H&#(AL8[_P%.M>v^-))qOT*F5Cq0`Ye%+$B6i:7@0IXSsDiWP,##P`%/L-" + "S(qw%sf/@%#B6;/U7K]uZbi^Oc^2n%t<)'mEVE''n`WnJra$^TKvX5B>;_aSEK',(hwa0:i4G?.Bci.(X[?b*($,=-n<.Q%`(X=?+@Am*Js0&=3bh8K]mL69=Lb,OcZV/);TTm8VI;?%OtJ<(b4mq7M6:u?KRdFl*:xP?Yb.5)%w_I?7uk5JC+FS(m#i'k.'a0i)9<7b'fs'59hq$*5Uhv##pi^8+hIEBF`nvo`;'l0.^S1<-wUK2/Coh58KKhLj" + "M=SO*rfO`+qC`W-On.=AJ56>>i2@2LH6A:&5q`?9I3@@'04&p2/LVa*T-4<-i3;M9UvZd+N7>b*eIwg:CC)c<>nO&#$(>.Z-I&J(Q0Hd5Q%7Co-b`-cP)hI;*_F]u`Rb[.j8_Q/<&>uu+VsH$sM9TA%?)(vmJ80),P7E>)tjD%2L=-t#fK[%`v=Q8WlA2);Sa" + ">gXm8YB`1d@K#n]76-a$U,mF%Ul:#/'xoFM9QX-$.QN'>" + "[%$Z$uF6pA6Ki2O5:8w*vP1<-1`[G,)-m#>0`P&#eb#.3i)rtB61(o'$?X3B2Qft^ae_5tKL9MUe9b*sLEQ95C&`=G?@Mj=wh*'3E>=-<)Gt*Iw)'QG:`@I" + "wOf7&]1i'S01B+Ev/Nac#9S;=;YQpg_6U`*kVY39xK,[/6Aj7:'1Bm-_1EYfa1+o&o4hp7KN_Q(OlIo@S%;jVdn0'1h19w,WQhLI)3S#f$2(eb,jr*b;3Vw]*7NH%$c4Vs,eD9>XW8?N]o+(*pgC%/72LV-uW%iewS8W6m2rtCpo'RS1R84=@paTKt)>=%&1[)*vp'u+x,VrwN;&]kuO9JDbg=pO$J*.jVe;u'm0dr9l,<*wMK*Oe=g8lV_KEBFkO'oU]^=[-792#ok,)" + "i]lR8qQ2oA8wcRCZ^7w/Njh;?.stX?Q1>S1q4Bn$)K1<-rGdO'$Wr.Lc.CG)$/*JL4tNR/,SVO3,aUw'DJN:)Ss;wGn9A32ijw%FL+Z0Fn.U9;reSq)bmI32U==5ALuG&#Vf1398/pVo" + "1*c-(aY168o<`JsSbk-,1N;$>0:OUas(3:8Z972LSfF8eb=c-;>SPw7.6hn3m`9^Xkn(r.qS[0;T%&Qc=+STRxX'q1BNk3&*eu2;&8q$&x>Q#Q7^Tf+6<(d%ZVmj2bDi%.3L2n+4W'$P" + "iDDG)g,r%+?,$@?uou5tSe2aN_AQU*'IAO" + "URQ##V^Fv-XFbGM7Fl(N<3DhLGF%q.1rC$#:T__&Pi68%0xi_&[qFJ(77j_&JWoF.V735&T,[R*:xFR*K5>>#`bW-?4Ne_&6Ne_&6Ne_&n`kr-#GJcM6X;uM6X;uM(.a..^2TkL%oR(#" + ";u.T%fAr%4tJ8&><1=GHZ_+m9/#H1F^R#SC#*N=BA9(D?v[UiFY>>^8p,KKF.W]L29uLkLlu/+4T" + "w$)F./^n3+rlo+DB;5sIYGNk+i1t-69Jg--0pao7Sm#K)pdHW&;LuDNH@H>#/X-TI(;P>#,Gc>#0Su>#4`1?#8lC?#xL$#B.`$#F:r$#JF.%#NR@%#R_R%#Vke%#Zww%#_-4^Rh%Sflr-k'MS.o?.5/sWel/wpEM0%3'/1)K^f1-d>G21&v(35>V`39V7A4=onx4" + "A1OY5EI0;6Ibgr6M$HS7Q<)58C5w,;WoA*#[%T*#`1g*#d=#+#hI5+#lUG+#pbY+#tnl+#x$),#&1;,#*=M,#.I`,#2Ur,#6b.-#;w[H#iQtA#m^0B#qjBB#uvTB##-hB#'9$C#+E6C#" + "/QHC#3^ZC#7jmC#;v)D#?,)4kMYD4lVu`4m`:&5niUA5@(A5BA1]PBB:xlBCC=2CDLXMCEUtiCf&0g2'tN?PGT4CPGT4CPGT4CPGT4CPGT4CPGT4CPGT4CP" + "GT4CPGT4CPGT4CPGT4CPGT4CPGT4CP-qekC`.9kEg^+F$kwViFJTB&5KTB&5KTB&5KTB&5KTB&5KTB&5KTB&5KTB&5KTB&5KTB&5KTB&5KTB&5KTB&5KTB&5KTB&5o,^<-28ZI'O?;xp" + "O?;xpO?;xpO?;xpO?;xpO?;xpO?;xpO?;xpO?;xpO?;xpO?;xpO?;xpO?;xpO?;xp;7q-#lLYI:xvD=#"; + +static const char* GetDefaultCompressedFontDataTTFBase85() +{ + return proggy_clean_ttf_compressed_data_base85; +} + +#endif // #ifndef IMGUI_DISABLE diff --git a/cpp-projects/3d-engine/imgui/imgui_internal.h b/cpp-projects/3d-engine/imgui/imgui_internal.h new file mode 100644 index 0000000..1c96161 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/imgui_internal.h @@ -0,0 +1,2551 @@ +// dear imgui, v1.81 WIP +// (internal structures/api) + +// You may use this file to debug, understand or extend ImGui features but we don't provide any guarantee of forward compatibility! +// Set: +// #define IMGUI_DEFINE_MATH_OPERATORS +// To implement maths operators for ImVec2 (disabled by default to not collide with using IM_VEC2_CLASS_EXTRA along with your own math types+operators) + +/* + +Index of this file: + +// [SECTION] Header mess +// [SECTION] Forward declarations +// [SECTION] Context pointer +// [SECTION] STB libraries includes +// [SECTION] Macros +// [SECTION] Generic helpers +// [SECTION] ImDrawList support +// [SECTION] Widgets support: flags, enums, data structures +// [SECTION] Columns support +// [SECTION] Multi-select support +// [SECTION] Docking support +// [SECTION] Viewport support +// [SECTION] Settings support +// [SECTION] Metrics, Debug +// [SECTION] Generic context hooks +// [SECTION] ImGuiContext (main imgui context) +// [SECTION] ImGuiWindowTempData, ImGuiWindow +// [SECTION] Tab bar, Tab item support +// [SECTION] Table support +// [SECTION] ImGui internal API +// [SECTION] ImFontAtlas internal API +// [SECTION] Test Engine specific hooks (imgui_test_engine) + +*/ + +#pragma once +#ifndef IMGUI_DISABLE + +//----------------------------------------------------------------------------- +// [SECTION] Header mess +//----------------------------------------------------------------------------- + +#ifndef IMGUI_VERSION +#error Must include imgui.h before imgui_internal.h +#endif + +#include // FILE*, sscanf +#include // NULL, malloc, free, qsort, atoi, atof +#include // sqrtf, fabsf, fmodf, powf, floorf, ceilf, cosf, sinf +#include // INT_MIN, INT_MAX + +// Visual Studio warnings +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable: 4251) // class 'xxx' needs to have dll-interface to be used by clients of struct 'xxx' // when IMGUI_API is set to__declspec(dllexport) +#endif + +// Clang/GCC warnings with -Weverything +#if defined(__clang__) +#pragma clang diagnostic push +#if __has_warning("-Wunknown-warning-option") +#pragma clang diagnostic ignored "-Wunknown-warning-option" // warning: unknown warning group 'xxx' +#endif +#pragma clang diagnostic ignored "-Wunknown-pragmas" // warning: unknown warning group 'xxx' +#pragma clang diagnostic ignored "-Wunused-function" // for stb_textedit.h +#pragma clang diagnostic ignored "-Wmissing-prototypes" // for stb_textedit.h +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#pragma clang diagnostic ignored "-Wdouble-promotion" +#pragma clang diagnostic ignored "-Wimplicit-int-float-conversion" // warning: implicit conversion from 'xxx' to 'float' may lose precision +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" // warning: unknown option after '#pragma GCC diagnostic' kind +#pragma GCC diagnostic ignored "-Wclass-memaccess" // [__GNUC__ >= 8] warning: 'memset/memcpy' clearing/writing an object of type 'xxxx' with no trivial copy-assignment; use assignment or value-initialization instead +#endif + +// Legacy defines +#ifdef IMGUI_DISABLE_FORMAT_STRING_FUNCTIONS // Renamed in 1.74 +#error Use IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS +#endif +#ifdef IMGUI_DISABLE_MATH_FUNCTIONS // Renamed in 1.74 +#error Use IMGUI_DISABLE_DEFAULT_MATH_FUNCTIONS +#endif + +// Enable stb_truetype by default unless FreeType is enabled. +// You can compile with both by defining both IMGUI_ENABLE_FREETYPE and IMGUI_ENABLE_STB_TRUETYPE together. +#ifndef IMGUI_ENABLE_FREETYPE +#define IMGUI_ENABLE_STB_TRUETYPE +#endif + +//----------------------------------------------------------------------------- +// [SECTION] Forward declarations +//----------------------------------------------------------------------------- + +struct ImBitVector; // Store 1-bit per value +struct ImRect; // An axis-aligned rectangle (2 points) +struct ImDrawDataBuilder; // Helper to build a ImDrawData instance +struct ImDrawListSharedData; // Data shared between all ImDrawList instances +struct ImGuiColorMod; // Stacked color modifier, backup of modified data so we can restore it +struct ImGuiContext; // Main Dear ImGui context +struct ImGuiContextHook; // Hook for extensions like ImGuiTestEngine +struct ImGuiDataTypeInfo; // Type information associated to a ImGuiDataType enum +struct ImGuiGroupData; // Stacked storage data for BeginGroup()/EndGroup() +struct ImGuiInputTextState; // Internal state of the currently focused/edited text input box +struct ImGuiLastItemDataBackup; // Backup and restore IsItemHovered() internal data +struct ImGuiMenuColumns; // Simple column measurement, currently used for MenuItem() only +struct ImGuiNavMoveResult; // Result of a gamepad/keyboard directional navigation move query result +struct ImGuiMetricsConfig; // Storage for ShowMetricsWindow() and DebugNodeXXX() functions +struct ImGuiNextWindowData; // Storage for SetNextWindow** functions +struct ImGuiNextItemData; // Storage for SetNextItem** functions +struct ImGuiOldColumnData; // Storage data for a single column for legacy Columns() api +struct ImGuiOldColumns; // Storage data for a columns set for legacy Columns() api +struct ImGuiPopupData; // Storage for current popup stack +struct ImGuiSettingsHandler; // Storage for one type registered in the .ini file +struct ImGuiStackSizes; // Storage of stack sizes for debugging/asserting +struct ImGuiStyleMod; // Stacked style modifier, backup of modified data so we can restore it +struct ImGuiTabBar; // Storage for a tab bar +struct ImGuiTabItem; // Storage for a tab item (within a tab bar) +struct ImGuiTable; // Storage for a table +struct ImGuiTableColumn; // Storage for one column of a table +struct ImGuiTableSettings; // Storage for a table .ini settings +struct ImGuiTableColumnsSettings; // Storage for a column .ini settings +struct ImGuiWindow; // Storage for one window +struct ImGuiWindowTempData; // Temporary storage for one window (that's the data which in theory we could ditch at the end of the frame) +struct ImGuiWindowSettings; // Storage for a window .ini settings (we keep one of those even if the actual window wasn't instanced during this session) + +// Use your programming IDE "Go to definition" facility on the names of the center columns to find the actual flags/enum lists. +typedef int ImGuiLayoutType; // -> enum ImGuiLayoutType_ // Enum: Horizontal or vertical +typedef int ImGuiItemFlags; // -> enum ImGuiItemFlags_ // Flags: for PushItemFlag() +typedef int ImGuiItemStatusFlags; // -> enum ImGuiItemStatusFlags_ // Flags: for DC.LastItemStatusFlags +typedef int ImGuiOldColumnFlags; // -> enum ImGuiOldColumnFlags_ // Flags: for BeginColumns() +typedef int ImGuiNavHighlightFlags; // -> enum ImGuiNavHighlightFlags_ // Flags: for RenderNavHighlight() +typedef int ImGuiNavDirSourceFlags; // -> enum ImGuiNavDirSourceFlags_ // Flags: for GetNavInputAmount2d() +typedef int ImGuiNavMoveFlags; // -> enum ImGuiNavMoveFlags_ // Flags: for navigation requests +typedef int ImGuiNextItemDataFlags; // -> enum ImGuiNextItemDataFlags_ // Flags: for SetNextItemXXX() functions +typedef int ImGuiNextWindowDataFlags; // -> enum ImGuiNextWindowDataFlags_// Flags: for SetNextWindowXXX() functions +typedef int ImGuiSeparatorFlags; // -> enum ImGuiSeparatorFlags_ // Flags: for SeparatorEx() +typedef int ImGuiTextFlags; // -> enum ImGuiTextFlags_ // Flags: for TextEx() +typedef int ImGuiTooltipFlags; // -> enum ImGuiTooltipFlags_ // Flags: for BeginTooltipEx() + +typedef void (*ImGuiErrorLogCallback)(void* user_data, const char* fmt, ...); + +//----------------------------------------------------------------------------- +// [SECTION] Context pointer +// See implementation of this variable in imgui.cpp for comments and details. +//----------------------------------------------------------------------------- + +#ifndef GImGui +extern IMGUI_API ImGuiContext* GImGui; // Current implicit context pointer +#endif + +//------------------------------------------------------------------------- +// [SECTION] STB libraries includes +//------------------------------------------------------------------------- + +namespace ImStb +{ + +#undef STB_TEXTEDIT_STRING +#undef STB_TEXTEDIT_CHARTYPE +#define STB_TEXTEDIT_STRING ImGuiInputTextState +#define STB_TEXTEDIT_CHARTYPE ImWchar +#define STB_TEXTEDIT_GETWIDTH_NEWLINE (-1.0f) +#define STB_TEXTEDIT_UNDOSTATECOUNT 99 +#define STB_TEXTEDIT_UNDOCHARCOUNT 999 +#include "imstb_textedit.h" + +} // namespace ImStb + +//----------------------------------------------------------------------------- +// [SECTION] Macros +//----------------------------------------------------------------------------- + +// Debug Logging +#ifndef IMGUI_DEBUG_LOG +#define IMGUI_DEBUG_LOG(_FMT,...) printf("[%05d] " _FMT, GImGui->FrameCount, __VA_ARGS__) +#endif + +// Debug Logging for selected systems. Remove the '((void)0) //' to enable. +//#define IMGUI_DEBUG_LOG_POPUP IMGUI_DEBUG_LOG // Enable log +//#define IMGUI_DEBUG_LOG_NAV IMGUI_DEBUG_LOG // Enable log +#define IMGUI_DEBUG_LOG_POPUP(...) ((void)0) // Disable log +#define IMGUI_DEBUG_LOG_NAV(...) ((void)0) // Disable log + +// Static Asserts +#if (__cplusplus >= 201100) || (defined(_MSVC_LANG) && _MSVC_LANG >= 201100) +#define IM_STATIC_ASSERT(_COND) static_assert(_COND, "") +#else +#define IM_STATIC_ASSERT(_COND) typedef char static_assertion_##__line__[(_COND)?1:-1] +#endif + +// "Paranoid" Debug Asserts are meant to only be enabled during specific debugging/work, otherwise would slow down the code too much. +// We currently don't have many of those so the effect is currently negligible, but onward intent to add more aggressive ones in the code. +//#define IMGUI_DEBUG_PARANOID +#ifdef IMGUI_DEBUG_PARANOID +#define IM_ASSERT_PARANOID(_EXPR) IM_ASSERT(_EXPR) +#else +#define IM_ASSERT_PARANOID(_EXPR) +#endif + +// Error handling +// Down the line in some frameworks/languages we would like to have a way to redirect those to the programmer and recover from more faults. +#ifndef IM_ASSERT_USER_ERROR +#define IM_ASSERT_USER_ERROR(_EXP,_MSG) IM_ASSERT((_EXP) && _MSG) // Recoverable User Error +#endif + +// Misc Macros +#define IM_PI 3.14159265358979323846f +#ifdef _WIN32 +#define IM_NEWLINE "\r\n" // Play it nice with Windows users (Update: since 2018-05, Notepad finally appears to support Unix-style carriage returns!) +#else +#define IM_NEWLINE "\n" +#endif +#define IM_TABSIZE (4) +#define IM_F32_TO_INT8_UNBOUND(_VAL) ((int)((_VAL) * 255.0f + ((_VAL)>=0 ? 0.5f : -0.5f))) // Unsaturated, for display purpose +#define IM_F32_TO_INT8_SAT(_VAL) ((int)(ImSaturate(_VAL) * 255.0f + 0.5f)) // Saturated, always output 0..255 +#define IM_FLOOR(_VAL) ((float)(int)(_VAL)) // ImFloor() is not inlined in MSVC debug builds +#define IM_ROUND(_VAL) ((float)(int)((_VAL) + 0.5f)) // + +// Enforce cdecl calling convention for functions called by the standard library, in case compilation settings changed the default to e.g. __vectorcall +#ifdef _MSC_VER +#define IMGUI_CDECL __cdecl +#else +#define IMGUI_CDECL +#endif + +// Debug Tools +// Use 'Metrics->Tools->Item Picker' to break into the call-stack of a specific item. +#ifndef IM_DEBUG_BREAK +#if defined(__clang__) +#define IM_DEBUG_BREAK() __builtin_debugtrap() +#elif defined (_MSC_VER) +#define IM_DEBUG_BREAK() __debugbreak() +#else +#define IM_DEBUG_BREAK() IM_ASSERT(0) // It is expected that you define IM_DEBUG_BREAK() into something that will break nicely in a debugger! +#endif +#endif // #ifndef IM_DEBUG_BREAK + +//----------------------------------------------------------------------------- +// [SECTION] Generic helpers +// Note that the ImXXX helpers functions are lower-level than ImGui functions. +// ImGui functions or the ImGui context are never called/used from other ImXXX functions. +//----------------------------------------------------------------------------- +// - Helpers: Hashing +// - Helpers: Sorting +// - Helpers: Bit manipulation +// - Helpers: String, Formatting +// - Helpers: UTF-8 <> wchar conversions +// - Helpers: ImVec2/ImVec4 operators +// - Helpers: Maths +// - Helpers: Geometry +// - Helper: ImVec1 +// - Helper: ImVec2ih +// - Helper: ImRect +// - Helper: ImBitArray +// - Helper: ImBitVector +// - Helper: ImSpan<>, ImSpanAllocator<> +// - Helper: ImPool<> +// - Helper: ImChunkStream<> +//----------------------------------------------------------------------------- + +// Helpers: Hashing +IMGUI_API ImGuiID ImHashData(const void* data, size_t data_size, ImU32 seed = 0); +IMGUI_API ImGuiID ImHashStr(const char* data, size_t data_size = 0, ImU32 seed = 0); +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS +static inline ImGuiID ImHash(const void* data, int size, ImU32 seed = 0) { return size ? ImHashData(data, (size_t)size, seed) : ImHashStr((const char*)data, 0, seed); } // [moved to ImHashStr/ImHashData in 1.68] +#endif + +// Helpers: Sorting +#define ImQsort qsort + +// Helpers: Color Blending +IMGUI_API ImU32 ImAlphaBlendColors(ImU32 col_a, ImU32 col_b); + +// Helpers: Bit manipulation +static inline bool ImIsPowerOfTwo(int v) { return v != 0 && (v & (v - 1)) == 0; } +static inline bool ImIsPowerOfTwo(ImU64 v) { return v != 0 && (v & (v - 1)) == 0; } +static inline int ImUpperPowerOfTwo(int v) { v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return v; } + +// Helpers: String, Formatting +IMGUI_API int ImStricmp(const char* str1, const char* str2); +IMGUI_API int ImStrnicmp(const char* str1, const char* str2, size_t count); +IMGUI_API void ImStrncpy(char* dst, const char* src, size_t count); +IMGUI_API char* ImStrdup(const char* str); +IMGUI_API char* ImStrdupcpy(char* dst, size_t* p_dst_size, const char* str); +IMGUI_API const char* ImStrchrRange(const char* str_begin, const char* str_end, char c); +IMGUI_API int ImStrlenW(const ImWchar* str); +IMGUI_API const char* ImStreolRange(const char* str, const char* str_end); // End end-of-line +IMGUI_API const ImWchar*ImStrbolW(const ImWchar* buf_mid_line, const ImWchar* buf_begin); // Find beginning-of-line +IMGUI_API const char* ImStristr(const char* haystack, const char* haystack_end, const char* needle, const char* needle_end); +IMGUI_API void ImStrTrimBlanks(char* str); +IMGUI_API const char* ImStrSkipBlank(const char* str); +IMGUI_API int ImFormatString(char* buf, size_t buf_size, const char* fmt, ...) IM_FMTARGS(3); +IMGUI_API int ImFormatStringV(char* buf, size_t buf_size, const char* fmt, va_list args) IM_FMTLIST(3); +IMGUI_API const char* ImParseFormatFindStart(const char* format); +IMGUI_API const char* ImParseFormatFindEnd(const char* format); +IMGUI_API const char* ImParseFormatTrimDecorations(const char* format, char* buf, size_t buf_size); +IMGUI_API int ImParseFormatPrecision(const char* format, int default_value); +static inline bool ImCharIsBlankA(char c) { return c == ' ' || c == '\t'; } +static inline bool ImCharIsBlankW(unsigned int c) { return c == ' ' || c == '\t' || c == 0x3000; } + +// Helpers: UTF-8 <> wchar conversions +IMGUI_API int ImTextStrToUtf8(char* buf, int buf_size, const ImWchar* in_text, const ImWchar* in_text_end); // return output UTF-8 bytes count +IMGUI_API int ImTextCharFromUtf8(unsigned int* out_char, const char* in_text, const char* in_text_end); // read one character. return input UTF-8 bytes count +IMGUI_API int ImTextStrFromUtf8(ImWchar* buf, int buf_size, const char* in_text, const char* in_text_end, const char** in_remaining = NULL); // return input UTF-8 bytes count +IMGUI_API int ImTextCountCharsFromUtf8(const char* in_text, const char* in_text_end); // return number of UTF-8 code-points (NOT bytes count) +IMGUI_API int ImTextCountUtf8BytesFromChar(const char* in_text, const char* in_text_end); // return number of bytes to express one char in UTF-8 +IMGUI_API int ImTextCountUtf8BytesFromStr(const ImWchar* in_text, const ImWchar* in_text_end); // return number of bytes to express string in UTF-8 + +// Helpers: ImVec2/ImVec4 operators +// We are keeping those disabled by default so they don't leak in user space, to allow user enabling implicit cast operators between ImVec2 and their own types (using IM_VEC2_CLASS_EXTRA etc.) +// We unfortunately don't have a unary- operator for ImVec2 because this would needs to be defined inside the class itself. +#ifdef IMGUI_DEFINE_MATH_OPERATORS +static inline ImVec2 operator*(const ImVec2& lhs, const float rhs) { return ImVec2(lhs.x * rhs, lhs.y * rhs); } +static inline ImVec2 operator/(const ImVec2& lhs, const float rhs) { return ImVec2(lhs.x / rhs, lhs.y / rhs); } +static inline ImVec2 operator+(const ImVec2& lhs, const ImVec2& rhs) { return ImVec2(lhs.x + rhs.x, lhs.y + rhs.y); } +static inline ImVec2 operator-(const ImVec2& lhs, const ImVec2& rhs) { return ImVec2(lhs.x - rhs.x, lhs.y - rhs.y); } +static inline ImVec2 operator*(const ImVec2& lhs, const ImVec2& rhs) { return ImVec2(lhs.x * rhs.x, lhs.y * rhs.y); } +static inline ImVec2 operator/(const ImVec2& lhs, const ImVec2& rhs) { return ImVec2(lhs.x / rhs.x, lhs.y / rhs.y); } +static inline ImVec2& operator*=(ImVec2& lhs, const float rhs) { lhs.x *= rhs; lhs.y *= rhs; return lhs; } +static inline ImVec2& operator/=(ImVec2& lhs, const float rhs) { lhs.x /= rhs; lhs.y /= rhs; return lhs; } +static inline ImVec2& operator+=(ImVec2& lhs, const ImVec2& rhs) { lhs.x += rhs.x; lhs.y += rhs.y; return lhs; } +static inline ImVec2& operator-=(ImVec2& lhs, const ImVec2& rhs) { lhs.x -= rhs.x; lhs.y -= rhs.y; return lhs; } +static inline ImVec2& operator*=(ImVec2& lhs, const ImVec2& rhs) { lhs.x *= rhs.x; lhs.y *= rhs.y; return lhs; } +static inline ImVec2& operator/=(ImVec2& lhs, const ImVec2& rhs) { lhs.x /= rhs.x; lhs.y /= rhs.y; return lhs; } +static inline ImVec4 operator+(const ImVec4& lhs, const ImVec4& rhs) { return ImVec4(lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z, lhs.w + rhs.w); } +static inline ImVec4 operator-(const ImVec4& lhs, const ImVec4& rhs) { return ImVec4(lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z, lhs.w - rhs.w); } +static inline ImVec4 operator*(const ImVec4& lhs, const ImVec4& rhs) { return ImVec4(lhs.x * rhs.x, lhs.y * rhs.y, lhs.z * rhs.z, lhs.w * rhs.w); } +#endif + +// Helpers: File System +#ifdef IMGUI_DISABLE_FILE_FUNCTIONS +#define IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS +typedef void* ImFileHandle; +static inline ImFileHandle ImFileOpen(const char*, const char*) { return NULL; } +static inline bool ImFileClose(ImFileHandle) { return false; } +static inline ImU64 ImFileGetSize(ImFileHandle) { return (ImU64)-1; } +static inline ImU64 ImFileRead(void*, ImU64, ImU64, ImFileHandle) { return 0; } +static inline ImU64 ImFileWrite(const void*, ImU64, ImU64, ImFileHandle) { return 0; } +#endif +#ifndef IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS +typedef FILE* ImFileHandle; +IMGUI_API ImFileHandle ImFileOpen(const char* filename, const char* mode); +IMGUI_API bool ImFileClose(ImFileHandle file); +IMGUI_API ImU64 ImFileGetSize(ImFileHandle file); +IMGUI_API ImU64 ImFileRead(void* data, ImU64 size, ImU64 count, ImFileHandle file); +IMGUI_API ImU64 ImFileWrite(const void* data, ImU64 size, ImU64 count, ImFileHandle file); +#else +#define IMGUI_DISABLE_TTY_FUNCTIONS // Can't use stdout, fflush if we are not using default file functions +#endif +IMGUI_API void* ImFileLoadToMemory(const char* filename, const char* mode, size_t* out_file_size = NULL, int padding_bytes = 0); + +// Helpers: Maths +// - Wrapper for standard libs functions. (Note that imgui_demo.cpp does _not_ use them to keep the code easy to copy) +#ifndef IMGUI_DISABLE_DEFAULT_MATH_FUNCTIONS +#define ImFabs(X) fabsf(X) +#define ImSqrt(X) sqrtf(X) +#define ImFmod(X, Y) fmodf((X), (Y)) +#define ImCos(X) cosf(X) +#define ImSin(X) sinf(X) +#define ImAcos(X) acosf(X) +#define ImAtan2(Y, X) atan2f((Y), (X)) +#define ImAtof(STR) atof(STR) +#define ImFloorStd(X) floorf(X) // We already uses our own ImFloor() { return (float)(int)v } internally so the standard one wrapper is named differently (it's used by e.g. stb_truetype) +#define ImCeil(X) ceilf(X) +static inline float ImPow(float x, float y) { return powf(x, y); } // DragBehaviorT/SliderBehaviorT uses ImPow with either float/double and need the precision +static inline double ImPow(double x, double y) { return pow(x, y); } +static inline float ImLog(float x) { return logf(x); } // DragBehaviorT/SliderBehaviorT uses ImLog with either float/double and need the precision +static inline double ImLog(double x) { return log(x); } +static inline float ImAbs(float x) { return fabsf(x); } +static inline double ImAbs(double x) { return fabs(x); } +static inline float ImSign(float x) { return (x < 0.0f) ? -1.0f : ((x > 0.0f) ? 1.0f : 0.0f); } // Sign operator - returns -1, 0 or 1 based on sign of argument +static inline double ImSign(double x) { return (x < 0.0) ? -1.0 : ((x > 0.0) ? 1.0 : 0.0); } +#endif +// - ImMin/ImMax/ImClamp/ImLerp/ImSwap are used by widgets which support variety of types: signed/unsigned int/long long float/double +// (Exceptionally using templates here but we could also redefine them for those types) +template static inline T ImMin(T lhs, T rhs) { return lhs < rhs ? lhs : rhs; } +template static inline T ImMax(T lhs, T rhs) { return lhs >= rhs ? lhs : rhs; } +template static inline T ImClamp(T v, T mn, T mx) { return (v < mn) ? mn : (v > mx) ? mx : v; } +template static inline T ImLerp(T a, T b, float t) { return (T)(a + (b - a) * t); } +template static inline void ImSwap(T& a, T& b) { T tmp = a; a = b; b = tmp; } +template static inline T ImAddClampOverflow(T a, T b, T mn, T mx) { if (b < 0 && (a < mn - b)) return mn; if (b > 0 && (a > mx - b)) return mx; return a + b; } +template static inline T ImSubClampOverflow(T a, T b, T mn, T mx) { if (b > 0 && (a < mn + b)) return mn; if (b < 0 && (a > mx + b)) return mx; return a - b; } +// - Misc maths helpers +static inline ImVec2 ImMin(const ImVec2& lhs, const ImVec2& rhs) { return ImVec2(lhs.x < rhs.x ? lhs.x : rhs.x, lhs.y < rhs.y ? lhs.y : rhs.y); } +static inline ImVec2 ImMax(const ImVec2& lhs, const ImVec2& rhs) { return ImVec2(lhs.x >= rhs.x ? lhs.x : rhs.x, lhs.y >= rhs.y ? lhs.y : rhs.y); } +static inline ImVec2 ImClamp(const ImVec2& v, const ImVec2& mn, ImVec2 mx) { return ImVec2((v.x < mn.x) ? mn.x : (v.x > mx.x) ? mx.x : v.x, (v.y < mn.y) ? mn.y : (v.y > mx.y) ? mx.y : v.y); } +static inline ImVec2 ImLerp(const ImVec2& a, const ImVec2& b, float t) { return ImVec2(a.x + (b.x - a.x) * t, a.y + (b.y - a.y) * t); } +static inline ImVec2 ImLerp(const ImVec2& a, const ImVec2& b, const ImVec2& t) { return ImVec2(a.x + (b.x - a.x) * t.x, a.y + (b.y - a.y) * t.y); } +static inline ImVec4 ImLerp(const ImVec4& a, const ImVec4& b, float t) { return ImVec4(a.x + (b.x - a.x) * t, a.y + (b.y - a.y) * t, a.z + (b.z - a.z) * t, a.w + (b.w - a.w) * t); } +static inline float ImSaturate(float f) { return (f < 0.0f) ? 0.0f : (f > 1.0f) ? 1.0f : f; } +static inline float ImLengthSqr(const ImVec2& lhs) { return (lhs.x * lhs.x) + (lhs.y * lhs.y); } +static inline float ImLengthSqr(const ImVec4& lhs) { return (lhs.x * lhs.x) + (lhs.y * lhs.y) + (lhs.z * lhs.z) + (lhs.w * lhs.w); } +static inline float ImInvLength(const ImVec2& lhs, float fail_value) { float d = (lhs.x * lhs.x) + (lhs.y * lhs.y); if (d > 0.0f) return 1.0f / ImSqrt(d); return fail_value; } +static inline float ImFloor(float f) { return (float)(int)(f); } +static inline ImVec2 ImFloor(const ImVec2& v) { return ImVec2((float)(int)(v.x), (float)(int)(v.y)); } +static inline int ImModPositive(int a, int b) { return (a + b) % b; } +static inline float ImDot(const ImVec2& a, const ImVec2& b) { return a.x * b.x + a.y * b.y; } +static inline ImVec2 ImRotate(const ImVec2& v, float cos_a, float sin_a) { return ImVec2(v.x * cos_a - v.y * sin_a, v.x * sin_a + v.y * cos_a); } +static inline float ImLinearSweep(float current, float target, float speed) { if (current < target) return ImMin(current + speed, target); if (current > target) return ImMax(current - speed, target); return current; } +static inline ImVec2 ImMul(const ImVec2& lhs, const ImVec2& rhs) { return ImVec2(lhs.x * rhs.x, lhs.y * rhs.y); } + +// Helpers: Geometry +IMGUI_API ImVec2 ImBezierCubicCalc(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, float t); +IMGUI_API ImVec2 ImBezierCubicClosestPoint(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, const ImVec2& p, int num_segments); // For curves with explicit number of segments +IMGUI_API ImVec2 ImBezierCubicClosestPointCasteljau(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, const ImVec2& p4, const ImVec2& p, float tess_tol);// For auto-tessellated curves you can use tess_tol = style.CurveTessellationTol +IMGUI_API ImVec2 ImBezierQuadraticCalc(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, float t); +IMGUI_API ImVec2 ImLineClosestPoint(const ImVec2& a, const ImVec2& b, const ImVec2& p); +IMGUI_API bool ImTriangleContainsPoint(const ImVec2& a, const ImVec2& b, const ImVec2& c, const ImVec2& p); +IMGUI_API ImVec2 ImTriangleClosestPoint(const ImVec2& a, const ImVec2& b, const ImVec2& c, const ImVec2& p); +IMGUI_API void ImTriangleBarycentricCoords(const ImVec2& a, const ImVec2& b, const ImVec2& c, const ImVec2& p, float& out_u, float& out_v, float& out_w); +inline float ImTriangleArea(const ImVec2& a, const ImVec2& b, const ImVec2& c) { return ImFabs((a.x * (b.y - c.y)) + (b.x * (c.y - a.y)) + (c.x * (a.y - b.y))) * 0.5f; } +IMGUI_API ImGuiDir ImGetDirQuadrantFromDelta(float dx, float dy); + +// Helper: ImVec1 (1D vector) +// (this odd construct is used to facilitate the transition between 1D and 2D, and the maintenance of some branches/patches) +struct ImVec1 +{ + float x; + ImVec1() { x = 0.0f; } + ImVec1(float _x) { x = _x; } +}; + +// Helper: ImVec2ih (2D vector, half-size integer, for long-term packed storage) +struct ImVec2ih +{ + short x, y; + ImVec2ih() { x = y = 0; } + ImVec2ih(short _x, short _y) { x = _x; y = _y; } + explicit ImVec2ih(const ImVec2& rhs) { x = (short)rhs.x; y = (short)rhs.y; } +}; + +// Helper: ImRect (2D axis aligned bounding-box) +// NB: we can't rely on ImVec2 math operators being available here! +struct IMGUI_API ImRect +{ + ImVec2 Min; // Upper-left + ImVec2 Max; // Lower-right + + ImRect() : Min(0.0f, 0.0f), Max(0.0f, 0.0f) {} + ImRect(const ImVec2& min, const ImVec2& max) : Min(min), Max(max) {} + ImRect(const ImVec4& v) : Min(v.x, v.y), Max(v.z, v.w) {} + ImRect(float x1, float y1, float x2, float y2) : Min(x1, y1), Max(x2, y2) {} + + ImVec2 GetCenter() const { return ImVec2((Min.x + Max.x) * 0.5f, (Min.y + Max.y) * 0.5f); } + ImVec2 GetSize() const { return ImVec2(Max.x - Min.x, Max.y - Min.y); } + float GetWidth() const { return Max.x - Min.x; } + float GetHeight() const { return Max.y - Min.y; } + ImVec2 GetTL() const { return Min; } // Top-left + ImVec2 GetTR() const { return ImVec2(Max.x, Min.y); } // Top-right + ImVec2 GetBL() const { return ImVec2(Min.x, Max.y); } // Bottom-left + ImVec2 GetBR() const { return Max; } // Bottom-right + bool Contains(const ImVec2& p) const { return p.x >= Min.x && p.y >= Min.y && p.x < Max.x && p.y < Max.y; } + bool Contains(const ImRect& r) const { return r.Min.x >= Min.x && r.Min.y >= Min.y && r.Max.x <= Max.x && r.Max.y <= Max.y; } + bool Overlaps(const ImRect& r) const { return r.Min.y < Max.y && r.Max.y > Min.y && r.Min.x < Max.x && r.Max.x > Min.x; } + void Add(const ImVec2& p) { if (Min.x > p.x) Min.x = p.x; if (Min.y > p.y) Min.y = p.y; if (Max.x < p.x) Max.x = p.x; if (Max.y < p.y) Max.y = p.y; } + void Add(const ImRect& r) { if (Min.x > r.Min.x) Min.x = r.Min.x; if (Min.y > r.Min.y) Min.y = r.Min.y; if (Max.x < r.Max.x) Max.x = r.Max.x; if (Max.y < r.Max.y) Max.y = r.Max.y; } + void Expand(const float amount) { Min.x -= amount; Min.y -= amount; Max.x += amount; Max.y += amount; } + void Expand(const ImVec2& amount) { Min.x -= amount.x; Min.y -= amount.y; Max.x += amount.x; Max.y += amount.y; } + void Translate(const ImVec2& d) { Min.x += d.x; Min.y += d.y; Max.x += d.x; Max.y += d.y; } + void TranslateX(float dx) { Min.x += dx; Max.x += dx; } + void TranslateY(float dy) { Min.y += dy; Max.y += dy; } + void ClipWith(const ImRect& r) { Min = ImMax(Min, r.Min); Max = ImMin(Max, r.Max); } // Simple version, may lead to an inverted rectangle, which is fine for Contains/Overlaps test but not for display. + void ClipWithFull(const ImRect& r) { Min = ImClamp(Min, r.Min, r.Max); Max = ImClamp(Max, r.Min, r.Max); } // Full version, ensure both points are fully clipped. + void Floor() { Min.x = IM_FLOOR(Min.x); Min.y = IM_FLOOR(Min.y); Max.x = IM_FLOOR(Max.x); Max.y = IM_FLOOR(Max.y); } + bool IsInverted() const { return Min.x > Max.x || Min.y > Max.y; } + ImVec4 ToVec4() const { return ImVec4(Min.x, Min.y, Max.x, Max.y); } +}; + +// Helper: ImBitArray +inline bool ImBitArrayTestBit(const ImU32* arr, int n) { ImU32 mask = (ImU32)1 << (n & 31); return (arr[n >> 5] & mask) != 0; } +inline void ImBitArrayClearBit(ImU32* arr, int n) { ImU32 mask = (ImU32)1 << (n & 31); arr[n >> 5] &= ~mask; } +inline void ImBitArraySetBit(ImU32* arr, int n) { ImU32 mask = (ImU32)1 << (n & 31); arr[n >> 5] |= mask; } +inline void ImBitArraySetBitRange(ImU32* arr, int n, int n2) // Works on range [n..n2) +{ + n2--; + while (n <= n2) + { + int a_mod = (n & 31); + int b_mod = (n2 > (n | 31) ? 31 : (n2 & 31)) + 1; + ImU32 mask = (ImU32)(((ImU64)1 << b_mod) - 1) & ~(ImU32)(((ImU64)1 << a_mod) - 1); + arr[n >> 5] |= mask; + n = (n + 32) & ~31; + } +} + +// Helper: ImBitArray class (wrapper over ImBitArray functions) +// Store 1-bit per value. NOT CLEARED by constructor. +template +struct IMGUI_API ImBitArray +{ + ImU32 Storage[(BITCOUNT + 31) >> 5]; + ImBitArray() { } + void ClearAllBits() { memset(Storage, 0, sizeof(Storage)); } + void SetAllBits() { memset(Storage, 255, sizeof(Storage)); } + bool TestBit(int n) const { IM_ASSERT(n < BITCOUNT); return ImBitArrayTestBit(Storage, n); } + void SetBit(int n) { IM_ASSERT(n < BITCOUNT); ImBitArraySetBit(Storage, n); } + void ClearBit(int n) { IM_ASSERT(n < BITCOUNT); ImBitArrayClearBit(Storage, n); } + void SetBitRange(int n, int n2) { ImBitArraySetBitRange(Storage, n, n2); } // Works on range [n..n2) +}; + +// Helper: ImBitVector +// Store 1-bit per value. +struct IMGUI_API ImBitVector +{ + ImVector Storage; + void Create(int sz) { Storage.resize((sz + 31) >> 5); memset(Storage.Data, 0, (size_t)Storage.Size * sizeof(Storage.Data[0])); } + void Clear() { Storage.clear(); } + bool TestBit(int n) const { IM_ASSERT(n < (Storage.Size << 5)); return ImBitArrayTestBit(Storage.Data, n); } + void SetBit(int n) { IM_ASSERT(n < (Storage.Size << 5)); ImBitArraySetBit(Storage.Data, n); } + void ClearBit(int n) { IM_ASSERT(n < (Storage.Size << 5)); ImBitArrayClearBit(Storage.Data, n); } +}; + +// Helper: ImSpan<> +// Pointing to a span of data we don't own. +template +struct ImSpan +{ + T* Data; + T* DataEnd; + + // Constructors, destructor + inline ImSpan() { Data = DataEnd = NULL; } + inline ImSpan(T* data, int size) { Data = data; DataEnd = data + size; } + inline ImSpan(T* data, T* data_end) { Data = data; DataEnd = data_end; } + + inline void set(T* data, int size) { Data = data; DataEnd = data + size; } + inline void set(T* data, T* data_end) { Data = data; DataEnd = data_end; } + inline int size() const { return (int)(ptrdiff_t)(DataEnd - Data); } + inline int size_in_bytes() const { return (int)(ptrdiff_t)(DataEnd - Data) * (int)sizeof(T); } + inline T& operator[](int i) { T* p = Data + i; IM_ASSERT(p >= Data && p < DataEnd); return *p; } + inline const T& operator[](int i) const { const T* p = Data + i; IM_ASSERT(p >= Data && p < DataEnd); return *p; } + + inline T* begin() { return Data; } + inline const T* begin() const { return Data; } + inline T* end() { return DataEnd; } + inline const T* end() const { return DataEnd; } + + // Utilities + inline int index_from_ptr(const T* it) const { IM_ASSERT(it >= Data && it < DataEnd); const ptrdiff_t off = it - Data; return (int)off; } +}; + +// Helper: ImSpanAllocator<> +// Facilitate storing multiple chunks into a single large block (the "arena") +template +struct ImSpanAllocator +{ + char* BasePtr; + int TotalSize; + int CurrSpan; + int Offsets[CHUNKS]; + + ImSpanAllocator() { memset(this, 0, sizeof(*this)); } + inline void ReserveBytes(int n, size_t sz) { IM_ASSERT(n == CurrSpan && n < CHUNKS); IM_UNUSED(n); Offsets[CurrSpan++] = TotalSize; TotalSize += (int)sz; } + inline int GetArenaSizeInBytes() { return TotalSize; } + inline void SetArenaBasePtr(void* base_ptr) { BasePtr = (char*)base_ptr; } + inline void* GetSpanPtrBegin(int n) { IM_ASSERT(n >= 0 && n < CHUNKS && CurrSpan == CHUNKS); return (void*)(BasePtr + Offsets[n]); } + inline void* GetSpanPtrEnd(int n) { IM_ASSERT(n >= 0 && n < CHUNKS && CurrSpan == CHUNKS); return (n + 1 < CHUNKS) ? BasePtr + Offsets[n + 1] : (void*)(BasePtr + TotalSize); } + template + inline void GetSpan(int n, ImSpan* span) { span->set((T*)GetSpanPtrBegin(n), (T*)GetSpanPtrEnd(n)); } +}; + +// Helper: ImPool<> +// Basic keyed storage for contiguous instances, slow/amortized insertion, O(1) indexable, O(Log N) queries by ID over a dense/hot buffer, +// Honor constructor/destructor. Add/remove invalidate all pointers. Indexes have the same lifetime as the associated object. +typedef int ImPoolIdx; +template +struct IMGUI_API ImPool +{ + ImVector Buf; // Contiguous data + ImGuiStorage Map; // ID->Index + ImPoolIdx FreeIdx; // Next free idx to use + + ImPool() { FreeIdx = 0; } + ~ImPool() { Clear(); } + T* GetByKey(ImGuiID key) { int idx = Map.GetInt(key, -1); return (idx != -1) ? &Buf[idx] : NULL; } + T* GetByIndex(ImPoolIdx n) { return &Buf[n]; } + ImPoolIdx GetIndex(const T* p) const { IM_ASSERT(p >= Buf.Data && p < Buf.Data + Buf.Size); return (ImPoolIdx)(p - Buf.Data); } + T* GetOrAddByKey(ImGuiID key) { int* p_idx = Map.GetIntRef(key, -1); if (*p_idx != -1) return &Buf[*p_idx]; *p_idx = FreeIdx; return Add(); } + bool Contains(const T* p) const { return (p >= Buf.Data && p < Buf.Data + Buf.Size); } + void Clear() { for (int n = 0; n < Map.Data.Size; n++) { int idx = Map.Data[n].val_i; if (idx != -1) Buf[idx].~T(); } Map.Clear(); Buf.clear(); FreeIdx = 0; } + T* Add() { int idx = FreeIdx; if (idx == Buf.Size) { Buf.resize(Buf.Size + 1); FreeIdx++; } else { FreeIdx = *(int*)&Buf[idx]; } IM_PLACEMENT_NEW(&Buf[idx]) T(); return &Buf[idx]; } + void Remove(ImGuiID key, const T* p) { Remove(key, GetIndex(p)); } + void Remove(ImGuiID key, ImPoolIdx idx) { Buf[idx].~T(); *(int*)&Buf[idx] = FreeIdx; FreeIdx = idx; Map.SetInt(key, -1); } + void Reserve(int capacity) { Buf.reserve(capacity); Map.Data.reserve(capacity); } + int GetSize() const { return Buf.Size; } +}; + +// Helper: ImChunkStream<> +// Build and iterate a contiguous stream of variable-sized structures. +// This is used by Settings to store persistent data while reducing allocation count. +// We store the chunk size first, and align the final size on 4 bytes boundaries (this what the '(X + 3) & ~3' statement is for) +// The tedious/zealous amount of casting is to avoid -Wcast-align warnings. +template +struct IMGUI_API ImChunkStream +{ + ImVector Buf; + + void clear() { Buf.clear(); } + bool empty() const { return Buf.Size == 0; } + int size() const { return Buf.Size; } + T* alloc_chunk(size_t sz) { size_t HDR_SZ = 4; sz = ((HDR_SZ + sz) + 3u) & ~3u; int off = Buf.Size; Buf.resize(off + (int)sz); ((int*)(void*)(Buf.Data + off))[0] = (int)sz; return (T*)(void*)(Buf.Data + off + (int)HDR_SZ); } + T* begin() { size_t HDR_SZ = 4; if (!Buf.Data) return NULL; return (T*)(void*)(Buf.Data + HDR_SZ); } + T* next_chunk(T* p) { size_t HDR_SZ = 4; IM_ASSERT(p >= begin() && p < end()); p = (T*)(void*)((char*)(void*)p + chunk_size(p)); if (p == (T*)(void*)((char*)end() + HDR_SZ)) return (T*)0; IM_ASSERT(p < end()); return p; } + int chunk_size(const T* p) { return ((const int*)p)[-1]; } + T* end() { return (T*)(void*)(Buf.Data + Buf.Size); } + int offset_from_ptr(const T* p) { IM_ASSERT(p >= begin() && p < end()); const ptrdiff_t off = (const char*)p - Buf.Data; return (int)off; } + T* ptr_from_offset(int off) { IM_ASSERT(off >= 4 && off < Buf.Size); return (T*)(void*)(Buf.Data + off); } + void swap(ImChunkStream& rhs) { rhs.Buf.swap(Buf); } + +}; + +//----------------------------------------------------------------------------- +// [SECTION] ImDrawList support +//----------------------------------------------------------------------------- + +// ImDrawList: Helper function to calculate a circle's segment count given its radius and a "maximum error" value. +// FIXME: the minimum number of auto-segment may be undesirably high for very small radiuses (e.g. 1.0f) +#define IM_DRAWLIST_CIRCLE_AUTO_SEGMENT_MIN 12 +#define IM_DRAWLIST_CIRCLE_AUTO_SEGMENT_MAX 512 +#define IM_DRAWLIST_CIRCLE_AUTO_SEGMENT_CALC(_RAD,_MAXERROR) ImClamp((int)((IM_PI * 2.0f) / ImAcos(((_RAD) - (_MAXERROR)) / (_RAD))), IM_DRAWLIST_CIRCLE_AUTO_SEGMENT_MIN, IM_DRAWLIST_CIRCLE_AUTO_SEGMENT_MAX) + +// ImDrawList: You may set this to higher values (e.g. 2 or 3) to increase tessellation of fast rounded corners path. +#ifndef IM_DRAWLIST_ARCFAST_TESSELLATION_MULTIPLIER +#define IM_DRAWLIST_ARCFAST_TESSELLATION_MULTIPLIER 1 +#endif + +// Data shared between all ImDrawList instances +// You may want to create your own instance of this if you want to use ImDrawList completely without ImGui. In that case, watch out for future changes to this structure. +struct IMGUI_API ImDrawListSharedData +{ + ImVec2 TexUvWhitePixel; // UV of white pixel in the atlas + ImFont* Font; // Current/default font (optional, for simplified AddText overload) + float FontSize; // Current/default font size (optional, for simplified AddText overload) + float CurveTessellationTol; // Tessellation tolerance when using PathBezierCurveTo() + float CircleSegmentMaxError; // Number of circle segments to use per pixel of radius for AddCircle() etc + ImVec4 ClipRectFullscreen; // Value for PushClipRectFullscreen() + ImDrawListFlags InitialFlags; // Initial flags at the beginning of the frame (it is possible to alter flags on a per-drawlist basis afterwards) + + // [Internal] Lookup tables + ImVec2 ArcFastVtx[12 * IM_DRAWLIST_ARCFAST_TESSELLATION_MULTIPLIER]; // FIXME: Bake rounded corners fill/borders in atlas + ImU8 CircleSegmentCounts[64]; // Precomputed segment count for given radius before we calculate it dynamically (to avoid calculation overhead) + const ImVec4* TexUvLines; // UV of anti-aliased lines in the atlas + + ImDrawListSharedData(); + void SetCircleSegmentMaxError(float max_error); +}; + +struct ImDrawDataBuilder +{ + ImVector Layers[2]; // Global layers for: regular, tooltip + + void Clear() { for (int n = 0; n < IM_ARRAYSIZE(Layers); n++) Layers[n].resize(0); } + void ClearFreeMemory() { for (int n = 0; n < IM_ARRAYSIZE(Layers); n++) Layers[n].clear(); } + IMGUI_API void FlattenIntoSingleLayer(); +}; + +//----------------------------------------------------------------------------- +// [SECTION] Widgets support: flags, enums, data structures +//----------------------------------------------------------------------------- + +// Transient per-window flags, reset at the beginning of the frame. For child window, inherited from parent on first Begin(). +// This is going to be exposed in imgui.h when stabilized enough. +enum ImGuiItemFlags_ +{ + ImGuiItemFlags_None = 0, + ImGuiItemFlags_NoTabStop = 1 << 0, // false + ImGuiItemFlags_ButtonRepeat = 1 << 1, // false // Button() will return true multiple times based on io.KeyRepeatDelay and io.KeyRepeatRate settings. + ImGuiItemFlags_Disabled = 1 << 2, // false // [BETA] Disable interactions but doesn't affect visuals yet. See github.com/ocornut/imgui/issues/211 + ImGuiItemFlags_NoNav = 1 << 3, // false + ImGuiItemFlags_NoNavDefaultFocus = 1 << 4, // false + ImGuiItemFlags_SelectableDontClosePopup = 1 << 5, // false // MenuItem/Selectable() automatically closes current Popup window + ImGuiItemFlags_MixedValue = 1 << 6, // false // [BETA] Represent a mixed/indeterminate value, generally multi-selection where values differ. Currently only supported by Checkbox() (later should support all sorts of widgets) + ImGuiItemFlags_ReadOnly = 1 << 7, // false // [ALPHA] Allow hovering interactions but underlying value is not changed. + ImGuiItemFlags_Default_ = 0 +}; + +// Storage for LastItem data +enum ImGuiItemStatusFlags_ +{ + ImGuiItemStatusFlags_None = 0, + ImGuiItemStatusFlags_HoveredRect = 1 << 0, + ImGuiItemStatusFlags_HasDisplayRect = 1 << 1, + ImGuiItemStatusFlags_Edited = 1 << 2, // Value exposed by item was edited in the current frame (should match the bool return value of most widgets) + ImGuiItemStatusFlags_ToggledSelection = 1 << 3, // Set when Selectable(), TreeNode() reports toggling a selection. We can't report "Selected" because reporting the change allows us to handle clipping with less issues. + ImGuiItemStatusFlags_ToggledOpen = 1 << 4, // Set when TreeNode() reports toggling their open state. + ImGuiItemStatusFlags_HasDeactivated = 1 << 5, // Set if the widget/group is able to provide data for the ImGuiItemStatusFlags_Deactivated flag. + ImGuiItemStatusFlags_Deactivated = 1 << 6 // Only valid if ImGuiItemStatusFlags_HasDeactivated is set. + +#ifdef IMGUI_ENABLE_TEST_ENGINE + , // [imgui_tests only] + ImGuiItemStatusFlags_Openable = 1 << 10, // + ImGuiItemStatusFlags_Opened = 1 << 11, // + ImGuiItemStatusFlags_Checkable = 1 << 12, // + ImGuiItemStatusFlags_Checked = 1 << 13 // +#endif +}; + +// Extend ImGuiButtonFlags_ +enum ImGuiButtonFlagsPrivate_ +{ + ImGuiButtonFlags_PressedOnClick = 1 << 4, // return true on click (mouse down event) + ImGuiButtonFlags_PressedOnClickRelease = 1 << 5, // [Default] return true on click + release on same item <-- this is what the majority of Button are using + ImGuiButtonFlags_PressedOnClickReleaseAnywhere = 1 << 6, // return true on click + release even if the release event is not done while hovering the item + ImGuiButtonFlags_PressedOnRelease = 1 << 7, // return true on release (default requires click+release) + ImGuiButtonFlags_PressedOnDoubleClick = 1 << 8, // return true on double-click (default requires click+release) + ImGuiButtonFlags_PressedOnDragDropHold = 1 << 9, // return true when held into while we are drag and dropping another item (used by e.g. tree nodes, collapsing headers) + ImGuiButtonFlags_Repeat = 1 << 10, // hold to repeat + ImGuiButtonFlags_FlattenChildren = 1 << 11, // allow interactions even if a child window is overlapping + ImGuiButtonFlags_AllowItemOverlap = 1 << 12, // require previous frame HoveredId to either match id or be null before being usable, use along with SetItemAllowOverlap() + ImGuiButtonFlags_DontClosePopups = 1 << 13, // disable automatically closing parent popup on press // [UNUSED] + ImGuiButtonFlags_Disabled = 1 << 14, // disable interactions + ImGuiButtonFlags_AlignTextBaseLine = 1 << 15, // vertically align button to match text baseline - ButtonEx() only // FIXME: Should be removed and handled by SmallButton(), not possible currently because of DC.CursorPosPrevLine + ImGuiButtonFlags_NoKeyModifiers = 1 << 16, // disable mouse interaction if a key modifier is held + ImGuiButtonFlags_NoHoldingActiveId = 1 << 17, // don't set ActiveId while holding the mouse (ImGuiButtonFlags_PressedOnClick only) + ImGuiButtonFlags_NoNavFocus = 1 << 18, // don't override navigation focus when activated + ImGuiButtonFlags_NoHoveredOnFocus = 1 << 19, // don't report as hovered when nav focus is on this item + ImGuiButtonFlags_PressedOnMask_ = ImGuiButtonFlags_PressedOnClick | ImGuiButtonFlags_PressedOnClickRelease | ImGuiButtonFlags_PressedOnClickReleaseAnywhere | ImGuiButtonFlags_PressedOnRelease | ImGuiButtonFlags_PressedOnDoubleClick | ImGuiButtonFlags_PressedOnDragDropHold, + ImGuiButtonFlags_PressedOnDefault_ = ImGuiButtonFlags_PressedOnClickRelease +}; + +// Extend ImGuiSliderFlags_ +enum ImGuiSliderFlagsPrivate_ +{ + ImGuiSliderFlags_Vertical = 1 << 20, // Should this slider be orientated vertically? + ImGuiSliderFlags_ReadOnly = 1 << 21 +}; + +// Extend ImGuiSelectableFlags_ +enum ImGuiSelectableFlagsPrivate_ +{ + // NB: need to be in sync with last value of ImGuiSelectableFlags_ + ImGuiSelectableFlags_NoHoldingActiveID = 1 << 20, + ImGuiSelectableFlags_SelectOnClick = 1 << 21, // Override button behavior to react on Click (default is Click+Release) + ImGuiSelectableFlags_SelectOnRelease = 1 << 22, // Override button behavior to react on Release (default is Click+Release) + ImGuiSelectableFlags_SpanAvailWidth = 1 << 23, // Span all avail width even if we declared less for layout purpose. FIXME: We may be able to remove this (added in 6251d379, 2bcafc86 for menus) + ImGuiSelectableFlags_DrawHoveredWhenHeld = 1 << 24, // Always show active when held, even is not hovered. This concept could probably be renamed/formalized somehow. + ImGuiSelectableFlags_SetNavIdOnHover = 1 << 25, // Set Nav/Focus ID on mouse hover (used by MenuItem) + ImGuiSelectableFlags_NoPadWithHalfSpacing = 1 << 26 // Disable padding each side with ItemSpacing * 0.5f +}; + +// Extend ImGuiTreeNodeFlags_ +enum ImGuiTreeNodeFlagsPrivate_ +{ + ImGuiTreeNodeFlags_ClipLabelForTrailingButton = 1 << 20 +}; + +enum ImGuiSeparatorFlags_ +{ + ImGuiSeparatorFlags_None = 0, + ImGuiSeparatorFlags_Horizontal = 1 << 0, // Axis default to current layout type, so generally Horizontal unless e.g. in a menu bar + ImGuiSeparatorFlags_Vertical = 1 << 1, + ImGuiSeparatorFlags_SpanAllColumns = 1 << 2 +}; + +enum ImGuiTextFlags_ +{ + ImGuiTextFlags_None = 0, + ImGuiTextFlags_NoWidthForLargeClippedText = 1 << 0 +}; + +enum ImGuiTooltipFlags_ +{ + ImGuiTooltipFlags_None = 0, + ImGuiTooltipFlags_OverridePreviousTooltip = 1 << 0 // Override will clear/ignore previously submitted tooltip (defaults to append) +}; + +// FIXME: this is in development, not exposed/functional as a generic feature yet. +// Horizontal/Vertical enums are fixed to 0/1 so they may be used to index ImVec2 +enum ImGuiLayoutType_ +{ + ImGuiLayoutType_Horizontal = 0, + ImGuiLayoutType_Vertical = 1 +}; + +enum ImGuiLogType +{ + ImGuiLogType_None = 0, + ImGuiLogType_TTY, + ImGuiLogType_File, + ImGuiLogType_Buffer, + ImGuiLogType_Clipboard +}; + +// X/Y enums are fixed to 0/1 so they may be used to index ImVec2 +enum ImGuiAxis +{ + ImGuiAxis_None = -1, + ImGuiAxis_X = 0, + ImGuiAxis_Y = 1 +}; + +enum ImGuiPlotType +{ + ImGuiPlotType_Lines, + ImGuiPlotType_Histogram +}; + +enum ImGuiInputSource +{ + ImGuiInputSource_None = 0, + ImGuiInputSource_Mouse, + ImGuiInputSource_Nav, + ImGuiInputSource_NavKeyboard, // Only used occasionally for storage, not tested/handled by most code + ImGuiInputSource_NavGamepad, // " + ImGuiInputSource_COUNT +}; + +// FIXME-NAV: Clarify/expose various repeat delay/rate +enum ImGuiInputReadMode +{ + ImGuiInputReadMode_Down, + ImGuiInputReadMode_Pressed, + ImGuiInputReadMode_Released, + ImGuiInputReadMode_Repeat, + ImGuiInputReadMode_RepeatSlow, + ImGuiInputReadMode_RepeatFast +}; + +enum ImGuiNavHighlightFlags_ +{ + ImGuiNavHighlightFlags_None = 0, + ImGuiNavHighlightFlags_TypeDefault = 1 << 0, + ImGuiNavHighlightFlags_TypeThin = 1 << 1, + ImGuiNavHighlightFlags_AlwaysDraw = 1 << 2, // Draw rectangular highlight if (g.NavId == id) _even_ when using the mouse. + ImGuiNavHighlightFlags_NoRounding = 1 << 3 +}; + +enum ImGuiNavDirSourceFlags_ +{ + ImGuiNavDirSourceFlags_None = 0, + ImGuiNavDirSourceFlags_Keyboard = 1 << 0, + ImGuiNavDirSourceFlags_PadDPad = 1 << 1, + ImGuiNavDirSourceFlags_PadLStick = 1 << 2 +}; + +enum ImGuiNavMoveFlags_ +{ + ImGuiNavMoveFlags_None = 0, + ImGuiNavMoveFlags_LoopX = 1 << 0, // On failed request, restart from opposite side + ImGuiNavMoveFlags_LoopY = 1 << 1, + ImGuiNavMoveFlags_WrapX = 1 << 2, // On failed request, request from opposite side one line down (when NavDir==right) or one line up (when NavDir==left) + ImGuiNavMoveFlags_WrapY = 1 << 3, // This is not super useful for provided for completeness + ImGuiNavMoveFlags_AllowCurrentNavId = 1 << 4, // Allow scoring and considering the current NavId as a move target candidate. This is used when the move source is offset (e.g. pressing PageDown actually needs to send a Up move request, if we are pressing PageDown from the bottom-most item we need to stay in place) + ImGuiNavMoveFlags_AlsoScoreVisibleSet = 1 << 5, // Store alternate result in NavMoveResultLocalVisibleSet that only comprise elements that are already fully visible. + ImGuiNavMoveFlags_ScrollToEdge = 1 << 6 +}; + +enum ImGuiNavForward +{ + ImGuiNavForward_None, + ImGuiNavForward_ForwardQueued, + ImGuiNavForward_ForwardActive +}; + +enum ImGuiNavLayer +{ + ImGuiNavLayer_Main = 0, // Main scrolling layer + ImGuiNavLayer_Menu = 1, // Menu layer (access with Alt/ImGuiNavInput_Menu) + ImGuiNavLayer_COUNT +}; + +enum ImGuiPopupPositionPolicy +{ + ImGuiPopupPositionPolicy_Default, + ImGuiPopupPositionPolicy_ComboBox, + ImGuiPopupPositionPolicy_Tooltip +}; + +struct ImGuiDataTypeTempStorage +{ + ImU8 Data[8]; // Can fit any data up to ImGuiDataType_COUNT +}; + +// Type information associated to one ImGuiDataType. Retrieve with DataTypeGetInfo(). +struct ImGuiDataTypeInfo +{ + size_t Size; // Size in bytes + const char* Name; // Short descriptive name for the type, for debugging + const char* PrintFmt; // Default printf format for the type + const char* ScanFmt; // Default scanf format for the type +}; + +// Extend ImGuiDataType_ +enum ImGuiDataTypePrivate_ +{ + ImGuiDataType_String = ImGuiDataType_COUNT + 1, + ImGuiDataType_Pointer, + ImGuiDataType_ID +}; + +// Stacked color modifier, backup of modified data so we can restore it +struct ImGuiColorMod +{ + ImGuiCol Col; + ImVec4 BackupValue; +}; + +// Stacked style modifier, backup of modified data so we can restore it. Data type inferred from the variable. +struct ImGuiStyleMod +{ + ImGuiStyleVar VarIdx; + union { int BackupInt[2]; float BackupFloat[2]; }; + ImGuiStyleMod(ImGuiStyleVar idx, int v) { VarIdx = idx; BackupInt[0] = v; } + ImGuiStyleMod(ImGuiStyleVar idx, float v) { VarIdx = idx; BackupFloat[0] = v; } + ImGuiStyleMod(ImGuiStyleVar idx, ImVec2 v) { VarIdx = idx; BackupFloat[0] = v.x; BackupFloat[1] = v.y; } +}; + +// Stacked storage data for BeginGroup()/EndGroup() +struct ImGuiGroupData +{ + ImGuiID WindowID; + ImVec2 BackupCursorPos; + ImVec2 BackupCursorMaxPos; + ImVec1 BackupIndent; + ImVec1 BackupGroupOffset; + ImVec2 BackupCurrLineSize; + float BackupCurrLineTextBaseOffset; + ImGuiID BackupActiveIdIsAlive; + bool BackupActiveIdPreviousFrameIsAlive; + bool EmitItem; +}; + +// Simple column measurement, currently used for MenuItem() only.. This is very short-sighted/throw-away code and NOT a generic helper. +struct IMGUI_API ImGuiMenuColumns +{ + float Spacing; + float Width, NextWidth; + float Pos[3], NextWidths[3]; + + ImGuiMenuColumns() { memset(this, 0, sizeof(*this)); } + void Update(int count, float spacing, bool clear); + float DeclColumns(float w0, float w1, float w2); + float CalcExtraSpace(float avail_w) const; +}; + +// Internal state of the currently focused/edited text input box +// For a given item ID, access with ImGui::GetInputTextState() +struct IMGUI_API ImGuiInputTextState +{ + ImGuiID ID; // widget id owning the text state + int CurLenW, CurLenA; // we need to maintain our buffer length in both UTF-8 and wchar format. UTF-8 length is valid even if TextA is not. + ImVector TextW; // edit buffer, we need to persist but can't guarantee the persistence of the user-provided buffer. so we copy into own buffer. + ImVector TextA; // temporary UTF8 buffer for callbacks and other operations. this is not updated in every code-path! size=capacity. + ImVector InitialTextA; // backup of end-user buffer at the time of focus (in UTF-8, unaltered) + bool TextAIsValid; // temporary UTF8 buffer is not initially valid before we make the widget active (until then we pull the data from user argument) + int BufCapacityA; // end-user buffer capacity + float ScrollX; // horizontal scrolling/offset + ImStb::STB_TexteditState Stb; // state for stb_textedit.h + float CursorAnim; // timer for cursor blink, reset on every user action so the cursor reappears immediately + bool CursorFollow; // set when we want scrolling to follow the current cursor position (not always!) + bool SelectedAllMouseLock; // after a double-click to select all, we ignore further mouse drags to update selection + bool Edited; // edited this frame + ImGuiInputTextFlags UserFlags; // Temporarily set while we call user's callback + ImGuiInputTextCallback UserCallback; // " + void* UserCallbackData; // " + + ImGuiInputTextState() { memset(this, 0, sizeof(*this)); } + void ClearText() { CurLenW = CurLenA = 0; TextW[0] = 0; TextA[0] = 0; CursorClamp(); } + void ClearFreeMemory() { TextW.clear(); TextA.clear(); InitialTextA.clear(); } + int GetUndoAvailCount() const { return Stb.undostate.undo_point; } + int GetRedoAvailCount() const { return STB_TEXTEDIT_UNDOSTATECOUNT - Stb.undostate.redo_point; } + void OnKeyPressed(int key); // Cannot be inline because we call in code in stb_textedit.h implementation + + // Cursor & Selection + void CursorAnimReset() { CursorAnim = -0.30f; } // After a user-input the cursor stays on for a while without blinking + void CursorClamp() { Stb.cursor = ImMin(Stb.cursor, CurLenW); Stb.select_start = ImMin(Stb.select_start, CurLenW); Stb.select_end = ImMin(Stb.select_end, CurLenW); } + bool HasSelection() const { return Stb.select_start != Stb.select_end; } + void ClearSelection() { Stb.select_start = Stb.select_end = Stb.cursor; } + void SelectAll() { Stb.select_start = 0; Stb.cursor = Stb.select_end = CurLenW; Stb.has_preferred_x = 0; } +}; + +// Storage for current popup stack +struct ImGuiPopupData +{ + ImGuiID PopupId; // Set on OpenPopup() + ImGuiWindow* Window; // Resolved on BeginPopup() - may stay unresolved if user never calls OpenPopup() + ImGuiWindow* SourceWindow; // Set on OpenPopup() copy of NavWindow at the time of opening the popup + int OpenFrameCount; // Set on OpenPopup() + ImGuiID OpenParentId; // Set on OpenPopup(), we need this to differentiate multiple menu sets from each others (e.g. inside menu bar vs loose menu items) + ImVec2 OpenPopupPos; // Set on OpenPopup(), preferred popup position (typically == OpenMousePos when using mouse) + ImVec2 OpenMousePos; // Set on OpenPopup(), copy of mouse position at the time of opening popup + + ImGuiPopupData() { memset(this, 0, sizeof(*this)); OpenFrameCount = -1; } +}; + +struct ImGuiNavMoveResult +{ + ImGuiWindow* Window; // Best candidate window + ImGuiID ID; // Best candidate ID + ImGuiID FocusScopeId; // Best candidate focus scope ID + float DistBox; // Best candidate box distance to current NavId + float DistCenter; // Best candidate center distance to current NavId + float DistAxial; + ImRect RectRel; // Best candidate bounding box in window relative space + + ImGuiNavMoveResult() { Clear(); } + void Clear() { Window = NULL; ID = FocusScopeId = 0; DistBox = DistCenter = DistAxial = FLT_MAX; RectRel = ImRect(); } +}; + +enum ImGuiNextWindowDataFlags_ +{ + ImGuiNextWindowDataFlags_None = 0, + ImGuiNextWindowDataFlags_HasPos = 1 << 0, + ImGuiNextWindowDataFlags_HasSize = 1 << 1, + ImGuiNextWindowDataFlags_HasContentSize = 1 << 2, + ImGuiNextWindowDataFlags_HasCollapsed = 1 << 3, + ImGuiNextWindowDataFlags_HasSizeConstraint = 1 << 4, + ImGuiNextWindowDataFlags_HasFocus = 1 << 5, + ImGuiNextWindowDataFlags_HasBgAlpha = 1 << 6, + ImGuiNextWindowDataFlags_HasScroll = 1 << 7 +}; + +// Storage for SetNexWindow** functions +struct ImGuiNextWindowData +{ + ImGuiNextWindowDataFlags Flags; + ImGuiCond PosCond; + ImGuiCond SizeCond; + ImGuiCond CollapsedCond; + ImVec2 PosVal; + ImVec2 PosPivotVal; + ImVec2 SizeVal; + ImVec2 ContentSizeVal; + ImVec2 ScrollVal; + bool CollapsedVal; + ImRect SizeConstraintRect; + ImGuiSizeCallback SizeCallback; + void* SizeCallbackUserData; + float BgAlphaVal; // Override background alpha + ImVec2 MenuBarOffsetMinVal; // *Always on* This is not exposed publicly, so we don't clear it. + + ImGuiNextWindowData() { memset(this, 0, sizeof(*this)); } + inline void ClearFlags() { Flags = ImGuiNextWindowDataFlags_None; } +}; + +enum ImGuiNextItemDataFlags_ +{ + ImGuiNextItemDataFlags_None = 0, + ImGuiNextItemDataFlags_HasWidth = 1 << 0, + ImGuiNextItemDataFlags_HasOpen = 1 << 1 +}; + +struct ImGuiNextItemData +{ + ImGuiNextItemDataFlags Flags; + float Width; // Set by SetNextItemWidth() + ImGuiID FocusScopeId; // Set by SetNextItemMultiSelectData() (!= 0 signify value has been set, so it's an alternate version of HasSelectionData, we don't use Flags for this because they are cleared too early. This is mostly used for debugging) + ImGuiCond OpenCond; + bool OpenVal; // Set by SetNextItemOpen() + + ImGuiNextItemData() { memset(this, 0, sizeof(*this)); } + inline void ClearFlags() { Flags = ImGuiNextItemDataFlags_None; } // Also cleared manually by ItemAdd()! +}; + +struct ImGuiShrinkWidthItem +{ + int Index; + float Width; +}; + +struct ImGuiPtrOrIndex +{ + void* Ptr; // Either field can be set, not both. e.g. Dock node tab bars are loose while BeginTabBar() ones are in a pool. + int Index; // Usually index in a main pool. + + ImGuiPtrOrIndex(void* ptr) { Ptr = ptr; Index = -1; } + ImGuiPtrOrIndex(int index) { Ptr = NULL; Index = index; } +}; + +//----------------------------------------------------------------------------- +// [SECTION] Columns support +//----------------------------------------------------------------------------- + +// Flags for internal's BeginColumns(). Prefix using BeginTable() nowadays! +enum ImGuiOldColumnFlags_ +{ + ImGuiOldColumnFlags_None = 0, + ImGuiOldColumnFlags_NoBorder = 1 << 0, // Disable column dividers + ImGuiOldColumnFlags_NoResize = 1 << 1, // Disable resizing columns when clicking on the dividers + ImGuiOldColumnFlags_NoPreserveWidths = 1 << 2, // Disable column width preservation when adjusting columns + ImGuiOldColumnFlags_NoForceWithinWindow = 1 << 3, // Disable forcing columns to fit within window + ImGuiOldColumnFlags_GrowParentContentsSize = 1 << 4 // (WIP) Restore pre-1.51 behavior of extending the parent window contents size but _without affecting the columns width at all_. Will eventually remove. + + // Obsolete names (will be removed) +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + , ImGuiColumnsFlags_None = ImGuiOldColumnFlags_None, + ImGuiColumnsFlags_NoBorder = ImGuiOldColumnFlags_NoBorder, + ImGuiColumnsFlags_NoResize = ImGuiOldColumnFlags_NoResize, + ImGuiColumnsFlags_NoPreserveWidths = ImGuiOldColumnFlags_NoPreserveWidths, + ImGuiColumnsFlags_NoForceWithinWindow = ImGuiOldColumnFlags_NoForceWithinWindow, + ImGuiColumnsFlags_GrowParentContentsSize = ImGuiOldColumnFlags_GrowParentContentsSize +#endif +}; + +struct ImGuiOldColumnData +{ + float OffsetNorm; // Column start offset, normalized 0.0 (far left) -> 1.0 (far right) + float OffsetNormBeforeResize; + ImGuiOldColumnFlags Flags; // Not exposed + ImRect ClipRect; + + ImGuiOldColumnData() { memset(this, 0, sizeof(*this)); } +}; + +struct ImGuiOldColumns +{ + ImGuiID ID; + ImGuiOldColumnFlags Flags; + bool IsFirstFrame; + bool IsBeingResized; + int Current; + int Count; + float OffMinX, OffMaxX; // Offsets from HostWorkRect.Min.x + float LineMinY, LineMaxY; + float HostCursorPosY; // Backup of CursorPos at the time of BeginColumns() + float HostCursorMaxPosX; // Backup of CursorMaxPos at the time of BeginColumns() + ImRect HostInitialClipRect; // Backup of ClipRect at the time of BeginColumns() + ImRect HostBackupClipRect; // Backup of ClipRect during PushColumnsBackground()/PopColumnsBackground() + ImRect HostBackupParentWorkRect;//Backup of WorkRect at the time of BeginColumns() + ImVector Columns; + ImDrawListSplitter Splitter; + + ImGuiOldColumns() { memset(this, 0, sizeof(*this)); } +}; + +//----------------------------------------------------------------------------- +// [SECTION] Multi-select support +//----------------------------------------------------------------------------- + +#ifdef IMGUI_HAS_MULTI_SELECT +// +#endif // #ifdef IMGUI_HAS_MULTI_SELECT + +//----------------------------------------------------------------------------- +// [SECTION] Docking support +//----------------------------------------------------------------------------- + +#ifdef IMGUI_HAS_DOCK +// +#endif // #ifdef IMGUI_HAS_DOCK + +//----------------------------------------------------------------------------- +// [SECTION] Viewport support +//----------------------------------------------------------------------------- + +#ifdef IMGUI_HAS_VIEWPORT +// +#endif // #ifdef IMGUI_HAS_VIEWPORT + +//----------------------------------------------------------------------------- +// [SECTION] Settings support +//----------------------------------------------------------------------------- + +// Windows data saved in imgui.ini file +// Because we never destroy or rename ImGuiWindowSettings, we can store the names in a separate buffer easily. +// (this is designed to be stored in a ImChunkStream buffer, with the variable-length Name following our structure) +struct ImGuiWindowSettings +{ + ImGuiID ID; + ImVec2ih Pos; + ImVec2ih Size; + bool Collapsed; + bool WantApply; // Set when loaded from .ini data (to enable merging/loading .ini data into an already running context) + + ImGuiWindowSettings() { memset(this, 0, sizeof(*this)); } + char* GetName() { return (char*)(this + 1); } +}; + +struct ImGuiSettingsHandler +{ + const char* TypeName; // Short description stored in .ini file. Disallowed characters: '[' ']' + ImGuiID TypeHash; // == ImHashStr(TypeName) + void (*ClearAllFn)(ImGuiContext* ctx, ImGuiSettingsHandler* handler); // Clear all settings data + void (*ReadInitFn)(ImGuiContext* ctx, ImGuiSettingsHandler* handler); // Read: Called before reading (in registration order) + void* (*ReadOpenFn)(ImGuiContext* ctx, ImGuiSettingsHandler* handler, const char* name); // Read: Called when entering into a new ini entry e.g. "[Window][Name]" + void (*ReadLineFn)(ImGuiContext* ctx, ImGuiSettingsHandler* handler, void* entry, const char* line); // Read: Called for every line of text within an ini entry + void (*ApplyAllFn)(ImGuiContext* ctx, ImGuiSettingsHandler* handler); // Read: Called after reading (in registration order) + void (*WriteAllFn)(ImGuiContext* ctx, ImGuiSettingsHandler* handler, ImGuiTextBuffer* out_buf); // Write: Output every entries into 'out_buf' + void* UserData; + + ImGuiSettingsHandler() { memset(this, 0, sizeof(*this)); } +}; + +//----------------------------------------------------------------------------- +// [SECTION] Metrics, Debug +//----------------------------------------------------------------------------- + +struct ImGuiMetricsConfig +{ + bool ShowWindowsRects; + bool ShowWindowsBeginOrder; + bool ShowTablesRects; + bool ShowDrawCmdMesh; + bool ShowDrawCmdBoundingBoxes; + int ShowWindowsRectsType; + int ShowTablesRectsType; + + ImGuiMetricsConfig() + { + ShowWindowsRects = false; + ShowWindowsBeginOrder = false; + ShowTablesRects = false; + ShowDrawCmdMesh = true; + ShowDrawCmdBoundingBoxes = true; + ShowWindowsRectsType = -1; + ShowTablesRectsType = -1; + } +}; + +struct IMGUI_API ImGuiStackSizes +{ + short SizeOfIDStack; + short SizeOfColorStack; + short SizeOfStyleVarStack; + short SizeOfFontStack; + short SizeOfFocusScopeStack; + short SizeOfGroupStack; + short SizeOfBeginPopupStack; + + ImGuiStackSizes() { memset(this, 0, sizeof(*this)); } + void SetToCurrentState(); + void CompareWithCurrentState(); +}; + +//----------------------------------------------------------------------------- +// [SECTION] Generic context hooks +//----------------------------------------------------------------------------- + +typedef void (*ImGuiContextHookCallback)(ImGuiContext* ctx, ImGuiContextHook* hook); +enum ImGuiContextHookType { ImGuiContextHookType_NewFramePre, ImGuiContextHookType_NewFramePost, ImGuiContextHookType_EndFramePre, ImGuiContextHookType_EndFramePost, ImGuiContextHookType_RenderPre, ImGuiContextHookType_RenderPost, ImGuiContextHookType_Shutdown, ImGuiContextHookType_PendingRemoval_ }; + +struct ImGuiContextHook +{ + ImGuiID HookId; // A unique ID assigned by AddContextHook() + ImGuiContextHookType Type; + ImGuiID Owner; + ImGuiContextHookCallback Callback; + void* UserData; + + ImGuiContextHook() { memset(this, 0, sizeof(*this)); } +}; + +//----------------------------------------------------------------------------- +// [SECTION] ImGuiContext (main imgui context) +//----------------------------------------------------------------------------- + +struct ImGuiContext +{ + bool Initialized; + bool FontAtlasOwnedByContext; // IO.Fonts-> is owned by the ImGuiContext and will be destructed along with it. + ImGuiIO IO; + ImGuiStyle Style; + ImFont* Font; // (Shortcut) == FontStack.empty() ? IO.Font : FontStack.back() + float FontSize; // (Shortcut) == FontBaseSize * g.CurrentWindow->FontWindowScale == window->FontSize(). Text height for current window. + float FontBaseSize; // (Shortcut) == IO.FontGlobalScale * Font->Scale * Font->FontSize. Base text height. + ImDrawListSharedData DrawListSharedData; + double Time; + int FrameCount; + int FrameCountEnded; + int FrameCountRendered; + bool WithinFrameScope; // Set by NewFrame(), cleared by EndFrame() + bool WithinFrameScopeWithImplicitWindow; // Set by NewFrame(), cleared by EndFrame() when the implicit debug window has been pushed + bool WithinEndChild; // Set within EndChild() + bool GcCompactAll; // Request full GC + bool TestEngineHookItems; // Will call test engine hooks: ImGuiTestEngineHook_ItemAdd(), ImGuiTestEngineHook_ItemInfo(), ImGuiTestEngineHook_Log() + ImGuiID TestEngineHookIdInfo; // Will call test engine hooks: ImGuiTestEngineHook_IdInfo() from GetID() + void* TestEngine; // Test engine user data + + // Windows state + ImVector Windows; // Windows, sorted in display order, back to front + ImVector WindowsFocusOrder; // Windows, sorted in focus order, back to front. (FIXME: We could only store root windows here! Need to sort out the Docking equivalent which is RootWindowDockStop and is unfortunately a little more dynamic) + ImVector WindowsTempSortBuffer; // Temporary buffer used in EndFrame() to reorder windows so parents are kept before their child + ImVector CurrentWindowStack; + ImGuiStorage WindowsById; // Map window's ImGuiID to ImGuiWindow* + int WindowsActiveCount; // Number of unique windows submitted by frame + ImGuiWindow* CurrentWindow; // Window being drawn into + ImGuiWindow* HoveredWindow; // Window the mouse is hovering. Will typically catch mouse inputs. + ImGuiWindow* HoveredRootWindow; // == HoveredWindow ? HoveredWindow->RootWindow : NULL, merely a shortcut to avoid null test in some situation. + ImGuiWindow* HoveredWindowUnderMovingWindow; // Hovered window ignoring MovingWindow. Only set if MovingWindow is set. + ImGuiWindow* MovingWindow; // Track the window we clicked on (in order to preserve focus). The actual window that is moved is generally MovingWindow->RootWindow. + ImGuiWindow* WheelingWindow; // Track the window we started mouse-wheeling on. Until a timer elapse or mouse has moved, generally keep scrolling the same window even if during the course of scrolling the mouse ends up hovering a child window. + ImVec2 WheelingWindowRefMousePos; + float WheelingWindowTimer; + + // Item/widgets state and tracking information + ImGuiID HoveredId; // Hovered widget, filled during the frame + ImGuiID HoveredIdPreviousFrame; + bool HoveredIdAllowOverlap; + bool HoveredIdUsingMouseWheel; // Hovered widget will use mouse wheel. Blocks scrolling the underlying window. + bool HoveredIdPreviousFrameUsingMouseWheel; + bool HoveredIdDisabled; // At least one widget passed the rect test, but has been discarded by disabled flag or popup inhibit. May be true even if HoveredId == 0. + float HoveredIdTimer; // Measure contiguous hovering time + float HoveredIdNotActiveTimer; // Measure contiguous hovering time where the item has not been active + ImGuiID ActiveId; // Active widget + ImGuiID ActiveIdIsAlive; // Active widget has been seen this frame (we can't use a bool as the ActiveId may change within the frame) + float ActiveIdTimer; + bool ActiveIdIsJustActivated; // Set at the time of activation for one frame + bool ActiveIdAllowOverlap; // Active widget allows another widget to steal active id (generally for overlapping widgets, but not always) + bool ActiveIdNoClearOnFocusLoss; // Disable losing active id if the active id window gets unfocused. + bool ActiveIdHasBeenPressedBefore; // Track whether the active id led to a press (this is to allow changing between PressOnClick and PressOnRelease without pressing twice). Used by range_select branch. + bool ActiveIdHasBeenEditedBefore; // Was the value associated to the widget Edited over the course of the Active state. + bool ActiveIdHasBeenEditedThisFrame; + bool ActiveIdUsingMouseWheel; // Active widget will want to read mouse wheel. Blocks scrolling the underlying window. + ImU32 ActiveIdUsingNavDirMask; // Active widget will want to read those nav move requests (e.g. can activate a button and move away from it) + ImU32 ActiveIdUsingNavInputMask; // Active widget will want to read those nav inputs. + ImU64 ActiveIdUsingKeyInputMask; // Active widget will want to read those key inputs. When we grow the ImGuiKey enum we'll need to either to order the enum to make useful keys come first, either redesign this into e.g. a small array. + ImVec2 ActiveIdClickOffset; // Clicked offset from upper-left corner, if applicable (currently only set by ButtonBehavior) + ImGuiWindow* ActiveIdWindow; + ImGuiInputSource ActiveIdSource; // Activating with mouse or nav (gamepad/keyboard) + int ActiveIdMouseButton; + ImGuiID ActiveIdPreviousFrame; + bool ActiveIdPreviousFrameIsAlive; + bool ActiveIdPreviousFrameHasBeenEditedBefore; + ImGuiWindow* ActiveIdPreviousFrameWindow; + ImGuiID LastActiveId; // Store the last non-zero ActiveId, useful for animation. + float LastActiveIdTimer; // Store the last non-zero ActiveId timer since the beginning of activation, useful for animation. + + // Next window/item data + ImGuiNextWindowData NextWindowData; // Storage for SetNextWindow** functions + ImGuiNextItemData NextItemData; // Storage for SetNextItem** functions + + // Shared stacks + ImVector ColorStack; // Stack for PushStyleColor()/PopStyleColor() - inherited by Begin() + ImVector StyleVarStack; // Stack for PushStyleVar()/PopStyleVar() - inherited by Begin() + ImVector FontStack; // Stack for PushFont()/PopFont() - inherited by Begin() + ImVector FocusScopeStack; // Stack for PushFocusScope()/PopFocusScope() - not inherited by Begin(), unless child window + ImVectorItemFlagsStack; // Stack for PushItemFlag()/PopItemFlag() - inherited by Begin() + ImVectorGroupStack; // Stack for BeginGroup()/EndGroup() - not inherited by Begin() + ImVectorOpenPopupStack; // Which popups are open (persistent) + ImVectorBeginPopupStack; // Which level of BeginPopup() we are in (reset every frame) + + // Gamepad/keyboard Navigation + ImGuiWindow* NavWindow; // Focused window for navigation. Could be called 'FocusWindow' + ImGuiID NavId; // Focused item for navigation + ImGuiID NavFocusScopeId; // Identify a selection scope (selection code often wants to "clear other items" when landing on an item of the selection set) + ImGuiID NavActivateId; // ~~ (g.ActiveId == 0) && IsNavInputPressed(ImGuiNavInput_Activate) ? NavId : 0, also set when calling ActivateItem() + ImGuiID NavActivateDownId; // ~~ IsNavInputDown(ImGuiNavInput_Activate) ? NavId : 0 + ImGuiID NavActivatePressedId; // ~~ IsNavInputPressed(ImGuiNavInput_Activate) ? NavId : 0 + ImGuiID NavInputId; // ~~ IsNavInputPressed(ImGuiNavInput_Input) ? NavId : 0 + ImGuiID NavJustTabbedId; // Just tabbed to this id. + ImGuiID NavJustMovedToId; // Just navigated to this id (result of a successfully MoveRequest). + ImGuiID NavJustMovedToFocusScopeId; // Just navigated to this focus scope id (result of a successfully MoveRequest). + ImGuiKeyModFlags NavJustMovedToKeyMods; + ImGuiID NavNextActivateId; // Set by ActivateItem(), queued until next frame. + ImGuiInputSource NavInputSource; // Keyboard or Gamepad mode? THIS WILL ONLY BE None or NavGamepad or NavKeyboard. + ImRect NavScoringRect; // Rectangle used for scoring, in screen space. Based of window->NavRectRel[], modified for directional navigation scoring. + int NavScoringCount; // Metrics for debugging + ImGuiNavLayer NavLayer; // Layer we are navigating on. For now the system is hard-coded for 0=main contents and 1=menu/title bar, may expose layers later. + int NavIdTabCounter; // == NavWindow->DC.FocusIdxTabCounter at time of NavId processing + bool NavIdIsAlive; // Nav widget has been seen this frame ~~ NavRectRel is valid + bool NavMousePosDirty; // When set we will update mouse position if (io.ConfigFlags & ImGuiConfigFlags_NavEnableSetMousePos) if set (NB: this not enabled by default) + bool NavDisableHighlight; // When user starts using mouse, we hide gamepad/keyboard highlight (NB: but they are still available, which is why NavDisableHighlight isn't always != NavDisableMouseHover) + bool NavDisableMouseHover; // When user starts using gamepad/keyboard, we hide mouse hovering highlight until mouse is touched again. + bool NavAnyRequest; // ~~ NavMoveRequest || NavInitRequest + bool NavInitRequest; // Init request for appearing window to select first item + bool NavInitRequestFromMove; + ImGuiID NavInitResultId; // Init request result (first item of the window, or one for which SetItemDefaultFocus() was called) + ImRect NavInitResultRectRel; // Init request result rectangle (relative to parent window) + bool NavMoveRequest; // Move request for this frame + ImGuiNavMoveFlags NavMoveRequestFlags; + ImGuiNavForward NavMoveRequestForward; // None / ForwardQueued / ForwardActive (this is used to navigate sibling parent menus from a child menu) + ImGuiKeyModFlags NavMoveRequestKeyMods; + ImGuiDir NavMoveDir, NavMoveDirLast; // Direction of the move request (left/right/up/down), direction of the previous move request + ImGuiDir NavMoveClipDir; // FIXME-NAV: Describe the purpose of this better. Might want to rename? + ImGuiNavMoveResult NavMoveResultLocal; // Best move request candidate within NavWindow + ImGuiNavMoveResult NavMoveResultLocalVisibleSet; // Best move request candidate within NavWindow that are mostly visible (when using ImGuiNavMoveFlags_AlsoScoreVisibleSet flag) + ImGuiNavMoveResult NavMoveResultOther; // Best move request candidate within NavWindow's flattened hierarchy (when using ImGuiWindowFlags_NavFlattened flag) + ImGuiWindow* NavWrapRequestWindow; // Window which requested trying nav wrap-around. + ImGuiNavMoveFlags NavWrapRequestFlags; // Wrap-around operation flags. + + // Navigation: Windowing (CTRL+TAB for list, or Menu button + keys or directional pads to move/resize) + ImGuiWindow* NavWindowingTarget; // Target window when doing CTRL+Tab (or Pad Menu + FocusPrev/Next), this window is temporarily displayed top-most! + ImGuiWindow* NavWindowingTargetAnim; // Record of last valid NavWindowingTarget until DimBgRatio and NavWindowingHighlightAlpha becomes 0.0f, so the fade-out can stay on it. + ImGuiWindow* NavWindowingListWindow; // Internal window actually listing the CTRL+Tab contents + float NavWindowingTimer; + float NavWindowingHighlightAlpha; + bool NavWindowingToggleLayer; + + // Legacy Focus/Tabbing system (older than Nav, active even if Nav is disabled, misnamed. FIXME-NAV: This needs a redesign!) + ImGuiWindow* FocusRequestCurrWindow; // + ImGuiWindow* FocusRequestNextWindow; // + int FocusRequestCurrCounterRegular; // Any item being requested for focus, stored as an index (we on layout to be stable between the frame pressing TAB and the next frame, semi-ouch) + int FocusRequestCurrCounterTabStop; // Tab item being requested for focus, stored as an index + int FocusRequestNextCounterRegular; // Stored for next frame + int FocusRequestNextCounterTabStop; // " + bool FocusTabPressed; // + + // Render + ImDrawData DrawData; // Main ImDrawData instance to pass render information to the user + ImDrawDataBuilder DrawDataBuilder; + float DimBgRatio; // 0.0..1.0 animation when fading in a dimming background (for modal window and CTRL+TAB list) + ImDrawList BackgroundDrawList; // First draw list to be rendered. + ImDrawList ForegroundDrawList; // Last draw list to be rendered. This is where we the render software mouse cursor (if io.MouseDrawCursor is set) and most debug overlays. + ImGuiMouseCursor MouseCursor; + + // Drag and Drop + bool DragDropActive; + bool DragDropWithinSource; // Set when within a BeginDragDropXXX/EndDragDropXXX block for a drag source. + bool DragDropWithinTarget; // Set when within a BeginDragDropXXX/EndDragDropXXX block for a drag target. + ImGuiDragDropFlags DragDropSourceFlags; + int DragDropSourceFrameCount; + int DragDropMouseButton; + ImGuiPayload DragDropPayload; + ImRect DragDropTargetRect; // Store rectangle of current target candidate (we favor small targets when overlapping) + ImGuiID DragDropTargetId; + ImGuiDragDropFlags DragDropAcceptFlags; + float DragDropAcceptIdCurrRectSurface; // Target item surface (we resolve overlapping targets by prioritizing the smaller surface) + ImGuiID DragDropAcceptIdCurr; // Target item id (set at the time of accepting the payload) + ImGuiID DragDropAcceptIdPrev; // Target item id from previous frame (we need to store this to allow for overlapping drag and drop targets) + int DragDropAcceptFrameCount; // Last time a target expressed a desire to accept the source + ImGuiID DragDropHoldJustPressedId; // Set when holding a payload just made ButtonBehavior() return a press. + ImVector DragDropPayloadBufHeap; // We don't expose the ImVector<> directly, ImGuiPayload only holds pointer+size + unsigned char DragDropPayloadBufLocal[16]; // Local buffer for small payloads + + // Table + ImGuiTable* CurrentTable; + ImPool Tables; + ImVector CurrentTableStack; + ImVector TablesLastTimeActive; // Last used timestamp of each tables (SOA, for efficient GC) + ImVector DrawChannelsTempMergeBuffer; + + // Tab bars + ImGuiTabBar* CurrentTabBar; + ImPool TabBars; + ImVector CurrentTabBarStack; + ImVector ShrinkWidthBuffer; + + // Widget state + ImVec2 LastValidMousePos; + ImGuiInputTextState InputTextState; + ImFont InputTextPasswordFont; + ImGuiID TempInputId; // Temporary text input when CTRL+clicking on a slider, etc. + ImGuiColorEditFlags ColorEditOptions; // Store user options for color edit widgets + float ColorEditLastHue; // Backup of last Hue associated to LastColor[3], so we can restore Hue in lossy RGB<>HSV round trips + float ColorEditLastSat; // Backup of last Saturation associated to LastColor[3], so we can restore Saturation in lossy RGB<>HSV round trips + float ColorEditLastColor[3]; + ImVec4 ColorPickerRef; // Initial/reference color at the time of opening the color picker. + float SliderCurrentAccum; // Accumulated slider delta when using navigation controls. + bool SliderCurrentAccumDirty; // Has the accumulated slider delta changed since last time we tried to apply it? + bool DragCurrentAccumDirty; + float DragCurrentAccum; // Accumulator for dragging modification. Always high-precision, not rounded by end-user precision settings + float DragSpeedDefaultRatio; // If speed == 0.0f, uses (max-min) * DragSpeedDefaultRatio + float ScrollbarClickDeltaToGrabCenter; // Distance between mouse and center of grab box, normalized in parent space. Use storage? + int TooltipOverrideCount; + float TooltipSlowDelay; // Time before slow tooltips appears (FIXME: This is temporary until we merge in tooltip timer+priority work) + ImVector ClipboardHandlerData; // If no custom clipboard handler is defined + ImVector MenusIdSubmittedThisFrame; // A list of menu IDs that were rendered at least once + + // Platform support + ImVec2 PlatformImePos; // Cursor position request & last passed to the OS Input Method Editor + ImVec2 PlatformImeLastPos; + char PlatformLocaleDecimalPoint; // '.' or *localeconv()->decimal_point + + // Settings + bool SettingsLoaded; + float SettingsDirtyTimer; // Save .ini Settings to memory when time reaches zero + ImGuiTextBuffer SettingsIniData; // In memory .ini settings + ImVector SettingsHandlers; // List of .ini settings handlers + ImChunkStream SettingsWindows; // ImGuiWindow .ini settings entries + ImChunkStream SettingsTables; // ImGuiTable .ini settings entries + ImVector Hooks; // Hooks for extensions (e.g. test engine) + ImGuiID HookIdNext; // Next available HookId + + // Capture/Logging + bool LogEnabled; // Currently capturing + ImGuiLogType LogType; // Capture target + ImFileHandle LogFile; // If != NULL log to stdout/ file + ImGuiTextBuffer LogBuffer; // Accumulation buffer when log to clipboard. This is pointer so our GImGui static constructor doesn't call heap allocators. + float LogLinePosY; + bool LogLineFirstItem; + int LogDepthRef; + int LogDepthToExpand; + int LogDepthToExpandDefault; // Default/stored value for LogDepthMaxExpand if not specified in the LogXXX function call. + + // Debug Tools + bool DebugItemPickerActive; // Item picker is active (started with DebugStartItemPicker()) + ImGuiID DebugItemPickerBreakId; // Will call IM_DEBUG_BREAK() when encountering this id + ImGuiMetricsConfig DebugMetricsConfig; + + // Misc + float FramerateSecPerFrame[120]; // Calculate estimate of framerate for user over the last 2 seconds. + int FramerateSecPerFrameIdx; + float FramerateSecPerFrameAccum; + int WantCaptureMouseNextFrame; // Explicit capture via CaptureKeyboardFromApp()/CaptureMouseFromApp() sets those flags + int WantCaptureKeyboardNextFrame; + int WantTextInputNextFrame; + char TempBuffer[1024 * 3 + 1]; // Temporary text buffer + + ImGuiContext(ImFontAtlas* shared_font_atlas) : BackgroundDrawList(&DrawListSharedData), ForegroundDrawList(&DrawListSharedData) + { + Initialized = false; + FontAtlasOwnedByContext = shared_font_atlas ? false : true; + Font = NULL; + FontSize = FontBaseSize = 0.0f; + IO.Fonts = shared_font_atlas ? shared_font_atlas : IM_NEW(ImFontAtlas)(); + Time = 0.0f; + FrameCount = 0; + FrameCountEnded = FrameCountRendered = -1; + WithinFrameScope = WithinFrameScopeWithImplicitWindow = WithinEndChild = false; + GcCompactAll = false; + TestEngineHookItems = false; + TestEngineHookIdInfo = 0; + TestEngine = NULL; + + WindowsActiveCount = 0; + CurrentWindow = NULL; + HoveredWindow = NULL; + HoveredRootWindow = NULL; + HoveredWindowUnderMovingWindow = NULL; + MovingWindow = NULL; + WheelingWindow = NULL; + WheelingWindowTimer = 0.0f; + + HoveredId = HoveredIdPreviousFrame = 0; + HoveredIdAllowOverlap = false; + HoveredIdUsingMouseWheel = HoveredIdPreviousFrameUsingMouseWheel = false; + HoveredIdDisabled = false; + HoveredIdTimer = HoveredIdNotActiveTimer = 0.0f; + ActiveId = 0; + ActiveIdIsAlive = 0; + ActiveIdTimer = 0.0f; + ActiveIdIsJustActivated = false; + ActiveIdAllowOverlap = false; + ActiveIdNoClearOnFocusLoss = false; + ActiveIdHasBeenPressedBefore = false; + ActiveIdHasBeenEditedBefore = false; + ActiveIdHasBeenEditedThisFrame = false; + ActiveIdUsingMouseWheel = false; + ActiveIdUsingNavDirMask = 0x00; + ActiveIdUsingNavInputMask = 0x00; + ActiveIdUsingKeyInputMask = 0x00; + ActiveIdClickOffset = ImVec2(-1, -1); + ActiveIdWindow = NULL; + ActiveIdSource = ImGuiInputSource_None; + ActiveIdMouseButton = 0; + ActiveIdPreviousFrame = 0; + ActiveIdPreviousFrameIsAlive = false; + ActiveIdPreviousFrameHasBeenEditedBefore = false; + ActiveIdPreviousFrameWindow = NULL; + LastActiveId = 0; + LastActiveIdTimer = 0.0f; + + NavWindow = NULL; + NavId = NavFocusScopeId = NavActivateId = NavActivateDownId = NavActivatePressedId = NavInputId = 0; + NavJustTabbedId = NavJustMovedToId = NavJustMovedToFocusScopeId = NavNextActivateId = 0; + NavJustMovedToKeyMods = ImGuiKeyModFlags_None; + NavInputSource = ImGuiInputSource_None; + NavScoringRect = ImRect(); + NavScoringCount = 0; + NavLayer = ImGuiNavLayer_Main; + NavIdTabCounter = INT_MAX; + NavIdIsAlive = false; + NavMousePosDirty = false; + NavDisableHighlight = true; + NavDisableMouseHover = false; + NavAnyRequest = false; + NavInitRequest = false; + NavInitRequestFromMove = false; + NavInitResultId = 0; + NavMoveRequest = false; + NavMoveRequestFlags = ImGuiNavMoveFlags_None; + NavMoveRequestForward = ImGuiNavForward_None; + NavMoveRequestKeyMods = ImGuiKeyModFlags_None; + NavMoveDir = NavMoveDirLast = NavMoveClipDir = ImGuiDir_None; + NavWrapRequestWindow = NULL; + NavWrapRequestFlags = ImGuiNavMoveFlags_None; + + NavWindowingTarget = NavWindowingTargetAnim = NavWindowingListWindow = NULL; + NavWindowingTimer = NavWindowingHighlightAlpha = 0.0f; + NavWindowingToggleLayer = false; + + FocusRequestCurrWindow = FocusRequestNextWindow = NULL; + FocusRequestCurrCounterRegular = FocusRequestCurrCounterTabStop = INT_MAX; + FocusRequestNextCounterRegular = FocusRequestNextCounterTabStop = INT_MAX; + FocusTabPressed = false; + + DimBgRatio = 0.0f; + BackgroundDrawList._OwnerName = "##Background"; // Give it a name for debugging + ForegroundDrawList._OwnerName = "##Foreground"; // Give it a name for debugging + MouseCursor = ImGuiMouseCursor_Arrow; + + DragDropActive = DragDropWithinSource = DragDropWithinTarget = false; + DragDropSourceFlags = ImGuiDragDropFlags_None; + DragDropSourceFrameCount = -1; + DragDropMouseButton = -1; + DragDropTargetId = 0; + DragDropAcceptFlags = ImGuiDragDropFlags_None; + DragDropAcceptIdCurrRectSurface = 0.0f; + DragDropAcceptIdPrev = DragDropAcceptIdCurr = 0; + DragDropAcceptFrameCount = -1; + DragDropHoldJustPressedId = 0; + memset(DragDropPayloadBufLocal, 0, sizeof(DragDropPayloadBufLocal)); + + CurrentTable = NULL; + CurrentTabBar = NULL; + + LastValidMousePos = ImVec2(0.0f, 0.0f); + TempInputId = 0; + ColorEditOptions = ImGuiColorEditFlags__OptionsDefault; + ColorEditLastHue = ColorEditLastSat = 0.0f; + ColorEditLastColor[0] = ColorEditLastColor[1] = ColorEditLastColor[2] = FLT_MAX; + SliderCurrentAccum = 0.0f; + SliderCurrentAccumDirty = false; + DragCurrentAccumDirty = false; + DragCurrentAccum = 0.0f; + DragSpeedDefaultRatio = 1.0f / 100.0f; + ScrollbarClickDeltaToGrabCenter = 0.0f; + TooltipOverrideCount = 0; + TooltipSlowDelay = 0.50f; + + PlatformImePos = PlatformImeLastPos = ImVec2(FLT_MAX, FLT_MAX); + PlatformLocaleDecimalPoint = '.'; + + SettingsLoaded = false; + SettingsDirtyTimer = 0.0f; + HookIdNext = 0; + + LogEnabled = false; + LogType = ImGuiLogType_None; + LogFile = NULL; + LogLinePosY = FLT_MAX; + LogLineFirstItem = false; + LogDepthRef = 0; + LogDepthToExpand = LogDepthToExpandDefault = 2; + + DebugItemPickerActive = false; + DebugItemPickerBreakId = 0; + + memset(FramerateSecPerFrame, 0, sizeof(FramerateSecPerFrame)); + FramerateSecPerFrameIdx = 0; + FramerateSecPerFrameAccum = 0.0f; + WantCaptureMouseNextFrame = WantCaptureKeyboardNextFrame = WantTextInputNextFrame = -1; + memset(TempBuffer, 0, sizeof(TempBuffer)); + } +}; + +//----------------------------------------------------------------------------- +// [SECTION] ImGuiWindowTempData, ImGuiWindow +//----------------------------------------------------------------------------- + +// Transient per-window data, reset at the beginning of the frame. This used to be called ImGuiDrawContext, hence the DC variable name in ImGuiWindow. +// (That's theory, in practice the delimitation between ImGuiWindow and ImGuiWindowTempData is quite tenuous and could be reconsidered..) +// (This doesn't need a constructor because we zero-clear it as part of ImGuiWindow and all frame-temporary data are setup on Begin) +struct IMGUI_API ImGuiWindowTempData +{ + // Layout + ImVec2 CursorPos; // Current emitting position, in absolute coordinates. + ImVec2 CursorPosPrevLine; + ImVec2 CursorStartPos; // Initial position after Begin(), generally ~ window position + WindowPadding. + ImVec2 CursorMaxPos; // Used to implicitly calculate ContentSize at the beginning of next frame, for scrolling range and auto-resize. Always growing during the frame. + ImVec2 IdealMaxPos; // Used to implicitly calculate ContentSizeIdeal at the beginning of next frame, for auto-resize only. Always growing during the frame. + ImVec2 CurrLineSize; + ImVec2 PrevLineSize; + float CurrLineTextBaseOffset; // Baseline offset (0.0f by default on a new line, generally == style.FramePadding.y when a framed item has been added). + float PrevLineTextBaseOffset; + ImVec1 Indent; // Indentation / start position from left of window (increased by TreePush/TreePop, etc.) + ImVec1 ColumnsOffset; // Offset to the current column (if ColumnsCurrent > 0). FIXME: This and the above should be a stack to allow use cases like Tree->Column->Tree. Need revamp columns API. + ImVec1 GroupOffset; + + // Last item status + ImGuiID LastItemId; // ID for last item + ImGuiItemStatusFlags LastItemStatusFlags; // Status flags for last item (see ImGuiItemStatusFlags_) + ImRect LastItemRect; // Interaction rect for last item + ImRect LastItemDisplayRect; // End-user display rect for last item (only valid if LastItemStatusFlags & ImGuiItemStatusFlags_HasDisplayRect) + + // Keyboard/Gamepad navigation + ImGuiNavLayer NavLayerCurrent; // Current layer, 0..31 (we currently only use 0..1) + int NavLayerActiveMask; // Which layers have been written to (result from previous frame) + int NavLayerActiveMaskNext; // Which layers have been written to (accumulator for current frame) + ImGuiID NavFocusScopeIdCurrent; // Current focus scope ID while appending + bool NavHideHighlightOneFrame; + bool NavHasScroll; // Set when scrolling can be used (ScrollMax > 0.0f) + + // Miscellaneous + bool MenuBarAppending; // FIXME: Remove this + ImVec2 MenuBarOffset; // MenuBarOffset.x is sort of equivalent of a per-layer CursorPos.x, saved/restored as we switch to the menu bar. The only situation when MenuBarOffset.y is > 0 if when (SafeAreaPadding.y > FramePadding.y), often used on TVs. + ImGuiMenuColumns MenuColumns; // Simplified columns storage for menu items measurement + int TreeDepth; // Current tree depth. + ImU32 TreeJumpToParentOnPopMask; // Store a copy of !g.NavIdIsAlive for TreeDepth 0..31.. Could be turned into a ImU64 if necessary. + ImVector ChildWindows; + ImGuiStorage* StateStorage; // Current persistent per-window storage (store e.g. tree node open/close state) + ImGuiOldColumns* CurrentColumns; // Current columns set + int CurrentTableIdx; // Current table index (into g.Tables) + ImGuiLayoutType LayoutType; + ImGuiLayoutType ParentLayoutType; // Layout type of parent window at the time of Begin() + int FocusCounterRegular; // (Legacy Focus/Tabbing system) Sequential counter, start at -1 and increase as assigned via FocusableItemRegister() (FIXME-NAV: Needs redesign) + int FocusCounterTabStop; // (Legacy Focus/Tabbing system) Same, but only count widgets which you can Tab through. + + // Local parameters stacks + // We store the current settings outside of the vectors to increase memory locality (reduce cache misses). The vectors are rarely modified. Also it allows us to not heap allocate for short-lived windows which are not using those settings. + ImGuiItemFlags ItemFlags; // == g.ItemFlagsStack.back() + float ItemWidth; // Current item width (>0.0: width in pixels, <0.0: align xx pixels to the right of window). + float TextWrapPos; // Current text wrap pos. + ImVector ItemWidthStack; // Store item widths to restore (attention: .back() is not == ItemWidth) + ImVector TextWrapPosStack; // Store text wrap pos to restore (attention: .back() is not == TextWrapPos) + ImGuiStackSizes StackSizesOnBegin; // Store size of various stacks for asserting +}; + +// Storage for one window +struct IMGUI_API ImGuiWindow +{ + char* Name; // Window name, owned by the window. + ImGuiID ID; // == ImHashStr(Name) + ImGuiWindowFlags Flags; // See enum ImGuiWindowFlags_ + ImVec2 Pos; // Position (always rounded-up to nearest pixel) + ImVec2 Size; // Current size (==SizeFull or collapsed title bar size) + ImVec2 SizeFull; // Size when non collapsed + ImVec2 ContentSize; // Size of contents/scrollable client area (calculated from the extents reach of the cursor) from previous frame. Does not include window decoration or window padding. + ImVec2 ContentSizeIdeal; + ImVec2 ContentSizeExplicit; // Size of contents/scrollable client area explicitly request by the user via SetNextWindowContentSize(). + ImVec2 WindowPadding; // Window padding at the time of Begin(). + float WindowRounding; // Window rounding at the time of Begin(). May be clamped lower to avoid rendering artifacts with title bar, menu bar etc. + float WindowBorderSize; // Window border size at the time of Begin(). + int NameBufLen; // Size of buffer storing Name. May be larger than strlen(Name)! + ImGuiID MoveId; // == window->GetID("#MOVE") + ImGuiID ChildId; // ID of corresponding item in parent window (for navigation to return from child window to parent window) + ImVec2 Scroll; + ImVec2 ScrollMax; + ImVec2 ScrollTarget; // target scroll position. stored as cursor position with scrolling canceled out, so the highest point is always 0.0f. (FLT_MAX for no change) + ImVec2 ScrollTargetCenterRatio; // 0.0f = scroll so that target position is at top, 0.5f = scroll so that target position is centered + ImVec2 ScrollTargetEdgeSnapDist; // 0.0f = no snapping, >0.0f snapping threshold + ImVec2 ScrollbarSizes; // Size taken by each scrollbars on their smaller axis. Pay attention! ScrollbarSizes.x == width of the vertical scrollbar, ScrollbarSizes.y = height of the horizontal scrollbar. + bool ScrollbarX, ScrollbarY; // Are scrollbars visible? + bool Active; // Set to true on Begin(), unless Collapsed + bool WasActive; + bool WriteAccessed; // Set to true when any widget access the current window + bool Collapsed; // Set when collapsing window to become only title-bar + bool WantCollapseToggle; + bool SkipItems; // Set when items can safely be all clipped (e.g. window not visible or collapsed) + bool Appearing; // Set during the frame where the window is appearing (or re-appearing) + bool Hidden; // Do not display (== HiddenFrames*** > 0) + bool IsFallbackWindow; // Set on the "Debug##Default" window. + bool HasCloseButton; // Set when the window has a close button (p_open != NULL) + signed char ResizeBorderHeld; // Current border being held for resize (-1: none, otherwise 0-3) + short BeginCount; // Number of Begin() during the current frame (generally 0 or 1, 1+ if appending via multiple Begin/End pairs) + short BeginOrderWithinParent; // Order within immediate parent window, if we are a child window. Otherwise 0. + short BeginOrderWithinContext; // Order within entire imgui context. This is mostly used for debugging submission order related issues. + ImGuiID PopupId; // ID in the popup stack when this window is used as a popup/menu (because we use generic Name/ID for recycling) + ImS8 AutoFitFramesX, AutoFitFramesY; + ImS8 AutoFitChildAxises; + bool AutoFitOnlyGrows; + ImGuiDir AutoPosLastDirection; + ImS8 HiddenFramesCanSkipItems; // Hide the window for N frames + ImS8 HiddenFramesCannotSkipItems; // Hide the window for N frames while allowing items to be submitted so we can measure their size + ImS8 HiddenFramesForRenderOnly; // Hide the window until frame N at Render() time only + ImGuiCond SetWindowPosAllowFlags : 8; // store acceptable condition flags for SetNextWindowPos() use. + ImGuiCond SetWindowSizeAllowFlags : 8; // store acceptable condition flags for SetNextWindowSize() use. + ImGuiCond SetWindowCollapsedAllowFlags : 8; // store acceptable condition flags for SetNextWindowCollapsed() use. + ImVec2 SetWindowPosVal; // store window position when using a non-zero Pivot (position set needs to be processed when we know the window size) + ImVec2 SetWindowPosPivot; // store window pivot for positioning. ImVec2(0, 0) when positioning from top-left corner; ImVec2(0.5f, 0.5f) for centering; ImVec2(1, 1) for bottom right. + + ImVector IDStack; // ID stack. ID are hashes seeded with the value at the top of the stack. (In theory this should be in the TempData structure) + ImGuiWindowTempData DC; // Temporary per-window data, reset at the beginning of the frame. This used to be called ImGuiDrawContext, hence the "DC" variable name. + + // The best way to understand what those rectangles are is to use the 'Metrics->Tools->Show Windows Rectangles' viewer. + // The main 'OuterRect', omitted as a field, is window->Rect(). + ImRect OuterRectClipped; // == Window->Rect() just after setup in Begin(). == window->Rect() for root window. + ImRect InnerRect; // Inner rectangle (omit title bar, menu bar, scroll bar) + ImRect InnerClipRect; // == InnerRect shrunk by WindowPadding*0.5f on each side, clipped within viewport or parent clip rect. + ImRect WorkRect; // Initially covers the whole scrolling region. Reduced by containers e.g columns/tables when active. Shrunk by WindowPadding*1.0f on each side. This is meant to replace ContentRegionRect over time (from 1.71+ onward). + ImRect ParentWorkRect; // Backup of WorkRect before entering a container such as columns/tables. Used by e.g. SpanAllColumns functions to easily access. Stacked containers are responsible for maintaining this. // FIXME-WORKRECT: Could be a stack? + ImRect ClipRect; // Current clipping/scissoring rectangle, evolve as we are using PushClipRect(), etc. == DrawList->clip_rect_stack.back(). + ImRect ContentRegionRect; // FIXME: This is currently confusing/misleading. It is essentially WorkRect but not handling of scrolling. We currently rely on it as right/bottom aligned sizing operation need some size to rely on. + ImVec2ih HitTestHoleSize; // Define an optional rectangular hole where mouse will pass-through the window. + ImVec2ih HitTestHoleOffset; + + int LastFrameActive; // Last frame number the window was Active. + float LastTimeActive; // Last timestamp the window was Active (using float as we don't need high precision there) + float ItemWidthDefault; + ImGuiStorage StateStorage; + ImVector ColumnsStorage; + float FontWindowScale; // User scale multiplier per-window, via SetWindowFontScale() + int SettingsOffset; // Offset into SettingsWindows[] (offsets are always valid as we only grow the array from the back) + + ImDrawList* DrawList; // == &DrawListInst (for backward compatibility reason with code using imgui_internal.h we keep this a pointer) + ImDrawList DrawListInst; + ImGuiWindow* ParentWindow; // If we are a child _or_ popup window, this is pointing to our parent. Otherwise NULL. + ImGuiWindow* RootWindow; // Point to ourself or first ancestor that is not a child window == Top-level window. + ImGuiWindow* RootWindowForTitleBarHighlight; // Point to ourself or first ancestor which will display TitleBgActive color when this window is active. + ImGuiWindow* RootWindowForNav; // Point to ourself or first ancestor which doesn't have the NavFlattened flag. + + ImGuiWindow* NavLastChildNavWindow; // When going to the menu bar, we remember the child window we came from. (This could probably be made implicit if we kept g.Windows sorted by last focused including child window.) + ImGuiID NavLastIds[ImGuiNavLayer_COUNT]; // Last known NavId for this window, per layer (0/1) + ImRect NavRectRel[ImGuiNavLayer_COUNT]; // Reference rectangle, in window relative space + + int MemoryDrawListIdxCapacity; // Backup of last idx/vtx count, so when waking up the window we can preallocate and avoid iterative alloc/copy + int MemoryDrawListVtxCapacity; + bool MemoryCompacted; // Set when window extraneous data have been garbage collected + +public: + ImGuiWindow(ImGuiContext* context, const char* name); + ~ImGuiWindow(); + + ImGuiID GetID(const char* str, const char* str_end = NULL); + ImGuiID GetID(const void* ptr); + ImGuiID GetID(int n); + ImGuiID GetIDNoKeepAlive(const char* str, const char* str_end = NULL); + ImGuiID GetIDNoKeepAlive(const void* ptr); + ImGuiID GetIDNoKeepAlive(int n); + ImGuiID GetIDFromRectangle(const ImRect& r_abs); + + // We don't use g.FontSize because the window may be != g.CurrentWidow. + ImRect Rect() const { return ImRect(Pos.x, Pos.y, Pos.x + Size.x, Pos.y + Size.y); } + float CalcFontSize() const { ImGuiContext& g = *GImGui; float scale = g.FontBaseSize * FontWindowScale; if (ParentWindow) scale *= ParentWindow->FontWindowScale; return scale; } + float TitleBarHeight() const { ImGuiContext& g = *GImGui; return (Flags & ImGuiWindowFlags_NoTitleBar) ? 0.0f : CalcFontSize() + g.Style.FramePadding.y * 2.0f; } + ImRect TitleBarRect() const { return ImRect(Pos, ImVec2(Pos.x + SizeFull.x, Pos.y + TitleBarHeight())); } + float MenuBarHeight() const { ImGuiContext& g = *GImGui; return (Flags & ImGuiWindowFlags_MenuBar) ? DC.MenuBarOffset.y + CalcFontSize() + g.Style.FramePadding.y * 2.0f : 0.0f; } + ImRect MenuBarRect() const { float y1 = Pos.y + TitleBarHeight(); return ImRect(Pos.x, y1, Pos.x + SizeFull.x, y1 + MenuBarHeight()); } +}; + +// Backup and restore just enough data to be able to use IsItemHovered() on item A after another B in the same window has overwritten the data. +struct ImGuiLastItemDataBackup +{ + ImGuiID LastItemId; + ImGuiItemStatusFlags LastItemStatusFlags; + ImRect LastItemRect; + ImRect LastItemDisplayRect; + + ImGuiLastItemDataBackup() { Backup(); } + void Backup() { ImGuiWindow* window = GImGui->CurrentWindow; LastItemId = window->DC.LastItemId; LastItemStatusFlags = window->DC.LastItemStatusFlags; LastItemRect = window->DC.LastItemRect; LastItemDisplayRect = window->DC.LastItemDisplayRect; } + void Restore() const { ImGuiWindow* window = GImGui->CurrentWindow; window->DC.LastItemId = LastItemId; window->DC.LastItemStatusFlags = LastItemStatusFlags; window->DC.LastItemRect = LastItemRect; window->DC.LastItemDisplayRect = LastItemDisplayRect; } +}; + +//----------------------------------------------------------------------------- +// [SECTION] Tab bar, Tab item support +//----------------------------------------------------------------------------- + +// Extend ImGuiTabBarFlags_ +enum ImGuiTabBarFlagsPrivate_ +{ + ImGuiTabBarFlags_DockNode = 1 << 20, // Part of a dock node [we don't use this in the master branch but it facilitate branch syncing to keep this around] + ImGuiTabBarFlags_IsFocused = 1 << 21, + ImGuiTabBarFlags_SaveSettings = 1 << 22 // FIXME: Settings are handled by the docking system, this only request the tab bar to mark settings dirty when reordering tabs +}; + +// Extend ImGuiTabItemFlags_ +enum ImGuiTabItemFlagsPrivate_ +{ + ImGuiTabItemFlags_NoCloseButton = 1 << 20, // Track whether p_open was set or not (we'll need this info on the next frame to recompute ContentWidth during layout) + ImGuiTabItemFlags_Button = 1 << 21 // Used by TabItemButton, change the tab item behavior to mimic a button +}; + +// Storage for one active tab item (sizeof() 28~32 bytes) +struct ImGuiTabItem +{ + ImGuiID ID; + ImGuiTabItemFlags Flags; + int LastFrameVisible; + int LastFrameSelected; // This allows us to infer an ordered list of the last activated tabs with little maintenance + float Offset; // Position relative to beginning of tab + float Width; // Width currently displayed + float ContentWidth; // Width of label, stored during BeginTabItem() call + ImS16 NameOffset; // When Window==NULL, offset to name within parent ImGuiTabBar::TabsNames + ImS16 BeginOrder; // BeginTabItem() order, used to re-order tabs after toggling ImGuiTabBarFlags_Reorderable + ImS16 IndexDuringLayout; // Index only used during TabBarLayout() + bool WantClose; // Marked as closed by SetTabItemClosed() + + ImGuiTabItem() { memset(this, 0, sizeof(*this)); LastFrameVisible = LastFrameSelected = -1; NameOffset = BeginOrder = IndexDuringLayout = -1; } +}; + +// Storage for a tab bar (sizeof() 152 bytes) +struct ImGuiTabBar +{ + ImVector Tabs; + ImGuiTabBarFlags Flags; + ImGuiID ID; // Zero for tab-bars used by docking + ImGuiID SelectedTabId; // Selected tab/window + ImGuiID NextSelectedTabId; + ImGuiID VisibleTabId; // Can occasionally be != SelectedTabId (e.g. when previewing contents for CTRL+TAB preview) + int CurrFrameVisible; + int PrevFrameVisible; + ImRect BarRect; + float CurrTabsContentsHeight; + float PrevTabsContentsHeight; // Record the height of contents submitted below the tab bar + float WidthAllTabs; // Actual width of all tabs (locked during layout) + float WidthAllTabsIdeal; // Ideal width if all tabs were visible and not clipped + float ScrollingAnim; + float ScrollingTarget; + float ScrollingTargetDistToVisibility; + float ScrollingSpeed; + float ScrollingRectMinX; + float ScrollingRectMaxX; + ImGuiID ReorderRequestTabId; + ImS8 ReorderRequestDir; + ImS8 BeginCount; + bool WantLayout; + bool VisibleTabWasSubmitted; + bool TabsAddedNew; // Set to true when a new tab item or button has been added to the tab bar during last frame + ImS16 TabsActiveCount; // Number of tabs submitted this frame. + ImS16 LastTabItemIdx; // Index of last BeginTabItem() tab for use by EndTabItem() + float ItemSpacingY; + ImVec2 FramePadding; // style.FramePadding locked at the time of BeginTabBar() + ImVec2 BackupCursorPos; + ImGuiTextBuffer TabsNames; // For non-docking tab bar we re-append names in a contiguous buffer. + + ImGuiTabBar(); + int GetTabOrder(const ImGuiTabItem* tab) const { return Tabs.index_from_ptr(tab); } + const char* GetTabName(const ImGuiTabItem* tab) const + { + IM_ASSERT(tab->NameOffset != -1 && (int)tab->NameOffset < TabsNames.Buf.Size); + return TabsNames.Buf.Data + tab->NameOffset; + } +}; + +//----------------------------------------------------------------------------- +// [SECTION] Table support +//----------------------------------------------------------------------------- + +#ifdef IMGUI_HAS_TABLE + +#define IM_COL32_DISABLE IM_COL32(0,0,0,1) // Special sentinel code which cannot be used as a regular color. +#define IMGUI_TABLE_MAX_COLUMNS 64 // sizeof(ImU64) * 8. This is solely because we frequently encode columns set in a ImU64. +#define IMGUI_TABLE_MAX_DRAW_CHANNELS (4 + 64 * 2) // See TableSetupDrawChannels() + +// Our current column maximum is 64 but we may raise that in the future. +typedef ImS8 ImGuiTableColumnIdx; +typedef ImU8 ImGuiTableDrawChannelIdx; + +// [Internal] sizeof() ~ 104 +// We use the terminology "Enabled" to refer to a column that is not Hidden by user/api. +// We use the terminology "Clipped" to refer to a column that is out of sight because of scrolling/clipping. +// This is in contrast with some user-facing api such as IsItemVisible() / IsRectVisible() which use "Visible" to mean "not clipped". +struct ImGuiTableColumn +{ + ImGuiTableColumnFlags Flags; // Flags after some patching (not directly same as provided by user). See ImGuiTableColumnFlags_ + float WidthGiven; // Final/actual width visible == (MaxX - MinX), locked in TableUpdateLayout(). May be > WidthRequest to honor minimum width, may be < WidthRequest to honor shrinking columns down in tight space. + float MinX; // Absolute positions + float MaxX; + float WidthRequest; // Master width absolute value when !(Flags & _WidthStretch). When Stretch this is derived every frame from StretchWeight in TableUpdateLayout() + float WidthAuto; // Automatic width + float StretchWeight; // Master width weight when (Flags & _WidthStretch). Often around ~1.0f initially. + float InitStretchWeightOrWidth; // Value passed to TableSetupColumn(). For Width it is a content width (_without padding_). + ImRect ClipRect; // Clipping rectangle for the column + ImGuiID UserID; // Optional, value passed to TableSetupColumn() + float WorkMinX; // Contents region min ~(MinX + CellPaddingX + CellSpacingX1) == cursor start position when entering column + float WorkMaxX; // Contents region max ~(MaxX - CellPaddingX - CellSpacingX2) + float ItemWidth; // Current item width for the column, preserved across rows + float ContentMaxXFrozen; // Contents maximum position for frozen rows (apart from headers), from which we can infer content width. + float ContentMaxXUnfrozen; + float ContentMaxXHeadersUsed; // Contents maximum position for headers rows (regardless of freezing). TableHeader() automatically softclip itself + report ideal desired size, to avoid creating extraneous draw calls + float ContentMaxXHeadersIdeal; + ImS16 NameOffset; // Offset into parent ColumnsNames[] + ImGuiTableColumnIdx DisplayOrder; // Index within Table's IndexToDisplayOrder[] (column may be reordered by users) + ImGuiTableColumnIdx IndexWithinEnabledSet; // Index within enabled/visible set (<= IndexToDisplayOrder) + ImGuiTableColumnIdx PrevEnabledColumn; // Index of prev enabled/visible column within Columns[], -1 if first enabled/visible column + ImGuiTableColumnIdx NextEnabledColumn; // Index of next enabled/visible column within Columns[], -1 if last enabled/visible column + ImGuiTableColumnIdx SortOrder; // Index of this column within sort specs, -1 if not sorting on this column, 0 for single-sort, may be >0 on multi-sort + ImGuiTableDrawChannelIdx DrawChannelCurrent; // Index within DrawSplitter.Channels[] + ImGuiTableDrawChannelIdx DrawChannelFrozen; + ImGuiTableDrawChannelIdx DrawChannelUnfrozen; + bool IsEnabled; // Is the column not marked Hidden by the user? (even if off view, e.g. clipped by scrolling). + bool IsEnabledNextFrame; + bool IsVisibleX; // Is actually in view (e.g. overlapping the host window clipping rectangle, not scrolled). + bool IsVisibleY; + bool IsRequestOutput; // Return value for TableSetColumnIndex() / TableNextColumn(): whether we request user to output contents or not. + bool IsSkipItems; // Do we want item submissions to this column to be completely ignored (no layout will happen). + bool IsPreserveWidthAuto; + ImS8 NavLayerCurrent; // ImGuiNavLayer in 1 byte + ImU8 AutoFitQueue; // Queue of 8 values for the next 8 frames to request auto-fit + ImU8 CannotSkipItemsQueue; // Queue of 8 values for the next 8 frames to disable Clipped/SkipItem + ImU8 SortDirection : 2; // ImGuiSortDirection_Ascending or ImGuiSortDirection_Descending + ImU8 SortDirectionsAvailCount : 2; // Number of available sort directions (0 to 3) + ImU8 SortDirectionsAvailMask : 4; // Mask of available sort directions (1-bit each) + ImU8 SortDirectionsAvailList; // Ordered of available sort directions (2-bits each) + + ImGuiTableColumn() + { + memset(this, 0, sizeof(*this)); + StretchWeight = WidthRequest = -1.0f; + NameOffset = -1; + DisplayOrder = IndexWithinEnabledSet = -1; + PrevEnabledColumn = NextEnabledColumn = -1; + SortOrder = -1; + SortDirection = ImGuiSortDirection_None; + DrawChannelCurrent = DrawChannelFrozen = DrawChannelUnfrozen = (ImU8)-1; + } +}; + +// Transient cell data stored per row. +// sizeof() ~ 6 +struct ImGuiTableCellData +{ + ImU32 BgColor; // Actual color + ImGuiTableColumnIdx Column; // Column number +}; + +// FIXME-TABLE: transient data could be stored in a per-stacked table structure: DrawSplitter, SortSpecs, incoming RowData +struct ImGuiTable +{ + ImGuiID ID; + ImGuiTableFlags Flags; + void* RawData; // Single allocation to hold Columns[], DisplayOrderToIndex[] and RowCellData[] + ImSpan Columns; // Point within RawData[] + ImSpan DisplayOrderToIndex; // Point within RawData[]. Store display order of columns (when not reordered, the values are 0...Count-1) + ImSpan RowCellData; // Point within RawData[]. Store cells background requests for current row. + ImU64 EnabledMaskByDisplayOrder; // Column DisplayOrder -> IsEnabled map + ImU64 EnabledMaskByIndex; // Column Index -> IsEnabled map (== not hidden by user/api) in a format adequate for iterating column without touching cold data + ImU64 VisibleMaskByIndex; // Column Index -> IsVisibleX|IsVisibleY map (== not hidden by user/api && not hidden by scrolling/cliprect) + ImU64 RequestOutputMaskByIndex; // Column Index -> IsVisible || AutoFit (== expect user to submit items) + ImGuiTableFlags SettingsLoadedFlags; // Which data were loaded from the .ini file (e.g. when order is not altered we won't save order) + int SettingsOffset; // Offset in g.SettingsTables + int LastFrameActive; + int ColumnsCount; // Number of columns declared in BeginTable() + int CurrentRow; + int CurrentColumn; + ImS16 InstanceCurrent; // Count of BeginTable() calls with same ID in the same frame (generally 0). This is a little bit similar to BeginCount for a window, but multiple table with same ID look are multiple tables, they are just synched. + ImS16 InstanceInteracted; // Mark which instance (generally 0) of the same ID is being interacted with + float RowPosY1; + float RowPosY2; + float RowMinHeight; // Height submitted to TableNextRow() + float RowTextBaseline; + float RowIndentOffsetX; + ImGuiTableRowFlags RowFlags : 16; // Current row flags, see ImGuiTableRowFlags_ + ImGuiTableRowFlags LastRowFlags : 16; + int RowBgColorCounter; // Counter for alternating background colors (can be fast-forwarded by e.g clipper), not same as CurrentRow because header rows typically don't increase this. + ImU32 RowBgColor[2]; // Background color override for current row. + ImU32 BorderColorStrong; + ImU32 BorderColorLight; + float BorderX1; + float BorderX2; + float HostIndentX; + float MinColumnWidth; + float OuterPaddingX; + float CellPaddingX; // Padding from each borders + float CellPaddingY; + float CellSpacingX1; // Spacing between non-bordered cells + float CellSpacingX2; + float LastOuterHeight; // Outer height from last frame + float LastFirstRowHeight; // Height of first row from last frame + float InnerWidth; // User value passed to BeginTable(), see comments at the top of BeginTable() for details. + float ColumnsGivenWidth; // Sum of current column width + float ColumnsAutoFitWidth; // Sum of ideal column width in order nothing to be clipped, used for auto-fitting and content width submission in outer window + float ResizedColumnNextWidth; + float ResizeLockMinContentsX2; // Lock minimum contents width while resizing down in order to not create feedback loops. But we allow growing the table. + float RefScale; // Reference scale to be able to rescale columns on font/dpi changes. + ImRect OuterRect; // Note: for non-scrolling table, OuterRect.Max.y is often FLT_MAX until EndTable(), unless a height has been specified in BeginTable(). + ImRect InnerRect; // InnerRect but without decoration. As with OuterRect, for non-scrolling tables, InnerRect.Max.y is + ImRect WorkRect; + ImRect InnerClipRect; + ImRect BgClipRect; // We use this to cpu-clip cell background color fill + ImRect Bg0ClipRectForDrawCmd; // Actual ImDrawCmd clip rect for BG0/1 channel. This tends to be == OuterWindow->ClipRect at BeginTable() because output in BG0/BG1 is cpu-clipped + ImRect Bg2ClipRectForDrawCmd; // Actual ImDrawCmd clip rect for BG2 channel. This tends to be a correct, tight-fit, because output to BG2 are done by widgets relying on regular ClipRect. + ImRect HostClipRect; // This is used to check if we can eventually merge our columns draw calls into the current draw call of the current window. + ImRect HostBackupWorkRect; // Backup of InnerWindow->WorkRect at the end of BeginTable() + ImRect HostBackupParentWorkRect; // Backup of InnerWindow->ParentWorkRect at the end of BeginTable() + ImRect HostBackupInnerClipRect; // Backup of InnerWindow->ClipRect during PushTableBackground()/PopTableBackground() + ImVec2 HostBackupPrevLineSize; // Backup of InnerWindow->DC.PrevLineSize at the end of BeginTable() + ImVec2 HostBackupCurrLineSize; // Backup of InnerWindow->DC.CurrLineSize at the end of BeginTable() + ImVec2 HostBackupCursorMaxPos; // Backup of InnerWindow->DC.CursorMaxPos at the end of BeginTable() + ImVec2 UserOuterSize; // outer_size.x passed to BeginTable() + ImVec1 HostBackupColumnsOffset; // Backup of OuterWindow->DC.ColumnsOffset at the end of BeginTable() + float HostBackupItemWidth; // Backup of OuterWindow->DC.ItemWidth at the end of BeginTable() + int HostBackupItemWidthStackSize;// Backup of OuterWindow->DC.ItemWidthStack.Size at the end of BeginTable() + ImGuiWindow* OuterWindow; // Parent window for the table + ImGuiWindow* InnerWindow; // Window holding the table data (== OuterWindow or a child window) + ImGuiTextBuffer ColumnsNames; // Contiguous buffer holding columns names + ImDrawListSplitter DrawSplitter; // We carry our own ImDrawList splitter to allow recursion (FIXME: could be stored outside, worst case we need 1 splitter per recursing table) + ImGuiTableColumnSortSpecs SortSpecsSingle; + ImVector SortSpecsMulti; // FIXME-OPT: Using a small-vector pattern would work be good. + ImGuiTableSortSpecs SortSpecs; // Public facing sorts specs, this is what we return in TableGetSortSpecs() + ImGuiTableColumnIdx SortSpecsCount; + ImGuiTableColumnIdx ColumnsEnabledCount; // Number of enabled columns (<= ColumnsCount) + ImGuiTableColumnIdx ColumnsEnabledFixedCount; // Number of enabled columns (<= ColumnsCount) + ImGuiTableColumnIdx DeclColumnsCount; // Count calls to TableSetupColumn() + ImGuiTableColumnIdx HoveredColumnBody; // Index of column whose visible region is being hovered. Important: == ColumnsCount when hovering empty region after the right-most column! + ImGuiTableColumnIdx HoveredColumnBorder; // Index of column whose right-border is being hovered (for resizing). + ImGuiTableColumnIdx AutoFitSingleColumn; // Index of single column requesting auto-fit. + ImGuiTableColumnIdx ResizedColumn; // Index of column being resized. Reset when InstanceCurrent==0. + ImGuiTableColumnIdx LastResizedColumn; // Index of column being resized from previous frame. + ImGuiTableColumnIdx HeldHeaderColumn; // Index of column header being held. + ImGuiTableColumnIdx ReorderColumn; // Index of column being reordered. (not cleared) + ImGuiTableColumnIdx ReorderColumnDir; // -1 or +1 + ImGuiTableColumnIdx LeftMostStretchedColumn; // Index of left-most stretched column. + ImGuiTableColumnIdx RightMostStretchedColumn; // Index of right-most stretched column. + ImGuiTableColumnIdx RightMostEnabledColumn; // Index of right-most non-hidden column. + ImGuiTableColumnIdx ContextPopupColumn; // Column right-clicked on, of -1 if opening context menu from a neutral/empty spot + ImGuiTableColumnIdx FreezeRowsRequest; // Requested frozen rows count + ImGuiTableColumnIdx FreezeRowsCount; // Actual frozen row count (== FreezeRowsRequest, or == 0 when no scrolling offset) + ImGuiTableColumnIdx FreezeColumnsRequest; // Requested frozen columns count + ImGuiTableColumnIdx FreezeColumnsCount; // Actual frozen columns count (== FreezeColumnsRequest, or == 0 when no scrolling offset) + ImGuiTableColumnIdx RowCellDataCurrent; // Index of current RowCellData[] entry in current row + ImGuiTableDrawChannelIdx DummyDrawChannel; // Redirect non-visible columns here. + ImGuiTableDrawChannelIdx Bg2DrawChannelCurrent; // For Selectable() and other widgets drawing accross columns after the freezing line. Index within DrawSplitter.Channels[] + ImGuiTableDrawChannelIdx Bg2DrawChannelUnfrozen; + bool IsLayoutLocked; // Set by TableUpdateLayout() which is called when beginning the first row. + bool IsInsideRow; // Set when inside TableBeginRow()/TableEndRow(). + bool IsInitializing; + bool IsSortSpecsDirty; + bool IsUsingHeaders; // Set when the first row had the ImGuiTableRowFlags_Headers flag. + bool IsContextPopupOpen; // Set when default context menu is open (also see: ContextPopupColumn, InstanceInteracted). + bool IsSettingsRequestLoad; + bool IsSettingsDirty; // Set when table settings have changed and needs to be reported into ImGuiTableSetttings data. + bool IsDefaultDisplayOrder; // Set when display order is unchanged from default (DisplayOrder contains 0...Count-1) + bool IsResetAllRequest; + bool IsResetDisplayOrderRequest; + bool IsUnfrozenRows; // Set when we got past the frozen row. + bool IsDefaultSizingPolicy; // Set if user didn't explicitely set a sizing policy in BeginTable() + bool MemoryCompacted; + bool HostSkipItems; // Backup of InnerWindow->SkipItem at the end of BeginTable(), because we will overwrite InnerWindow->SkipItem on a per-column basis + + IMGUI_API ImGuiTable() { memset(this, 0, sizeof(*this)); LastFrameActive = -1; } + IMGUI_API ~ImGuiTable() { IM_FREE(RawData); } +}; + +// sizeof() ~ 12 +struct ImGuiTableColumnSettings +{ + float WidthOrWeight; + ImGuiID UserID; + ImGuiTableColumnIdx Index; + ImGuiTableColumnIdx DisplayOrder; + ImGuiTableColumnIdx SortOrder; + ImU8 SortDirection : 2; + ImU8 IsEnabled : 1; // "Visible" in ini file + ImU8 IsStretch : 1; + + ImGuiTableColumnSettings() + { + WidthOrWeight = 0.0f; + UserID = 0; + Index = -1; + DisplayOrder = SortOrder = -1; + SortDirection = ImGuiSortDirection_None; + IsEnabled = 1; + IsStretch = 0; + } +}; + +// This is designed to be stored in a single ImChunkStream (1 header followed by N ImGuiTableColumnSettings, etc.) +struct ImGuiTableSettings +{ + ImGuiID ID; // Set to 0 to invalidate/delete the setting + ImGuiTableFlags SaveFlags; // Indicate data we want to save using the Resizable/Reorderable/Sortable/Hideable flags (could be using its own flags..) + float RefScale; // Reference scale to be able to rescale columns on font/dpi changes. + ImGuiTableColumnIdx ColumnsCount; + ImGuiTableColumnIdx ColumnsCountMax; // Maximum number of columns this settings instance can store, we can recycle a settings instance with lower number of columns but not higher + bool WantApply; // Set when loaded from .ini data (to enable merging/loading .ini data into an already running context) + + ImGuiTableSettings() { memset(this, 0, sizeof(*this)); } + ImGuiTableColumnSettings* GetColumnSettings() { return (ImGuiTableColumnSettings*)(this + 1); } +}; + +#endif // #ifdef IMGUI_HAS_TABLE + +//----------------------------------------------------------------------------- +// [SECTION] ImGui internal API +// No guarantee of forward compatibility here! +//----------------------------------------------------------------------------- + +namespace ImGui +{ + // Windows + // We should always have a CurrentWindow in the stack (there is an implicit "Debug" window) + // If this ever crash because g.CurrentWindow is NULL it means that either + // - ImGui::NewFrame() has never been called, which is illegal. + // - You are calling ImGui functions after ImGui::EndFrame()/ImGui::Render() and before the next ImGui::NewFrame(), which is also illegal. + inline ImGuiWindow* GetCurrentWindowRead() { ImGuiContext& g = *GImGui; return g.CurrentWindow; } + inline ImGuiWindow* GetCurrentWindow() { ImGuiContext& g = *GImGui; g.CurrentWindow->WriteAccessed = true; return g.CurrentWindow; } + IMGUI_API ImGuiWindow* FindWindowByID(ImGuiID id); + IMGUI_API ImGuiWindow* FindWindowByName(const char* name); + IMGUI_API void UpdateWindowParentAndRootLinks(ImGuiWindow* window, ImGuiWindowFlags flags, ImGuiWindow* parent_window); + IMGUI_API ImVec2 CalcWindowNextAutoFitSize(ImGuiWindow* window); + IMGUI_API bool IsWindowChildOf(ImGuiWindow* window, ImGuiWindow* potential_parent); + IMGUI_API bool IsWindowAbove(ImGuiWindow* potential_above, ImGuiWindow* potential_below); + IMGUI_API bool IsWindowNavFocusable(ImGuiWindow* window); + IMGUI_API ImRect GetWindowAllowedExtentRect(ImGuiWindow* window); + IMGUI_API void SetWindowPos(ImGuiWindow* window, const ImVec2& pos, ImGuiCond cond = 0); + IMGUI_API void SetWindowSize(ImGuiWindow* window, const ImVec2& size, ImGuiCond cond = 0); + IMGUI_API void SetWindowCollapsed(ImGuiWindow* window, bool collapsed, ImGuiCond cond = 0); + IMGUI_API void SetWindowHitTestHole(ImGuiWindow* window, const ImVec2& pos, const ImVec2& size); + + // Windows: Display Order and Focus Order + IMGUI_API void FocusWindow(ImGuiWindow* window); + IMGUI_API void FocusTopMostWindowUnderOne(ImGuiWindow* under_this_window, ImGuiWindow* ignore_window); + IMGUI_API void BringWindowToFocusFront(ImGuiWindow* window); + IMGUI_API void BringWindowToDisplayFront(ImGuiWindow* window); + IMGUI_API void BringWindowToDisplayBack(ImGuiWindow* window); + + // Fonts, drawing + IMGUI_API void SetCurrentFont(ImFont* font); + inline ImFont* GetDefaultFont() { ImGuiContext& g = *GImGui; return g.IO.FontDefault ? g.IO.FontDefault : g.IO.Fonts->Fonts[0]; } + inline ImDrawList* GetForegroundDrawList(ImGuiWindow* window) { IM_UNUSED(window); ImGuiContext& g = *GImGui; return &g.ForegroundDrawList; } // This seemingly unnecessary wrapper simplifies compatibility between the 'master' and 'docking' branches. + + // Init + IMGUI_API void Initialize(ImGuiContext* context); + IMGUI_API void Shutdown(ImGuiContext* context); // Since 1.60 this is a _private_ function. You can call DestroyContext() to destroy the context created by CreateContext(). + + // NewFrame + IMGUI_API void UpdateHoveredWindowAndCaptureFlags(); + IMGUI_API void StartMouseMovingWindow(ImGuiWindow* window); + IMGUI_API void UpdateMouseMovingWindowNewFrame(); + IMGUI_API void UpdateMouseMovingWindowEndFrame(); + + // Generic context hooks + IMGUI_API ImGuiID AddContextHook(ImGuiContext* context, const ImGuiContextHook* hook); + IMGUI_API void RemoveContextHook(ImGuiContext* context, ImGuiID hook_to_remove); + IMGUI_API void CallContextHooks(ImGuiContext* context, ImGuiContextHookType type); + + // Settings + IMGUI_API void MarkIniSettingsDirty(); + IMGUI_API void MarkIniSettingsDirty(ImGuiWindow* window); + IMGUI_API void ClearIniSettings(); + IMGUI_API ImGuiWindowSettings* CreateNewWindowSettings(const char* name); + IMGUI_API ImGuiWindowSettings* FindWindowSettings(ImGuiID id); + IMGUI_API ImGuiWindowSettings* FindOrCreateWindowSettings(const char* name); + IMGUI_API ImGuiSettingsHandler* FindSettingsHandler(const char* type_name); + + // Scrolling + IMGUI_API void SetNextWindowScroll(const ImVec2& scroll); // Use -1.0f on one axis to leave as-is + IMGUI_API void SetScrollX(ImGuiWindow* window, float scroll_x); + IMGUI_API void SetScrollY(ImGuiWindow* window, float scroll_y); + IMGUI_API void SetScrollFromPosX(ImGuiWindow* window, float local_x, float center_x_ratio); + IMGUI_API void SetScrollFromPosY(ImGuiWindow* window, float local_y, float center_y_ratio); + IMGUI_API ImVec2 ScrollToBringRectIntoView(ImGuiWindow* window, const ImRect& item_rect); + + // Basic Accessors + inline ImGuiID GetItemID() { ImGuiContext& g = *GImGui; return g.CurrentWindow->DC.LastItemId; } // Get ID of last item (~~ often same ImGui::GetID(label) beforehand) + inline ImGuiItemStatusFlags GetItemStatusFlags() { ImGuiContext& g = *GImGui; return g.CurrentWindow->DC.LastItemStatusFlags; } + inline ImGuiID GetActiveID() { ImGuiContext& g = *GImGui; return g.ActiveId; } + inline ImGuiID GetFocusID() { ImGuiContext& g = *GImGui; return g.NavId; } + inline ImGuiItemFlags GetItemsFlags() { ImGuiContext& g = *GImGui; return g.CurrentWindow->DC.ItemFlags; } + IMGUI_API void SetActiveID(ImGuiID id, ImGuiWindow* window); + IMGUI_API void SetFocusID(ImGuiID id, ImGuiWindow* window); + IMGUI_API void ClearActiveID(); + IMGUI_API ImGuiID GetHoveredID(); + IMGUI_API void SetHoveredID(ImGuiID id); + IMGUI_API void KeepAliveID(ImGuiID id); + IMGUI_API void MarkItemEdited(ImGuiID id); // Mark data associated to given item as "edited", used by IsItemDeactivatedAfterEdit() function. + IMGUI_API void PushOverrideID(ImGuiID id); // Push given value as-is at the top of the ID stack (whereas PushID combines old and new hashes) + IMGUI_API ImGuiID GetIDWithSeed(const char* str_id_begin, const char* str_id_end, ImGuiID seed); + + // Basic Helpers for widget code + IMGUI_API void ItemSize(const ImVec2& size, float text_baseline_y = -1.0f); + IMGUI_API void ItemSize(const ImRect& bb, float text_baseline_y = -1.0f); + IMGUI_API bool ItemAdd(const ImRect& bb, ImGuiID id, const ImRect* nav_bb = NULL); + IMGUI_API bool ItemHoverable(const ImRect& bb, ImGuiID id); + IMGUI_API bool IsClippedEx(const ImRect& bb, ImGuiID id, bool clip_even_when_logged); + IMGUI_API void SetLastItemData(ImGuiWindow* window, ImGuiID item_id, ImGuiItemStatusFlags status_flags, const ImRect& item_rect); + IMGUI_API bool FocusableItemRegister(ImGuiWindow* window, ImGuiID id); // Return true if focus is requested + IMGUI_API void FocusableItemUnregister(ImGuiWindow* window); + IMGUI_API ImVec2 CalcItemSize(ImVec2 size, float default_w, float default_h); + IMGUI_API float CalcWrapWidthForPos(const ImVec2& pos, float wrap_pos_x); + IMGUI_API void PushMultiItemsWidths(int components, float width_full); + IMGUI_API void PushItemFlag(ImGuiItemFlags option, bool enabled); + IMGUI_API void PopItemFlag(); + IMGUI_API bool IsItemToggledSelection(); // Was the last item selection toggled? (after Selectable(), TreeNode() etc. We only returns toggle _event_ in order to handle clipping correctly) + IMGUI_API ImVec2 GetContentRegionMaxAbs(); + IMGUI_API void ShrinkWidths(ImGuiShrinkWidthItem* items, int count, float width_excess); + + // Logging/Capture + IMGUI_API void LogBegin(ImGuiLogType type, int auto_open_depth); // -> BeginCapture() when we design v2 api, for now stay under the radar by using the old name. + IMGUI_API void LogToBuffer(int auto_open_depth = -1); // Start logging/capturing to internal buffer + + // Popups, Modals, Tooltips + IMGUI_API bool BeginChildEx(const char* name, ImGuiID id, const ImVec2& size_arg, bool border, ImGuiWindowFlags flags); + IMGUI_API void OpenPopupEx(ImGuiID id, ImGuiPopupFlags popup_flags = ImGuiPopupFlags_None); + IMGUI_API void ClosePopupToLevel(int remaining, bool restore_focus_to_window_under_popup); + IMGUI_API void ClosePopupsOverWindow(ImGuiWindow* ref_window, bool restore_focus_to_window_under_popup); + IMGUI_API bool IsPopupOpen(ImGuiID id, ImGuiPopupFlags popup_flags); + IMGUI_API bool BeginPopupEx(ImGuiID id, ImGuiWindowFlags extra_flags); + IMGUI_API void BeginTooltipEx(ImGuiWindowFlags extra_flags, ImGuiTooltipFlags tooltip_flags); + IMGUI_API ImGuiWindow* GetTopMostPopupModal(); + IMGUI_API ImVec2 FindBestWindowPosForPopup(ImGuiWindow* window); + IMGUI_API ImVec2 FindBestWindowPosForPopupEx(const ImVec2& ref_pos, const ImVec2& size, ImGuiDir* last_dir, const ImRect& r_outer, const ImRect& r_avoid, ImGuiPopupPositionPolicy policy); + + // Gamepad/Keyboard Navigation + IMGUI_API void NavInitWindow(ImGuiWindow* window, bool force_reinit); + IMGUI_API bool NavMoveRequestButNoResultYet(); + IMGUI_API void NavMoveRequestCancel(); + IMGUI_API void NavMoveRequestForward(ImGuiDir move_dir, ImGuiDir clip_dir, const ImRect& bb_rel, ImGuiNavMoveFlags move_flags); + IMGUI_API void NavMoveRequestTryWrapping(ImGuiWindow* window, ImGuiNavMoveFlags move_flags); + IMGUI_API float GetNavInputAmount(ImGuiNavInput n, ImGuiInputReadMode mode); + IMGUI_API ImVec2 GetNavInputAmount2d(ImGuiNavDirSourceFlags dir_sources, ImGuiInputReadMode mode, float slow_factor = 0.0f, float fast_factor = 0.0f); + IMGUI_API int CalcTypematicRepeatAmount(float t0, float t1, float repeat_delay, float repeat_rate); + IMGUI_API void ActivateItem(ImGuiID id); // Remotely activate a button, checkbox, tree node etc. given its unique ID. activation is queued and processed on the next frame when the item is encountered again. + IMGUI_API void SetNavID(ImGuiID id, int nav_layer, ImGuiID focus_scope_id); + IMGUI_API void SetNavIDWithRectRel(ImGuiID id, int nav_layer, ImGuiID focus_scope_id, const ImRect& rect_rel); + + // Focus Scope (WIP) + // This is generally used to identify a selection set (multiple of which may be in the same window), as selection + // patterns generally need to react (e.g. clear selection) when landing on an item of the set. + IMGUI_API void PushFocusScope(ImGuiID id); + IMGUI_API void PopFocusScope(); + inline ImGuiID GetFocusedFocusScope() { ImGuiContext& g = *GImGui; return g.NavFocusScopeId; } // Focus scope which is actually active + inline ImGuiID GetFocusScope() { ImGuiContext& g = *GImGui; return g.CurrentWindow->DC.NavFocusScopeIdCurrent; } // Focus scope we are outputting into, set by PushFocusScope() + + // Inputs + // FIXME: Eventually we should aim to move e.g. IsActiveIdUsingKey() into IsKeyXXX functions. + IMGUI_API void SetItemUsingMouseWheel(); + inline bool IsActiveIdUsingNavDir(ImGuiDir dir) { ImGuiContext& g = *GImGui; return (g.ActiveIdUsingNavDirMask & (1 << dir)) != 0; } + inline bool IsActiveIdUsingNavInput(ImGuiNavInput input) { ImGuiContext& g = *GImGui; return (g.ActiveIdUsingNavInputMask & (1 << input)) != 0; } + inline bool IsActiveIdUsingKey(ImGuiKey key) { ImGuiContext& g = *GImGui; IM_ASSERT(key < 64); return (g.ActiveIdUsingKeyInputMask & ((ImU64)1 << key)) != 0; } + IMGUI_API bool IsMouseDragPastThreshold(ImGuiMouseButton button, float lock_threshold = -1.0f); + inline bool IsKeyPressedMap(ImGuiKey key, bool repeat = true) { ImGuiContext& g = *GImGui; const int key_index = g.IO.KeyMap[key]; return (key_index >= 0) ? IsKeyPressed(key_index, repeat) : false; } + inline bool IsNavInputDown(ImGuiNavInput n) { ImGuiContext& g = *GImGui; return g.IO.NavInputs[n] > 0.0f; } + inline bool IsNavInputTest(ImGuiNavInput n, ImGuiInputReadMode rm) { return (GetNavInputAmount(n, rm) > 0.0f); } + IMGUI_API ImGuiKeyModFlags GetMergedKeyModFlags(); + + // Drag and Drop + IMGUI_API bool BeginDragDropTargetCustom(const ImRect& bb, ImGuiID id); + IMGUI_API void ClearDragDrop(); + IMGUI_API bool IsDragDropPayloadBeingAccepted(); + + // Internal Columns API (this is not exposed because we will encourage transitioning to the Tables API) + IMGUI_API void SetWindowClipRectBeforeSetChannel(ImGuiWindow* window, const ImRect& clip_rect); + IMGUI_API void BeginColumns(const char* str_id, int count, ImGuiOldColumnFlags flags = 0); // setup number of columns. use an identifier to distinguish multiple column sets. close with EndColumns(). + IMGUI_API void EndColumns(); // close columns + IMGUI_API void PushColumnClipRect(int column_index); + IMGUI_API void PushColumnsBackground(); + IMGUI_API void PopColumnsBackground(); + IMGUI_API ImGuiID GetColumnsID(const char* str_id, int count); + IMGUI_API ImGuiOldColumns* FindOrCreateColumns(ImGuiWindow* window, ImGuiID id); + IMGUI_API float GetColumnOffsetFromNorm(const ImGuiOldColumns* columns, float offset_norm); + IMGUI_API float GetColumnNormFromOffset(const ImGuiOldColumns* columns, float offset); + + // Tables: Candidates for public API + IMGUI_API void TableOpenContextMenu(int column_n = -1); + IMGUI_API void TableSetColumnWidth(int column_n, float width); + IMGUI_API void TableSetColumnSortDirection(int column_n, ImGuiSortDirection sort_direction, bool append_to_sort_specs); + IMGUI_API int TableGetHoveredColumn(); // May use (TableGetColumnFlags() & ImGuiTableColumnFlags_IsHovered) instead. Return hovered column. return -1 when table is not hovered. return columns_count if the unused space at the right of visible columns is hovered. + IMGUI_API float TableGetHeaderRowHeight(); + IMGUI_API void TablePushBackgroundChannel(); + IMGUI_API void TablePopBackgroundChannel(); + + // Tables: Internals + IMGUI_API ImGuiTable* TableFindByID(ImGuiID id); + IMGUI_API bool BeginTableEx(const char* name, ImGuiID id, int columns_count, ImGuiTableFlags flags = 0, const ImVec2& outer_size = ImVec2(0, 0), float inner_width = 0.0f); + IMGUI_API void TableBeginInitMemory(ImGuiTable* table, int columns_count); + IMGUI_API void TableBeginApplyRequests(ImGuiTable* table); + IMGUI_API void TableSetupDrawChannels(ImGuiTable* table); + IMGUI_API void TableUpdateLayout(ImGuiTable* table); + IMGUI_API void TableUpdateBorders(ImGuiTable* table); + IMGUI_API void TableUpdateColumnsWeightFromWidth(ImGuiTable* table); + IMGUI_API void TableDrawBorders(ImGuiTable* table); + IMGUI_API void TableDrawContextMenu(ImGuiTable* table); + IMGUI_API void TableMergeDrawChannels(ImGuiTable* table); + IMGUI_API void TableSortSpecsSanitize(ImGuiTable* table); + IMGUI_API void TableSortSpecsBuild(ImGuiTable* table); + IMGUI_API ImGuiSortDirection TableGetColumnNextSortDirection(ImGuiTableColumn* column); + IMGUI_API void TableFixColumnSortDirection(ImGuiTable* table, ImGuiTableColumn* column); + IMGUI_API float TableGetColumnWidthAuto(ImGuiTable* table, ImGuiTableColumn* column); + IMGUI_API void TableBeginRow(ImGuiTable* table); + IMGUI_API void TableEndRow(ImGuiTable* table); + IMGUI_API void TableBeginCell(ImGuiTable* table, int column_n); + IMGUI_API void TableEndCell(ImGuiTable* table); + IMGUI_API ImRect TableGetCellBgRect(const ImGuiTable* table, int column_n); + IMGUI_API const char* TableGetColumnName(const ImGuiTable* table, int column_n); + IMGUI_API ImGuiID TableGetColumnResizeID(const ImGuiTable* table, int column_n, int instance_no = 0); + IMGUI_API float TableGetMaxColumnWidth(const ImGuiTable* table, int column_n); + IMGUI_API void TableSetColumnWidthAutoSingle(ImGuiTable* table, int column_n); + IMGUI_API void TableSetColumnWidthAutoAll(ImGuiTable* table); + IMGUI_API void TableRemove(ImGuiTable* table); + IMGUI_API void TableGcCompactTransientBuffers(ImGuiTable* table); + IMGUI_API void TableGcCompactSettings(); + + // Tables: Settings + IMGUI_API void TableLoadSettings(ImGuiTable* table); + IMGUI_API void TableSaveSettings(ImGuiTable* table); + IMGUI_API void TableResetSettings(ImGuiTable* table); + IMGUI_API ImGuiTableSettings* TableGetBoundSettings(ImGuiTable* table); + IMGUI_API void TableSettingsInstallHandler(ImGuiContext* context); + IMGUI_API ImGuiTableSettings* TableSettingsCreate(ImGuiID id, int columns_count); + IMGUI_API ImGuiTableSettings* TableSettingsFindByID(ImGuiID id); + + // Tab Bars + IMGUI_API bool BeginTabBarEx(ImGuiTabBar* tab_bar, const ImRect& bb, ImGuiTabBarFlags flags); + IMGUI_API ImGuiTabItem* TabBarFindTabByID(ImGuiTabBar* tab_bar, ImGuiID tab_id); + IMGUI_API void TabBarRemoveTab(ImGuiTabBar* tab_bar, ImGuiID tab_id); + IMGUI_API void TabBarCloseTab(ImGuiTabBar* tab_bar, ImGuiTabItem* tab); + IMGUI_API void TabBarQueueReorder(ImGuiTabBar* tab_bar, const ImGuiTabItem* tab, int dir); + IMGUI_API bool TabBarProcessReorder(ImGuiTabBar* tab_bar); + IMGUI_API bool TabItemEx(ImGuiTabBar* tab_bar, const char* label, bool* p_open, ImGuiTabItemFlags flags); + IMGUI_API ImVec2 TabItemCalcSize(const char* label, bool has_close_button); + IMGUI_API void TabItemBackground(ImDrawList* draw_list, const ImRect& bb, ImGuiTabItemFlags flags, ImU32 col); + IMGUI_API void TabItemLabelAndCloseButton(ImDrawList* draw_list, const ImRect& bb, ImGuiTabItemFlags flags, ImVec2 frame_padding, const char* label, ImGuiID tab_id, ImGuiID close_button_id, bool is_contents_visible, bool* out_just_closed, bool* out_text_clipped); + + // Render helpers + // AVOID USING OUTSIDE OF IMGUI.CPP! NOT FOR PUBLIC CONSUMPTION. THOSE FUNCTIONS ARE A MESS. THEIR SIGNATURE AND BEHAVIOR WILL CHANGE, THEY NEED TO BE REFACTORED INTO SOMETHING DECENT. + // NB: All position are in absolute pixels coordinates (we are never using window coordinates internally) + IMGUI_API void RenderText(ImVec2 pos, const char* text, const char* text_end = NULL, bool hide_text_after_hash = true); + IMGUI_API void RenderTextWrapped(ImVec2 pos, const char* text, const char* text_end, float wrap_width); + IMGUI_API void RenderTextClipped(const ImVec2& pos_min, const ImVec2& pos_max, const char* text, const char* text_end, const ImVec2* text_size_if_known, const ImVec2& align = ImVec2(0, 0), const ImRect* clip_rect = NULL); + IMGUI_API void RenderTextClippedEx(ImDrawList* draw_list, const ImVec2& pos_min, const ImVec2& pos_max, const char* text, const char* text_end, const ImVec2* text_size_if_known, const ImVec2& align = ImVec2(0, 0), const ImRect* clip_rect = NULL); + IMGUI_API void RenderTextEllipsis(ImDrawList* draw_list, const ImVec2& pos_min, const ImVec2& pos_max, float clip_max_x, float ellipsis_max_x, const char* text, const char* text_end, const ImVec2* text_size_if_known); + IMGUI_API void RenderFrame(ImVec2 p_min, ImVec2 p_max, ImU32 fill_col, bool border = true, float rounding = 0.0f); + IMGUI_API void RenderFrameBorder(ImVec2 p_min, ImVec2 p_max, float rounding = 0.0f); + IMGUI_API void RenderColorRectWithAlphaCheckerboard(ImDrawList* draw_list, ImVec2 p_min, ImVec2 p_max, ImU32 fill_col, float grid_step, ImVec2 grid_off, float rounding = 0.0f, int rounding_corners_flags = ~0); + IMGUI_API void RenderNavHighlight(const ImRect& bb, ImGuiID id, ImGuiNavHighlightFlags flags = ImGuiNavHighlightFlags_TypeDefault); // Navigation highlight + IMGUI_API const char* FindRenderedTextEnd(const char* text, const char* text_end = NULL); // Find the optional ## from which we stop displaying text. + IMGUI_API void LogRenderedText(const ImVec2* ref_pos, const char* text, const char* text_end = NULL); + + // Render helpers (those functions don't access any ImGui state!) + IMGUI_API void RenderArrow(ImDrawList* draw_list, ImVec2 pos, ImU32 col, ImGuiDir dir, float scale = 1.0f); + IMGUI_API void RenderBullet(ImDrawList* draw_list, ImVec2 pos, ImU32 col); + IMGUI_API void RenderCheckMark(ImDrawList* draw_list, ImVec2 pos, ImU32 col, float sz); + IMGUI_API void RenderMouseCursor(ImDrawList* draw_list, ImVec2 pos, float scale, ImGuiMouseCursor mouse_cursor, ImU32 col_fill, ImU32 col_border, ImU32 col_shadow); + IMGUI_API void RenderArrowPointingAt(ImDrawList* draw_list, ImVec2 pos, ImVec2 half_sz, ImGuiDir direction, ImU32 col); + IMGUI_API void RenderRectFilledRangeH(ImDrawList* draw_list, const ImRect& rect, ImU32 col, float x_start_norm, float x_end_norm, float rounding); + IMGUI_API void RenderRectFilledWithHole(ImDrawList* draw_list, ImRect outer, ImRect inner, ImU32 col, float rounding); + +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + // [1.71: 2019/06/07: Updating prototypes of some of the internal functions. Leaving those for reference for a short while] + inline void RenderArrow(ImVec2 pos, ImGuiDir dir, float scale=1.0f) { ImGuiWindow* window = GetCurrentWindow(); RenderArrow(window->DrawList, pos, GetColorU32(ImGuiCol_Text), dir, scale); } + inline void RenderBullet(ImVec2 pos) { ImGuiWindow* window = GetCurrentWindow(); RenderBullet(window->DrawList, pos, GetColorU32(ImGuiCol_Text)); } +#endif + + // Widgets + IMGUI_API void TextEx(const char* text, const char* text_end = NULL, ImGuiTextFlags flags = 0); + IMGUI_API bool ButtonEx(const char* label, const ImVec2& size_arg = ImVec2(0, 0), ImGuiButtonFlags flags = 0); + IMGUI_API bool CloseButton(ImGuiID id, const ImVec2& pos); + IMGUI_API bool CollapseButton(ImGuiID id, const ImVec2& pos); + IMGUI_API bool ArrowButtonEx(const char* str_id, ImGuiDir dir, ImVec2 size_arg, ImGuiButtonFlags flags = 0); + IMGUI_API void Scrollbar(ImGuiAxis axis); + IMGUI_API bool ScrollbarEx(const ImRect& bb, ImGuiID id, ImGuiAxis axis, float* p_scroll_v, float avail_v, float contents_v, ImDrawCornerFlags rounding_corners); + IMGUI_API bool ImageButtonEx(ImGuiID id, ImTextureID texture_id, const ImVec2& size, const ImVec2& uv0, const ImVec2& uv1, const ImVec2& padding, const ImVec4& bg_col, const ImVec4& tint_col); + IMGUI_API ImRect GetWindowScrollbarRect(ImGuiWindow* window, ImGuiAxis axis); + IMGUI_API ImGuiID GetWindowScrollbarID(ImGuiWindow* window, ImGuiAxis axis); + IMGUI_API ImGuiID GetWindowResizeID(ImGuiWindow* window, int n); // 0..3: corners, 4..7: borders + IMGUI_API void SeparatorEx(ImGuiSeparatorFlags flags); + IMGUI_API bool CheckboxFlags(const char* label, ImS64* flags, ImS64 flags_value); + IMGUI_API bool CheckboxFlags(const char* label, ImU64* flags, ImU64 flags_value); + + // Widgets low-level behaviors + IMGUI_API bool ButtonBehavior(const ImRect& bb, ImGuiID id, bool* out_hovered, bool* out_held, ImGuiButtonFlags flags = 0); + IMGUI_API bool DragBehavior(ImGuiID id, ImGuiDataType data_type, void* p_v, float v_speed, const void* p_min, const void* p_max, const char* format, ImGuiSliderFlags flags); + IMGUI_API bool SliderBehavior(const ImRect& bb, ImGuiID id, ImGuiDataType data_type, void* p_v, const void* p_min, const void* p_max, const char* format, ImGuiSliderFlags flags, ImRect* out_grab_bb); + IMGUI_API bool SplitterBehavior(const ImRect& bb, ImGuiID id, ImGuiAxis axis, float* size1, float* size2, float min_size1, float min_size2, float hover_extend = 0.0f, float hover_visibility_delay = 0.0f); + IMGUI_API bool TreeNodeBehavior(ImGuiID id, ImGuiTreeNodeFlags flags, const char* label, const char* label_end = NULL); + IMGUI_API bool TreeNodeBehaviorIsOpen(ImGuiID id, ImGuiTreeNodeFlags flags = 0); // Consume previous SetNextItemOpen() data, if any. May return true when logging + IMGUI_API void TreePushOverrideID(ImGuiID id); + + // Template functions are instantiated in imgui_widgets.cpp for a finite number of types. + // To use them externally (for custom widget) you may need an "extern template" statement in your code in order to link to existing instances and silence Clang warnings (see #2036). + // e.g. " extern template IMGUI_API float RoundScalarWithFormatT(const char* format, ImGuiDataType data_type, float v); " + template IMGUI_API float ScaleRatioFromValueT(ImGuiDataType data_type, T v, T v_min, T v_max, bool is_logarithmic, float logarithmic_zero_epsilon, float zero_deadzone_size); + template IMGUI_API T ScaleValueFromRatioT(ImGuiDataType data_type, float t, T v_min, T v_max, bool is_logarithmic, float logarithmic_zero_epsilon, float zero_deadzone_size); + template IMGUI_API bool DragBehaviorT(ImGuiDataType data_type, T* v, float v_speed, T v_min, T v_max, const char* format, ImGuiSliderFlags flags); + template IMGUI_API bool SliderBehaviorT(const ImRect& bb, ImGuiID id, ImGuiDataType data_type, T* v, T v_min, T v_max, const char* format, ImGuiSliderFlags flags, ImRect* out_grab_bb); + template IMGUI_API T RoundScalarWithFormatT(const char* format, ImGuiDataType data_type, T v); + template IMGUI_API bool CheckboxFlagsT(const char* label, T* flags, T flags_value); + + // Data type helpers + IMGUI_API const ImGuiDataTypeInfo* DataTypeGetInfo(ImGuiDataType data_type); + IMGUI_API int DataTypeFormatString(char* buf, int buf_size, ImGuiDataType data_type, const void* p_data, const char* format); + IMGUI_API void DataTypeApplyOp(ImGuiDataType data_type, int op, void* output, const void* arg_1, const void* arg_2); + IMGUI_API bool DataTypeApplyOpFromText(const char* buf, const char* initial_value_buf, ImGuiDataType data_type, void* p_data, const char* format); + IMGUI_API int DataTypeCompare(ImGuiDataType data_type, const void* arg_1, const void* arg_2); + IMGUI_API bool DataTypeClamp(ImGuiDataType data_type, void* p_data, const void* p_min, const void* p_max); + + // InputText + IMGUI_API bool InputTextEx(const char* label, const char* hint, char* buf, int buf_size, const ImVec2& size_arg, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback = NULL, void* user_data = NULL); + IMGUI_API bool TempInputText(const ImRect& bb, ImGuiID id, const char* label, char* buf, int buf_size, ImGuiInputTextFlags flags); + IMGUI_API bool TempInputScalar(const ImRect& bb, ImGuiID id, const char* label, ImGuiDataType data_type, void* p_data, const char* format, const void* p_clamp_min = NULL, const void* p_clamp_max = NULL); + inline bool TempInputIsActive(ImGuiID id) { ImGuiContext& g = *GImGui; return (g.ActiveId == id && g.TempInputId == id); } + inline ImGuiInputTextState* GetInputTextState(ImGuiID id) { ImGuiContext& g = *GImGui; return (g.InputTextState.ID == id) ? &g.InputTextState : NULL; } // Get input text state if active + + // Color + IMGUI_API void ColorTooltip(const char* text, const float* col, ImGuiColorEditFlags flags); + IMGUI_API void ColorEditOptionsPopup(const float* col, ImGuiColorEditFlags flags); + IMGUI_API void ColorPickerOptionsPopup(const float* ref_col, ImGuiColorEditFlags flags); + + // Plot + IMGUI_API int PlotEx(ImGuiPlotType plot_type, const char* label, float (*values_getter)(void* data, int idx), void* data, int values_count, int values_offset, const char* overlay_text, float scale_min, float scale_max, ImVec2 frame_size); + + // Shade functions (write over already created vertices) + IMGUI_API void ShadeVertsLinearColorGradientKeepAlpha(ImDrawList* draw_list, int vert_start_idx, int vert_end_idx, ImVec2 gradient_p0, ImVec2 gradient_p1, ImU32 col0, ImU32 col1); + IMGUI_API void ShadeVertsLinearUV(ImDrawList* draw_list, int vert_start_idx, int vert_end_idx, const ImVec2& a, const ImVec2& b, const ImVec2& uv_a, const ImVec2& uv_b, bool clamp); + + // Garbage collection + IMGUI_API void GcCompactTransientMiscBuffers(); + IMGUI_API void GcCompactTransientWindowBuffers(ImGuiWindow* window); + IMGUI_API void GcAwakeTransientWindowBuffers(ImGuiWindow* window); + + // Debug Tools + IMGUI_API void ErrorCheckEndFrameRecover(ImGuiErrorLogCallback log_callback, void* user_data = NULL); + inline void DebugDrawItemRect(ImU32 col = IM_COL32(255,0,0,255)) { ImGuiContext& g = *GImGui; ImGuiWindow* window = g.CurrentWindow; GetForegroundDrawList(window)->AddRect(window->DC.LastItemRect.Min, window->DC.LastItemRect.Max, col); } + inline void DebugStartItemPicker() { ImGuiContext& g = *GImGui; g.DebugItemPickerActive = true; } + + IMGUI_API void DebugNodeColumns(ImGuiOldColumns* columns); + IMGUI_API void DebugNodeDrawList(ImGuiWindow* window, const ImDrawList* draw_list, const char* label); + IMGUI_API void DebugNodeDrawCmdShowMeshAndBoundingBox(ImGuiWindow* window, const ImDrawList* draw_list, const ImDrawCmd* draw_cmd, bool show_mesh, bool show_aabb); + IMGUI_API void DebugNodeStorage(ImGuiStorage* storage, const char* label); + IMGUI_API void DebugNodeTabBar(ImGuiTabBar* tab_bar, const char* label); + IMGUI_API void DebugNodeTable(ImGuiTable* table); + IMGUI_API void DebugNodeTableSettings(ImGuiTableSettings* settings); + IMGUI_API void DebugNodeWindow(ImGuiWindow* window, const char* label); + IMGUI_API void DebugNodeWindowSettings(ImGuiWindowSettings* settings); + IMGUI_API void DebugNodeWindowsList(ImVector* windows, const char* label); + +} // namespace ImGui + + +//----------------------------------------------------------------------------- +// [SECTION] ImFontAtlas internal API +//----------------------------------------------------------------------------- + +// This structure is likely to evolve as we add support for incremental atlas updates +struct ImFontBuilderIO +{ + bool (*FontBuilder_Build)(ImFontAtlas* atlas); +}; + +// Helper for font builder +IMGUI_API const ImFontBuilderIO* ImFontAtlasGetBuilderForStbTruetype(); +IMGUI_API void ImFontAtlasBuildInit(ImFontAtlas* atlas); +IMGUI_API void ImFontAtlasBuildSetupFont(ImFontAtlas* atlas, ImFont* font, ImFontConfig* font_config, float ascent, float descent); +IMGUI_API void ImFontAtlasBuildPackCustomRects(ImFontAtlas* atlas, void* stbrp_context_opaque); +IMGUI_API void ImFontAtlasBuildFinish(ImFontAtlas* atlas); +IMGUI_API void ImFontAtlasBuildRender8bppRectFromString(ImFontAtlas* atlas, int atlas_x, int atlas_y, int w, int h, const char* in_str, char in_marker_char, unsigned char in_marker_pixel_value); +IMGUI_API void ImFontAtlasBuildMultiplyCalcLookupTable(unsigned char out_table[256], float in_multiply_factor); +IMGUI_API void ImFontAtlasBuildMultiplyRectAlpha8(const unsigned char table[256], unsigned char* pixels, int x, int y, int w, int h, int stride); + +//----------------------------------------------------------------------------- +// [SECTION] Test Engine specific hooks (imgui_test_engine) +//----------------------------------------------------------------------------- + +#ifdef IMGUI_ENABLE_TEST_ENGINE +extern void ImGuiTestEngineHook_ItemAdd(ImGuiContext* ctx, const ImRect& bb, ImGuiID id); +extern void ImGuiTestEngineHook_ItemInfo(ImGuiContext* ctx, ImGuiID id, const char* label, ImGuiItemStatusFlags flags); +extern void ImGuiTestEngineHook_IdInfo(ImGuiContext* ctx, ImGuiDataType data_type, ImGuiID id, const void* data_id); +extern void ImGuiTestEngineHook_IdInfo(ImGuiContext* ctx, ImGuiDataType data_type, ImGuiID id, const void* data_id, const void* data_id_end); +extern void ImGuiTestEngineHook_Log(ImGuiContext* ctx, const char* fmt, ...); +#define IMGUI_TEST_ENGINE_ITEM_ADD(_BB,_ID) if (g.TestEngineHookItems) ImGuiTestEngineHook_ItemAdd(&g, _BB, _ID) // Register item bounding box +#define IMGUI_TEST_ENGINE_ITEM_INFO(_ID,_LABEL,_FLAGS) if (g.TestEngineHookItems) ImGuiTestEngineHook_ItemInfo(&g, _ID, _LABEL, _FLAGS) // Register item label and status flags (optional) +#define IMGUI_TEST_ENGINE_LOG(_FMT,...) if (g.TestEngineHookItems) ImGuiTestEngineHook_Log(&g, _FMT, __VA_ARGS__) // Custom log entry from user land into test log +#define IMGUI_TEST_ENGINE_ID_INFO(_ID,_TYPE,_DATA) if (g.TestEngineHookIdInfo == id) ImGuiTestEngineHook_IdInfo(&g, _TYPE, _ID, (const void*)(_DATA)); +#define IMGUI_TEST_ENGINE_ID_INFO2(_ID,_TYPE,_DATA,_DATA2) if (g.TestEngineHookIdInfo == id) ImGuiTestEngineHook_IdInfo(&g, _TYPE, _ID, (const void*)(_DATA), (const void*)(_DATA2)); +#else +#define IMGUI_TEST_ENGINE_ITEM_ADD(_BB,_ID) do { } while (0) +#define IMGUI_TEST_ENGINE_ITEM_INFO(_ID,_LABEL,_FLAGS) do { } while (0) +#define IMGUI_TEST_ENGINE_LOG(_FMT,...) do { } while (0) +#define IMGUI_TEST_ENGINE_ID_INFO(_ID,_TYPE,_DATA) do { } while (0) +#define IMGUI_TEST_ENGINE_ID_INFO2(_ID,_TYPE,_DATA,_DATA2) do { } while (0) +#endif + +//----------------------------------------------------------------------------- + +#if defined(__clang__) +#pragma clang diagnostic pop +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + +#ifdef _MSC_VER +#pragma warning (pop) +#endif + +#endif // #ifndef IMGUI_DISABLE diff --git a/cpp-projects/3d-engine/imgui/imgui_tables.cpp b/cpp-projects/3d-engine/imgui/imgui_tables.cpp new file mode 100644 index 0000000..ac38185 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/imgui_tables.cpp @@ -0,0 +1,3923 @@ +// dear imgui, v1.81 WIP +// (tables and columns code) + +/* + +Index of this file: + +// [SECTION] Commentary +// [SECTION] Header mess +// [SECTION] Tables: Main code +// [SECTION] Tables: Row changes +// [SECTION] Tables: Columns changes +// [SECTION] Tables: Columns width management +// [SECTION] Tables: Drawing +// [SECTION] Tables: Sorting +// [SECTION] Tables: Headers +// [SECTION] Tables: Context Menu +// [SECTION] Tables: Settings (.ini data) +// [SECTION] Tables: Garbage Collection +// [SECTION] Tables: Debugging +// [SECTION] Columns, BeginColumns, EndColumns, etc. + +*/ + +// Navigating this file: +// - In Visual Studio IDE: CTRL+comma ("Edit.NavigateTo") can follow symbols in comments, whereas CTRL+F12 ("Edit.GoToImplementation") cannot. +// - With Visual Assist installed: ALT+G ("VAssistX.GoToImplementation") can also follow symbols in comments. + +//----------------------------------------------------------------------------- +// [SECTION] Commentary +//----------------------------------------------------------------------------- + +//----------------------------------------------------------------------------- +// Typical tables call flow: (root level is generally public API): +//----------------------------------------------------------------------------- +// - BeginTable() user begin into a table +// | BeginChild() - (if ScrollX/ScrollY is set) +// | TableBeginInitMemory() - first time table is used +// | TableResetSettings() - on settings reset +// | TableLoadSettings() - on settings load +// | TableBeginApplyRequests() - apply queued resizing/reordering/hiding requests +// | - TableSetColumnWidth() - apply resizing width (for mouse resize, often requested by previous frame) +// | - TableUpdateColumnsWeightFromWidth()- recompute columns weights (of stretch columns) from their respective width +// - TableSetupColumn() user submit columns details (optional) +// - TableSetupScrollFreeze() user submit scroll freeze information (optional) +//----------------------------------------------------------------------------- +// - TableUpdateLayout() [Internal] followup to BeginTable(): setup everything: widths, columns positions, clipping rectangles. Automatically called by the FIRST call to TableNextRow() or TableHeadersRow(). +// | TableSetupDrawChannels() - setup ImDrawList channels +// | TableUpdateBorders() - detect hovering columns for resize, ahead of contents submission +// | TableDrawContextMenu() - draw right-click context menu +//----------------------------------------------------------------------------- +// - TableHeadersRow() or TableHeader() user submit a headers row (optional) +// | TableSortSpecsClickColumn() - when left-clicked: alter sort order and sort direction +// | TableOpenContextMenu() - when right-clicked: trigger opening of the default context menu +// - TableGetSortSpecs() user queries updated sort specs (optional, generally after submitting headers) +// - TableNextRow() user begin into a new row (also automatically called by TableHeadersRow()) +// | TableEndRow() - finish existing row +// | TableBeginRow() - add a new row +// - TableSetColumnIndex() / TableNextColumn() user begin into a cell +// | TableEndCell() - close existing column/cell +// | TableBeginCell() - enter into current column/cell +// - [...] user emit contents +//----------------------------------------------------------------------------- +// - EndTable() user ends the table +// | TableDrawBorders() - draw outer borders, inner vertical borders +// | TableMergeDrawChannels() - merge draw channels if clipping isn't required +// | EndChild() - (if ScrollX/ScrollY is set) +//----------------------------------------------------------------------------- + +//----------------------------------------------------------------------------- +// TABLE SIZING +//----------------------------------------------------------------------------- +// (Read carefully because this is subtle but it does make sense!) +//----------------------------------------------------------------------------- +// About 'outer_size': +// Its meaning needs to differ slightly depending of if we are using ScrollX/ScrollY flags. +// Default value is ImVec2(0.0f, 0.0f). +// X +// - outer_size.x <= 0.0f -> Right-align from window/work-rect right-most edge. With -FLT_MIN or 0.0f will align exactly on right-most edge. +// - outer_size.x > 0.0f -> Set Fixed width. +// Y with ScrollX/ScrollY disabled: we output table directly in current window +// - outer_size.y < 0.0f -> Bottom-align (but will auto extend, unless _NoHostExtendY is set). Not meaningful is parent window can vertically scroll. +// - outer_size.y = 0.0f -> No minimum height (but will auto extend, unless _NoHostExtendY is set) +// - outer_size.y > 0.0f -> Set Minimum height (but will auto extend, unless _NoHostExtenY is set) +// Y with ScrollX/ScrollY enabled: using a child window for scrolling +// - outer_size.y < 0.0f -> Bottom-align. Not meaningful is parent window can vertically scroll. +// - outer_size.y = 0.0f -> Bottom-align, consistent with BeginChild(). Not recommended unless table is last item in parent window. +// - outer_size.y > 0.0f -> Set Exact height. Recommended when using Scrolling on any axis. +//----------------------------------------------------------------------------- +// Outer size is also affected by the NoHostExtendX/NoHostExtendY flags. +// Important to that note how the two flags have slightly different behaviors! +// - ImGuiTableFlags_NoHostExtendX -> Make outer width auto-fit to columns (overriding outer_size.x value). Only available when ScrollX/ScrollY are disabled and Stretch columns are not used. +// - ImGuiTableFlags_NoHostExtendY -> Make outer height stop exactly at outer_size.y (prevent auto-extending table past the limit). Only available when ScrollX/ScrollY are disabled. Data below the limit will be clipped and not visible. +// In theory ImGuiTableFlags_NoHostExtendY could be the default and any non-scrolling tables with outer_size.y != 0.0f would use exact height. +// This would be consistent but perhaps less useful and more confusing (as vertically clipped items are not easily noticeable) +//----------------------------------------------------------------------------- +// About 'inner_width': +// With ScrollX disabled: +// - inner_width -> *ignored* +// With ScrollX enabled: +// - inner_width < 0.0f -> *illegal* fit in known width (right align from outer_size.x) <-- weird +// - inner_width = 0.0f -> fit in outer_width: Fixed size columns will take space they need (if avail, otherwise shrink down), Stretch columns becomes Fixed columns. +// - inner_width > 0.0f -> override scrolling width, generally to be larger than outer_size.x. Fixed column take space they need (if avail, otherwise shrink down), Stretch columns share remaining space! +//----------------------------------------------------------------------------- +// Details: +// - If you want to use Stretch columns with ScrollX, you generally need to specify 'inner_width' otherwise the concept +// of "available space" doesn't make sense. +// - Even if not really useful, we allow 'inner_width < outer_size.x' for consistency and to facilitate understanding +// of what the value does. +//----------------------------------------------------------------------------- + +//----------------------------------------------------------------------------- +// COLUMNS SIZING POLICIES +//----------------------------------------------------------------------------- +// About overriding column sizing policy and width/weight with TableSetupColumn(): +// We use a default parameter of 'init_width_or_weight == -1'. +// - with ImGuiTableColumnFlags_WidthFixed, init_width <= 0 (default) --> width is automatic +// - with ImGuiTableColumnFlags_WidthFixed, init_width > 0 (explicit) --> width is custom +// - with ImGuiTableColumnFlags_WidthStretch, init_weight <= 0 (default) --> weight is 1.0f +// - with ImGuiTableColumnFlags_WidthStretch, init_weight > 0 (explicit) --> weight is custom +// Widths are specified _without_ CellPadding. If you specify a width of 100.0f, the column will be cover (100.0f + Padding * 2.0f) +// and you can fit a 100.0f wide item in it without clipping and with full padding. +//----------------------------------------------------------------------------- +// About default sizing policy (if you don't specify a ImGuiTableColumnFlags_WidthXXXX flag) +// - with Table policy ImGuiTableFlags_SizingFixedFit --> default Column policy is ImGuiTableColumnFlags_WidthFixed, default Width is equal to contents width +// - with Table policy ImGuiTableFlags_SizingFixedSame --> default Column policy is ImGuiTableColumnFlags_WidthFixed, default Width is max of all contents width +// - with Table policy ImGuiTableFlags_SizingStretchSame --> default Column policy is ImGuiTableColumnFlags_WidthStretch, default Weight is 1.0f +// - with Table policy ImGuiTableFlags_SizingStretchWeight --> default Column policy is ImGuiTableColumnFlags_WidthStretch, default Weight is proportional to contents +// Default Width and default Weight can be overriden when calling TableSetupColumn(). +//----------------------------------------------------------------------------- +// About mixing Fixed/Auto and Stretch columns together: +// - the typical use of mixing sizing policies is: any number of LEADING Fixed columns, followed by one or two TRAILING Stretch columns. +// - using mixed policies with ScrollX does not make much sense, as using Stretch columns with ScrollX does not make much sense in the first place! +// that is, unless 'inner_width' is passed to BeginTable() to explicitely provide a total width to layout columns in. +// - when using ImGuiTableFlags_SizingFixedSame with mixed columns, only the Fixed/Auto columns will match their widths to the maximum contents width. +// - when using ImGuiTableFlags_SizingStretchSame with mixed columns, only the Stretch columns will match their weight/widths. +//----------------------------------------------------------------------------- +// About using column width: +// If a column is manual resizable or has a width specified with TableSetupColumn(): +// - you may use GetContentRegionAvail().x to query the width available in a given column. +// - right-side alignment features such as SetNextItemWidth(-x) or PushItemWidth(-x) will rely on this width. +// If the column is not resizable and has no width specified with TableSetupColumn(): +// - its width will be automatic and be the set to the max of items submitted. +// - therefore you generally cannot have ALL items of the columns use e.g. SetNextItemWidth(-FLT_MIN). +// - but if the column has one or more item of known/fixed size, this will become the reference width used by SetNextItemWidth(-FLT_MIN). +//----------------------------------------------------------------------------- + + +//----------------------------------------------------------------------------- +// TABLES CLIPPING/CULLING +//----------------------------------------------------------------------------- +// About clipping/culling of Rows in Tables: +// - For large numbers of rows, it is recommended you use ImGuiListClipper to only submit visible rows. +// ImGuiListClipper is reliant on the fact that rows are of equal height. +// See 'Demo->Tables->Vertical Scrolling' or 'Demo->Tables->Advanced' for a demo of using the clipper. +// - Note that auto-resizing columns don't play well with using the clipper. +// By default a table with _ScrollX but without _Resizable will have column auto-resize. +// So, if you want to use the clipper, make sure to either enable _Resizable, either setup columns width explicitly with _WidthFixed. +//----------------------------------------------------------------------------- +// About clipping/culling of Columns in Tables: +// - Both TableSetColumnIndex() and TableNextColumn() return true when the column is visible or performing +// width measurements. Otherwise, you may skip submitting the contents of a cell/column, BUT ONLY if you know +// it is not going to contribute to row height. +// In many situations, you may skip submitting contents for every columns but one (e.g. the first one). +// - Case A: column is not hidden by user, and at least partially in sight (most common case). +// - Case B: column is clipped / out of sight (because of scrolling or parent ClipRect): TableNextColumn() return false as a hint but we still allow layout output. +// - Case C: column is hidden explicitly by the user (e.g. via the context menu, or _DefaultHide column flag, etc.). +// +// [A] [B] [C] +// TableNextColumn(): true false false -> [userland] when TableNextColumn() / TableSetColumnIndex() return false, user can skip submitting items but only if the column doesn't contribute to row height. +// SkipItems: false false true -> [internal] when SkipItems is true, most widgets will early out if submitted, resulting is no layout output. +// ClipRect: normal zero-width zero-width -> [internal] when ClipRect is zero, ItemAdd() will return false and most widgets will early out mid-way. +// ImDrawList output: normal dummy dummy -> [internal] when using the dummy channel, ImDrawList submissions (if any) will be wasted (because cliprect is zero-width anyway). +// +// - We need distinguish those cases because non-hidden columns that are clipped outside of scrolling bounds should still contribute their height to the row. +// However, in the majority of cases, the contribution to row height is the same for all columns, or the tallest cells are known by the programmer. +//----------------------------------------------------------------------------- +// About clipping/culling of whole Tables: +// - Scrolling tables with a known outer size can be clipped earlier as BeginTable() will return false. +//----------------------------------------------------------------------------- + +//----------------------------------------------------------------------------- +// [SECTION] Header mess +//----------------------------------------------------------------------------- + +#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS) +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include "imgui.h" +#ifndef IMGUI_DISABLE + +#ifndef IMGUI_DEFINE_MATH_OPERATORS +#define IMGUI_DEFINE_MATH_OPERATORS +#endif +#include "imgui_internal.h" + +// System includes +#if defined(_MSC_VER) && _MSC_VER <= 1500 // MSVC 2008 or earlier +#include // intptr_t +#else +#include // intptr_t +#endif + +// Visual Studio warnings +#ifdef _MSC_VER +#pragma warning (disable: 4127) // condition expression is constant +#pragma warning (disable: 4996) // 'This function or variable may be unsafe': strcpy, strdup, sprintf, vsnprintf, sscanf, fopen +#if defined(_MSC_VER) && _MSC_VER >= 1922 // MSVC 2019 16.2 or later +#pragma warning (disable: 5054) // operator '|': deprecated between enumerations of different types +#endif +#endif + +// Clang/GCC warnings with -Weverything +#if defined(__clang__) +#if __has_warning("-Wunknown-warning-option") +#pragma clang diagnostic ignored "-Wunknown-warning-option" // warning: unknown warning group 'xxx' // not all warnings are known by all Clang versions and they tend to be rename-happy.. so ignoring warnings triggers new warnings on some configuration. Great! +#endif +#pragma clang diagnostic ignored "-Wunknown-pragmas" // warning: unknown warning group 'xxx' +#pragma clang diagnostic ignored "-Wold-style-cast" // warning: use of old-style cast // yes, they are more terse. +#pragma clang diagnostic ignored "-Wfloat-equal" // warning: comparing floating point with == or != is unsafe // storing and comparing against same constants (typically 0.0f) is ok. +#pragma clang diagnostic ignored "-Wformat-nonliteral" // warning: format string is not a string literal // passing non-literal to vsnformat(). yes, user passing incorrect format strings can crash the code. +#pragma clang diagnostic ignored "-Wsign-conversion" // warning: implicit conversion changes signedness +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" // warning: zero as null pointer constant // some standard header variations use #define NULL 0 +#pragma clang diagnostic ignored "-Wdouble-promotion" // warning: implicit conversion from 'float' to 'double' when passing argument to function // using printf() is a misery with this as C++ va_arg ellipsis changes float to double. +#pragma clang diagnostic ignored "-Wenum-enum-conversion" // warning: bitwise operation between different enumeration types ('XXXFlags_' and 'XXXFlagsPrivate_') +#pragma clang diagnostic ignored "-Wdeprecated-enum-enum-conversion"// warning: bitwise operation between different enumeration types ('XXXFlags_' and 'XXXFlagsPrivate_') is deprecated +#pragma clang diagnostic ignored "-Wimplicit-int-float-conversion" // warning: implicit conversion from 'xxx' to 'float' may lose precision +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wpragmas" // warning: unknown option after '#pragma GCC diagnostic' kind +#pragma GCC diagnostic ignored "-Wformat-nonliteral" // warning: format not a string literal, format string not checked +#pragma GCC diagnostic ignored "-Wclass-memaccess" // [__GNUC__ >= 8] warning: 'memset/memcpy' clearing/writing an object of type 'xxxx' with no trivial copy-assignment; use assignment or value-initialization instead +#endif + +//----------------------------------------------------------------------------- +// [SECTION] Tables: Main code +//----------------------------------------------------------------------------- + +// Configuration +static const int TABLE_DRAW_CHANNEL_BG0 = 0; +static const int TABLE_DRAW_CHANNEL_BG2_FROZEN = 1; +static const int TABLE_DRAW_CHANNEL_NOCLIP = 2; // When using ImGuiTableFlags_NoClip (this becomes the last visible channel) +static const float TABLE_BORDER_SIZE = 1.0f; // FIXME-TABLE: Currently hard-coded because of clipping assumptions with outer borders rendering. +static const float TABLE_RESIZE_SEPARATOR_HALF_THICKNESS = 4.0f; // Extend outside inner borders. +static const float TABLE_RESIZE_SEPARATOR_FEEDBACK_TIMER = 0.06f; // Delay/timer before making the hover feedback (color+cursor) visible because tables/columns tends to be more cramped. + +// Helper +inline ImGuiTableFlags TableFixFlags(ImGuiTableFlags flags, ImGuiWindow* outer_window) +{ + // Adjust flags: set default sizing policy + if ((flags & ImGuiTableFlags_SizingMask_) == 0) + flags |= ((flags & ImGuiTableFlags_ScrollX) || (outer_window->Flags & ImGuiWindowFlags_AlwaysAutoResize)) ? ImGuiTableFlags_SizingFixedFit : ImGuiTableFlags_SizingStretchSame; + + // Adjust flags: enable NoKeepColumnsVisible when using ImGuiTableFlags_SizingFixedSame + if ((flags & ImGuiTableFlags_SizingMask_) == ImGuiTableFlags_SizingFixedSame) + flags |= ImGuiTableFlags_NoKeepColumnsVisible; + + // Adjust flags: enforce borders when resizable + if (flags & ImGuiTableFlags_Resizable) + flags |= ImGuiTableFlags_BordersInnerV; + + // Adjust flags: disable NoHostExtendX/NoHostExtendY if we have any scrolling going on + if (flags & (ImGuiTableFlags_ScrollX | ImGuiTableFlags_ScrollY)) + flags &= ~(ImGuiTableFlags_NoHostExtendX | ImGuiTableFlags_NoHostExtendY); + + // Adjust flags: NoBordersInBodyUntilResize takes priority over NoBordersInBody + if (flags & ImGuiTableFlags_NoBordersInBodyUntilResize) + flags &= ~ImGuiTableFlags_NoBordersInBody; + + // Adjust flags: disable saved settings if there's nothing to save + if ((flags & (ImGuiTableFlags_Resizable | ImGuiTableFlags_Hideable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Sortable)) == 0) + flags |= ImGuiTableFlags_NoSavedSettings; + + // Inherit _NoSavedSettings from top-level window (child windows always have _NoSavedSettings set) +#ifdef IMGUI_HAS_DOCK + ImGuiWindow* window_for_settings = outer_window->RootWindowDockStop; +#else + ImGuiWindow* window_for_settings = outer_window->RootWindow; +#endif + if (window_for_settings->Flags & ImGuiWindowFlags_NoSavedSettings) + flags |= ImGuiTableFlags_NoSavedSettings; + + return flags; +} + +ImGuiTable* ImGui::TableFindByID(ImGuiID id) +{ + ImGuiContext& g = *GImGui; + return g.Tables.GetByKey(id); +} + +// Read about "TABLE SIZING" at the top of this file. +bool ImGui::BeginTable(const char* str_id, int columns_count, ImGuiTableFlags flags, const ImVec2& outer_size, float inner_width) +{ + ImGuiID id = GetID(str_id); + return BeginTableEx(str_id, id, columns_count, flags, outer_size, inner_width); +} + +bool ImGui::BeginTableEx(const char* name, ImGuiID id, int columns_count, ImGuiTableFlags flags, const ImVec2& outer_size, float inner_width) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* outer_window = GetCurrentWindow(); + if (outer_window->SkipItems) // Consistent with other tables + beneficial side effect that assert on miscalling EndTable() will be more visible. + return false; + + // Sanity checks + IM_ASSERT(columns_count > 0 && columns_count <= IMGUI_TABLE_MAX_COLUMNS && "Only 1..64 columns allowed!"); + if (flags & ImGuiTableFlags_ScrollX) + IM_ASSERT(inner_width >= 0.0f); + + // If an outer size is specified ahead we will be able to early out when not visible. Exact clipping rules may evolve. + const bool use_child_window = (flags & (ImGuiTableFlags_ScrollX | ImGuiTableFlags_ScrollY)) != 0; + const ImVec2 avail_size = GetContentRegionAvail(); + ImVec2 actual_outer_size = CalcItemSize(outer_size, ImMax(avail_size.x, 1.0f), use_child_window ? ImMax(avail_size.y, 1.0f) : 0.0f); + ImRect outer_rect(outer_window->DC.CursorPos, outer_window->DC.CursorPos + actual_outer_size); + if (use_child_window && IsClippedEx(outer_rect, 0, false)) + { + ItemSize(outer_rect); + return false; + } + + // Acquire storage for the table + ImGuiTable* table = g.Tables.GetOrAddByKey(id); + const int instance_no = (table->LastFrameActive != g.FrameCount) ? 0 : table->InstanceCurrent + 1; + const ImGuiID instance_id = id + instance_no; + const ImGuiTableFlags table_last_flags = table->Flags; + if (instance_no > 0) + IM_ASSERT(table->ColumnsCount == columns_count && "BeginTable(): Cannot change columns count mid-frame while preserving same ID"); + + // Fix flags + table->IsDefaultSizingPolicy = (flags & ImGuiTableFlags_SizingMask_) == 0; + flags = TableFixFlags(flags, outer_window); + + // Initialize + table->ID = id; + table->Flags = flags; + table->InstanceCurrent = (ImS16)instance_no; + table->LastFrameActive = g.FrameCount; + table->OuterWindow = table->InnerWindow = outer_window; + table->ColumnsCount = columns_count; + table->IsLayoutLocked = false; + table->InnerWidth = inner_width; + table->UserOuterSize = outer_size; + + // When not using a child window, WorkRect.Max will grow as we append contents. + if (use_child_window) + { + // Ensure no vertical scrollbar appears if we only want horizontal one, to make flag consistent + // (we have no other way to disable vertical scrollbar of a window while keeping the horizontal one showing) + ImVec2 override_content_size(FLT_MAX, FLT_MAX); + if ((flags & ImGuiTableFlags_ScrollX) && !(flags & ImGuiTableFlags_ScrollY)) + override_content_size.y = FLT_MIN; + + // Ensure specified width (when not specified, Stretched columns will act as if the width == OuterWidth and + // never lead to any scrolling). We don't handle inner_width < 0.0f, we could potentially use it to right-align + // based on the right side of the child window work rect, which would require knowing ahead if we are going to + // have decoration taking horizontal spaces (typically a vertical scrollbar). + if ((flags & ImGuiTableFlags_ScrollX) && inner_width > 0.0f) + override_content_size.x = inner_width; + + if (override_content_size.x != FLT_MAX || override_content_size.y != FLT_MAX) + SetNextWindowContentSize(ImVec2(override_content_size.x != FLT_MAX ? override_content_size.x : 0.0f, override_content_size.y != FLT_MAX ? override_content_size.y : 0.0f)); + + // Reset scroll if we are reactivating it + if ((table_last_flags & (ImGuiTableFlags_ScrollX | ImGuiTableFlags_ScrollY)) == 0) + SetNextWindowScroll(ImVec2(0.0f, 0.0f)); + + // Create scrolling region (without border and zero window padding) + ImGuiWindowFlags child_flags = (flags & ImGuiTableFlags_ScrollX) ? ImGuiWindowFlags_HorizontalScrollbar : ImGuiWindowFlags_None; + BeginChildEx(name, instance_id, outer_rect.GetSize(), false, child_flags); + table->InnerWindow = g.CurrentWindow; + table->WorkRect = table->InnerWindow->WorkRect; + table->OuterRect = table->InnerWindow->Rect(); + table->InnerRect = table->InnerWindow->InnerRect; + IM_ASSERT(table->InnerWindow->WindowPadding.x == 0.0f && table->InnerWindow->WindowPadding.y == 0.0f && table->InnerWindow->WindowBorderSize == 0.0f); + } + else + { + // For non-scrolling tables, WorkRect == OuterRect == InnerRect. + // But at this point we do NOT have a correct value for .Max.y (unless a height has been explicitly passed in). It will only be updated in EndTable(). + table->WorkRect = table->OuterRect = table->InnerRect = outer_rect; + } + + // Push a standardized ID for both child-using and not-child-using tables + PushOverrideID(instance_id); + + // Backup a copy of host window members we will modify + ImGuiWindow* inner_window = table->InnerWindow; + table->HostIndentX = inner_window->DC.Indent.x; + table->HostClipRect = inner_window->ClipRect; + table->HostSkipItems = inner_window->SkipItems; + table->HostBackupWorkRect = inner_window->WorkRect; + table->HostBackupParentWorkRect = inner_window->ParentWorkRect; + table->HostBackupColumnsOffset = outer_window->DC.ColumnsOffset; + table->HostBackupPrevLineSize = inner_window->DC.PrevLineSize; + table->HostBackupCurrLineSize = inner_window->DC.CurrLineSize; + table->HostBackupCursorMaxPos = inner_window->DC.CursorMaxPos; + table->HostBackupItemWidth = outer_window->DC.ItemWidth; + table->HostBackupItemWidthStackSize = outer_window->DC.ItemWidthStack.Size; + inner_window->DC.PrevLineSize = inner_window->DC.CurrLineSize = ImVec2(0.0f, 0.0f); + + // Padding and Spacing + // - None ........Content..... Pad .....Content........ + // - PadOuter | Pad ..Content..... Pad .....Content.. Pad | + // - PadInner ........Content.. Pad | Pad ..Content........ + // - PadOuter+PadInner | Pad ..Content.. Pad | Pad ..Content.. Pad | + const bool pad_outer_x = (flags & ImGuiTableFlags_NoPadOuterX) ? false : (flags & ImGuiTableFlags_PadOuterX) ? true : (flags & ImGuiTableFlags_BordersOuterV) != 0; + const bool pad_inner_x = (flags & ImGuiTableFlags_NoPadInnerX) ? false : true; + const float inner_spacing_for_border = (flags & ImGuiTableFlags_BordersInnerV) ? TABLE_BORDER_SIZE : 0.0f; + const float inner_spacing_explicit = (pad_inner_x && (flags & ImGuiTableFlags_BordersInnerV) == 0) ? g.Style.CellPadding.x : 0.0f; + const float inner_padding_explicit = (pad_inner_x && (flags & ImGuiTableFlags_BordersInnerV) != 0) ? g.Style.CellPadding.x : 0.0f; + table->CellSpacingX1 = inner_spacing_explicit + inner_spacing_for_border; + table->CellSpacingX2 = inner_spacing_explicit; + table->CellPaddingX = inner_padding_explicit; + table->CellPaddingY = g.Style.CellPadding.y; + + const float outer_padding_for_border = (flags & ImGuiTableFlags_BordersOuterV) ? TABLE_BORDER_SIZE : 0.0f; + const float outer_padding_explicit = pad_outer_x ? g.Style.CellPadding.x : 0.0f; + table->OuterPaddingX = (outer_padding_for_border + outer_padding_explicit) - table->CellPaddingX; + + table->CurrentColumn = -1; + table->CurrentRow = -1; + table->RowBgColorCounter = 0; + table->LastRowFlags = ImGuiTableRowFlags_None; + table->InnerClipRect = (inner_window == outer_window) ? table->WorkRect : inner_window->ClipRect; + table->InnerClipRect.ClipWith(table->WorkRect); // We need this to honor inner_width + table->InnerClipRect.ClipWithFull(table->HostClipRect); + table->InnerClipRect.Max.y = (flags & ImGuiTableFlags_NoHostExtendY) ? ImMin(table->InnerClipRect.Max.y, inner_window->WorkRect.Max.y) : inner_window->ClipRect.Max.y; + + table->RowPosY1 = table->RowPosY2 = table->WorkRect.Min.y; // This is needed somehow + table->RowTextBaseline = 0.0f; // This will be cleared again by TableBeginRow() + table->FreezeRowsRequest = table->FreezeRowsCount = 0; // This will be setup by TableSetupScrollFreeze(), if any + table->FreezeColumnsRequest = table->FreezeColumnsCount = 0; + table->IsUnfrozenRows = true; + table->DeclColumnsCount = 0; + + // Using opaque colors facilitate overlapping elements of the grid + table->BorderColorStrong = GetColorU32(ImGuiCol_TableBorderStrong); + table->BorderColorLight = GetColorU32(ImGuiCol_TableBorderLight); + + // Make table current + const int table_idx = g.Tables.GetIndex(table); + g.CurrentTableStack.push_back(ImGuiPtrOrIndex(table_idx)); + g.CurrentTable = table; + outer_window->DC.CurrentTableIdx = table_idx; + if (inner_window != outer_window) // So EndChild() within the inner window can restore the table properly. + inner_window->DC.CurrentTableIdx = table_idx; + + if ((table_last_flags & ImGuiTableFlags_Reorderable) && (flags & ImGuiTableFlags_Reorderable) == 0) + table->IsResetDisplayOrderRequest = true; + + // Mark as used + if (table_idx >= g.TablesLastTimeActive.Size) + g.TablesLastTimeActive.resize(table_idx + 1, -1.0f); + g.TablesLastTimeActive[table_idx] = (float)g.Time; + table->MemoryCompacted = false; + + // Setup memory buffer (clear data if columns count changed) + const int stored_size = table->Columns.size(); + if (stored_size != 0 && stored_size != columns_count) + { + IM_FREE(table->RawData); + table->RawData = NULL; + } + if (table->RawData == NULL) + { + TableBeginInitMemory(table, columns_count); + table->IsInitializing = table->IsSettingsRequestLoad = true; + } + if (table->IsResetAllRequest) + TableResetSettings(table); + if (table->IsInitializing) + { + // Initialize + table->SettingsOffset = -1; + table->IsSortSpecsDirty = true; + table->InstanceInteracted = -1; + table->ContextPopupColumn = -1; + table->ReorderColumn = table->ResizedColumn = table->LastResizedColumn = -1; + table->AutoFitSingleColumn = -1; + table->HoveredColumnBody = table->HoveredColumnBorder = -1; + for (int n = 0; n < columns_count; n++) + { + ImGuiTableColumn* column = &table->Columns[n]; + float width_auto = column->WidthAuto; + *column = ImGuiTableColumn(); + column->WidthAuto = width_auto; + column->IsPreserveWidthAuto = true; // Preserve WidthAuto when reinitializing a live table: not technically necessary but remove a visible flicker + column->DisplayOrder = table->DisplayOrderToIndex[n] = (ImGuiTableColumnIdx)n; + column->IsEnabled = column->IsEnabledNextFrame = true; + } + } + + // Load settings + if (table->IsSettingsRequestLoad) + TableLoadSettings(table); + + // Handle DPI/font resize + // This is designed to facilitate DPI changes with the assumption that e.g. style.CellPadding has been scaled as well. + // It will also react to changing fonts with mixed results. It doesn't need to be perfect but merely provide a decent transition. + // FIXME-DPI: Provide consistent standards for reference size. Perhaps using g.CurrentDpiScale would be more self explanatory. + // This is will lead us to non-rounded WidthRequest in columns, which should work but is a poorly tested path. + const float new_ref_scale_unit = g.FontSize; // g.Font->GetCharAdvance('A') ? + if (table->RefScale != 0.0f && table->RefScale != new_ref_scale_unit) + { + const float scale_factor = new_ref_scale_unit / table->RefScale; + //IMGUI_DEBUG_LOG("[table] %08X RefScaleUnit %.3f -> %.3f, scaling width by %.3f\n", table->ID, table->RefScaleUnit, new_ref_scale_unit, scale_factor); + for (int n = 0; n < columns_count; n++) + table->Columns[n].WidthRequest = table->Columns[n].WidthRequest * scale_factor; + } + table->RefScale = new_ref_scale_unit; + + // Disable output until user calls TableNextRow() or TableNextColumn() leading to the TableUpdateLayout() call.. + // This is not strictly necessary but will reduce cases were "out of table" output will be misleading to the user. + // Because we cannot safely assert in EndTable() when no rows have been created, this seems like our best option. + inner_window->SkipItems = true; + + // Clear names + // At this point the ->NameOffset field of each column will be invalid until TableUpdateLayout() or the first call to TableSetupColumn() + if (table->ColumnsNames.Buf.Size > 0) + table->ColumnsNames.Buf.resize(0); + + // Apply queued resizing/reordering/hiding requests + TableBeginApplyRequests(table); + + return true; +} + +// For reference, the average total _allocation count_ for a table is: +// + 0 (for ImGuiTable instance, we are pooling allocations in g.Tables) +// + 1 (for table->RawData allocated below) +// + 1 (for table->ColumnsNames, if names are used) +// + 1 (for table->Splitter._Channels) +// + 2 * active_channels_count (for ImDrawCmd and ImDrawIdx buffers inside channels) +// Where active_channels_count is variable but often == columns_count or columns_count + 1, see TableSetupDrawChannels() for details. +// Unused channels don't perform their +2 allocations. +void ImGui::TableBeginInitMemory(ImGuiTable* table, int columns_count) +{ + // Allocate single buffer for our arrays + ImSpanAllocator<3> span_allocator; + span_allocator.ReserveBytes(0, columns_count * sizeof(ImGuiTableColumn)); + span_allocator.ReserveBytes(1, columns_count * sizeof(ImGuiTableColumnIdx)); + span_allocator.ReserveBytes(2, columns_count * sizeof(ImGuiTableCellData)); + table->RawData = IM_ALLOC(span_allocator.GetArenaSizeInBytes()); + memset(table->RawData, 0, span_allocator.GetArenaSizeInBytes()); + span_allocator.SetArenaBasePtr(table->RawData); + span_allocator.GetSpan(0, &table->Columns); + span_allocator.GetSpan(1, &table->DisplayOrderToIndex); + span_allocator.GetSpan(2, &table->RowCellData); +} + +// Apply queued resizing/reordering/hiding requests +void ImGui::TableBeginApplyRequests(ImGuiTable* table) +{ + // Handle resizing request + // (We process this at the first TableBegin of the frame) + // FIXME-TABLE: Contains columns if our work area doesn't allow for scrolling? + if (table->InstanceCurrent == 0) + { + if (table->ResizedColumn != -1 && table->ResizedColumnNextWidth != FLT_MAX) + TableSetColumnWidth(table->ResizedColumn, table->ResizedColumnNextWidth); + table->LastResizedColumn = table->ResizedColumn; + table->ResizedColumnNextWidth = FLT_MAX; + table->ResizedColumn = -1; + + // Process auto-fit for single column, which is a special case for stretch columns and fixed columns with FixedSame policy. + // FIXME-TABLE: Would be nice to redistribute available stretch space accordingly to other weights, instead of giving it all to siblings. + if (table->AutoFitSingleColumn != -1) + { + TableSetColumnWidth(table->AutoFitSingleColumn, table->Columns[table->AutoFitSingleColumn].WidthAuto); + table->AutoFitSingleColumn = -1; + } + } + + // Handle reordering request + // Note: we don't clear ReorderColumn after handling the request. + if (table->InstanceCurrent == 0) + { + if (table->HeldHeaderColumn == -1 && table->ReorderColumn != -1) + table->ReorderColumn = -1; + table->HeldHeaderColumn = -1; + if (table->ReorderColumn != -1 && table->ReorderColumnDir != 0) + { + // We need to handle reordering across hidden columns. + // In the configuration below, moving C to the right of E will lead to: + // ... C [D] E ---> ... [D] E C (Column name/index) + // ... 2 3 4 ... 2 3 4 (Display order) + const int reorder_dir = table->ReorderColumnDir; + IM_ASSERT(reorder_dir == -1 || reorder_dir == +1); + IM_ASSERT(table->Flags & ImGuiTableFlags_Reorderable); + ImGuiTableColumn* src_column = &table->Columns[table->ReorderColumn]; + ImGuiTableColumn* dst_column = &table->Columns[(reorder_dir == -1) ? src_column->PrevEnabledColumn : src_column->NextEnabledColumn]; + IM_UNUSED(dst_column); + const int src_order = src_column->DisplayOrder; + const int dst_order = dst_column->DisplayOrder; + src_column->DisplayOrder = (ImGuiTableColumnIdx)dst_order; + for (int order_n = src_order + reorder_dir; order_n != dst_order + reorder_dir; order_n += reorder_dir) + table->Columns[table->DisplayOrderToIndex[order_n]].DisplayOrder -= (ImGuiTableColumnIdx)reorder_dir; + IM_ASSERT(dst_column->DisplayOrder == dst_order - reorder_dir); + + // Display order is stored in both columns->IndexDisplayOrder and table->DisplayOrder[], + // rebuild the later from the former. + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + table->DisplayOrderToIndex[table->Columns[column_n].DisplayOrder] = (ImGuiTableColumnIdx)column_n; + table->ReorderColumnDir = 0; + table->IsSettingsDirty = true; + } + } + + // Handle display order reset request + if (table->IsResetDisplayOrderRequest) + { + for (int n = 0; n < table->ColumnsCount; n++) + table->DisplayOrderToIndex[n] = table->Columns[n].DisplayOrder = (ImGuiTableColumnIdx)n; + table->IsResetDisplayOrderRequest = false; + table->IsSettingsDirty = true; + } +} + +// Adjust flags: default width mode + stretch columns are not allowed when auto extending +static void TableSetupColumnFlags(ImGuiTable* table, ImGuiTableColumn* column, ImGuiTableColumnFlags flags_in) +{ + ImGuiTableColumnFlags flags = flags_in; + + // Sizing Policy + if ((flags & ImGuiTableColumnFlags_WidthMask_) == 0) + { + const ImGuiTableFlags table_sizing_policy = (table->Flags & ImGuiTableFlags_SizingMask_); + if (table_sizing_policy == ImGuiTableFlags_SizingFixedFit || table_sizing_policy == ImGuiTableFlags_SizingFixedSame) + flags |= ImGuiTableColumnFlags_WidthFixed; + else + flags |= ImGuiTableColumnFlags_WidthStretch; + } + else + { + IM_ASSERT(ImIsPowerOfTwo(flags & ImGuiTableColumnFlags_WidthMask_)); // Check that only 1 of each set is used. + } + + // Resize + if ((table->Flags & ImGuiTableFlags_Resizable) == 0) + flags |= ImGuiTableColumnFlags_NoResize; + + // Sorting + if ((flags & ImGuiTableColumnFlags_NoSortAscending) && (flags & ImGuiTableColumnFlags_NoSortDescending)) + flags |= ImGuiTableColumnFlags_NoSort; + + // Indentation + if ((flags & ImGuiTableColumnFlags_IndentMask_) == 0) + flags |= (table->Columns.index_from_ptr(column) == 0) ? ImGuiTableColumnFlags_IndentEnable : ImGuiTableColumnFlags_IndentDisable; + + // Alignment + //if ((flags & ImGuiTableColumnFlags_AlignMask_) == 0) + // flags |= ImGuiTableColumnFlags_AlignCenter; + //IM_ASSERT(ImIsPowerOfTwo(flags & ImGuiTableColumnFlags_AlignMask_)); // Check that only 1 of each set is used. + + // Preserve status flags + column->Flags = flags | (column->Flags & ImGuiTableColumnFlags_StatusMask_); + + // Build an ordered list of available sort directions + column->SortDirectionsAvailCount = column->SortDirectionsAvailMask = column->SortDirectionsAvailList = 0; + if (table->Flags & ImGuiTableFlags_Sortable) + { + int count = 0, mask = 0, list = 0; + if ((flags & ImGuiTableColumnFlags_PreferSortAscending) != 0 && (flags & ImGuiTableColumnFlags_NoSortAscending) == 0) { mask |= 1 << ImGuiSortDirection_Ascending; list |= ImGuiSortDirection_Ascending << (count << 1); count++; } + if ((flags & ImGuiTableColumnFlags_PreferSortDescending) != 0 && (flags & ImGuiTableColumnFlags_NoSortDescending) == 0) { mask |= 1 << ImGuiSortDirection_Descending; list |= ImGuiSortDirection_Descending << (count << 1); count++; } + if ((flags & ImGuiTableColumnFlags_PreferSortAscending) == 0 && (flags & ImGuiTableColumnFlags_NoSortAscending) == 0) { mask |= 1 << ImGuiSortDirection_Ascending; list |= ImGuiSortDirection_Ascending << (count << 1); count++; } + if ((flags & ImGuiTableColumnFlags_PreferSortDescending) == 0 && (flags & ImGuiTableColumnFlags_NoSortDescending) == 0) { mask |= 1 << ImGuiSortDirection_Descending; list |= ImGuiSortDirection_Descending << (count << 1); count++; } + if ((table->Flags & ImGuiTableFlags_SortTristate) || count == 0) { mask |= 1 << ImGuiSortDirection_None; count++; } + column->SortDirectionsAvailList = (ImU8)list; + column->SortDirectionsAvailMask = (ImU8)mask; + column->SortDirectionsAvailCount = (ImU8)count; + ImGui::TableFixColumnSortDirection(table, column); + } +} + +// Layout columns for the frame. This is in essence the followup to BeginTable(). +// Runs on the first call to TableNextRow(), to give a chance for TableSetupColumn() to be called first. +// FIXME-TABLE: Our width (and therefore our WorkRect) will be minimal in the first frame for _WidthAuto columns. +// Increase feedback side-effect with widgets relying on WorkRect.Max.x... Maybe provide a default distribution for _WidthAuto columns? +void ImGui::TableUpdateLayout(ImGuiTable* table) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(table->IsLayoutLocked == false); + + const ImGuiTableFlags table_sizing_policy = (table->Flags & ImGuiTableFlags_SizingMask_); + table->IsDefaultDisplayOrder = true; + table->ColumnsEnabledCount = 0; + table->EnabledMaskByIndex = 0x00; + table->EnabledMaskByDisplayOrder = 0x00; + table->MinColumnWidth = ImMax(1.0f, g.Style.FramePadding.x * 1.0f); // g.Style.ColumnsMinSpacing; // FIXME-TABLE + + // [Part 1] Apply/lock Enabled and Order states. Calculate auto/ideal width for columns. Count fixed/stretch columns. + // Process columns in their visible orders as we are building the Prev/Next indices. + int count_fixed = 0; // Number of columns that have fixed sizing policies + int count_stretch = 0; // Number of columns that have stretch sizing policies + int last_visible_column_idx = -1; + bool has_auto_fit_request = false; + bool has_resizable = false; + float stretch_sum_width_auto = 0.0f; + float fixed_max_width_auto = 0.0f; + for (int order_n = 0; order_n < table->ColumnsCount; order_n++) + { + const int column_n = table->DisplayOrderToIndex[order_n]; + if (column_n != order_n) + table->IsDefaultDisplayOrder = false; + ImGuiTableColumn* column = &table->Columns[column_n]; + + // Clear column setup if not submitted by user. Currently we make it mandatory to call TableSetupColumn() every frame. + // It would easily work without but we're not ready to guarantee it since e.g. names need resubmission anyway. + // We take a slight shortcut but in theory we could be calling TableSetupColumn() here with dummy values, it should yield the same effect. + if (table->DeclColumnsCount <= column_n) + { + TableSetupColumnFlags(table, column, ImGuiTableColumnFlags_None); + column->NameOffset = -1; + column->UserID = 0; + column->InitStretchWeightOrWidth = -1.0f; + } + + // Update Enabled state, mark settings/sortspecs dirty + if (!(table->Flags & ImGuiTableFlags_Hideable) || (column->Flags & ImGuiTableColumnFlags_NoHide)) + column->IsEnabledNextFrame = true; + if (column->IsEnabled != column->IsEnabledNextFrame) + { + column->IsEnabled = column->IsEnabledNextFrame; + table->IsSettingsDirty = true; + if (!column->IsEnabled && column->SortOrder != -1) + table->IsSortSpecsDirty = true; + } + if (column->SortOrder > 0 && !(table->Flags & ImGuiTableFlags_SortMulti)) + table->IsSortSpecsDirty = true; + + // Auto-fit unsized columns + const bool start_auto_fit = (column->Flags & ImGuiTableColumnFlags_WidthFixed) ? (column->WidthRequest < 0.0f) : (column->StretchWeight < 0.0f); + if (start_auto_fit) + column->AutoFitQueue = column->CannotSkipItemsQueue = (1 << 3) - 1; // Fit for three frames + + if (!column->IsEnabled) + { + column->IndexWithinEnabledSet = -1; + continue; + } + + // Mark as enabled and link to previous/next enabled column + column->PrevEnabledColumn = (ImGuiTableColumnIdx)last_visible_column_idx; + column->NextEnabledColumn = -1; + if (last_visible_column_idx != -1) + table->Columns[last_visible_column_idx].NextEnabledColumn = (ImGuiTableColumnIdx)column_n; + column->IndexWithinEnabledSet = table->ColumnsEnabledCount++; + table->EnabledMaskByIndex |= (ImU64)1 << column_n; + table->EnabledMaskByDisplayOrder |= (ImU64)1 << column->DisplayOrder; + last_visible_column_idx = column_n; + IM_ASSERT(column->IndexWithinEnabledSet <= column->DisplayOrder); + + // Calculate ideal/auto column width (that's the width required for all contents to be visible without clipping) + // Combine width from regular rows + width from headers unless requested not to. + if (!column->IsPreserveWidthAuto) + column->WidthAuto = TableGetColumnWidthAuto(table, column); + + // Non-resizable columns keep their requested width (apply user value regardless of IsPreserveWidthAuto) + const bool column_is_resizable = (column->Flags & ImGuiTableColumnFlags_NoResize) == 0; + if (column_is_resizable) + has_resizable = true; + if ((column->Flags & ImGuiTableColumnFlags_WidthFixed) && column->InitStretchWeightOrWidth > 0.0f && !column_is_resizable) + column->WidthAuto = column->InitStretchWeightOrWidth; + + if (column->AutoFitQueue != 0x00) + has_auto_fit_request = true; + if (column->Flags & ImGuiTableColumnFlags_WidthStretch) + { + stretch_sum_width_auto += column->WidthAuto; + count_stretch++; + } + else + { + fixed_max_width_auto = ImMax(fixed_max_width_auto, column->WidthAuto); + count_fixed++; + } + } + if ((table->Flags & ImGuiTableFlags_Sortable) && table->SortSpecsCount == 0 && !(table->Flags & ImGuiTableFlags_SortTristate)) + table->IsSortSpecsDirty = true; + table->RightMostEnabledColumn = (ImGuiTableColumnIdx)last_visible_column_idx; + IM_ASSERT(table->RightMostEnabledColumn >= 0); + + // [Part 2] Disable child window clipping while fitting columns. This is not strictly necessary but makes it possible + // to avoid the column fitting having to wait until the first visible frame of the child container (may or not be a good thing). + // FIXME-TABLE: for always auto-resizing columns may not want to do that all the time. + if (has_auto_fit_request && table->OuterWindow != table->InnerWindow) + table->InnerWindow->SkipItems = false; + if (has_auto_fit_request) + table->IsSettingsDirty = true; + + // [Part 3] Fix column flags and record a few extra information. + float sum_width_requests = 0.0f; // Sum of all width for fixed and auto-resize columns, excluding width contributed by Stretch columns but including spacing/padding. + float stretch_sum_weights = 0.0f; // Sum of all weights for stretch columns. + table->LeftMostStretchedColumn = table->RightMostStretchedColumn = -1; + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + if (!(table->EnabledMaskByIndex & ((ImU64)1 << column_n))) + continue; + ImGuiTableColumn* column = &table->Columns[column_n]; + + const bool column_is_resizable = (column->Flags & ImGuiTableColumnFlags_NoResize) == 0; + if (column->Flags & ImGuiTableColumnFlags_WidthFixed) + { + // Apply same widths policy + float width_auto = column->WidthAuto; + if (table_sizing_policy == ImGuiTableFlags_SizingFixedSame && (column->AutoFitQueue != 0x00 || !column_is_resizable)) + width_auto = fixed_max_width_auto; + + // Apply automatic width + // Latch initial size for fixed columns and update it constantly for auto-resizing column (unless clipped!) + if (column->AutoFitQueue != 0x00) + column->WidthRequest = width_auto; + else if ((column->Flags & ImGuiTableColumnFlags_WidthFixed) && !column_is_resizable && (table->RequestOutputMaskByIndex & ((ImU64)1 << column_n))) + column->WidthRequest = width_auto; + + // FIXME-TABLE: Increase minimum size during init frame to avoid biasing auto-fitting widgets + // (e.g. TextWrapped) too much. Otherwise what tends to happen is that TextWrapped would output a very + // large height (= first frame scrollbar display very off + clipper would skip lots of items). + // This is merely making the side-effect less extreme, but doesn't properly fixes it. + // FIXME: Move this to ->WidthGiven to avoid temporary lossyless? + // FIXME: This break IsPreserveWidthAuto from not flickering if the stored WidthAuto was smaller. + if (column->AutoFitQueue > 0x01 && table->IsInitializing && !column->IsPreserveWidthAuto) + column->WidthRequest = ImMax(column->WidthRequest, table->MinColumnWidth * 4.0f); // FIXME-TABLE: Another constant/scale? + sum_width_requests += column->WidthRequest; + } + else + { + // Initialize stretch weight + if (column->AutoFitQueue != 0x00 || column->StretchWeight < 0.0f || !column_is_resizable) + { + if (column->InitStretchWeightOrWidth > 0.0f) + column->StretchWeight = column->InitStretchWeightOrWidth; + else if (table_sizing_policy == ImGuiTableFlags_SizingStretchProp) + column->StretchWeight = (column->WidthAuto / stretch_sum_width_auto) * count_stretch; + else + column->StretchWeight = 1.0f; + } + + stretch_sum_weights += column->StretchWeight; + if (table->LeftMostStretchedColumn == -1 || table->Columns[table->LeftMostStretchedColumn].DisplayOrder > column->DisplayOrder) + table->LeftMostStretchedColumn = (ImGuiTableColumnIdx)column_n; + if (table->RightMostStretchedColumn == -1 || table->Columns[table->RightMostStretchedColumn].DisplayOrder < column->DisplayOrder) + table->RightMostStretchedColumn = (ImGuiTableColumnIdx)column_n; + } + column->IsPreserveWidthAuto = false; + sum_width_requests += table->CellPaddingX * 2.0f; + } + table->ColumnsEnabledFixedCount = (ImGuiTableColumnIdx)count_fixed; + + // [Part 4] Apply final widths based on requested widths + const ImRect work_rect = table->WorkRect; + const float width_spacings = (table->OuterPaddingX * 2.0f) + (table->CellSpacingX1 + table->CellSpacingX2) * (table->ColumnsEnabledCount - 1); + const float width_avail = ((table->Flags & ImGuiTableFlags_ScrollX) && table->InnerWidth == 0.0f) ? table->InnerClipRect.GetWidth() : work_rect.GetWidth(); + const float width_avail_for_stretched_columns = width_avail - width_spacings - sum_width_requests; + float width_remaining_for_stretched_columns = width_avail_for_stretched_columns; + table->ColumnsGivenWidth = width_spacings + (table->CellPaddingX * 2.0f) * table->ColumnsEnabledCount; + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + if (!(table->EnabledMaskByIndex & ((ImU64)1 << column_n))) + continue; + ImGuiTableColumn* column = &table->Columns[column_n]; + + // Allocate width for stretched/weighted columns (StretchWeight gets converted into WidthRequest) + if (column->Flags & ImGuiTableColumnFlags_WidthStretch) + { + float weight_ratio = column->StretchWeight / stretch_sum_weights; + column->WidthRequest = IM_FLOOR(ImMax(width_avail_for_stretched_columns * weight_ratio, table->MinColumnWidth) + 0.01f); + width_remaining_for_stretched_columns -= column->WidthRequest; + } + + // [Resize Rule 1] The right-most Visible column is not resizable if there is at least one Stretch column + // See additional comments in TableSetColumnWidth(). + if (column->NextEnabledColumn == -1 && table->LeftMostStretchedColumn != -1) + column->Flags |= ImGuiTableColumnFlags_NoDirectResize_; + + // Assign final width, record width in case we will need to shrink + column->WidthGiven = ImFloor(ImMax(column->WidthRequest, table->MinColumnWidth)); + table->ColumnsGivenWidth += column->WidthGiven; + } + + // [Part 5] Redistribute stretch remainder width due to rounding (remainder width is < 1.0f * number of Stretch column). + // Using right-to-left distribution (more likely to match resizing cursor). + if (width_remaining_for_stretched_columns >= 1.0f && !(table->Flags & ImGuiTableFlags_PreciseWidths)) + for (int order_n = table->ColumnsCount - 1; stretch_sum_weights > 0.0f && width_remaining_for_stretched_columns >= 1.0f && order_n >= 0; order_n--) + { + if (!(table->EnabledMaskByDisplayOrder & ((ImU64)1 << order_n))) + continue; + ImGuiTableColumn* column = &table->Columns[table->DisplayOrderToIndex[order_n]]; + if (!(column->Flags & ImGuiTableColumnFlags_WidthStretch)) + continue; + column->WidthRequest += 1.0f; + column->WidthGiven += 1.0f; + width_remaining_for_stretched_columns -= 1.0f; + } + + table->HoveredColumnBody = -1; + table->HoveredColumnBorder = -1; + const ImRect mouse_hit_rect(table->OuterRect.Min.x, table->OuterRect.Min.y, table->OuterRect.Max.x, ImMax(table->OuterRect.Max.y, table->OuterRect.Min.y + table->LastOuterHeight)); + const bool is_hovering_table = ItemHoverable(mouse_hit_rect, 0); + + // [Part 6] Setup final position, offset, skip/clip states and clipping rectangles, detect hovered column + // Process columns in their visible orders as we are comparing the visible order and adjusting host_clip_rect while looping. + int visible_n = 0; + bool offset_x_frozen = (table->FreezeColumnsCount > 0); + float offset_x = ((table->FreezeColumnsCount > 0) ? table->OuterRect.Min.x : work_rect.Min.x) + table->OuterPaddingX - table->CellSpacingX1; + ImRect host_clip_rect = table->InnerClipRect; + //host_clip_rect.Max.x += table->CellPaddingX + table->CellSpacingX2; + table->VisibleMaskByIndex = 0x00; + table->RequestOutputMaskByIndex = 0x00; + for (int order_n = 0; order_n < table->ColumnsCount; order_n++) + { + const int column_n = table->DisplayOrderToIndex[order_n]; + ImGuiTableColumn* column = &table->Columns[column_n]; + + column->NavLayerCurrent = (ImS8)((table->FreezeRowsCount > 0 || column_n < table->FreezeColumnsCount) ? ImGuiNavLayer_Menu : ImGuiNavLayer_Main); + + if (offset_x_frozen && table->FreezeColumnsCount == visible_n) + { + offset_x += work_rect.Min.x - table->OuterRect.Min.x; + offset_x_frozen = false; + } + + // Clear status flags + column->Flags &= ~ImGuiTableColumnFlags_StatusMask_; + + if ((table->EnabledMaskByDisplayOrder & ((ImU64)1 << order_n)) == 0) + { + // Hidden column: clear a few fields and we are done with it for the remainder of the function. + // We set a zero-width clip rect but set Min.y/Max.y properly to not interfere with the clipper. + column->MinX = column->MaxX = column->WorkMinX = column->ClipRect.Min.x = column->ClipRect.Max.x = offset_x; + column->WidthGiven = 0.0f; + column->ClipRect.Min.y = work_rect.Min.y; + column->ClipRect.Max.y = FLT_MAX; + column->ClipRect.ClipWithFull(host_clip_rect); + column->IsVisibleX = column->IsVisibleY = column->IsRequestOutput = false; + column->IsSkipItems = true; + column->ItemWidth = 1.0f; + continue; + } + + // Detect hovered column + if (is_hovering_table && g.IO.MousePos.x >= column->ClipRect.Min.x && g.IO.MousePos.x < column->ClipRect.Max.x) + table->HoveredColumnBody = (ImGuiTableColumnIdx)column_n; + + // Lock start position + column->MinX = offset_x; + + // Lock width based on start position and minimum/maximum width for this position + float max_width = TableGetMaxColumnWidth(table, column_n); + column->WidthGiven = ImMin(column->WidthGiven, max_width); + column->WidthGiven = ImMax(column->WidthGiven, ImMin(column->WidthRequest, table->MinColumnWidth)); + column->MaxX = offset_x + column->WidthGiven + table->CellSpacingX1 + table->CellSpacingX2 + table->CellPaddingX * 2.0f; + + // Lock other positions + // - ClipRect.Min.x: Because merging draw commands doesn't compare min boundaries, we make ClipRect.Min.x match left bounds to be consistent regardless of merging. + // - ClipRect.Max.x: using WorkMaxX instead of MaxX (aka including padding) makes things more consistent when resizing down, tho slightly detrimental to visibility in very-small column. + // - ClipRect.Max.x: using MaxX makes it easier for header to receive hover highlight with no discontinuity and display sorting arrow. + // - FIXME-TABLE: We want equal width columns to have equal (ClipRect.Max.x - WorkMinX) width, which means ClipRect.max.x cannot stray off host_clip_rect.Max.x else right-most column may appear shorter. + column->WorkMinX = column->MinX + table->CellPaddingX + table->CellSpacingX1; + column->WorkMaxX = column->MaxX - table->CellPaddingX - table->CellSpacingX2; // Expected max + column->ItemWidth = ImFloor(column->WidthGiven * 0.65f); + column->ClipRect.Min.x = column->MinX; + column->ClipRect.Min.y = work_rect.Min.y; + column->ClipRect.Max.x = column->MaxX; //column->WorkMaxX; + column->ClipRect.Max.y = FLT_MAX; + column->ClipRect.ClipWithFull(host_clip_rect); + + // Mark column as Clipped (not in sight) + // Note that scrolling tables (where inner_window != outer_window) handle Y clipped earlier in BeginTable() so IsVisibleY really only applies to non-scrolling tables. + // FIXME-TABLE: Because InnerClipRect.Max.y is conservatively ==outer_window->ClipRect.Max.y, we never can mark columns _Above_ the scroll line as not IsVisibleY. + // Taking advantage of LastOuterHeight would yield good results there... + // FIXME-TABLE: Y clipping is disabled because it effectively means not submitting will reduce contents width which is fed to outer_window->DC.CursorMaxPos.x, + // and this may be used (e.g. typically by outer_window using AlwaysAutoResize or outer_window's horizontal scrollbar, but could be something else). + // Possible solution to preserve last known content width for clipped column. Test 'table_reported_size' fails when enabling Y clipping and window is resized small. + column->IsVisibleX = (column->ClipRect.Max.x > column->ClipRect.Min.x); + column->IsVisibleY = true; // (column->ClipRect.Max.y > column->ClipRect.Min.y); + const bool is_visible = column->IsVisibleX; //&& column->IsVisibleY; + if (is_visible) + table->VisibleMaskByIndex |= ((ImU64)1 << column_n); + + // Mark column as requesting output from user. Note that fixed + non-resizable sets are auto-fitting at all times and therefore always request output. + column->IsRequestOutput = is_visible || column->AutoFitQueue != 0 || column->CannotSkipItemsQueue != 0; + if (column->IsRequestOutput) + table->RequestOutputMaskByIndex |= ((ImU64)1 << column_n); + + // Mark column as SkipItems (ignoring all items/layout) + column->IsSkipItems = !column->IsEnabled || table->HostSkipItems; + if (column->IsSkipItems) + IM_ASSERT(!is_visible); + + // Update status flags + column->Flags |= ImGuiTableColumnFlags_IsEnabled; + if (is_visible) + column->Flags |= ImGuiTableColumnFlags_IsVisible; + if (column->SortOrder != -1) + column->Flags |= ImGuiTableColumnFlags_IsSorted; + if (table->HoveredColumnBody == column_n) + column->Flags |= ImGuiTableColumnFlags_IsHovered; + + // Alignment + // FIXME-TABLE: This align based on the whole column width, not per-cell, and therefore isn't useful in + // many cases (to be able to honor this we might be able to store a log of cells width, per row, for + // visible rows, but nav/programmatic scroll would have visible artifacts.) + //if (column->Flags & ImGuiTableColumnFlags_AlignRight) + // column->WorkMinX = ImMax(column->WorkMinX, column->MaxX - column->ContentWidthRowsUnfrozen); + //else if (column->Flags & ImGuiTableColumnFlags_AlignCenter) + // column->WorkMinX = ImLerp(column->WorkMinX, ImMax(column->StartX, column->MaxX - column->ContentWidthRowsUnfrozen), 0.5f); + + // Reset content width variables + column->ContentMaxXFrozen = column->ContentMaxXUnfrozen = column->WorkMinX; + column->ContentMaxXHeadersUsed = column->ContentMaxXHeadersIdeal = column->WorkMinX; + + // Don't decrement auto-fit counters until container window got a chance to submit its items + if (table->HostSkipItems == false) + { + column->AutoFitQueue >>= 1; + column->CannotSkipItemsQueue >>= 1; + } + + if (visible_n < table->FreezeColumnsCount) + host_clip_rect.Min.x = ImClamp(column->MaxX + TABLE_BORDER_SIZE, host_clip_rect.Min.x, host_clip_rect.Max.x); + + offset_x += column->WidthGiven + table->CellSpacingX1 + table->CellSpacingX2 + table->CellPaddingX * 2.0f; + visible_n++; + } + + // [Part 7] Detect/store when we are hovering the unused space after the right-most column (so e.g. context menus can react on it) + // Clear Resizable flag if none of our column are actually resizable (either via an explicit _NoResize flag, either + // because of using _WidthAuto/_WidthStretch). This will hide the resizing option from the context menu. + const float unused_x1 = ImMax(table->WorkRect.Min.x, table->Columns[table->RightMostEnabledColumn].ClipRect.Max.x); + if (is_hovering_table && table->HoveredColumnBody == -1) + { + if (g.IO.MousePos.x >= unused_x1) + table->HoveredColumnBody = (ImGuiTableColumnIdx)table->ColumnsCount; + } + if (has_resizable == false && (table->Flags & ImGuiTableFlags_Resizable)) + table->Flags &= ~ImGuiTableFlags_Resizable; + + // [Part 8] Lock actual OuterRect/WorkRect right-most position. + // This is done late to handle the case of fixed-columns tables not claiming more widths that they need. + // Because of this we are careful with uses of WorkRect and InnerClipRect before this point. + if (table->RightMostStretchedColumn != -1) + table->Flags &= ~ImGuiTableFlags_NoHostExtendX; + if (table->Flags & ImGuiTableFlags_NoHostExtendX) + { + table->OuterRect.Max.x = table->WorkRect.Max.x = unused_x1; + table->InnerClipRect.Max.x = ImMin(table->InnerClipRect.Max.x, unused_x1); + } + table->InnerWindow->ParentWorkRect = table->WorkRect; + table->BorderX1 = table->InnerClipRect.Min.x;// +((table->Flags & ImGuiTableFlags_BordersOuter) ? 0.0f : -1.0f); + table->BorderX2 = table->InnerClipRect.Max.x;// +((table->Flags & ImGuiTableFlags_BordersOuter) ? 0.0f : +1.0f); + + // [Part 9] Allocate draw channels and setup background cliprect + TableSetupDrawChannels(table); + + // [Part 10] Hit testing on borders + if (table->Flags & ImGuiTableFlags_Resizable) + TableUpdateBorders(table); + table->LastFirstRowHeight = 0.0f; + table->IsLayoutLocked = true; + table->IsUsingHeaders = false; + + // [Part 11] Context menu + if (table->IsContextPopupOpen && table->InstanceCurrent == table->InstanceInteracted) + { + const ImGuiID context_menu_id = ImHashStr("##ContextMenu", 0, table->ID); + if (BeginPopupEx(context_menu_id, ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoSavedSettings)) + { + TableDrawContextMenu(table); + EndPopup(); + } + else + { + table->IsContextPopupOpen = false; + } + } + + // [Part 13] Sanitize and build sort specs before we have a change to use them for display. + // This path will only be exercised when sort specs are modified before header rows (e.g. init or visibility change) + if (table->IsSortSpecsDirty && (table->Flags & ImGuiTableFlags_Sortable)) + TableSortSpecsBuild(table); + + // Initial state + ImGuiWindow* inner_window = table->InnerWindow; + if (table->Flags & ImGuiTableFlags_NoClip) + table->DrawSplitter.SetCurrentChannel(inner_window->DrawList, TABLE_DRAW_CHANNEL_NOCLIP); + else + inner_window->DrawList->PushClipRect(inner_window->ClipRect.Min, inner_window->ClipRect.Max, false); +} + +// Process hit-testing on resizing borders. Actual size change will be applied in EndTable() +// - Set table->HoveredColumnBorder with a short delay/timer to reduce feedback noise +// - Submit ahead of table contents and header, use ImGuiButtonFlags_AllowItemOverlap to prioritize widgets +// overlapping the same area. +void ImGui::TableUpdateBorders(ImGuiTable* table) +{ + ImGuiContext& g = *GImGui; + IM_ASSERT(table->Flags & ImGuiTableFlags_Resizable); + + // At this point OuterRect height may be zero or under actual final height, so we rely on temporal coherency and + // use the final height from last frame. Because this is only affecting _interaction_ with columns, it is not + // really problematic (whereas the actual visual will be displayed in EndTable() and using the current frame height). + // Actual columns highlight/render will be performed in EndTable() and not be affected. + const float hit_half_width = TABLE_RESIZE_SEPARATOR_HALF_THICKNESS; + const float hit_y1 = table->OuterRect.Min.y; + const float hit_y2_body = ImMax(table->OuterRect.Max.y, hit_y1 + table->LastOuterHeight); + const float hit_y2_head = hit_y1 + table->LastFirstRowHeight; + + for (int order_n = 0; order_n < table->ColumnsCount; order_n++) + { + if (!(table->EnabledMaskByDisplayOrder & ((ImU64)1 << order_n))) + continue; + + const int column_n = table->DisplayOrderToIndex[order_n]; + ImGuiTableColumn* column = &table->Columns[column_n]; + if (column->Flags & (ImGuiTableColumnFlags_NoResize | ImGuiTableColumnFlags_NoDirectResize_)) + continue; + + // ImGuiTableFlags_NoBordersInBodyUntilResize will be honored in TableDrawBorders() + const float border_y2_hit = (table->Flags & ImGuiTableFlags_NoBordersInBody) ? hit_y2_head : hit_y2_body; + if ((table->Flags & ImGuiTableFlags_NoBordersInBody) && table->IsUsingHeaders == false) + continue; + + if (table->FreezeColumnsCount > 0) + if (column->MaxX < table->Columns[table->DisplayOrderToIndex[table->FreezeColumnsCount - 1]].MaxX) + continue; + + ImGuiID column_id = TableGetColumnResizeID(table, column_n, table->InstanceCurrent); + ImRect hit_rect(column->MaxX - hit_half_width, hit_y1, column->MaxX + hit_half_width, border_y2_hit); + //GetForegroundDrawList()->AddRect(hit_rect.Min, hit_rect.Max, IM_COL32(255, 0, 0, 100)); + KeepAliveID(column_id); + + bool hovered = false, held = false; + bool pressed = ButtonBehavior(hit_rect, column_id, &hovered, &held, ImGuiButtonFlags_FlattenChildren | ImGuiButtonFlags_AllowItemOverlap | ImGuiButtonFlags_PressedOnClick | ImGuiButtonFlags_PressedOnDoubleClick); + if (pressed && IsMouseDoubleClicked(0)) + { + TableSetColumnWidthAutoSingle(table, column_n); + ClearActiveID(); + held = hovered = false; + } + if (held) + { + if (table->LastResizedColumn == -1) + table->ResizeLockMinContentsX2 = table->RightMostEnabledColumn != -1 ? table->Columns[table->RightMostEnabledColumn].MaxX : -FLT_MAX; + table->ResizedColumn = (ImGuiTableColumnIdx)column_n; + table->InstanceInteracted = table->InstanceCurrent; + } + if ((hovered && g.HoveredIdTimer > TABLE_RESIZE_SEPARATOR_FEEDBACK_TIMER) || held) + { + table->HoveredColumnBorder = (ImGuiTableColumnIdx)column_n; + SetMouseCursor(ImGuiMouseCursor_ResizeEW); + } + } +} + +void ImGui::EndTable() +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + IM_ASSERT(table != NULL && "Only call EndTable() if BeginTable() returns true!"); + + // This assert would be very useful to catch a common error... unfortunately it would probably trigger in some + // cases, and for consistency user may sometimes output empty tables (and still benefit from e.g. outer border) + //IM_ASSERT(table->IsLayoutLocked && "Table unused: never called TableNextRow(), is that the intent?"); + + // If the user never got to call TableNextRow() or TableNextColumn(), we call layout ourselves to ensure all our + // code paths are consistent (instead of just hoping that TableBegin/TableEnd will work), get borders drawn, etc. + if (!table->IsLayoutLocked) + TableUpdateLayout(table); + + const ImGuiTableFlags flags = table->Flags; + ImGuiWindow* inner_window = table->InnerWindow; + ImGuiWindow* outer_window = table->OuterWindow; + IM_ASSERT(inner_window == g.CurrentWindow); + IM_ASSERT(outer_window == inner_window || outer_window == inner_window->ParentWindow); + + if (table->IsInsideRow) + TableEndRow(table); + + // Context menu in columns body + if (flags & ImGuiTableFlags_ContextMenuInBody) + if (table->HoveredColumnBody != -1 && !IsAnyItemHovered() && IsMouseReleased(ImGuiMouseButton_Right)) + TableOpenContextMenu((int)table->HoveredColumnBody); + + // Finalize table height + inner_window->DC.PrevLineSize = table->HostBackupPrevLineSize; + inner_window->DC.CurrLineSize = table->HostBackupCurrLineSize; + inner_window->DC.CursorMaxPos = table->HostBackupCursorMaxPos; + const float inner_content_max_y = table->RowPosY2; + IM_ASSERT(table->RowPosY2 == inner_window->DC.CursorPos.y); + if (inner_window != outer_window) + inner_window->DC.CursorMaxPos.y = inner_content_max_y; + else if (!(flags & ImGuiTableFlags_NoHostExtendY)) + table->OuterRect.Max.y = table->InnerRect.Max.y = ImMax(table->OuterRect.Max.y, inner_content_max_y); // Patch OuterRect/InnerRect height + table->WorkRect.Max.y = ImMax(table->WorkRect.Max.y, table->OuterRect.Max.y); + table->LastOuterHeight = table->OuterRect.GetHeight(); + + // Setup inner scrolling range + // FIXME: This ideally should be done earlier, in BeginTable() SetNextWindowContentSize call, just like writing to inner_window->DC.CursorMaxPos.y, + // but since the later is likely to be impossible to do we'd rather update both axises together. + if (table->Flags & ImGuiTableFlags_ScrollX) + { + const float outer_padding_for_border = (table->Flags & ImGuiTableFlags_BordersOuterV) ? TABLE_BORDER_SIZE : 0.0f; + float max_pos_x = table->InnerWindow->DC.CursorMaxPos.x; + if (table->RightMostEnabledColumn != -1) + max_pos_x = ImMax(max_pos_x, table->Columns[table->RightMostEnabledColumn].WorkMaxX + table->CellPaddingX + table->OuterPaddingX - outer_padding_for_border); + if (table->ResizedColumn != -1) + max_pos_x = ImMax(max_pos_x, table->ResizeLockMinContentsX2); + table->InnerWindow->DC.CursorMaxPos.x = max_pos_x; + } + + // Pop clipping rect + if (!(flags & ImGuiTableFlags_NoClip)) + inner_window->DrawList->PopClipRect(); + inner_window->ClipRect = inner_window->DrawList->_ClipRectStack.back(); + + // Draw borders + if ((flags & ImGuiTableFlags_Borders) != 0) + TableDrawBorders(table); + +#if 0 + // Strip out dummy channel draw calls + // We have no way to prevent user submitting direct ImDrawList calls into a hidden column (but ImGui:: calls will be clipped out) + // Pros: remove draw calls which will have no effect. since they'll have zero-size cliprect they may be early out anyway. + // Cons: making it harder for users watching metrics/debugger to spot the wasted vertices. + if (table->DummyDrawChannel != (ImGuiTableColumnIdx)-1) + { + ImDrawChannel* dummy_channel = &table->DrawSplitter._Channels[table->DummyDrawChannel]; + dummy_channel->_CmdBuffer.resize(0); + dummy_channel->_IdxBuffer.resize(0); + } +#endif + + // Flatten channels and merge draw calls + table->DrawSplitter.SetCurrentChannel(inner_window->DrawList, 0); + if ((table->Flags & ImGuiTableFlags_NoClip) == 0) + TableMergeDrawChannels(table); + table->DrawSplitter.Merge(inner_window->DrawList); + + // Update ColumnsAutoFitWidth to get us ahead for host using our size to auto-resize without waiting for next BeginTable() + const float width_spacings = (table->OuterPaddingX * 2.0f) + (table->CellSpacingX1 + table->CellSpacingX2) * (table->ColumnsEnabledCount - 1); + table->ColumnsAutoFitWidth = width_spacings + (table->CellPaddingX * 2.0f) * table->ColumnsEnabledCount; + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + if (table->EnabledMaskByIndex & ((ImU64)1 << column_n)) + { + ImGuiTableColumn* column = &table->Columns[column_n]; + if ((column->Flags & ImGuiTableColumnFlags_WidthFixed) && !(column->Flags & ImGuiTableColumnFlags_NoResize)) + table->ColumnsAutoFitWidth += column->WidthRequest; + else + table->ColumnsAutoFitWidth += TableGetColumnWidthAuto(table, column); + } + + // Update scroll + if ((table->Flags & ImGuiTableFlags_ScrollX) == 0 && inner_window != outer_window) + { + inner_window->Scroll.x = 0.0f; + } + else if (table->LastResizedColumn != -1 && table->ResizedColumn == -1 && inner_window->ScrollbarX && table->InstanceInteracted == table->InstanceCurrent) + { + // When releasing a column being resized, scroll to keep the resulting column in sight + const float neighbor_width_to_keep_visible = table->MinColumnWidth + table->CellPaddingX * 2.0f; + ImGuiTableColumn* column = &table->Columns[table->LastResizedColumn]; + if (column->MaxX < table->InnerClipRect.Min.x) + SetScrollFromPosX(inner_window, column->MaxX - inner_window->Pos.x - neighbor_width_to_keep_visible, 1.0f); + else if (column->MaxX > table->InnerClipRect.Max.x) + SetScrollFromPosX(inner_window, column->MaxX - inner_window->Pos.x + neighbor_width_to_keep_visible, 1.0f); + } + + // Apply resizing/dragging at the end of the frame + if (table->ResizedColumn != -1 && table->InstanceCurrent == table->InstanceInteracted) + { + ImGuiTableColumn* column = &table->Columns[table->ResizedColumn]; + const float new_x2 = (g.IO.MousePos.x - g.ActiveIdClickOffset.x + TABLE_RESIZE_SEPARATOR_HALF_THICKNESS); + const float new_width = ImFloor(new_x2 - column->MinX - table->CellSpacingX1 - table->CellPaddingX * 2.0f); + table->ResizedColumnNextWidth = new_width; + } + + // Pop from id stack + IM_ASSERT_USER_ERROR(inner_window->IDStack.back() == table->ID + table->InstanceCurrent, "Mismatching PushID/PopID!"); + IM_ASSERT_USER_ERROR(outer_window->DC.ItemWidthStack.Size >= table->HostBackupItemWidthStackSize, "Too many PopItemWidth!"); + PopID(); + + // Restore window data that we modified + const ImVec2 backup_outer_max_pos = outer_window->DC.CursorMaxPos; + inner_window->WorkRect = table->HostBackupWorkRect; + inner_window->ParentWorkRect = table->HostBackupParentWorkRect; + inner_window->SkipItems = table->HostSkipItems; + outer_window->DC.CursorPos = table->OuterRect.Min; + outer_window->DC.ItemWidth = table->HostBackupItemWidth; + outer_window->DC.ItemWidthStack.Size = table->HostBackupItemWidthStackSize; + outer_window->DC.ColumnsOffset = table->HostBackupColumnsOffset; + + // Layout in outer window + // (FIXME: To allow auto-fit and allow desirable effect of SameLine() we dissociate 'used' vs 'ideal' size by overriding + // CursorPosPrevLine and CursorMaxPos manually. That should be a more general layout feature, see same problem e.g. #3414) + if (inner_window != outer_window) + { + EndChild(); + } + else + { + ItemSize(table->OuterRect.GetSize()); + ItemAdd(table->OuterRect, 0); + } + + // Override declared contents width/height to enable auto-resize while not needlessly adding a scrollbar + if (table->Flags & ImGuiTableFlags_NoHostExtendX) + { + // FIXME-TABLE: Could we remove this section? + // ColumnsAutoFitWidth may be one frame ahead here since for Fixed+NoResize is calculated from latest contents + IM_ASSERT((table->Flags & ImGuiTableFlags_ScrollX) == 0); + outer_window->DC.CursorMaxPos.x = ImMax(backup_outer_max_pos.x, table->OuterRect.Min.x + table->ColumnsAutoFitWidth); + } + else if (table->UserOuterSize.x <= 0.0f) + { + const float decoration_size = (table->Flags & ImGuiTableFlags_ScrollX) ? inner_window->ScrollbarSizes.x : 0.0f; + outer_window->DC.IdealMaxPos.x = ImMax(outer_window->DC.IdealMaxPos.x, table->OuterRect.Min.x + table->ColumnsAutoFitWidth + decoration_size - table->UserOuterSize.x); + outer_window->DC.CursorMaxPos.x = ImMax(backup_outer_max_pos.x, ImMin(table->OuterRect.Max.x, table->OuterRect.Min.x + table->ColumnsAutoFitWidth)); + } + else + { + outer_window->DC.CursorMaxPos.x = ImMax(backup_outer_max_pos.x, table->OuterRect.Max.x); + } + if (table->UserOuterSize.y <= 0.0f) + { + const float decoration_size = (table->Flags & ImGuiTableFlags_ScrollY) ? inner_window->ScrollbarSizes.y : 0.0f; + outer_window->DC.IdealMaxPos.y = ImMax(outer_window->DC.IdealMaxPos.y, inner_content_max_y + decoration_size - table->UserOuterSize.y); + outer_window->DC.CursorMaxPos.y = ImMax(backup_outer_max_pos.y, ImMin(table->OuterRect.Max.y, inner_content_max_y)); + } + else + { + // OuterRect.Max.y may already have been pushed downward from the initial value (unless ImGuiTableFlags_NoHostExtendY is set) + outer_window->DC.CursorMaxPos.y = ImMax(backup_outer_max_pos.y, table->OuterRect.Max.y); + } + + // Save settings + if (table->IsSettingsDirty) + TableSaveSettings(table); + table->IsInitializing = false; + + // Clear or restore current table, if any + IM_ASSERT(g.CurrentWindow == outer_window && g.CurrentTable == table); + g.CurrentTableStack.pop_back(); + g.CurrentTable = g.CurrentTableStack.Size ? g.Tables.GetByIndex(g.CurrentTableStack.back().Index) : NULL; + outer_window->DC.CurrentTableIdx = g.CurrentTable ? g.Tables.GetIndex(g.CurrentTable) : -1; +} + +// See "COLUMN SIZING POLICIES" comments at the top of this file +// If (init_width_or_weight <= 0.0f) it is ignored +void ImGui::TableSetupColumn(const char* label, ImGuiTableColumnFlags flags, float init_width_or_weight, ImGuiID user_id) +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + IM_ASSERT(table != NULL && "Need to call TableSetupColumn() after BeginTable()!"); + IM_ASSERT(table->IsLayoutLocked == false && "Need to call call TableSetupColumn() before first row!"); + IM_ASSERT((flags & ImGuiTableColumnFlags_StatusMask_) == 0 && "Illegal to pass StatusMask values to TableSetupColumn()"); + if (table->DeclColumnsCount >= table->ColumnsCount) + { + IM_ASSERT_USER_ERROR(table->DeclColumnsCount < table->ColumnsCount, "Called TableSetupColumn() too many times!"); + return; + } + + ImGuiTableColumn* column = &table->Columns[table->DeclColumnsCount]; + table->DeclColumnsCount++; + + // Assert when passing a width or weight if policy is entirely left to default, to avoid storing width into weight and vice-versa. + // Give a grace to users of ImGuiTableFlags_ScrollX. + if (table->IsDefaultSizingPolicy && (flags & ImGuiTableColumnFlags_WidthMask_) == 0 && (flags & ImGuiTableFlags_ScrollX) == 0) + IM_ASSERT(init_width_or_weight <= 0.0f && "Can only specify width/weight if sizing policy is set explicitely in either Table or Column."); + + // When passing a width automatically enforce WidthFixed policy + // (whereas TableSetupColumnFlags would default to WidthAuto if table is not Resizable) + if ((flags & ImGuiTableColumnFlags_WidthMask_) == 0 && init_width_or_weight > 0.0f) + if ((table->Flags & ImGuiTableFlags_SizingMask_) == ImGuiTableFlags_SizingFixedFit || (table->Flags & ImGuiTableFlags_SizingMask_) == ImGuiTableFlags_SizingFixedSame) + flags |= ImGuiTableColumnFlags_WidthFixed; + + TableSetupColumnFlags(table, column, flags); + column->UserID = user_id; + flags = column->Flags; + + // Initialize defaults + column->InitStretchWeightOrWidth = init_width_or_weight; + if (table->IsInitializing) + { + // Init width or weight + if (column->WidthRequest < 0.0f && column->StretchWeight < 0.0f) + { + if ((flags & ImGuiTableColumnFlags_WidthFixed) && init_width_or_weight > 0.0f) + column->WidthRequest = init_width_or_weight; + if (flags & ImGuiTableColumnFlags_WidthStretch) + column->StretchWeight = (init_width_or_weight > 0.0f) ? init_width_or_weight : -1.0f; + + // Disable auto-fit if an explicit width/weight has been specified + if (init_width_or_weight > 0.0f) + column->AutoFitQueue = 0x00; + } + + // Init default visibility/sort state + if ((flags & ImGuiTableColumnFlags_DefaultHide) && (table->SettingsLoadedFlags & ImGuiTableFlags_Hideable) == 0) + column->IsEnabled = column->IsEnabledNextFrame = false; + if (flags & ImGuiTableColumnFlags_DefaultSort && (table->SettingsLoadedFlags & ImGuiTableFlags_Sortable) == 0) + { + column->SortOrder = 0; // Multiple columns using _DefaultSort will be reassigned unique SortOrder values when building the sort specs. + column->SortDirection = (column->Flags & ImGuiTableColumnFlags_PreferSortDescending) ? (ImS8)ImGuiSortDirection_Descending : (ImU8)(ImGuiSortDirection_Ascending); + } + } + + // Store name (append with zero-terminator in contiguous buffer) + column->NameOffset = -1; + if (label != NULL && label[0] != 0) + { + column->NameOffset = (ImS16)table->ColumnsNames.size(); + table->ColumnsNames.append(label, label + strlen(label) + 1); + } +} + +// [Public] +void ImGui::TableSetupScrollFreeze(int columns, int rows) +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + IM_ASSERT(table != NULL && "Need to call TableSetupColumn() after BeginTable()!"); + IM_ASSERT(table->IsLayoutLocked == false && "Need to call TableSetupColumn() before first row!"); + IM_ASSERT(columns >= 0 && columns < IMGUI_TABLE_MAX_COLUMNS); + IM_ASSERT(rows >= 0 && rows < 128); // Arbitrary limit + + table->FreezeColumnsRequest = (table->Flags & ImGuiTableFlags_ScrollX) ? (ImGuiTableColumnIdx)columns : 0; + table->FreezeColumnsCount = (table->InnerWindow->Scroll.x != 0.0f) ? table->FreezeColumnsRequest : 0; + table->FreezeRowsRequest = (table->Flags & ImGuiTableFlags_ScrollY) ? (ImGuiTableColumnIdx)rows : 0; + table->FreezeRowsCount = (table->InnerWindow->Scroll.y != 0.0f) ? table->FreezeRowsRequest : 0; + table->IsUnfrozenRows = (table->FreezeRowsCount == 0); // Make sure this is set before TableUpdateLayout() so ImGuiListClipper can benefit from it.b +} + +int ImGui::TableGetColumnCount() +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + return table ? table->ColumnsCount : 0; +} + +const char* ImGui::TableGetColumnName(int column_n) +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + if (!table) + return NULL; + if (column_n < 0) + column_n = table->CurrentColumn; + return TableGetColumnName(table, column_n); +} + +const char* ImGui::TableGetColumnName(const ImGuiTable* table, int column_n) +{ + if (table->IsLayoutLocked == false && column_n >= table->DeclColumnsCount) + return ""; // NameOffset is invalid at this point + const ImGuiTableColumn* column = &table->Columns[column_n]; + if (column->NameOffset == -1) + return ""; + return &table->ColumnsNames.Buf[column->NameOffset]; +} + +// We allow querying for an extra column in order to poll the IsHovered state of the right-most section +ImGuiTableColumnFlags ImGui::TableGetColumnFlags(int column_n) +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + if (!table) + return ImGuiTableColumnFlags_None; + if (column_n < 0) + column_n = table->CurrentColumn; + if (column_n == table->ColumnsCount) + return (table->HoveredColumnBody == column_n) ? ImGuiTableColumnFlags_IsHovered : ImGuiTableColumnFlags_None; + return table->Columns[column_n].Flags; +} + +// Return the cell rectangle based on currently known height. +// - Important: we generally don't know our row height until the end of the row, so Max.y will be incorrect in many situations. +// The only case where this is correct is if we provided a min_row_height to TableNextRow() and don't go below it. +// - Important: if ImGuiTableFlags_PadOuterX is set but ImGuiTableFlags_PadInnerX is not set, the outer-most left and right +// columns report a small offset so their CellBgRect can extend up to the outer border. +ImRect ImGui::TableGetCellBgRect(const ImGuiTable* table, int column_n) +{ + const ImGuiTableColumn* column = &table->Columns[column_n]; + float x1 = column->MinX; + float x2 = column->MaxX; + if (column->PrevEnabledColumn == -1) + x1 -= table->CellSpacingX1; + if (column->NextEnabledColumn == -1) + x2 += table->CellSpacingX2; + return ImRect(x1, table->RowPosY1, x2, table->RowPosY2); +} + +// Return the resizing ID for the right-side of the given column. +ImGuiID ImGui::TableGetColumnResizeID(const ImGuiTable* table, int column_n, int instance_no) +{ + IM_ASSERT(column_n >= 0 && column_n < table->ColumnsCount); + ImGuiID id = table->ID + 1 + (instance_no * table->ColumnsCount) + column_n; + return id; +} + +// Return -1 when table is not hovered. return columns_count if the unused space at the right of visible columns is hovered. +int ImGui::TableGetHoveredColumn() +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + if (!table) + return -1; + return (int)table->HoveredColumnBody; +} + +void ImGui::TableSetBgColor(ImGuiTableBgTarget target, ImU32 color, int column_n) +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + IM_ASSERT(target != ImGuiTableBgTarget_None); + + if (color == IM_COL32_DISABLE) + color = 0; + + // We cannot draw neither the cell or row background immediately as we don't know the row height at this point in time. + switch (target) + { + case ImGuiTableBgTarget_CellBg: + { + if (table->RowPosY1 > table->InnerClipRect.Max.y) // Discard + return; + if (column_n == -1) + column_n = table->CurrentColumn; + if ((table->VisibleMaskByIndex & ((ImU64)1 << column_n)) == 0) + return; + if (table->RowCellDataCurrent < 0 || table->RowCellData[table->RowCellDataCurrent].Column != column_n) + table->RowCellDataCurrent++; + ImGuiTableCellData* cell_data = &table->RowCellData[table->RowCellDataCurrent]; + cell_data->BgColor = color; + cell_data->Column = (ImGuiTableColumnIdx)column_n; + break; + } + case ImGuiTableBgTarget_RowBg0: + case ImGuiTableBgTarget_RowBg1: + { + if (table->RowPosY1 > table->InnerClipRect.Max.y) // Discard + return; + IM_ASSERT(column_n == -1); + int bg_idx = (target == ImGuiTableBgTarget_RowBg1) ? 1 : 0; + table->RowBgColor[bg_idx] = color; + break; + } + default: + IM_ASSERT(0); + } +} + +//------------------------------------------------------------------------- +// [SECTION] Tables: Row changes +//------------------------------------------------------------------------- +// - TableGetRowIndex() +// - TableNextRow() +// - TableBeginRow() [Internal] +// - TableEndRow() [Internal] +//------------------------------------------------------------------------- + +// [Public] Note: for row coloring we use ->RowBgColorCounter which is the same value without counting header rows +int ImGui::TableGetRowIndex() +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + if (!table) + return 0; + return table->CurrentRow; +} + +// [Public] Starts into the first cell of a new row +void ImGui::TableNextRow(ImGuiTableRowFlags row_flags, float row_min_height) +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + + if (!table->IsLayoutLocked) + TableUpdateLayout(table); + if (table->IsInsideRow) + TableEndRow(table); + + table->LastRowFlags = table->RowFlags; + table->RowFlags = row_flags; + table->RowMinHeight = row_min_height; + TableBeginRow(table); + + // We honor min_row_height requested by user, but cannot guarantee per-row maximum height, + // because that would essentially require a unique clipping rectangle per-cell. + table->RowPosY2 += table->CellPaddingY * 2.0f; + table->RowPosY2 = ImMax(table->RowPosY2, table->RowPosY1 + row_min_height); + + // Disable output until user calls TableNextColumn() + table->InnerWindow->SkipItems = true; +} + +// [Internal] Called by TableNextRow() +void ImGui::TableBeginRow(ImGuiTable* table) +{ + ImGuiWindow* window = table->InnerWindow; + IM_ASSERT(!table->IsInsideRow); + + // New row + table->CurrentRow++; + table->CurrentColumn = -1; + table->RowBgColor[0] = table->RowBgColor[1] = IM_COL32_DISABLE; + table->RowCellDataCurrent = -1; + table->IsInsideRow = true; + + // Begin frozen rows + float next_y1 = table->RowPosY2; + if (table->CurrentRow == 0 && table->FreezeRowsCount > 0) + next_y1 = window->DC.CursorPos.y = table->OuterRect.Min.y; + + table->RowPosY1 = table->RowPosY2 = next_y1; + table->RowTextBaseline = 0.0f; + table->RowIndentOffsetX = window->DC.Indent.x - table->HostIndentX; // Lock indent + window->DC.PrevLineTextBaseOffset = 0.0f; + window->DC.CursorMaxPos.y = next_y1; + + // Making the header BG color non-transparent will allow us to overlay it multiple times when handling smooth dragging. + if (table->RowFlags & ImGuiTableRowFlags_Headers) + { + TableSetBgColor(ImGuiTableBgTarget_RowBg0, GetColorU32(ImGuiCol_TableHeaderBg)); + if (table->CurrentRow == 0) + table->IsUsingHeaders = true; + } +} + +// [Internal] Called by TableNextRow() +void ImGui::TableEndRow(ImGuiTable* table) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + IM_ASSERT(window == table->InnerWindow); + IM_ASSERT(table->IsInsideRow); + + if (table->CurrentColumn != -1) + TableEndCell(table); + + // Position cursor at the bottom of our row so it can be used for e.g. clipping calculation. However it is + // likely that the next call to TableBeginCell() will reposition the cursor to take account of vertical padding. + window->DC.CursorPos.y = table->RowPosY2; + + // Row background fill + const float bg_y1 = table->RowPosY1; + const float bg_y2 = table->RowPosY2; + const bool unfreeze_rows_actual = (table->CurrentRow + 1 == table->FreezeRowsCount); + const bool unfreeze_rows_request = (table->CurrentRow + 1 == table->FreezeRowsRequest); + if (table->CurrentRow == 0) + table->LastFirstRowHeight = bg_y2 - bg_y1; + + const bool is_visible = (bg_y2 >= table->InnerClipRect.Min.y && bg_y1 <= table->InnerClipRect.Max.y); + if (is_visible) + { + // Decide of background color for the row + ImU32 bg_col0 = 0; + ImU32 bg_col1 = 0; + if (table->RowBgColor[0] != IM_COL32_DISABLE) + bg_col0 = table->RowBgColor[0]; + else if (table->Flags & ImGuiTableFlags_RowBg) + bg_col0 = GetColorU32((table->RowBgColorCounter & 1) ? ImGuiCol_TableRowBgAlt : ImGuiCol_TableRowBg); + if (table->RowBgColor[1] != IM_COL32_DISABLE) + bg_col1 = table->RowBgColor[1]; + + // Decide of top border color + ImU32 border_col = 0; + const float border_size = TABLE_BORDER_SIZE; + if (table->CurrentRow > 0 || table->InnerWindow == table->OuterWindow) + if (table->Flags & ImGuiTableFlags_BordersInnerH) + border_col = (table->LastRowFlags & ImGuiTableRowFlags_Headers) ? table->BorderColorStrong : table->BorderColorLight; + + const bool draw_cell_bg_color = table->RowCellDataCurrent >= 0; + const bool draw_strong_bottom_border = unfreeze_rows_actual; + if ((bg_col0 | bg_col1 | border_col) != 0 || draw_strong_bottom_border || draw_cell_bg_color) + { + // In theory we could call SetWindowClipRectBeforeSetChannel() but since we know TableEndRow() is + // always followed by a change of clipping rectangle we perform the smallest overwrite possible here. + if ((table->Flags & ImGuiTableFlags_NoClip) == 0) + window->DrawList->_CmdHeader.ClipRect = table->Bg0ClipRectForDrawCmd.ToVec4(); + table->DrawSplitter.SetCurrentChannel(window->DrawList, TABLE_DRAW_CHANNEL_BG0); + } + + // Draw row background + // We soft/cpu clip this so all backgrounds and borders can share the same clipping rectangle + if (bg_col0 || bg_col1) + { + ImRect row_rect(table->WorkRect.Min.x, bg_y1, table->WorkRect.Max.x, bg_y2); + row_rect.ClipWith(table->BgClipRect); + if (bg_col0 != 0 && row_rect.Min.y < row_rect.Max.y) + window->DrawList->AddRectFilled(row_rect.Min, row_rect.Max, bg_col0); + if (bg_col1 != 0 && row_rect.Min.y < row_rect.Max.y) + window->DrawList->AddRectFilled(row_rect.Min, row_rect.Max, bg_col1); + } + + // Draw cell background color + if (draw_cell_bg_color) + { + ImGuiTableCellData* cell_data_end = &table->RowCellData[table->RowCellDataCurrent]; + for (ImGuiTableCellData* cell_data = &table->RowCellData[0]; cell_data <= cell_data_end; cell_data++) + { + const ImGuiTableColumn* column = &table->Columns[cell_data->Column]; + ImRect cell_bg_rect = TableGetCellBgRect(table, cell_data->Column); + cell_bg_rect.ClipWith(table->BgClipRect); + cell_bg_rect.Min.x = ImMax(cell_bg_rect.Min.x, column->ClipRect.Min.x); // So that first column after frozen one gets clipped + cell_bg_rect.Max.x = ImMin(cell_bg_rect.Max.x, column->MaxX); + window->DrawList->AddRectFilled(cell_bg_rect.Min, cell_bg_rect.Max, cell_data->BgColor); + } + } + + // Draw top border + if (border_col && bg_y1 >= table->BgClipRect.Min.y && bg_y1 < table->BgClipRect.Max.y) + window->DrawList->AddLine(ImVec2(table->BorderX1, bg_y1), ImVec2(table->BorderX2, bg_y1), border_col, border_size); + + // Draw bottom border at the row unfreezing mark (always strong) + if (draw_strong_bottom_border && bg_y2 >= table->BgClipRect.Min.y && bg_y2 < table->BgClipRect.Max.y) + window->DrawList->AddLine(ImVec2(table->BorderX1, bg_y2), ImVec2(table->BorderX2, bg_y2), table->BorderColorStrong, border_size); + } + + // End frozen rows (when we are past the last frozen row line, teleport cursor and alter clipping rectangle) + // We need to do that in TableEndRow() instead of TableBeginRow() so the list clipper can mark end of row and + // get the new cursor position. + if (unfreeze_rows_request) + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + ImGuiTableColumn* column = &table->Columns[column_n]; + column->NavLayerCurrent = (ImS8)((column_n < table->FreezeColumnsCount) ? ImGuiNavLayer_Menu : ImGuiNavLayer_Main); + } + if (unfreeze_rows_actual) + { + IM_ASSERT(table->IsUnfrozenRows == false); + table->IsUnfrozenRows = true; + + // BgClipRect starts as table->InnerClipRect, reduce it now and make BgClipRectForDrawCmd == BgClipRect + float y0 = ImMax(table->RowPosY2 + 1, window->InnerClipRect.Min.y); + table->BgClipRect.Min.y = table->Bg2ClipRectForDrawCmd.Min.y = ImMin(y0, window->InnerClipRect.Max.y); + table->BgClipRect.Max.y = table->Bg2ClipRectForDrawCmd.Max.y = window->InnerClipRect.Max.y; + table->Bg2DrawChannelCurrent = table->Bg2DrawChannelUnfrozen; + IM_ASSERT(table->Bg2ClipRectForDrawCmd.Min.y <= table->Bg2ClipRectForDrawCmd.Max.y); + + float row_height = table->RowPosY2 - table->RowPosY1; + table->RowPosY2 = window->DC.CursorPos.y = table->WorkRect.Min.y + table->RowPosY2 - table->OuterRect.Min.y; + table->RowPosY1 = table->RowPosY2 - row_height; + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + ImGuiTableColumn* column = &table->Columns[column_n]; + column->DrawChannelCurrent = column->DrawChannelUnfrozen; + column->ClipRect.Min.y = table->Bg2ClipRectForDrawCmd.Min.y; + } + + // Update cliprect ahead of TableBeginCell() so clipper can access to new ClipRect->Min.y + SetWindowClipRectBeforeSetChannel(window, table->Columns[0].ClipRect); + table->DrawSplitter.SetCurrentChannel(window->DrawList, table->Columns[0].DrawChannelCurrent); + } + + if (!(table->RowFlags & ImGuiTableRowFlags_Headers)) + table->RowBgColorCounter++; + table->IsInsideRow = false; +} + +//------------------------------------------------------------------------- +// [SECTION] Tables: Columns changes +//------------------------------------------------------------------------- +// - TableGetColumnIndex() +// - TableSetColumnIndex() +// - TableNextColumn() +// - TableBeginCell() [Internal] +// - TableEndCell() [Internal] +//------------------------------------------------------------------------- + +int ImGui::TableGetColumnIndex() +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + if (!table) + return 0; + return table->CurrentColumn; +} + +// [Public] Append into a specific column +bool ImGui::TableSetColumnIndex(int column_n) +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + if (!table) + return false; + + if (table->CurrentColumn != column_n) + { + if (table->CurrentColumn != -1) + TableEndCell(table); + IM_ASSERT(column_n >= 0 && table->ColumnsCount); + TableBeginCell(table, column_n); + } + + // Return whether the column is visible. User may choose to skip submitting items based on this return value, + // however they shouldn't skip submitting for columns that may have the tallest contribution to row height. + return (table->RequestOutputMaskByIndex & ((ImU64)1 << column_n)) != 0; +} + +// [Public] Append into the next column, wrap and create a new row when already on last column +bool ImGui::TableNextColumn() +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + if (!table) + return false; + + if (table->IsInsideRow && table->CurrentColumn + 1 < table->ColumnsCount) + { + if (table->CurrentColumn != -1) + TableEndCell(table); + TableBeginCell(table, table->CurrentColumn + 1); + } + else + { + TableNextRow(); + TableBeginCell(table, 0); + } + + // Return whether the column is visible. User may choose to skip submitting items based on this return value, + // however they shouldn't skip submitting for columns that may have the tallest contribution to row height. + int column_n = table->CurrentColumn; + return (table->RequestOutputMaskByIndex & ((ImU64)1 << column_n)) != 0; +} + + +// [Internal] Called by TableSetColumnIndex()/TableNextColumn() +// This is called very frequently, so we need to be mindful of unnecessary overhead. +// FIXME-TABLE FIXME-OPT: Could probably shortcut some things for non-active or clipped columns. +void ImGui::TableBeginCell(ImGuiTable* table, int column_n) +{ + ImGuiTableColumn* column = &table->Columns[column_n]; + ImGuiWindow* window = table->InnerWindow; + table->CurrentColumn = column_n; + + // Start position is roughly ~~ CellRect.Min + CellPadding + Indent + float start_x = column->WorkMinX; + if (column->Flags & ImGuiTableColumnFlags_IndentEnable) + start_x += table->RowIndentOffsetX; // ~~ += window.DC.Indent.x - table->HostIndentX, except we locked it for the row. + + window->DC.CursorPos.x = start_x; + window->DC.CursorPos.y = table->RowPosY1 + table->CellPaddingY; + window->DC.CursorMaxPos.x = window->DC.CursorPos.x; + window->DC.ColumnsOffset.x = start_x - window->Pos.x - window->DC.Indent.x; // FIXME-WORKRECT + window->DC.CurrLineTextBaseOffset = table->RowTextBaseline; + window->DC.NavLayerCurrent = (ImGuiNavLayer)column->NavLayerCurrent; + + window->WorkRect.Min.y = window->DC.CursorPos.y; + window->WorkRect.Min.x = column->WorkMinX; + window->WorkRect.Max.x = column->WorkMaxX; + window->DC.ItemWidth = column->ItemWidth; + + // To allow ImGuiListClipper to function we propagate our row height + if (!column->IsEnabled) + window->DC.CursorPos.y = ImMax(window->DC.CursorPos.y, table->RowPosY2); + + window->SkipItems = column->IsSkipItems; + if (column->IsSkipItems) + { + window->DC.LastItemId = 0; + window->DC.LastItemStatusFlags = 0; + } + + if (table->Flags & ImGuiTableFlags_NoClip) + { + // FIXME: if we end up drawing all borders/bg in EndTable, could remove this and just assert that channel hasn't changed. + table->DrawSplitter.SetCurrentChannel(window->DrawList, TABLE_DRAW_CHANNEL_NOCLIP); + //IM_ASSERT(table->DrawSplitter._Current == TABLE_DRAW_CHANNEL_NOCLIP); + } + else + { + // FIXME-TABLE: Could avoid this if draw channel is dummy channel? + SetWindowClipRectBeforeSetChannel(window, column->ClipRect); + table->DrawSplitter.SetCurrentChannel(window->DrawList, column->DrawChannelCurrent); + } +} + +// [Internal] Called by TableNextRow()/TableSetColumnIndex()/TableNextColumn() +void ImGui::TableEndCell(ImGuiTable* table) +{ + ImGuiTableColumn* column = &table->Columns[table->CurrentColumn]; + ImGuiWindow* window = table->InnerWindow; + + // Report maximum position so we can infer content size per column. + float* p_max_pos_x; + if (table->RowFlags & ImGuiTableRowFlags_Headers) + p_max_pos_x = &column->ContentMaxXHeadersUsed; // Useful in case user submit contents in header row that is not a TableHeader() call + else + p_max_pos_x = table->IsUnfrozenRows ? &column->ContentMaxXUnfrozen : &column->ContentMaxXFrozen; + *p_max_pos_x = ImMax(*p_max_pos_x, window->DC.CursorMaxPos.x); + table->RowPosY2 = ImMax(table->RowPosY2, window->DC.CursorMaxPos.y + table->CellPaddingY); + column->ItemWidth = window->DC.ItemWidth; + + // Propagate text baseline for the entire row + // FIXME-TABLE: Here we propagate text baseline from the last line of the cell.. instead of the first one. + table->RowTextBaseline = ImMax(table->RowTextBaseline, window->DC.PrevLineTextBaseOffset); +} + +//------------------------------------------------------------------------- +// [SECTION] Tables: Columns width management +//------------------------------------------------------------------------- +// - TableGetMaxColumnWidth() [Internal] +// - TableGetColumnWidthAuto() [Internal] +// - TableSetColumnWidth() +// - TableSetColumnWidthAutoSingle() [Internal] +// - TableSetColumnWidthAutoAll() [Internal] +// - TableUpdateColumnsWeightFromWidth() [Internal] +//------------------------------------------------------------------------- + +// Maximum column content width given current layout. Use column->MinX so this value on a per-column basis. +float ImGui::TableGetMaxColumnWidth(const ImGuiTable* table, int column_n) +{ + const ImGuiTableColumn* column = &table->Columns[column_n]; + float max_width = FLT_MAX; + const float min_column_distance = table->MinColumnWidth + table->CellPaddingX * 2.0f + table->CellSpacingX1 + table->CellSpacingX2; + if (table->Flags & ImGuiTableFlags_ScrollX) + { + // Frozen columns can't reach beyond visible width else scrolling will naturally break. + if (column->DisplayOrder < table->FreezeColumnsRequest) + { + max_width = (table->InnerClipRect.Max.x - (table->FreezeColumnsRequest - column->DisplayOrder) * min_column_distance) - column->MinX; + max_width = max_width - table->OuterPaddingX - table->CellPaddingX - table->CellSpacingX2; + } + } + else if ((table->Flags & ImGuiTableFlags_NoKeepColumnsVisible) == 0) + { + // If horizontal scrolling if disabled, we apply a final lossless shrinking of columns in order to make + // sure they are all visible. Because of this we also know that all of the columns will always fit in + // table->WorkRect and therefore in table->InnerRect (because ScrollX is off) + // FIXME-TABLE: This is solved incorrectly but also quite a difficult problem to fix as we also want ClipRect width to match. + // See "table_width_distrib" and "table_width_keep_visible" tests + max_width = table->WorkRect.Max.x - (table->ColumnsEnabledCount - column->IndexWithinEnabledSet - 1) * min_column_distance - column->MinX; + //max_width -= table->CellSpacingX1; + max_width -= table->CellSpacingX2; + max_width -= table->CellPaddingX * 2.0f; + max_width -= table->OuterPaddingX; + } + return max_width; +} + +// Note this is meant to be stored in column->WidthAuto, please generally use the WidthAuto field +float ImGui::TableGetColumnWidthAuto(ImGuiTable* table, ImGuiTableColumn* column) +{ + const float content_width_body = ImMax(column->ContentMaxXFrozen, column->ContentMaxXUnfrozen) - column->WorkMinX; + const float content_width_headers = column->ContentMaxXHeadersIdeal - column->WorkMinX; + float width_auto = content_width_body; + if (!(column->Flags & ImGuiTableColumnFlags_NoHeaderWidth)) + width_auto = ImMax(width_auto, content_width_headers); + + // Non-resizable fixed columns preserve their requested width + if ((column->Flags & ImGuiTableColumnFlags_WidthFixed) && column->InitStretchWeightOrWidth > 0.0f) + if (!(table->Flags & ImGuiTableFlags_Resizable) || (column->Flags & ImGuiTableColumnFlags_NoResize)) + width_auto = column->InitStretchWeightOrWidth; + + return ImMax(width_auto, table->MinColumnWidth); +} + +// 'width' = inner column width, without padding +void ImGui::TableSetColumnWidth(int column_n, float width) +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + IM_ASSERT(table != NULL && table->IsLayoutLocked == false); + IM_ASSERT(column_n >= 0 && column_n < table->ColumnsCount); + ImGuiTableColumn* column_0 = &table->Columns[column_n]; + float column_0_width = width; + + // Apply constraints early + // Compare both requested and actual given width to avoid overwriting requested width when column is stuck (minimum size, bounded) + IM_ASSERT(table->MinColumnWidth > 0.0f); + const float min_width = table->MinColumnWidth; + const float max_width = ImMax(min_width, TableGetMaxColumnWidth(table, column_n)); + column_0_width = ImClamp(column_0_width, min_width, max_width); + if (column_0->WidthGiven == column_0_width || column_0->WidthRequest == column_0_width) + return; + + //IMGUI_DEBUG_LOG("TableSetColumnWidth(%d, %.1f->%.1f)\n", column_0_idx, column_0->WidthGiven, column_0_width); + ImGuiTableColumn* column_1 = (column_0->NextEnabledColumn != -1) ? &table->Columns[column_0->NextEnabledColumn] : NULL; + + // In this surprisingly not simple because of how we support mixing Fixed and multiple Stretch columns. + // - All fixed: easy. + // - All stretch: easy. + // - One or more fixed + one stretch: easy. + // - One or more fixed + more than one stretch: tricky. + // Qt when manual resize is enabled only support a single _trailing_ stretch column. + + // When forwarding resize from Wn| to Fn+1| we need to be considerate of the _NoResize flag on Fn+1. + // FIXME-TABLE: Find a way to rewrite all of this so interactions feel more consistent for the user. + // Scenarios: + // - F1 F2 F3 resize from F1| or F2| --> ok: alter ->WidthRequested of Fixed column. Subsequent columns will be offset. + // - F1 F2 F3 resize from F3| --> ok: alter ->WidthRequested of Fixed column. If active, ScrollX extent can be altered. + // - F1 F2 W3 resize from F1| or F2| --> ok: alter ->WidthRequested of Fixed column. If active, ScrollX extent can be altered, but it doesn't make much sense as the Stretch column will always be minimal size. + // - F1 F2 W3 resize from W3| --> ok: no-op (disabled by Resize Rule 1) + // - W1 W2 W3 resize from W1| or W2| --> ok + // - W1 W2 W3 resize from W3| --> ok: no-op (disabled by Resize Rule 1) + // - W1 F2 F3 resize from F3| --> ok: no-op (disabled by Resize Rule 1) + // - W1 F2 resize from F2| --> ok: no-op (disabled by Resize Rule 1) + // - W1 W2 F3 resize from W1| or W2| --> ok + // - W1 F2 W3 resize from W1| or F2| --> ok + // - F1 W2 F3 resize from W2| --> ok + // - F1 W3 F2 resize from W3| --> ok + // - W1 F2 F3 resize from W1| --> ok: equivalent to resizing |F2. F3 will not move. + // - W1 F2 F3 resize from F2| --> ok + // All resizes from a Wx columns are locking other columns. + + // Possible improvements: + // - W1 W2 W3 resize W1| --> to not be stuck, both W2 and W3 would stretch down. Seems possible to fix. Would be most beneficial to simplify resize of all-weighted columns. + // - W3 F1 F2 resize W3| --> to not be stuck past F1|, both F1 and F2 would need to stretch down, which would be lossy or ambiguous. Seems hard to fix. + + // [Resize Rule 1] Can't resize from right of right-most visible column if there is any Stretch column. Implemented in TableUpdateLayout(). + + // If we have all Fixed columns OR resizing a Fixed column that doesn't come after a Stretch one, we can do an offsetting resize. + // This is the preferred resize path + if (column_0->Flags & ImGuiTableColumnFlags_WidthFixed) + if (!column_1 || table->LeftMostStretchedColumn == -1 || table->Columns[table->LeftMostStretchedColumn].DisplayOrder >= column_0->DisplayOrder) + { + column_0->WidthRequest = column_0_width; + table->IsSettingsDirty = true; + return; + } + + // We can also use previous column if there's no next one (this is used when doing an auto-fit on the right-most stretch column) + if (column_1 == NULL) + column_1 = (column_0->PrevEnabledColumn != -1) ? &table->Columns[column_0->PrevEnabledColumn] : NULL; + if (column_1 == NULL) + return; + + // Resizing from right-side of a Stretch column before a Fixed column forward sizing to left-side of fixed column. + // (old_a + old_b == new_a + new_b) --> (new_a == old_a + old_b - new_b) + float column_1_width = ImMax(column_1->WidthRequest - (column_0_width - column_0->WidthRequest), min_width); + column_0_width = column_0->WidthRequest + column_1->WidthRequest - column_1_width; + IM_ASSERT(column_0_width > 0.0f && column_1_width > 0.0f); + column_0->WidthRequest = column_0_width; + column_1->WidthRequest = column_1_width; + if ((column_0->Flags | column_1->Flags) & ImGuiTableColumnFlags_WidthStretch) + TableUpdateColumnsWeightFromWidth(table); + table->IsSettingsDirty = true; +} + +// Disable clipping then auto-fit, will take 2 frames +// (we don't take a shortcut for unclipped columns to reduce inconsistencies when e.g. resizing multiple columns) +void ImGui::TableSetColumnWidthAutoSingle(ImGuiTable* table, int column_n) +{ + // Single auto width uses auto-fit + ImGuiTableColumn* column = &table->Columns[column_n]; + if (!column->IsEnabled) + return; + column->CannotSkipItemsQueue = (1 << 0); + table->AutoFitSingleColumn = (ImGuiTableColumnIdx)column_n; +} + +void ImGui::TableSetColumnWidthAutoAll(ImGuiTable* table) +{ + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + ImGuiTableColumn* column = &table->Columns[column_n]; + if (!column->IsEnabled && !(column->Flags & ImGuiTableColumnFlags_WidthStretch)) // Cannot reset weight of hidden stretch column + continue; + column->CannotSkipItemsQueue = (1 << 0); + column->AutoFitQueue = (1 << 1); + } +} + +void ImGui::TableUpdateColumnsWeightFromWidth(ImGuiTable* table) +{ + IM_ASSERT(table->LeftMostStretchedColumn != -1 && table->RightMostStretchedColumn != -1); + + // Measure existing quantity + float visible_weight = 0.0f; + float visible_width = 0.0f; + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + ImGuiTableColumn* column = &table->Columns[column_n]; + if (!column->IsEnabled || !(column->Flags & ImGuiTableColumnFlags_WidthStretch)) + continue; + IM_ASSERT(column->StretchWeight > 0.0f); + visible_weight += column->StretchWeight; + visible_width += column->WidthRequest; + } + IM_ASSERT(visible_weight > 0.0f && visible_width > 0.0f); + + // Apply new weights + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + ImGuiTableColumn* column = &table->Columns[column_n]; + if (!column->IsEnabled || !(column->Flags & ImGuiTableColumnFlags_WidthStretch)) + continue; + column->StretchWeight = (column->WidthRequest / visible_width) * visible_weight; + IM_ASSERT(column->StretchWeight > 0.0f); + } +} + +//------------------------------------------------------------------------- +// [SECTION] Tables: Drawing +//------------------------------------------------------------------------- +// - TablePushBackgroundChannel() [Internal] +// - TablePopBackgroundChannel() [Internal] +// - TableSetupDrawChannels() [Internal] +// - TableMergeDrawChannels() [Internal] +// - TableDrawBorders() [Internal] +//------------------------------------------------------------------------- + +// Bg2 is used by Selectable (and possibly other widgets) to render to the background. +// Unlike our Bg0/1 channel which we uses for RowBg/CellBg/Borders and where we guarantee all shapes to be CPU-clipped, the Bg2 channel being widgets-facing will rely on regular ClipRect. +void ImGui::TablePushBackgroundChannel() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImGuiTable* table = g.CurrentTable; + + // Optimization: avoid SetCurrentChannel() + PushClipRect() + table->HostBackupInnerClipRect = window->ClipRect; + SetWindowClipRectBeforeSetChannel(window, table->Bg2ClipRectForDrawCmd); + table->DrawSplitter.SetCurrentChannel(window->DrawList, table->Bg2DrawChannelCurrent); +} + +void ImGui::TablePopBackgroundChannel() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImGuiTable* table = g.CurrentTable; + ImGuiTableColumn* column = &table->Columns[table->CurrentColumn]; + + // Optimization: avoid PopClipRect() + SetCurrentChannel() + SetWindowClipRectBeforeSetChannel(window, table->HostBackupInnerClipRect); + table->DrawSplitter.SetCurrentChannel(window->DrawList, column->DrawChannelCurrent); +} + +// Allocate draw channels. Called by TableUpdateLayout() +// - We allocate them following storage order instead of display order so reordering columns won't needlessly +// increase overall dormant memory cost. +// - We isolate headers draw commands in their own channels instead of just altering clip rects. +// This is in order to facilitate merging of draw commands. +// - After crossing FreezeRowsCount, all columns see their current draw channel changed to a second set of channels. +// - We only use the dummy draw channel so we can push a null clipping rectangle into it without affecting other +// channels, while simplifying per-row/per-cell overhead. It will be empty and discarded when merged. +// - We allocate 1 or 2 background draw channels. This is because we know TablePushBackgroundChannel() is only used for +// horizontal spanning. If we allowed vertical spanning we'd need one background draw channel per merge group (1-4). +// Draw channel allocation (before merging): +// - NoClip --> 2+D+1 channels: bg0/1 + bg2 + foreground (same clip rect == always 1 draw call) +// - Clip --> 2+D+N channels +// - FreezeRows --> 2+D+N*2 (unless scrolling value is zero) +// - FreezeRows || FreezeColunns --> 3+D+N*2 (unless scrolling value is zero) +// Where D is 1 if any column is clipped or hidden (dummy channel) otherwise 0. +void ImGui::TableSetupDrawChannels(ImGuiTable* table) +{ + const int freeze_row_multiplier = (table->FreezeRowsCount > 0) ? 2 : 1; + const int channels_for_row = (table->Flags & ImGuiTableFlags_NoClip) ? 1 : table->ColumnsEnabledCount; + const int channels_for_bg = 1 + 1 * freeze_row_multiplier; + const int channels_for_dummy = (table->ColumnsEnabledCount < table->ColumnsCount || table->VisibleMaskByIndex != table->EnabledMaskByIndex) ? +1 : 0; + const int channels_total = channels_for_bg + (channels_for_row * freeze_row_multiplier) + channels_for_dummy; + table->DrawSplitter.Split(table->InnerWindow->DrawList, channels_total); + table->DummyDrawChannel = (ImGuiTableDrawChannelIdx)((channels_for_dummy > 0) ? channels_total - 1 : -1); + table->Bg2DrawChannelCurrent = TABLE_DRAW_CHANNEL_BG2_FROZEN; + table->Bg2DrawChannelUnfrozen = (ImGuiTableDrawChannelIdx)((table->FreezeRowsCount > 0) ? 2 + channels_for_row : TABLE_DRAW_CHANNEL_BG2_FROZEN); + + int draw_channel_current = 2; + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + ImGuiTableColumn* column = &table->Columns[column_n]; + if (column->IsVisibleX && column->IsVisibleY) + { + column->DrawChannelFrozen = (ImGuiTableDrawChannelIdx)(draw_channel_current); + column->DrawChannelUnfrozen = (ImGuiTableDrawChannelIdx)(draw_channel_current + (table->FreezeRowsCount > 0 ? channels_for_row + 1 : 0)); + if (!(table->Flags & ImGuiTableFlags_NoClip)) + draw_channel_current++; + } + else + { + column->DrawChannelFrozen = column->DrawChannelUnfrozen = table->DummyDrawChannel; + } + column->DrawChannelCurrent = column->DrawChannelFrozen; + } + + // Initial draw cmd starts with a BgClipRect that matches the one of its host, to facilitate merge draw commands by default. + // All our cell highlight are manually clipped with BgClipRect. When unfreezing it will be made smaller to fit scrolling rect. + // (This technically isn't part of setting up draw channels, but is reasonably related to be done here) + table->BgClipRect = table->InnerClipRect; + table->Bg0ClipRectForDrawCmd = table->OuterWindow->ClipRect; + table->Bg2ClipRectForDrawCmd = table->HostClipRect; + IM_ASSERT(table->BgClipRect.Min.y <= table->BgClipRect.Max.y); +} + +// This function reorder draw channels based on matching clip rectangle, to facilitate merging them. Called by EndTable(). +// For simplicity we call it TableMergeDrawChannels() but in fact it only reorder channels + overwrite ClipRect, +// actual merging is done by table->DrawSplitter.Merge() which is called right after TableMergeDrawChannels(). +// +// Columns where the contents didn't stray off their local clip rectangle can be merged. To achieve +// this we merge their clip rect and make them contiguous in the channel list, so they can be merged +// by the call to DrawSplitter.Merge() following to the call to this function. +// We reorder draw commands by arranging them into a maximum of 4 distinct groups: +// +// 1 group: 2 groups: 2 groups: 4 groups: +// [ 0. ] no freeze [ 0. ] row freeze [ 01 ] col freeze [ 01 ] row+col freeze +// [ .. ] or no scroll [ 2. ] and v-scroll [ .. ] and h-scroll [ 23 ] and v+h-scroll +// +// Each column itself can use 1 channel (row freeze disabled) or 2 channels (row freeze enabled). +// When the contents of a column didn't stray off its limit, we move its channels into the corresponding group +// based on its position (within frozen rows/columns groups or not). +// At the end of the operation our 1-4 groups will each have a ImDrawCmd using the same ClipRect. +// This function assume that each column are pointing to a distinct draw channel, +// otherwise merge_group->ChannelsCount will not match set bit count of merge_group->ChannelsMask. +// +// Column channels will not be merged into one of the 1-4 groups in the following cases: +// - The contents stray off its clipping rectangle (we only compare the MaxX value, not the MinX value). +// Direct ImDrawList calls won't be taken into account by default, if you use them make sure the ImGui:: bounds +// matches, by e.g. calling SetCursorScreenPos(). +// - The channel uses more than one draw command itself. We drop all our attempt at merging stuff here.. +// we could do better but it's going to be rare and probably not worth the hassle. +// Columns for which the draw channel(s) haven't been merged with other will use their own ImDrawCmd. +// +// This function is particularly tricky to understand.. take a breath. +void ImGui::TableMergeDrawChannels(ImGuiTable* table) +{ + ImGuiContext& g = *GImGui; + ImDrawListSplitter* splitter = &table->DrawSplitter; + const bool has_freeze_v = (table->FreezeRowsCount > 0); + const bool has_freeze_h = (table->FreezeColumnsCount > 0); + IM_ASSERT(splitter->_Current == 0); + + // Track which groups we are going to attempt to merge, and which channels goes into each group. + struct MergeGroup + { + ImRect ClipRect; + int ChannelsCount; + ImBitArray ChannelsMask; + }; + int merge_group_mask = 0x00; + MergeGroup merge_groups[4]; + memset(merge_groups, 0, sizeof(merge_groups)); + + // 1. Scan channels and take note of those which can be merged + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + if ((table->VisibleMaskByIndex & ((ImU64)1 << column_n)) == 0) + continue; + ImGuiTableColumn* column = &table->Columns[column_n]; + + const int merge_group_sub_count = has_freeze_v ? 2 : 1; + for (int merge_group_sub_n = 0; merge_group_sub_n < merge_group_sub_count; merge_group_sub_n++) + { + const int channel_no = (merge_group_sub_n == 0) ? column->DrawChannelFrozen : column->DrawChannelUnfrozen; + + // Don't attempt to merge if there are multiple draw calls within the column + ImDrawChannel* src_channel = &splitter->_Channels[channel_no]; + if (src_channel->_CmdBuffer.Size > 0 && src_channel->_CmdBuffer.back().ElemCount == 0) + src_channel->_CmdBuffer.pop_back(); + if (src_channel->_CmdBuffer.Size != 1) + continue; + + // Find out the width of this merge group and check if it will fit in our column + // (note that we assume that rendering didn't stray on the left direction. we should need a CursorMinPos to detect it) + if (!(column->Flags & ImGuiTableColumnFlags_NoClip)) + { + float content_max_x; + if (!has_freeze_v) + content_max_x = ImMax(column->ContentMaxXUnfrozen, column->ContentMaxXHeadersUsed); // No row freeze + else if (merge_group_sub_n == 0) + content_max_x = ImMax(column->ContentMaxXFrozen, column->ContentMaxXHeadersUsed); // Row freeze: use width before freeze + else + content_max_x = column->ContentMaxXUnfrozen; // Row freeze: use width after freeze + if (content_max_x > column->ClipRect.Max.x) + continue; + } + + const int merge_group_n = (has_freeze_h && column_n < table->FreezeColumnsCount ? 0 : 1) + (has_freeze_v && merge_group_sub_n == 0 ? 0 : 2); + IM_ASSERT(channel_no < IMGUI_TABLE_MAX_DRAW_CHANNELS); + MergeGroup* merge_group = &merge_groups[merge_group_n]; + if (merge_group->ChannelsCount == 0) + merge_group->ClipRect = ImRect(+FLT_MAX, +FLT_MAX, -FLT_MAX, -FLT_MAX); + merge_group->ChannelsMask.SetBit(channel_no); + merge_group->ChannelsCount++; + merge_group->ClipRect.Add(src_channel->_CmdBuffer[0].ClipRect); + merge_group_mask |= (1 << merge_group_n); + } + + // Invalidate current draw channel + // (we don't clear DrawChannelFrozen/DrawChannelUnfrozen solely to facilitate debugging/later inspection of data) + column->DrawChannelCurrent = (ImGuiTableDrawChannelIdx)-1; + } + + // [DEBUG] Display merge groups +#if 0 + if (g.IO.KeyShift) + for (int merge_group_n = 0; merge_group_n < IM_ARRAYSIZE(merge_groups); merge_group_n++) + { + MergeGroup* merge_group = &merge_groups[merge_group_n]; + if (merge_group->ChannelsCount == 0) + continue; + char buf[32]; + ImFormatString(buf, 32, "MG%d:%d", merge_group_n, merge_group->ChannelsCount); + ImVec2 text_pos = merge_group->ClipRect.Min + ImVec2(4, 4); + ImVec2 text_size = CalcTextSize(buf, NULL); + GetForegroundDrawList()->AddRectFilled(text_pos, text_pos + text_size, IM_COL32(0, 0, 0, 255)); + GetForegroundDrawList()->AddText(text_pos, IM_COL32(255, 255, 0, 255), buf, NULL); + GetForegroundDrawList()->AddRect(merge_group->ClipRect.Min, merge_group->ClipRect.Max, IM_COL32(255, 255, 0, 255)); + } +#endif + + // 2. Rewrite channel list in our preferred order + if (merge_group_mask != 0) + { + // We skip channel 0 (Bg0/Bg1) and 1 (Bg2 frozen) from the shuffling since they won't move - see channels allocation in TableSetupDrawChannels(). + const int LEADING_DRAW_CHANNELS = 2; + g.DrawChannelsTempMergeBuffer.resize(splitter->_Count - LEADING_DRAW_CHANNELS); // Use shared temporary storage so the allocation gets amortized + ImDrawChannel* dst_tmp = g.DrawChannelsTempMergeBuffer.Data; + ImBitArray remaining_mask; // We need 132-bit of storage + remaining_mask.ClearAllBits(); + remaining_mask.SetBitRange(LEADING_DRAW_CHANNELS, splitter->_Count); + remaining_mask.ClearBit(table->Bg2DrawChannelUnfrozen); + IM_ASSERT(has_freeze_v == false || table->Bg2DrawChannelUnfrozen != TABLE_DRAW_CHANNEL_BG2_FROZEN); + int remaining_count = splitter->_Count - (has_freeze_v ? LEADING_DRAW_CHANNELS + 1 : LEADING_DRAW_CHANNELS); + //ImRect host_rect = (table->InnerWindow == table->OuterWindow) ? table->InnerClipRect : table->HostClipRect; + ImRect host_rect = table->HostClipRect; + for (int merge_group_n = 0; merge_group_n < IM_ARRAYSIZE(merge_groups); merge_group_n++) + { + if (int merge_channels_count = merge_groups[merge_group_n].ChannelsCount) + { + MergeGroup* merge_group = &merge_groups[merge_group_n]; + ImRect merge_clip_rect = merge_group->ClipRect; + + // Extend outer-most clip limits to match those of host, so draw calls can be merged even if + // outer-most columns have some outer padding offsetting them from their parent ClipRect. + // The principal cases this is dealing with are: + // - On a same-window table (not scrolling = single group), all fitting columns ClipRect -> will extend and match host ClipRect -> will merge + // - Columns can use padding and have left-most ClipRect.Min.x and right-most ClipRect.Max.x != from host ClipRect -> will extend and match host ClipRect -> will merge + // FIXME-TABLE FIXME-WORKRECT: We are wasting a merge opportunity on tables without scrolling if column doesn't fit + // within host clip rect, solely because of the half-padding difference between window->WorkRect and window->InnerClipRect. + if ((merge_group_n & 1) == 0 || !has_freeze_h) + merge_clip_rect.Min.x = ImMin(merge_clip_rect.Min.x, host_rect.Min.x); + if ((merge_group_n & 2) == 0 || !has_freeze_v) + merge_clip_rect.Min.y = ImMin(merge_clip_rect.Min.y, host_rect.Min.y); + if ((merge_group_n & 1) != 0) + merge_clip_rect.Max.x = ImMax(merge_clip_rect.Max.x, host_rect.Max.x); + if ((merge_group_n & 2) != 0 && (table->Flags & ImGuiTableFlags_NoHostExtendY) == 0) + merge_clip_rect.Max.y = ImMax(merge_clip_rect.Max.y, host_rect.Max.y); +#if 0 + GetOverlayDrawList()->AddRect(merge_group->ClipRect.Min, merge_group->ClipRect.Max, IM_COL32(255, 0, 0, 200), 0.0f, ~0, 1.0f); + GetOverlayDrawList()->AddLine(merge_group->ClipRect.Min, merge_clip_rect.Min, IM_COL32(255, 100, 0, 200)); + GetOverlayDrawList()->AddLine(merge_group->ClipRect.Max, merge_clip_rect.Max, IM_COL32(255, 100, 0, 200)); +#endif + remaining_count -= merge_group->ChannelsCount; + for (int n = 0; n < IM_ARRAYSIZE(remaining_mask.Storage); n++) + remaining_mask.Storage[n] &= ~merge_group->ChannelsMask.Storage[n]; + for (int n = 0; n < splitter->_Count && merge_channels_count != 0; n++) + { + // Copy + overwrite new clip rect + if (!merge_group->ChannelsMask.TestBit(n)) + continue; + merge_group->ChannelsMask.ClearBit(n); + merge_channels_count--; + + ImDrawChannel* channel = &splitter->_Channels[n]; + IM_ASSERT(channel->_CmdBuffer.Size == 1 && merge_clip_rect.Contains(ImRect(channel->_CmdBuffer[0].ClipRect))); + channel->_CmdBuffer[0].ClipRect = merge_clip_rect.ToVec4(); + memcpy(dst_tmp++, channel, sizeof(ImDrawChannel)); + } + } + + // Make sure Bg2DrawChannelUnfrozen appears in the middle of our groups (whereas Bg0/Bg1 and Bg2 frozen are fixed to 0 and 1) + if (merge_group_n == 1 && has_freeze_v) + memcpy(dst_tmp++, &splitter->_Channels[table->Bg2DrawChannelUnfrozen], sizeof(ImDrawChannel)); + } + + // Append unmergeable channels that we didn't reorder at the end of the list + for (int n = 0; n < splitter->_Count && remaining_count != 0; n++) + { + if (!remaining_mask.TestBit(n)) + continue; + ImDrawChannel* channel = &splitter->_Channels[n]; + memcpy(dst_tmp++, channel, sizeof(ImDrawChannel)); + remaining_count--; + } + IM_ASSERT(dst_tmp == g.DrawChannelsTempMergeBuffer.Data + g.DrawChannelsTempMergeBuffer.Size); + memcpy(splitter->_Channels.Data + LEADING_DRAW_CHANNELS, g.DrawChannelsTempMergeBuffer.Data, (splitter->_Count - LEADING_DRAW_CHANNELS) * sizeof(ImDrawChannel)); + } +} + +// FIXME-TABLE: This is a mess, need to redesign how we render borders (as some are also done in TableEndRow) +void ImGui::TableDrawBorders(ImGuiTable* table) +{ + ImGuiWindow* inner_window = table->InnerWindow; + if (!table->OuterWindow->ClipRect.Overlaps(table->OuterRect)) + return; + + ImDrawList* inner_drawlist = inner_window->DrawList; + table->DrawSplitter.SetCurrentChannel(inner_drawlist, TABLE_DRAW_CHANNEL_BG0); + inner_drawlist->PushClipRect(table->Bg0ClipRectForDrawCmd.Min, table->Bg0ClipRectForDrawCmd.Max, false); + + // Draw inner border and resizing feedback + const float border_size = TABLE_BORDER_SIZE; + const float draw_y1 = table->InnerRect.Min.y; + const float draw_y2_body = table->InnerRect.Max.y; + const float draw_y2_head = table->IsUsingHeaders ? ImMin(table->InnerRect.Max.y, (table->FreezeRowsCount >= 1 ? table->InnerRect.Min.y : table->WorkRect.Min.y) + table->LastFirstRowHeight) : draw_y1; + if (table->Flags & ImGuiTableFlags_BordersInnerV) + { + for (int order_n = 0; order_n < table->ColumnsCount; order_n++) + { + if (!(table->EnabledMaskByDisplayOrder & ((ImU64)1 << order_n))) + continue; + + const int column_n = table->DisplayOrderToIndex[order_n]; + ImGuiTableColumn* column = &table->Columns[column_n]; + const bool is_hovered = (table->HoveredColumnBorder == column_n); + const bool is_resized = (table->ResizedColumn == column_n) && (table->InstanceInteracted == table->InstanceCurrent); + const bool is_resizable = (column->Flags & (ImGuiTableColumnFlags_NoResize | ImGuiTableColumnFlags_NoDirectResize_)) == 0; + const bool is_frozen_separator = (table->FreezeColumnsCount != -1 && table->FreezeColumnsCount == order_n + 1); + if (column->MaxX > table->InnerClipRect.Max.x && !is_resized) + continue; + + // Decide whether right-most column is visible + if (column->NextEnabledColumn == -1 && !is_resizable) + if ((table->Flags & ImGuiTableFlags_SizingMask_) != ImGuiTableFlags_SizingFixedSame || (table->Flags & ImGuiTableFlags_NoHostExtendX)) + continue; + if (column->MaxX <= column->ClipRect.Min.x) // FIXME-TABLE FIXME-STYLE: Assume BorderSize==1, this is problematic if we want to increase the border size.. + continue; + + // Draw in outer window so right-most column won't be clipped + // Always draw full height border when being resized/hovered, or on the delimitation of frozen column scrolling. + ImU32 col; + float draw_y2; + if (is_hovered || is_resized || is_frozen_separator) + { + draw_y2 = draw_y2_body; + col = is_resized ? GetColorU32(ImGuiCol_SeparatorActive) : is_hovered ? GetColorU32(ImGuiCol_SeparatorHovered) : table->BorderColorStrong; + } + else + { + draw_y2 = (table->Flags & (ImGuiTableFlags_NoBordersInBody | ImGuiTableFlags_NoBordersInBodyUntilResize)) ? draw_y2_head : draw_y2_body; + col = (table->Flags & (ImGuiTableFlags_NoBordersInBody | ImGuiTableFlags_NoBordersInBodyUntilResize)) ? table->BorderColorStrong : table->BorderColorLight; + } + + if (draw_y2 > draw_y1) + inner_drawlist->AddLine(ImVec2(column->MaxX, draw_y1), ImVec2(column->MaxX, draw_y2), col, border_size); + } + } + + // Draw outer border + // FIXME: could use AddRect or explicit VLine/HLine helper? + if (table->Flags & ImGuiTableFlags_BordersOuter) + { + // Display outer border offset by 1 which is a simple way to display it without adding an extra draw call + // (Without the offset, in outer_window it would be rendered behind cells, because child windows are above their + // parent. In inner_window, it won't reach out over scrollbars. Another weird solution would be to display part + // of it in inner window, and the part that's over scrollbars in the outer window..) + // Either solution currently won't allow us to use a larger border size: the border would clipped. + const ImRect outer_border = table->OuterRect; + const ImU32 outer_col = table->BorderColorStrong; + if ((table->Flags & ImGuiTableFlags_BordersOuter) == ImGuiTableFlags_BordersOuter) + { + inner_drawlist->AddRect(outer_border.Min, outer_border.Max, outer_col, 0.0f, ~0, border_size); + } + else if (table->Flags & ImGuiTableFlags_BordersOuterV) + { + inner_drawlist->AddLine(outer_border.Min, ImVec2(outer_border.Min.x, outer_border.Max.y), outer_col, border_size); + inner_drawlist->AddLine(ImVec2(outer_border.Max.x, outer_border.Min.y), outer_border.Max, outer_col, border_size); + } + else if (table->Flags & ImGuiTableFlags_BordersOuterH) + { + inner_drawlist->AddLine(outer_border.Min, ImVec2(outer_border.Max.x, outer_border.Min.y), outer_col, border_size); + inner_drawlist->AddLine(ImVec2(outer_border.Min.x, outer_border.Max.y), outer_border.Max, outer_col, border_size); + } + } + if ((table->Flags & ImGuiTableFlags_BordersInnerH) && table->RowPosY2 < table->OuterRect.Max.y) + { + // Draw bottom-most row border + const float border_y = table->RowPosY2; + if (border_y >= table->BgClipRect.Min.y && border_y < table->BgClipRect.Max.y) + inner_drawlist->AddLine(ImVec2(table->BorderX1, border_y), ImVec2(table->BorderX2, border_y), table->BorderColorLight, border_size); + } + + inner_drawlist->PopClipRect(); +} + +//------------------------------------------------------------------------- +// [SECTION] Tables: Sorting +//------------------------------------------------------------------------- +// - TableGetSortSpecs() +// - TableFixColumnSortDirection() [Internal] +// - TableGetColumnNextSortDirection() [Internal] +// - TableSetColumnSortDirection() [Internal] +// - TableSortSpecsSanitize() [Internal] +// - TableSortSpecsBuild() [Internal] +//------------------------------------------------------------------------- + +// Return NULL if no sort specs (most often when ImGuiTableFlags_Sortable is not set) +// You can sort your data again when 'SpecsChanged == true'. It will be true with sorting specs have changed since +// last call, or the first time. +// Lifetime: don't hold on this pointer over multiple frames or past any subsequent call to BeginTable()! +ImGuiTableSortSpecs* ImGui::TableGetSortSpecs() +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + IM_ASSERT(table != NULL); + + if (!(table->Flags & ImGuiTableFlags_Sortable)) + return NULL; + + // Require layout (in case TableHeadersRow() hasn't been called) as it may alter IsSortSpecsDirty in some paths. + if (!table->IsLayoutLocked) + TableUpdateLayout(table); + + if (table->IsSortSpecsDirty) + TableSortSpecsBuild(table); + + return &table->SortSpecs; +} + +static inline ImGuiSortDirection TableGetColumnAvailSortDirection(ImGuiTableColumn* column, int n) +{ + IM_ASSERT(n < column->SortDirectionsAvailCount); + return (column->SortDirectionsAvailList >> (n << 1)) & 0x03; +} + +// Fix sort direction if currently set on a value which is unavailable (e.g. activating NoSortAscending/NoSortDescending) +void ImGui::TableFixColumnSortDirection(ImGuiTable* table, ImGuiTableColumn* column) +{ + if (column->SortOrder == -1 || (column->SortDirectionsAvailMask & (1 << column->SortDirection)) != 0) + return; + column->SortDirection = (ImU8)TableGetColumnAvailSortDirection(column, 0); + table->IsSortSpecsDirty = true; +} + +// Calculate next sort direction that would be set after clicking the column +// - If the PreferSortDescending flag is set, we will default to a Descending direction on the first click. +// - Note that the PreferSortAscending flag is never checked, it is essentially the default and therefore a no-op. +IM_STATIC_ASSERT(ImGuiSortDirection_None == 0 && ImGuiSortDirection_Ascending == 1 && ImGuiSortDirection_Descending == 2); +ImGuiSortDirection ImGui::TableGetColumnNextSortDirection(ImGuiTableColumn* column) +{ + IM_ASSERT(column->SortDirectionsAvailCount > 0); + if (column->SortOrder == -1) + return TableGetColumnAvailSortDirection(column, 0); + for (int n = 0; n < 3; n++) + if (column->SortDirection == TableGetColumnAvailSortDirection(column, n)) + return TableGetColumnAvailSortDirection(column, (n + 1) % column->SortDirectionsAvailCount); + IM_ASSERT(0); + return ImGuiSortDirection_None; +} + +// Note that the NoSortAscending/NoSortDescending flags are processed in TableSortSpecsSanitize(), and they may change/revert +// the value of SortDirection. We could technically also do it here but it would be unnecessary and duplicate code. +void ImGui::TableSetColumnSortDirection(int column_n, ImGuiSortDirection sort_direction, bool append_to_sort_specs) +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + + if (!(table->Flags & ImGuiTableFlags_SortMulti)) + append_to_sort_specs = false; + if (!(table->Flags & ImGuiTableFlags_SortTristate)) + IM_ASSERT(sort_direction != ImGuiSortDirection_None); + + ImGuiTableColumnIdx sort_order_max = 0; + if (append_to_sort_specs) + for (int other_column_n = 0; other_column_n < table->ColumnsCount; other_column_n++) + sort_order_max = ImMax(sort_order_max, table->Columns[other_column_n].SortOrder); + + ImGuiTableColumn* column = &table->Columns[column_n]; + column->SortDirection = (ImU8)sort_direction; + if (column->SortDirection == ImGuiSortDirection_None) + column->SortOrder = -1; + else if (column->SortOrder == -1 || !append_to_sort_specs) + column->SortOrder = append_to_sort_specs ? sort_order_max + 1 : 0; + + for (int other_column_n = 0; other_column_n < table->ColumnsCount; other_column_n++) + { + ImGuiTableColumn* other_column = &table->Columns[other_column_n]; + if (other_column != column && !append_to_sort_specs) + other_column->SortOrder = -1; + TableFixColumnSortDirection(table, other_column); + } + table->IsSettingsDirty = true; + table->IsSortSpecsDirty = true; +} + +void ImGui::TableSortSpecsSanitize(ImGuiTable* table) +{ + IM_ASSERT(table->Flags & ImGuiTableFlags_Sortable); + + // Clear SortOrder from hidden column and verify that there's no gap or duplicate. + int sort_order_count = 0; + ImU64 sort_order_mask = 0x00; + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + ImGuiTableColumn* column = &table->Columns[column_n]; + if (column->SortOrder != -1 && !column->IsEnabled) + column->SortOrder = -1; + if (column->SortOrder == -1) + continue; + sort_order_count++; + sort_order_mask |= ((ImU64)1 << column->SortOrder); + IM_ASSERT(sort_order_count < (int)sizeof(sort_order_mask) * 8); + } + + const bool need_fix_linearize = ((ImU64)1 << sort_order_count) != (sort_order_mask + 1); + const bool need_fix_single_sort_order = (sort_order_count > 1) && !(table->Flags & ImGuiTableFlags_SortMulti); + if (need_fix_linearize || need_fix_single_sort_order) + { + ImU64 fixed_mask = 0x00; + for (int sort_n = 0; sort_n < sort_order_count; sort_n++) + { + // Fix: Rewrite sort order fields if needed so they have no gap or duplicate. + // (e.g. SortOrder 0 disappeared, SortOrder 1..2 exists --> rewrite then as SortOrder 0..1) + int column_with_smallest_sort_order = -1; + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + if ((fixed_mask & ((ImU64)1 << (ImU64)column_n)) == 0 && table->Columns[column_n].SortOrder != -1) + if (column_with_smallest_sort_order == -1 || table->Columns[column_n].SortOrder < table->Columns[column_with_smallest_sort_order].SortOrder) + column_with_smallest_sort_order = column_n; + IM_ASSERT(column_with_smallest_sort_order != -1); + fixed_mask |= ((ImU64)1 << column_with_smallest_sort_order); + table->Columns[column_with_smallest_sort_order].SortOrder = (ImGuiTableColumnIdx)sort_n; + + // Fix: Make sure only one column has a SortOrder if ImGuiTableFlags_MultiSortable is not set. + if (need_fix_single_sort_order) + { + sort_order_count = 1; + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + if (column_n != column_with_smallest_sort_order) + table->Columns[column_n].SortOrder = -1; + break; + } + } + } + + // Fallback default sort order (if no column had the ImGuiTableColumnFlags_DefaultSort flag) + if (sort_order_count == 0 && !(table->Flags & ImGuiTableFlags_SortTristate)) + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + ImGuiTableColumn* column = &table->Columns[column_n]; + if (column->IsEnabled && !(column->Flags & ImGuiTableColumnFlags_NoSort)) + { + sort_order_count = 1; + column->SortOrder = 0; + column->SortDirection = (ImU8)TableGetColumnAvailSortDirection(column, 0); + break; + } + } + + table->SortSpecsCount = (ImGuiTableColumnIdx)sort_order_count; +} + +void ImGui::TableSortSpecsBuild(ImGuiTable* table) +{ + IM_ASSERT(table->IsSortSpecsDirty); + TableSortSpecsSanitize(table); + + // Write output + table->SortSpecsMulti.resize(table->SortSpecsCount <= 1 ? 0 : table->SortSpecsCount); + ImGuiTableColumnSortSpecs* sort_specs = (table->SortSpecsCount == 0) ? NULL : (table->SortSpecsCount == 1) ? &table->SortSpecsSingle : table->SortSpecsMulti.Data; + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + { + ImGuiTableColumn* column = &table->Columns[column_n]; + if (column->SortOrder == -1) + continue; + IM_ASSERT(column->SortOrder < table->SortSpecsCount); + ImGuiTableColumnSortSpecs* sort_spec = &sort_specs[column->SortOrder]; + sort_spec->ColumnUserID = column->UserID; + sort_spec->ColumnIndex = (ImGuiTableColumnIdx)column_n; + sort_spec->SortOrder = (ImGuiTableColumnIdx)column->SortOrder; + sort_spec->SortDirection = column->SortDirection; + } + table->SortSpecs.Specs = sort_specs; + table->SortSpecs.SpecsCount = table->SortSpecsCount; + table->SortSpecs.SpecsDirty = true; // Mark as dirty for user + table->IsSortSpecsDirty = false; // Mark as not dirty for us +} + +//------------------------------------------------------------------------- +// [SECTION] Tables: Headers +//------------------------------------------------------------------------- +// - TableGetHeaderRowHeight() [Internal] +// - TableHeadersRow() +// - TableHeader() +//------------------------------------------------------------------------- + +float ImGui::TableGetHeaderRowHeight() +{ + // Caring for a minor edge case: + // Calculate row height, for the unlikely case that some labels may be taller than others. + // If we didn't do that, uneven header height would highlight but smaller one before the tallest wouldn't catch input for all height. + // In your custom header row you may omit this all together and just call TableNextRow() without a height... + float row_height = GetTextLineHeight(); + int columns_count = TableGetColumnCount(); + for (int column_n = 0; column_n < columns_count; column_n++) + if (TableGetColumnFlags(column_n) & ImGuiTableColumnFlags_IsEnabled) + row_height = ImMax(row_height, CalcTextSize(TableGetColumnName(column_n)).y); + row_height += GetStyle().CellPadding.y * 2.0f; + return row_height; +} + +// [Public] This is a helper to output TableHeader() calls based on the column names declared in TableSetupColumn(). +// The intent is that advanced users willing to create customized headers would not need to use this helper +// and can create their own! For example: TableHeader() may be preceeded by Checkbox() or other custom widgets. +// See 'Demo->Tables->Custom headers' for a demonstration of implementing a custom version of this. +// This code is constructed to not make much use of internal functions, as it is intended to be a template to copy. +// FIXME-TABLE: TableOpenContextMenu() and TableGetHeaderRowHeight() are not public. +void ImGui::TableHeadersRow() +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + IM_ASSERT(table != NULL && "Need to call TableHeadersRow() after BeginTable()!"); + + // Layout if not already done (this is automatically done by TableNextRow, we do it here solely to facilitate stepping in debugger as it is frequent to step in TableUpdateLayout) + if (!table->IsLayoutLocked) + TableUpdateLayout(table); + + // Open row + const float row_y1 = GetCursorScreenPos().y; + const float row_height = TableGetHeaderRowHeight(); + TableNextRow(ImGuiTableRowFlags_Headers, row_height); + if (table->HostSkipItems) // Merely an optimization, you may skip in your own code. + return; + + const int columns_count = TableGetColumnCount(); + for (int column_n = 0; column_n < columns_count; column_n++) + { + if (!TableSetColumnIndex(column_n)) + continue; + + // Push an id to allow unnamed labels (generally accidental, but let's behave nicely with them) + // - in your own code you may omit the PushID/PopID all-together, provided you know they won't collide + // - table->InstanceCurrent is only >0 when we use multiple BeginTable/EndTable calls with same identifier. + const char* name = TableGetColumnName(column_n); + PushID(table->InstanceCurrent * table->ColumnsCount + column_n); + TableHeader(name); + PopID(); + } + + // Allow opening popup from the right-most section after the last column. + ImVec2 mouse_pos = ImGui::GetMousePos(); + if (IsMouseReleased(1) && TableGetHoveredColumn() == columns_count) + if (mouse_pos.y >= row_y1 && mouse_pos.y < row_y1 + row_height) + TableOpenContextMenu(-1); // Will open a non-column-specific popup. +} + +// Emit a column header (text + optional sort order) +// We cpu-clip text here so that all columns headers can be merged into a same draw call. +// Note that because of how we cpu-clip and display sorting indicators, you _cannot_ use SameLine() after a TableHeader() +void ImGui::TableHeader(const char* label) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return; + + ImGuiTable* table = g.CurrentTable; + IM_ASSERT(table != NULL && "Need to call TableHeader() after BeginTable()!"); + IM_ASSERT(table->CurrentColumn != -1); + const int column_n = table->CurrentColumn; + ImGuiTableColumn* column = &table->Columns[column_n]; + + // Label + if (label == NULL) + label = ""; + const char* label_end = FindRenderedTextEnd(label); + ImVec2 label_size = CalcTextSize(label, label_end, true); + ImVec2 label_pos = window->DC.CursorPos; + + // If we already got a row height, there's use that. + // FIXME-TABLE: Padding problem if the correct outer-padding CellBgRect strays off our ClipRect? + ImRect cell_r = TableGetCellBgRect(table, column_n); + float label_height = ImMax(label_size.y, table->RowMinHeight - table->CellPaddingY * 2.0f); + + // Calculate ideal size for sort order arrow + float w_arrow = 0.0f; + float w_sort_text = 0.0f; + char sort_order_suf[4] = ""; + const float ARROW_SCALE = 0.65f; + if ((table->Flags & ImGuiTableFlags_Sortable) && !(column->Flags & ImGuiTableColumnFlags_NoSort)) + { + w_arrow = ImFloor(g.FontSize * ARROW_SCALE + g.Style.FramePadding.x); + if (column->SortOrder > 0) + { + ImFormatString(sort_order_suf, IM_ARRAYSIZE(sort_order_suf), "%d", column->SortOrder + 1); + w_sort_text = g.Style.ItemInnerSpacing.x + CalcTextSize(sort_order_suf).x; + } + } + + // We feed our unclipped width to the column without writing on CursorMaxPos, so that column is still considering for merging. + float max_pos_x = label_pos.x + label_size.x + w_sort_text + w_arrow; + column->ContentMaxXHeadersUsed = ImMax(column->ContentMaxXHeadersUsed, column->WorkMaxX); + column->ContentMaxXHeadersIdeal = ImMax(column->ContentMaxXHeadersIdeal, max_pos_x); + + // Keep header highlighted when context menu is open. + const bool selected = (table->IsContextPopupOpen && table->ContextPopupColumn == column_n && table->InstanceInteracted == table->InstanceCurrent); + ImGuiID id = window->GetID(label); + ImRect bb(cell_r.Min.x, cell_r.Min.y, cell_r.Max.x, ImMax(cell_r.Max.y, cell_r.Min.y + label_height + g.Style.CellPadding.y * 2.0f)); + ItemSize(ImVec2(0.0f, label_height)); // Don't declare unclipped width, it'll be fed ContentMaxPosHeadersIdeal + if (!ItemAdd(bb, id)) + return; + + //GetForegroundDrawList()->AddRect(cell_r.Min, cell_r.Max, IM_COL32(255, 0, 0, 255)); // [DEBUG] + //GetForegroundDrawList()->AddRect(bb.Min, bb.Max, IM_COL32(255, 0, 0, 255)); // [DEBUG] + + // Using AllowItemOverlap mode because we cover the whole cell, and we want user to be able to submit subsequent items. + bool hovered, held; + bool pressed = ButtonBehavior(bb, id, &hovered, &held, ImGuiButtonFlags_AllowItemOverlap); + if (g.ActiveId != id) + SetItemAllowOverlap(); + if (held || hovered || selected) + { + const ImU32 col = GetColorU32(held ? ImGuiCol_HeaderActive : hovered ? ImGuiCol_HeaderHovered : ImGuiCol_Header); + //RenderFrame(bb.Min, bb.Max, col, false, 0.0f); + TableSetBgColor(ImGuiTableBgTarget_CellBg, col, table->CurrentColumn); + RenderNavHighlight(bb, id, ImGuiNavHighlightFlags_TypeThin | ImGuiNavHighlightFlags_NoRounding); + } + else + { + // Submit single cell bg color in the case we didn't submit a full header row + if ((table->RowFlags & ImGuiTableRowFlags_Headers) == 0) + TableSetBgColor(ImGuiTableBgTarget_CellBg, GetColorU32(ImGuiCol_TableHeaderBg), table->CurrentColumn); + } + if (held) + table->HeldHeaderColumn = (ImGuiTableColumnIdx)column_n; + window->DC.CursorPos.y -= g.Style.ItemSpacing.y * 0.5f; + + // Drag and drop to re-order columns. + // FIXME-TABLE: Scroll request while reordering a column and it lands out of the scrolling zone. + if (held && (table->Flags & ImGuiTableFlags_Reorderable) && IsMouseDragging(0) && !g.DragDropActive) + { + // While moving a column it will jump on the other side of the mouse, so we also test for MouseDelta.x + table->ReorderColumn = (ImGuiTableColumnIdx)column_n; + table->InstanceInteracted = table->InstanceCurrent; + + // We don't reorder: through the frozen<>unfrozen line, or through a column that is marked with ImGuiTableColumnFlags_NoReorder. + if (g.IO.MouseDelta.x < 0.0f && g.IO.MousePos.x < cell_r.Min.x) + if (ImGuiTableColumn* prev_column = (column->PrevEnabledColumn != -1) ? &table->Columns[column->PrevEnabledColumn] : NULL) + if (!((column->Flags | prev_column->Flags) & ImGuiTableColumnFlags_NoReorder)) + if ((column->IndexWithinEnabledSet < table->FreezeColumnsRequest) == (prev_column->IndexWithinEnabledSet < table->FreezeColumnsRequest)) + table->ReorderColumnDir = -1; + if (g.IO.MouseDelta.x > 0.0f && g.IO.MousePos.x > cell_r.Max.x) + if (ImGuiTableColumn* next_column = (column->NextEnabledColumn != -1) ? &table->Columns[column->NextEnabledColumn] : NULL) + if (!((column->Flags | next_column->Flags) & ImGuiTableColumnFlags_NoReorder)) + if ((column->IndexWithinEnabledSet < table->FreezeColumnsRequest) == (next_column->IndexWithinEnabledSet < table->FreezeColumnsRequest)) + table->ReorderColumnDir = +1; + } + + // Sort order arrow + const float ellipsis_max = cell_r.Max.x - w_arrow - w_sort_text; + if ((table->Flags & ImGuiTableFlags_Sortable) && !(column->Flags & ImGuiTableColumnFlags_NoSort)) + { + if (column->SortOrder != -1) + { + float x = ImMax(cell_r.Min.x, cell_r.Max.x - w_arrow - w_sort_text); + float y = label_pos.y; + if (column->SortOrder > 0) + { + PushStyleColor(ImGuiCol_Text, GetColorU32(ImGuiCol_Text, 0.70f)); + RenderText(ImVec2(x + g.Style.ItemInnerSpacing.x, y), sort_order_suf); + PopStyleColor(); + x += w_sort_text; + } + RenderArrow(window->DrawList, ImVec2(x, y), GetColorU32(ImGuiCol_Text), column->SortDirection == ImGuiSortDirection_Ascending ? ImGuiDir_Up : ImGuiDir_Down, ARROW_SCALE); + } + + // Handle clicking on column header to adjust Sort Order + if (pressed && table->ReorderColumn != column_n) + { + ImGuiSortDirection sort_direction = TableGetColumnNextSortDirection(column); + TableSetColumnSortDirection(column_n, sort_direction, g.IO.KeyShift); + } + } + + // Render clipped label. Clipping here ensure that in the majority of situations, all our header cells will + // be merged into a single draw call. + //window->DrawList->AddCircleFilled(ImVec2(ellipsis_max, label_pos.y), 40, IM_COL32_WHITE); + RenderTextEllipsis(window->DrawList, label_pos, ImVec2(ellipsis_max, label_pos.y + label_height + g.Style.FramePadding.y), ellipsis_max, ellipsis_max, label, label_end, &label_size); + + const bool text_clipped = label_size.x > (ellipsis_max - label_pos.x); + if (text_clipped && hovered && g.HoveredIdNotActiveTimer > g.TooltipSlowDelay) + SetTooltip("%.*s", (int)(label_end - label), label); + + // We don't use BeginPopupContextItem() because we want the popup to stay up even after the column is hidden + if (IsMouseReleased(1) && IsItemHovered()) + TableOpenContextMenu(column_n); +} + +//------------------------------------------------------------------------- +// [SECTION] Tables: Context Menu +//------------------------------------------------------------------------- +// - TableOpenContextMenu() [Internal] +// - TableDrawContextMenu() [Internal] +//------------------------------------------------------------------------- + +// Use -1 to open menu not specific to a given column. +void ImGui::TableOpenContextMenu(int column_n) +{ + ImGuiContext& g = *GImGui; + ImGuiTable* table = g.CurrentTable; + if (column_n == -1 && table->CurrentColumn != -1) // When called within a column automatically use this one (for consistency) + column_n = table->CurrentColumn; + if (column_n == table->ColumnsCount) // To facilitate using with TableGetHoveredColumn() + column_n = -1; + IM_ASSERT(column_n >= -1 && column_n < table->ColumnsCount); + if (table->Flags & (ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable)) + { + table->IsContextPopupOpen = true; + table->ContextPopupColumn = (ImGuiTableColumnIdx)column_n; + table->InstanceInteracted = table->InstanceCurrent; + const ImGuiID context_menu_id = ImHashStr("##ContextMenu", 0, table->ID); + OpenPopupEx(context_menu_id, ImGuiPopupFlags_None); + } +} + +// Output context menu into current window (generally a popup) +// FIXME-TABLE: Ideally this should be writable by the user. Full programmatic access to that data? +void ImGui::TableDrawContextMenu(ImGuiTable* table) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return; + + bool want_separator = false; + const int column_n = (table->ContextPopupColumn >= 0 && table->ContextPopupColumn < table->ColumnsCount) ? table->ContextPopupColumn : -1; + ImGuiTableColumn* column = (column_n != -1) ? &table->Columns[column_n] : NULL; + + // Sizing + if (table->Flags & ImGuiTableFlags_Resizable) + { + if (column != NULL) + { + const bool can_resize = !(column->Flags & ImGuiTableColumnFlags_NoResize) && column->IsEnabled; + if (MenuItem("Size column to fit###SizeOne", NULL, false, can_resize)) + TableSetColumnWidthAutoSingle(table, column_n); + } + + const char* size_all_desc; + if (table->ColumnsEnabledFixedCount == table->ColumnsEnabledCount && (table->Flags & ImGuiTableFlags_SizingMask_) != ImGuiTableFlags_SizingFixedSame) + size_all_desc = "Size all columns to fit###SizeAll"; // All fixed + else + size_all_desc = "Size all columns to default###SizeAll"; // All stretch or mixed + if (MenuItem(size_all_desc, NULL)) + TableSetColumnWidthAutoAll(table); + want_separator = true; + } + + // Ordering + if (table->Flags & ImGuiTableFlags_Reorderable) + { + if (MenuItem("Reset order", NULL, false, !table->IsDefaultDisplayOrder)) + table->IsResetDisplayOrderRequest = true; + want_separator = true; + } + + // Reset all (should work but seems unnecessary/noisy to expose?) + //if (MenuItem("Reset all")) + // table->IsResetAllRequest = true; + + // Sorting + // (modify TableOpenContextMenu() to add _Sortable flag if enabling this) +#if 0 + if ((table->Flags & ImGuiTableFlags_Sortable) && column != NULL && (column->Flags & ImGuiTableColumnFlags_NoSort) == 0) + { + if (want_separator) + Separator(); + want_separator = true; + + bool append_to_sort_specs = g.IO.KeyShift; + if (MenuItem("Sort in Ascending Order", NULL, column->SortOrder != -1 && column->SortDirection == ImGuiSortDirection_Ascending, (column->Flags & ImGuiTableColumnFlags_NoSortAscending) == 0)) + TableSetColumnSortDirection(table, column_n, ImGuiSortDirection_Ascending, append_to_sort_specs); + if (MenuItem("Sort in Descending Order", NULL, column->SortOrder != -1 && column->SortDirection == ImGuiSortDirection_Descending, (column->Flags & ImGuiTableColumnFlags_NoSortDescending) == 0)) + TableSetColumnSortDirection(table, column_n, ImGuiSortDirection_Descending, append_to_sort_specs); + } +#endif + + // Hiding / Visibility + if (table->Flags & ImGuiTableFlags_Hideable) + { + if (want_separator) + Separator(); + want_separator = true; + + PushItemFlag(ImGuiItemFlags_SelectableDontClosePopup, true); + for (int other_column_n = 0; other_column_n < table->ColumnsCount; other_column_n++) + { + ImGuiTableColumn* other_column = &table->Columns[other_column_n]; + const char* name = TableGetColumnName(table, other_column_n); + if (name == NULL || name[0] == 0) + name = ""; + + // Make sure we can't hide the last active column + bool menu_item_active = (other_column->Flags & ImGuiTableColumnFlags_NoHide) ? false : true; + if (other_column->IsEnabled && table->ColumnsEnabledCount <= 1) + menu_item_active = false; + if (MenuItem(name, NULL, other_column->IsEnabled, menu_item_active)) + other_column->IsEnabledNextFrame = !other_column->IsEnabled; + } + PopItemFlag(); + } +} + +//------------------------------------------------------------------------- +// [SECTION] Tables: Settings (.ini data) +//------------------------------------------------------------------------- +// FIXME: The binding/finding/creating flow are too confusing. +//------------------------------------------------------------------------- +// - TableSettingsInit() [Internal] +// - TableSettingsCalcChunkSize() [Internal] +// - TableSettingsCreate() [Internal] +// - TableSettingsFindByID() [Internal] +// - TableGetBoundSettings() [Internal] +// - TableResetSettings() +// - TableSaveSettings() [Internal] +// - TableLoadSettings() [Internal] +// - TableSettingsHandler_ClearAll() [Internal] +// - TableSettingsHandler_ApplyAll() [Internal] +// - TableSettingsHandler_ReadOpen() [Internal] +// - TableSettingsHandler_ReadLine() [Internal] +// - TableSettingsHandler_WriteAll() [Internal] +// - TableSettingsInstallHandler() [Internal] +//------------------------------------------------------------------------- +// [Init] 1: TableSettingsHandler_ReadXXXX() Load and parse .ini file into TableSettings. +// [Main] 2: TableLoadSettings() When table is created, bind Table to TableSettings, serialize TableSettings data into Table. +// [Main] 3: TableSaveSettings() When table properties are modified, serialize Table data into bound or new TableSettings, mark .ini as dirty. +// [Main] 4: TableSettingsHandler_WriteAll() When .ini file is dirty (which can come from other source), save TableSettings into .ini file. +//------------------------------------------------------------------------- + +// Clear and initialize empty settings instance +static void TableSettingsInit(ImGuiTableSettings* settings, ImGuiID id, int columns_count, int columns_count_max) +{ + IM_PLACEMENT_NEW(settings) ImGuiTableSettings(); + ImGuiTableColumnSettings* settings_column = settings->GetColumnSettings(); + for (int n = 0; n < columns_count_max; n++, settings_column++) + IM_PLACEMENT_NEW(settings_column) ImGuiTableColumnSettings(); + settings->ID = id; + settings->ColumnsCount = (ImGuiTableColumnIdx)columns_count; + settings->ColumnsCountMax = (ImGuiTableColumnIdx)columns_count_max; + settings->WantApply = true; +} + +static size_t TableSettingsCalcChunkSize(int columns_count) +{ + return sizeof(ImGuiTableSettings) + (size_t)columns_count * sizeof(ImGuiTableColumnSettings); +} + +ImGuiTableSettings* ImGui::TableSettingsCreate(ImGuiID id, int columns_count) +{ + ImGuiContext& g = *GImGui; + ImGuiTableSettings* settings = g.SettingsTables.alloc_chunk(TableSettingsCalcChunkSize(columns_count)); + TableSettingsInit(settings, id, columns_count, columns_count); + return settings; +} + +// Find existing settings +ImGuiTableSettings* ImGui::TableSettingsFindByID(ImGuiID id) +{ + // FIXME-OPT: Might want to store a lookup map for this? + ImGuiContext& g = *GImGui; + for (ImGuiTableSettings* settings = g.SettingsTables.begin(); settings != NULL; settings = g.SettingsTables.next_chunk(settings)) + if (settings->ID == id) + return settings; + return NULL; +} + +// Get settings for a given table, NULL if none +ImGuiTableSettings* ImGui::TableGetBoundSettings(ImGuiTable* table) +{ + if (table->SettingsOffset != -1) + { + ImGuiContext& g = *GImGui; + ImGuiTableSettings* settings = g.SettingsTables.ptr_from_offset(table->SettingsOffset); + IM_ASSERT(settings->ID == table->ID); + if (settings->ColumnsCountMax >= table->ColumnsCount) + return settings; // OK + settings->ID = 0; // Invalidate storage, we won't fit because of a count change + } + return NULL; +} + +// Restore initial state of table (with or without saved settings) +void ImGui::TableResetSettings(ImGuiTable* table) +{ + table->IsInitializing = table->IsSettingsDirty = true; + table->IsResetAllRequest = false; + table->IsSettingsRequestLoad = false; // Don't reload from ini + table->SettingsLoadedFlags = ImGuiTableFlags_None; // Mark as nothing loaded so our initialized data becomes authoritative +} + +void ImGui::TableSaveSettings(ImGuiTable* table) +{ + table->IsSettingsDirty = false; + if (table->Flags & ImGuiTableFlags_NoSavedSettings) + return; + + // Bind or create settings data + ImGuiContext& g = *GImGui; + ImGuiTableSettings* settings = TableGetBoundSettings(table); + if (settings == NULL) + { + settings = TableSettingsCreate(table->ID, table->ColumnsCount); + table->SettingsOffset = g.SettingsTables.offset_from_ptr(settings); + } + settings->ColumnsCount = (ImGuiTableColumnIdx)table->ColumnsCount; + + // Serialize ImGuiTable/ImGuiTableColumn into ImGuiTableSettings/ImGuiTableColumnSettings + IM_ASSERT(settings->ID == table->ID); + IM_ASSERT(settings->ColumnsCount == table->ColumnsCount && settings->ColumnsCountMax >= settings->ColumnsCount); + ImGuiTableColumn* column = table->Columns.Data; + ImGuiTableColumnSettings* column_settings = settings->GetColumnSettings(); + + bool save_ref_scale = false; + settings->SaveFlags = ImGuiTableFlags_None; + for (int n = 0; n < table->ColumnsCount; n++, column++, column_settings++) + { + const float width_or_weight = (column->Flags & ImGuiTableColumnFlags_WidthStretch) ? column->StretchWeight : column->WidthRequest; + column_settings->WidthOrWeight = width_or_weight; + column_settings->Index = (ImGuiTableColumnIdx)n; + column_settings->DisplayOrder = column->DisplayOrder; + column_settings->SortOrder = column->SortOrder; + column_settings->SortDirection = column->SortDirection; + column_settings->IsEnabled = column->IsEnabled; + column_settings->IsStretch = (column->Flags & ImGuiTableColumnFlags_WidthStretch) ? 1 : 0; + if ((column->Flags & ImGuiTableColumnFlags_WidthStretch) == 0) + save_ref_scale = true; + + // We skip saving some data in the .ini file when they are unnecessary to restore our state. + // Note that fixed width where initial width was derived from auto-fit will always be saved as InitStretchWeightOrWidth will be 0.0f. + // FIXME-TABLE: We don't have logic to easily compare SortOrder to DefaultSortOrder yet so it's always saved when present. + if (width_or_weight != column->InitStretchWeightOrWidth) + settings->SaveFlags |= ImGuiTableFlags_Resizable; + if (column->DisplayOrder != n) + settings->SaveFlags |= ImGuiTableFlags_Reorderable; + if (column->SortOrder != -1) + settings->SaveFlags |= ImGuiTableFlags_Sortable; + if (column->IsEnabled != ((column->Flags & ImGuiTableColumnFlags_DefaultHide) == 0)) + settings->SaveFlags |= ImGuiTableFlags_Hideable; + } + settings->SaveFlags &= table->Flags; + settings->RefScale = save_ref_scale ? table->RefScale : 0.0f; + + MarkIniSettingsDirty(); +} + +void ImGui::TableLoadSettings(ImGuiTable* table) +{ + ImGuiContext& g = *GImGui; + table->IsSettingsRequestLoad = false; + if (table->Flags & ImGuiTableFlags_NoSavedSettings) + return; + + // Bind settings + ImGuiTableSettings* settings; + if (table->SettingsOffset == -1) + { + settings = TableSettingsFindByID(table->ID); + if (settings == NULL) + return; + if (settings->ColumnsCount != table->ColumnsCount) // Allow settings if columns count changed. We could otherwise decide to return... + table->IsSettingsDirty = true; + table->SettingsOffset = g.SettingsTables.offset_from_ptr(settings); + } + else + { + settings = TableGetBoundSettings(table); + } + + table->SettingsLoadedFlags = settings->SaveFlags; + table->RefScale = settings->RefScale; + + // Serialize ImGuiTableSettings/ImGuiTableColumnSettings into ImGuiTable/ImGuiTableColumn + ImGuiTableColumnSettings* column_settings = settings->GetColumnSettings(); + ImU64 display_order_mask = 0; + for (int data_n = 0; data_n < settings->ColumnsCount; data_n++, column_settings++) + { + int column_n = column_settings->Index; + if (column_n < 0 || column_n >= table->ColumnsCount) + continue; + + ImGuiTableColumn* column = &table->Columns[column_n]; + if (settings->SaveFlags & ImGuiTableFlags_Resizable) + { + if (column_settings->IsStretch) + column->StretchWeight = column_settings->WidthOrWeight; + else + column->WidthRequest = column_settings->WidthOrWeight; + column->AutoFitQueue = 0x00; + } + if (settings->SaveFlags & ImGuiTableFlags_Reorderable) + column->DisplayOrder = column_settings->DisplayOrder; + else + column->DisplayOrder = (ImGuiTableColumnIdx)column_n; + display_order_mask |= (ImU64)1 << column->DisplayOrder; + column->IsEnabled = column->IsEnabledNextFrame = column_settings->IsEnabled; + column->SortOrder = column_settings->SortOrder; + column->SortDirection = column_settings->SortDirection; + } + + // Validate and fix invalid display order data + const ImU64 expected_display_order_mask = (settings->ColumnsCount == 64) ? ~0 : ((ImU64)1 << settings->ColumnsCount) - 1; + if (display_order_mask != expected_display_order_mask) + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + table->Columns[column_n].DisplayOrder = (ImGuiTableColumnIdx)column_n; + + // Rebuild index + for (int column_n = 0; column_n < table->ColumnsCount; column_n++) + table->DisplayOrderToIndex[table->Columns[column_n].DisplayOrder] = (ImGuiTableColumnIdx)column_n; +} + +static void TableSettingsHandler_ClearAll(ImGuiContext* ctx, ImGuiSettingsHandler*) +{ + ImGuiContext& g = *ctx; + for (int i = 0; i != g.Tables.GetSize(); i++) + g.Tables.GetByIndex(i)->SettingsOffset = -1; + g.SettingsTables.clear(); +} + +// Apply to existing windows (if any) +static void TableSettingsHandler_ApplyAll(ImGuiContext* ctx, ImGuiSettingsHandler*) +{ + ImGuiContext& g = *ctx; + for (int i = 0; i != g.Tables.GetSize(); i++) + { + ImGuiTable* table = g.Tables.GetByIndex(i); + table->IsSettingsRequestLoad = true; + table->SettingsOffset = -1; + } +} + +static void* TableSettingsHandler_ReadOpen(ImGuiContext*, ImGuiSettingsHandler*, const char* name) +{ + ImGuiID id = 0; + int columns_count = 0; + if (sscanf(name, "0x%08X,%d", &id, &columns_count) < 2) + return NULL; + + if (ImGuiTableSettings* settings = ImGui::TableSettingsFindByID(id)) + { + if (settings->ColumnsCountMax >= columns_count) + { + TableSettingsInit(settings, id, columns_count, settings->ColumnsCountMax); // Recycle + return settings; + } + settings->ID = 0; // Invalidate storage, we won't fit because of a count change + } + return ImGui::TableSettingsCreate(id, columns_count); +} + +static void TableSettingsHandler_ReadLine(ImGuiContext*, ImGuiSettingsHandler*, void* entry, const char* line) +{ + // "Column 0 UserID=0x42AD2D21 Width=100 Visible=1 Order=0 Sort=0v" + ImGuiTableSettings* settings = (ImGuiTableSettings*)entry; + float f = 0.0f; + int column_n = 0, r = 0, n = 0; + + if (sscanf(line, "RefScale=%f", &f) == 1) { settings->RefScale = f; return; } + + if (sscanf(line, "Column %d%n", &column_n, &r) == 1) + { + if (column_n < 0 || column_n >= settings->ColumnsCount) + return; + line = ImStrSkipBlank(line + r); + char c = 0; + ImGuiTableColumnSettings* column = settings->GetColumnSettings() + column_n; + column->Index = (ImGuiTableColumnIdx)column_n; + if (sscanf(line, "UserID=0x%08X%n", (ImU32*)&n, &r)==1) { line = ImStrSkipBlank(line + r); column->UserID = (ImGuiID)n; } + if (sscanf(line, "Width=%d%n", &n, &r) == 1) { line = ImStrSkipBlank(line + r); column->WidthOrWeight = (float)n; column->IsStretch = 0; settings->SaveFlags |= ImGuiTableFlags_Resizable; } + if (sscanf(line, "Weight=%f%n", &f, &r) == 1) { line = ImStrSkipBlank(line + r); column->WidthOrWeight = f; column->IsStretch = 1; settings->SaveFlags |= ImGuiTableFlags_Resizable; } + if (sscanf(line, "Visible=%d%n", &n, &r) == 1) { line = ImStrSkipBlank(line + r); column->IsEnabled = (ImU8)n; settings->SaveFlags |= ImGuiTableFlags_Hideable; } + if (sscanf(line, "Order=%d%n", &n, &r) == 1) { line = ImStrSkipBlank(line + r); column->DisplayOrder = (ImGuiTableColumnIdx)n; settings->SaveFlags |= ImGuiTableFlags_Reorderable; } + if (sscanf(line, "Sort=%d%c%n", &n, &c, &r) == 2) { line = ImStrSkipBlank(line + r); column->SortOrder = (ImGuiTableColumnIdx)n; column->SortDirection = (c == '^') ? ImGuiSortDirection_Descending : ImGuiSortDirection_Ascending; settings->SaveFlags |= ImGuiTableFlags_Sortable; } + } +} + +static void TableSettingsHandler_WriteAll(ImGuiContext* ctx, ImGuiSettingsHandler* handler, ImGuiTextBuffer* buf) +{ + ImGuiContext& g = *ctx; + for (ImGuiTableSettings* settings = g.SettingsTables.begin(); settings != NULL; settings = g.SettingsTables.next_chunk(settings)) + { + if (settings->ID == 0) // Skip ditched settings + continue; + + // TableSaveSettings() may clear some of those flags when we establish that the data can be stripped + // (e.g. Order was unchanged) + const bool save_size = (settings->SaveFlags & ImGuiTableFlags_Resizable) != 0; + const bool save_visible = (settings->SaveFlags & ImGuiTableFlags_Hideable) != 0; + const bool save_order = (settings->SaveFlags & ImGuiTableFlags_Reorderable) != 0; + const bool save_sort = (settings->SaveFlags & ImGuiTableFlags_Sortable) != 0; + if (!save_size && !save_visible && !save_order && !save_sort) + continue; + + buf->reserve(buf->size() + 30 + settings->ColumnsCount * 50); // ballpark reserve + buf->appendf("[%s][0x%08X,%d]\n", handler->TypeName, settings->ID, settings->ColumnsCount); + if (settings->RefScale != 0.0f) + buf->appendf("RefScale=%g\n", settings->RefScale); + ImGuiTableColumnSettings* column = settings->GetColumnSettings(); + for (int column_n = 0; column_n < settings->ColumnsCount; column_n++, column++) + { + // "Column 0 UserID=0x42AD2D21 Width=100 Visible=1 Order=0 Sort=0v" + buf->appendf("Column %-2d", column_n); + if (column->UserID != 0) buf->appendf(" UserID=%08X", column->UserID); + if (save_size && column->IsStretch) buf->appendf(" Weight=%.4f", column->WidthOrWeight); + if (save_size && !column->IsStretch) buf->appendf(" Width=%d", (int)column->WidthOrWeight); + if (save_visible) buf->appendf(" Visible=%d", column->IsEnabled); + if (save_order) buf->appendf(" Order=%d", column->DisplayOrder); + if (save_sort && column->SortOrder != -1) buf->appendf(" Sort=%d%c", column->SortOrder, (column->SortDirection == ImGuiSortDirection_Ascending) ? 'v' : '^'); + buf->append("\n"); + } + buf->append("\n"); + } +} + +void ImGui::TableSettingsInstallHandler(ImGuiContext* context) +{ + ImGuiContext& g = *context; + ImGuiSettingsHandler ini_handler; + ini_handler.TypeName = "Table"; + ini_handler.TypeHash = ImHashStr("Table"); + ini_handler.ClearAllFn = TableSettingsHandler_ClearAll; + ini_handler.ReadOpenFn = TableSettingsHandler_ReadOpen; + ini_handler.ReadLineFn = TableSettingsHandler_ReadLine; + ini_handler.ApplyAllFn = TableSettingsHandler_ApplyAll; + ini_handler.WriteAllFn = TableSettingsHandler_WriteAll; + g.SettingsHandlers.push_back(ini_handler); +} + +//------------------------------------------------------------------------- +// [SECTION] Tables: Garbage Collection +//------------------------------------------------------------------------- +// - TableRemove() [Internal] +// - TableGcCompactTransientBuffers() [Internal] +// - TableGcCompactSettings() [Internal] +//------------------------------------------------------------------------- + +// Remove Table (currently only used by TestEngine) +void ImGui::TableRemove(ImGuiTable* table) +{ + //IMGUI_DEBUG_LOG("TableRemove() id=0x%08X\n", table->ID); + ImGuiContext& g = *GImGui; + int table_idx = g.Tables.GetIndex(table); + //memset(table->RawData.Data, 0, table->RawData.size_in_bytes()); + //memset(table, 0, sizeof(ImGuiTable)); + g.Tables.Remove(table->ID, table); + g.TablesLastTimeActive[table_idx] = -1.0f; +} + +// Free up/compact internal Table buffers for when it gets unused +void ImGui::TableGcCompactTransientBuffers(ImGuiTable* table) +{ + //IMGUI_DEBUG_LOG("TableGcCompactTransientBuffers() id=0x%08X\n", table->ID); + ImGuiContext& g = *GImGui; + IM_ASSERT(table->MemoryCompacted == false); + table->DrawSplitter.ClearFreeMemory(); + table->SortSpecsMulti.clear(); + table->SortSpecs.Specs = NULL; + table->IsSortSpecsDirty = true; + table->ColumnsNames.clear(); + table->MemoryCompacted = true; + for (int n = 0; n < table->ColumnsCount; n++) + table->Columns[n].NameOffset = -1; + g.TablesLastTimeActive[g.Tables.GetIndex(table)] = -1.0f; +} + +// Compact and remove unused settings data (currently only used by TestEngine) +void ImGui::TableGcCompactSettings() +{ + ImGuiContext& g = *GImGui; + int required_memory = 0; + for (ImGuiTableSettings* settings = g.SettingsTables.begin(); settings != NULL; settings = g.SettingsTables.next_chunk(settings)) + if (settings->ID != 0) + required_memory += (int)TableSettingsCalcChunkSize(settings->ColumnsCount); + if (required_memory == g.SettingsTables.Buf.Size) + return; + ImChunkStream new_chunk_stream; + new_chunk_stream.Buf.reserve(required_memory); + for (ImGuiTableSettings* settings = g.SettingsTables.begin(); settings != NULL; settings = g.SettingsTables.next_chunk(settings)) + if (settings->ID != 0) + memcpy(new_chunk_stream.alloc_chunk(TableSettingsCalcChunkSize(settings->ColumnsCount)), settings, TableSettingsCalcChunkSize(settings->ColumnsCount)); + g.SettingsTables.swap(new_chunk_stream); +} + + +//------------------------------------------------------------------------- +// [SECTION] Tables: Debugging +//------------------------------------------------------------------------- +// - DebugNodeTable() [Internal] +//------------------------------------------------------------------------- + +#ifndef IMGUI_DISABLE_METRICS_WINDOW + +static const char* DebugNodeTableGetSizingPolicyDesc(ImGuiTableFlags sizing_policy) +{ + sizing_policy &= ImGuiTableFlags_SizingMask_; + if (sizing_policy == ImGuiTableFlags_SizingFixedFit) { return "FixedFit"; } + if (sizing_policy == ImGuiTableFlags_SizingFixedSame) { return "FixedSame"; } + if (sizing_policy == ImGuiTableFlags_SizingStretchProp) { return "StretchProp"; } + if (sizing_policy == ImGuiTableFlags_SizingStretchSame) { return "StretchSame"; } + return "N/A"; +} + +void ImGui::DebugNodeTable(ImGuiTable* table) +{ + char buf[512]; + char* p = buf; + const char* buf_end = buf + IM_ARRAYSIZE(buf); + const bool is_active = (table->LastFrameActive >= ImGui::GetFrameCount() - 2); // Note that fully clipped early out scrolling tables will appear as inactive here. + ImFormatString(p, buf_end - p, "Table 0x%08X (%d columns, in '%s')%s", table->ID, table->ColumnsCount, table->OuterWindow->Name, is_active ? "" : " *Inactive*"); + if (!is_active) { PushStyleColor(ImGuiCol_Text, GetStyleColorVec4(ImGuiCol_TextDisabled)); } + bool open = TreeNode(table, "%s", buf); + if (!is_active) { PopStyleColor(); } + if (IsItemHovered()) + GetForegroundDrawList()->AddRect(table->OuterRect.Min, table->OuterRect.Max, IM_COL32(255, 255, 0, 255)); + if (IsItemVisible() && table->HoveredColumnBody != -1) + GetForegroundDrawList()->AddRect(GetItemRectMin(), GetItemRectMax(), IM_COL32(255, 255, 0, 255)); + if (!open) + return; + bool clear_settings = SmallButton("Clear settings"); + BulletText("OuterRect: Pos: (%.1f,%.1f) Size: (%.1f,%.1f) Sizing: '%s'", table->OuterRect.Min.x, table->OuterRect.Min.y, table->OuterRect.GetWidth(), table->OuterRect.GetHeight(), DebugNodeTableGetSizingPolicyDesc(table->Flags)); + BulletText("ColumnsGivenWidth: %.1f, ColumnsAutoFitWidth: %.1f, InnerWidth: %.1f%s", table->ColumnsGivenWidth, table->ColumnsAutoFitWidth, table->InnerWidth, table->InnerWidth == 0.0f ? " (auto)" : ""); + BulletText("CellPaddingX: %.1f, CellSpacingX: %.1f/%.1f, OuterPaddingX: %.1f", table->CellPaddingX, table->CellSpacingX1, table->CellSpacingX2, table->OuterPaddingX); + BulletText("HoveredColumnBody: %d, HoveredColumnBorder: %d", table->HoveredColumnBody, table->HoveredColumnBorder); + BulletText("ResizedColumn: %d, ReorderColumn: %d, HeldHeaderColumn: %d", table->ResizedColumn, table->ReorderColumn, table->HeldHeaderColumn); + //BulletText("BgDrawChannels: %d/%d", 0, table->BgDrawChannelUnfrozen); + float sum_weights = 0.0f; + for (int n = 0; n < table->ColumnsCount; n++) + if (table->Columns[n].Flags & ImGuiTableColumnFlags_WidthStretch) + sum_weights += table->Columns[n].StretchWeight; + for (int n = 0; n < table->ColumnsCount; n++) + { + ImGuiTableColumn* column = &table->Columns[n]; + const char* name = TableGetColumnName(table, n); + ImFormatString(buf, IM_ARRAYSIZE(buf), + "Column %d order %d '%s': offset %+.2f to %+.2f%s\n" + "Enabled: %d, VisibleX/Y: %d/%d, RequestOutput: %d, SkipItems: %d, DrawChannels: %d,%d\n" + "WidthGiven: %.1f, Request/Auto: %.1f/%.1f, StretchWeight: %.3f (%.1f%%)\n" + "MinX: %.1f, MaxX: %.1f (%+.1f), ClipRect: %.1f to %.1f (+%.1f)\n" + "ContentWidth: %.1f,%.1f, HeadersUsed/Ideal %.1f/%.1f\n" + "Sort: %d%s, UserID: 0x%08X, Flags: 0x%04X: %s%s%s..", + n, column->DisplayOrder, name, column->MinX - table->WorkRect.Min.x, column->MaxX - table->WorkRect.Min.x, (n < table->FreezeColumnsRequest) ? " (Frozen)" : "", + column->IsEnabled, column->IsVisibleX, column->IsVisibleY, column->IsRequestOutput, column->IsSkipItems, column->DrawChannelFrozen, column->DrawChannelUnfrozen, + column->WidthGiven, column->WidthRequest, column->WidthAuto, column->StretchWeight, column->StretchWeight > 0.0f ? (column->StretchWeight / sum_weights) * 100.0f : 0.0f, + column->MinX, column->MaxX, column->MaxX - column->MinX, column->ClipRect.Min.x, column->ClipRect.Max.x, column->ClipRect.Max.x - column->ClipRect.Min.x, + column->ContentMaxXFrozen - column->WorkMinX, column->ContentMaxXUnfrozen - column->WorkMinX, column->ContentMaxXHeadersUsed - column->WorkMinX, column->ContentMaxXHeadersIdeal - column->WorkMinX, + column->SortOrder, (column->SortDirection == ImGuiSortDirection_Ascending) ? " (Asc)" : (column->SortDirection == ImGuiSortDirection_Descending) ? " (Des)" : "", column->UserID, column->Flags, + (column->Flags & ImGuiTableColumnFlags_WidthStretch) ? "WidthStretch " : "", + (column->Flags & ImGuiTableColumnFlags_WidthFixed) ? "WidthFixed " : "", + (column->Flags & ImGuiTableColumnFlags_NoResize) ? "NoResize " : ""); + Bullet(); + Selectable(buf); + if (IsItemHovered()) + { + ImRect r(column->MinX, table->OuterRect.Min.y, column->MaxX, table->OuterRect.Max.y); + GetForegroundDrawList()->AddRect(r.Min, r.Max, IM_COL32(255, 255, 0, 255)); + } + } + if (ImGuiTableSettings* settings = TableGetBoundSettings(table)) + DebugNodeTableSettings(settings); + if (clear_settings) + table->IsResetAllRequest = true; + TreePop(); +} + +void ImGui::DebugNodeTableSettings(ImGuiTableSettings* settings) +{ + if (!TreeNode((void*)(intptr_t)settings->ID, "Settings 0x%08X (%d columns)", settings->ID, settings->ColumnsCount)) + return; + BulletText("SaveFlags: 0x%08X", settings->SaveFlags); + BulletText("ColumnsCount: %d (max %d)", settings->ColumnsCount, settings->ColumnsCountMax); + for (int n = 0; n < settings->ColumnsCount; n++) + { + ImGuiTableColumnSettings* column_settings = &settings->GetColumnSettings()[n]; + ImGuiSortDirection sort_dir = (column_settings->SortOrder != -1) ? (ImGuiSortDirection)column_settings->SortDirection : ImGuiSortDirection_None; + BulletText("Column %d Order %d SortOrder %d %s Vis %d %s %7.3f UserID 0x%08X", + n, column_settings->DisplayOrder, column_settings->SortOrder, + (sort_dir == ImGuiSortDirection_Ascending) ? "Asc" : (sort_dir == ImGuiSortDirection_Descending) ? "Des" : "---", + column_settings->IsEnabled, column_settings->IsStretch ? "Weight" : "Width ", column_settings->WidthOrWeight, column_settings->UserID); + } + TreePop(); +} + +#else // #ifndef IMGUI_DISABLE_METRICS_WINDOW + +void ImGui::DebugNodeTable(ImGuiTable*) {} +void ImGui::DebugNodeTableSettings(ImGuiTableSettings*) {} + +#endif + + +//------------------------------------------------------------------------- +// [SECTION] Columns, BeginColumns, EndColumns, etc. +// (This is a legacy API, prefer using BeginTable/EndTable!) +//------------------------------------------------------------------------- +// FIXME: sizing is lossy when columns width is very small (default width may turn negative etc.) +//------------------------------------------------------------------------- +// - SetWindowClipRectBeforeSetChannel() [Internal] +// - GetColumnIndex() +// - GetColumnsCount() +// - GetColumnOffset() +// - GetColumnWidth() +// - SetColumnOffset() +// - SetColumnWidth() +// - PushColumnClipRect() [Internal] +// - PushColumnsBackground() [Internal] +// - PopColumnsBackground() [Internal] +// - FindOrCreateColumns() [Internal] +// - GetColumnsID() [Internal] +// - BeginColumns() +// - NextColumn() +// - EndColumns() +// - Columns() +//------------------------------------------------------------------------- + +// [Internal] Small optimization to avoid calls to PopClipRect/SetCurrentChannel/PushClipRect in sequences, +// they would meddle many times with the underlying ImDrawCmd. +// Instead, we do a preemptive overwrite of clipping rectangle _without_ altering the command-buffer and let +// the subsequent single call to SetCurrentChannel() does it things once. +void ImGui::SetWindowClipRectBeforeSetChannel(ImGuiWindow* window, const ImRect& clip_rect) +{ + ImVec4 clip_rect_vec4 = clip_rect.ToVec4(); + window->ClipRect = clip_rect; + window->DrawList->_CmdHeader.ClipRect = clip_rect_vec4; + window->DrawList->_ClipRectStack.Data[window->DrawList->_ClipRectStack.Size - 1] = clip_rect_vec4; +} + +int ImGui::GetColumnIndex() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->DC.CurrentColumns ? window->DC.CurrentColumns->Current : 0; +} + +int ImGui::GetColumnsCount() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + return window->DC.CurrentColumns ? window->DC.CurrentColumns->Count : 1; +} + +float ImGui::GetColumnOffsetFromNorm(const ImGuiOldColumns* columns, float offset_norm) +{ + return offset_norm * (columns->OffMaxX - columns->OffMinX); +} + +float ImGui::GetColumnNormFromOffset(const ImGuiOldColumns* columns, float offset) +{ + return offset / (columns->OffMaxX - columns->OffMinX); +} + +static const float COLUMNS_HIT_RECT_HALF_WIDTH = 4.0f; + +static float GetDraggedColumnOffset(ImGuiOldColumns* columns, int column_index) +{ + // Active (dragged) column always follow mouse. The reason we need this is that dragging a column to the right edge of an auto-resizing + // window creates a feedback loop because we store normalized positions. So while dragging we enforce absolute positioning. + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + IM_ASSERT(column_index > 0); // We are not supposed to drag column 0. + IM_ASSERT(g.ActiveId == columns->ID + ImGuiID(column_index)); + + float x = g.IO.MousePos.x - g.ActiveIdClickOffset.x + COLUMNS_HIT_RECT_HALF_WIDTH - window->Pos.x; + x = ImMax(x, ImGui::GetColumnOffset(column_index - 1) + g.Style.ColumnsMinSpacing); + if ((columns->Flags & ImGuiOldColumnFlags_NoPreserveWidths)) + x = ImMin(x, ImGui::GetColumnOffset(column_index + 1) - g.Style.ColumnsMinSpacing); + + return x; +} + +float ImGui::GetColumnOffset(int column_index) +{ + ImGuiWindow* window = GetCurrentWindowRead(); + ImGuiOldColumns* columns = window->DC.CurrentColumns; + if (columns == NULL) + return 0.0f; + + if (column_index < 0) + column_index = columns->Current; + IM_ASSERT(column_index < columns->Columns.Size); + + const float t = columns->Columns[column_index].OffsetNorm; + const float x_offset = ImLerp(columns->OffMinX, columns->OffMaxX, t); + return x_offset; +} + +static float GetColumnWidthEx(ImGuiOldColumns* columns, int column_index, bool before_resize = false) +{ + if (column_index < 0) + column_index = columns->Current; + + float offset_norm; + if (before_resize) + offset_norm = columns->Columns[column_index + 1].OffsetNormBeforeResize - columns->Columns[column_index].OffsetNormBeforeResize; + else + offset_norm = columns->Columns[column_index + 1].OffsetNorm - columns->Columns[column_index].OffsetNorm; + return ImGui::GetColumnOffsetFromNorm(columns, offset_norm); +} + +float ImGui::GetColumnWidth(int column_index) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImGuiOldColumns* columns = window->DC.CurrentColumns; + if (columns == NULL) + return GetContentRegionAvail().x; + + if (column_index < 0) + column_index = columns->Current; + return GetColumnOffsetFromNorm(columns, columns->Columns[column_index + 1].OffsetNorm - columns->Columns[column_index].OffsetNorm); +} + +void ImGui::SetColumnOffset(int column_index, float offset) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImGuiOldColumns* columns = window->DC.CurrentColumns; + IM_ASSERT(columns != NULL); + + if (column_index < 0) + column_index = columns->Current; + IM_ASSERT(column_index < columns->Columns.Size); + + const bool preserve_width = !(columns->Flags & ImGuiOldColumnFlags_NoPreserveWidths) && (column_index < columns->Count - 1); + const float width = preserve_width ? GetColumnWidthEx(columns, column_index, columns->IsBeingResized) : 0.0f; + + if (!(columns->Flags & ImGuiOldColumnFlags_NoForceWithinWindow)) + offset = ImMin(offset, columns->OffMaxX - g.Style.ColumnsMinSpacing * (columns->Count - column_index)); + columns->Columns[column_index].OffsetNorm = GetColumnNormFromOffset(columns, offset - columns->OffMinX); + + if (preserve_width) + SetColumnOffset(column_index + 1, offset + ImMax(g.Style.ColumnsMinSpacing, width)); +} + +void ImGui::SetColumnWidth(int column_index, float width) +{ + ImGuiWindow* window = GetCurrentWindowRead(); + ImGuiOldColumns* columns = window->DC.CurrentColumns; + IM_ASSERT(columns != NULL); + + if (column_index < 0) + column_index = columns->Current; + SetColumnOffset(column_index + 1, GetColumnOffset(column_index) + width); +} + +void ImGui::PushColumnClipRect(int column_index) +{ + ImGuiWindow* window = GetCurrentWindowRead(); + ImGuiOldColumns* columns = window->DC.CurrentColumns; + if (column_index < 0) + column_index = columns->Current; + + ImGuiOldColumnData* column = &columns->Columns[column_index]; + PushClipRect(column->ClipRect.Min, column->ClipRect.Max, false); +} + +// Get into the columns background draw command (which is generally the same draw command as before we called BeginColumns) +void ImGui::PushColumnsBackground() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + ImGuiOldColumns* columns = window->DC.CurrentColumns; + if (columns->Count == 1) + return; + + // Optimization: avoid SetCurrentChannel() + PushClipRect() + columns->HostBackupClipRect = window->ClipRect; + SetWindowClipRectBeforeSetChannel(window, columns->HostInitialClipRect); + columns->Splitter.SetCurrentChannel(window->DrawList, 0); +} + +void ImGui::PopColumnsBackground() +{ + ImGuiWindow* window = GetCurrentWindowRead(); + ImGuiOldColumns* columns = window->DC.CurrentColumns; + if (columns->Count == 1) + return; + + // Optimization: avoid PopClipRect() + SetCurrentChannel() + SetWindowClipRectBeforeSetChannel(window, columns->HostBackupClipRect); + columns->Splitter.SetCurrentChannel(window->DrawList, columns->Current + 1); +} + +ImGuiOldColumns* ImGui::FindOrCreateColumns(ImGuiWindow* window, ImGuiID id) +{ + // We have few columns per window so for now we don't need bother much with turning this into a faster lookup. + for (int n = 0; n < window->ColumnsStorage.Size; n++) + if (window->ColumnsStorage[n].ID == id) + return &window->ColumnsStorage[n]; + + window->ColumnsStorage.push_back(ImGuiOldColumns()); + ImGuiOldColumns* columns = &window->ColumnsStorage.back(); + columns->ID = id; + return columns; +} + +ImGuiID ImGui::GetColumnsID(const char* str_id, int columns_count) +{ + ImGuiWindow* window = GetCurrentWindow(); + + // Differentiate column ID with an arbitrary prefix for cases where users name their columns set the same as another widget. + // In addition, when an identifier isn't explicitly provided we include the number of columns in the hash to make it uniquer. + PushID(0x11223347 + (str_id ? 0 : columns_count)); + ImGuiID id = window->GetID(str_id ? str_id : "columns"); + PopID(); + + return id; +} + +void ImGui::BeginColumns(const char* str_id, int columns_count, ImGuiOldColumnFlags flags) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = GetCurrentWindow(); + + IM_ASSERT(columns_count >= 1); + IM_ASSERT(window->DC.CurrentColumns == NULL); // Nested columns are currently not supported + + // Acquire storage for the columns set + ImGuiID id = GetColumnsID(str_id, columns_count); + ImGuiOldColumns* columns = FindOrCreateColumns(window, id); + IM_ASSERT(columns->ID == id); + columns->Current = 0; + columns->Count = columns_count; + columns->Flags = flags; + window->DC.CurrentColumns = columns; + + columns->HostCursorPosY = window->DC.CursorPos.y; + columns->HostCursorMaxPosX = window->DC.CursorMaxPos.x; + columns->HostInitialClipRect = window->ClipRect; + columns->HostBackupParentWorkRect = window->ParentWorkRect; + window->ParentWorkRect = window->WorkRect; + + // Set state for first column + // We aim so that the right-most column will have the same clipping width as other after being clipped by parent ClipRect + const float column_padding = g.Style.ItemSpacing.x; + const float half_clip_extend_x = ImFloor(ImMax(window->WindowPadding.x * 0.5f, window->WindowBorderSize)); + const float max_1 = window->WorkRect.Max.x + column_padding - ImMax(column_padding - window->WindowPadding.x, 0.0f); + const float max_2 = window->WorkRect.Max.x + half_clip_extend_x; + columns->OffMinX = window->DC.Indent.x - column_padding + ImMax(column_padding - window->WindowPadding.x, 0.0f); + columns->OffMaxX = ImMax(ImMin(max_1, max_2) - window->Pos.x, columns->OffMinX + 1.0f); + columns->LineMinY = columns->LineMaxY = window->DC.CursorPos.y; + + // Clear data if columns count changed + if (columns->Columns.Size != 0 && columns->Columns.Size != columns_count + 1) + columns->Columns.resize(0); + + // Initialize default widths + columns->IsFirstFrame = (columns->Columns.Size == 0); + if (columns->Columns.Size == 0) + { + columns->Columns.reserve(columns_count + 1); + for (int n = 0; n < columns_count + 1; n++) + { + ImGuiOldColumnData column; + column.OffsetNorm = n / (float)columns_count; + columns->Columns.push_back(column); + } + } + + for (int n = 0; n < columns_count; n++) + { + // Compute clipping rectangle + ImGuiOldColumnData* column = &columns->Columns[n]; + float clip_x1 = IM_ROUND(window->Pos.x + GetColumnOffset(n)); + float clip_x2 = IM_ROUND(window->Pos.x + GetColumnOffset(n + 1) - 1.0f); + column->ClipRect = ImRect(clip_x1, -FLT_MAX, clip_x2, +FLT_MAX); + column->ClipRect.ClipWithFull(window->ClipRect); + } + + if (columns->Count > 1) + { + columns->Splitter.Split(window->DrawList, 1 + columns->Count); + columns->Splitter.SetCurrentChannel(window->DrawList, 1); + PushColumnClipRect(0); + } + + // We don't generally store Indent.x inside ColumnsOffset because it may be manipulated by the user. + float offset_0 = GetColumnOffset(columns->Current); + float offset_1 = GetColumnOffset(columns->Current + 1); + float width = offset_1 - offset_0; + PushItemWidth(width * 0.65f); + window->DC.ColumnsOffset.x = ImMax(column_padding - window->WindowPadding.x, 0.0f); + window->DC.CursorPos.x = IM_FLOOR(window->Pos.x + window->DC.Indent.x + window->DC.ColumnsOffset.x); + window->WorkRect.Max.x = window->Pos.x + offset_1 - column_padding; +} + +void ImGui::NextColumn() +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems || window->DC.CurrentColumns == NULL) + return; + + ImGuiContext& g = *GImGui; + ImGuiOldColumns* columns = window->DC.CurrentColumns; + + if (columns->Count == 1) + { + window->DC.CursorPos.x = IM_FLOOR(window->Pos.x + window->DC.Indent.x + window->DC.ColumnsOffset.x); + IM_ASSERT(columns->Current == 0); + return; + } + + // Next column + if (++columns->Current == columns->Count) + columns->Current = 0; + + PopItemWidth(); + + // Optimization: avoid PopClipRect() + SetCurrentChannel() + PushClipRect() + // (which would needlessly attempt to update commands in the wrong channel, then pop or overwrite them), + ImGuiOldColumnData* column = &columns->Columns[columns->Current]; + SetWindowClipRectBeforeSetChannel(window, column->ClipRect); + columns->Splitter.SetCurrentChannel(window->DrawList, columns->Current + 1); + + const float column_padding = g.Style.ItemSpacing.x; + columns->LineMaxY = ImMax(columns->LineMaxY, window->DC.CursorPos.y); + if (columns->Current > 0) + { + // Columns 1+ ignore IndentX (by canceling it out) + // FIXME-COLUMNS: Unnecessary, could be locked? + window->DC.ColumnsOffset.x = GetColumnOffset(columns->Current) - window->DC.Indent.x + column_padding; + } + else + { + // New row/line: column 0 honor IndentX. + window->DC.ColumnsOffset.x = ImMax(column_padding - window->WindowPadding.x, 0.0f); + columns->LineMinY = columns->LineMaxY; + } + window->DC.CursorPos.x = IM_FLOOR(window->Pos.x + window->DC.Indent.x + window->DC.ColumnsOffset.x); + window->DC.CursorPos.y = columns->LineMinY; + window->DC.CurrLineSize = ImVec2(0.0f, 0.0f); + window->DC.CurrLineTextBaseOffset = 0.0f; + + // FIXME-COLUMNS: Share code with BeginColumns() - move code on columns setup. + float offset_0 = GetColumnOffset(columns->Current); + float offset_1 = GetColumnOffset(columns->Current + 1); + float width = offset_1 - offset_0; + PushItemWidth(width * 0.65f); + window->WorkRect.Max.x = window->Pos.x + offset_1 - column_padding; +} + +void ImGui::EndColumns() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = GetCurrentWindow(); + ImGuiOldColumns* columns = window->DC.CurrentColumns; + IM_ASSERT(columns != NULL); + + PopItemWidth(); + if (columns->Count > 1) + { + PopClipRect(); + columns->Splitter.Merge(window->DrawList); + } + + const ImGuiOldColumnFlags flags = columns->Flags; + columns->LineMaxY = ImMax(columns->LineMaxY, window->DC.CursorPos.y); + window->DC.CursorPos.y = columns->LineMaxY; + if (!(flags & ImGuiOldColumnFlags_GrowParentContentsSize)) + window->DC.CursorMaxPos.x = columns->HostCursorMaxPosX; // Restore cursor max pos, as columns don't grow parent + + // Draw columns borders and handle resize + // The IsBeingResized flag ensure we preserve pre-resize columns width so back-and-forth are not lossy + bool is_being_resized = false; + if (!(flags & ImGuiOldColumnFlags_NoBorder) && !window->SkipItems) + { + // We clip Y boundaries CPU side because very long triangles are mishandled by some GPU drivers. + const float y1 = ImMax(columns->HostCursorPosY, window->ClipRect.Min.y); + const float y2 = ImMin(window->DC.CursorPos.y, window->ClipRect.Max.y); + int dragging_column = -1; + for (int n = 1; n < columns->Count; n++) + { + ImGuiOldColumnData* column = &columns->Columns[n]; + float x = window->Pos.x + GetColumnOffset(n); + const ImGuiID column_id = columns->ID + ImGuiID(n); + const float column_hit_hw = COLUMNS_HIT_RECT_HALF_WIDTH; + const ImRect column_hit_rect(ImVec2(x - column_hit_hw, y1), ImVec2(x + column_hit_hw, y2)); + KeepAliveID(column_id); + if (IsClippedEx(column_hit_rect, column_id, false)) + continue; + + bool hovered = false, held = false; + if (!(flags & ImGuiOldColumnFlags_NoResize)) + { + ButtonBehavior(column_hit_rect, column_id, &hovered, &held); + if (hovered || held) + g.MouseCursor = ImGuiMouseCursor_ResizeEW; + if (held && !(column->Flags & ImGuiOldColumnFlags_NoResize)) + dragging_column = n; + } + + // Draw column + const ImU32 col = GetColorU32(held ? ImGuiCol_SeparatorActive : hovered ? ImGuiCol_SeparatorHovered : ImGuiCol_Separator); + const float xi = IM_FLOOR(x); + window->DrawList->AddLine(ImVec2(xi, y1 + 1.0f), ImVec2(xi, y2), col); + } + + // Apply dragging after drawing the column lines, so our rendered lines are in sync with how items were displayed during the frame. + if (dragging_column != -1) + { + if (!columns->IsBeingResized) + for (int n = 0; n < columns->Count + 1; n++) + columns->Columns[n].OffsetNormBeforeResize = columns->Columns[n].OffsetNorm; + columns->IsBeingResized = is_being_resized = true; + float x = GetDraggedColumnOffset(columns, dragging_column); + SetColumnOffset(dragging_column, x); + } + } + columns->IsBeingResized = is_being_resized; + + window->WorkRect = window->ParentWorkRect; + window->ParentWorkRect = columns->HostBackupParentWorkRect; + window->DC.CurrentColumns = NULL; + window->DC.ColumnsOffset.x = 0.0f; + window->DC.CursorPos.x = IM_FLOOR(window->Pos.x + window->DC.Indent.x + window->DC.ColumnsOffset.x); +} + +void ImGui::Columns(int columns_count, const char* id, bool border) +{ + ImGuiWindow* window = GetCurrentWindow(); + IM_ASSERT(columns_count >= 1); + + ImGuiOldColumnFlags flags = (border ? 0 : ImGuiOldColumnFlags_NoBorder); + //flags |= ImGuiOldColumnFlags_NoPreserveWidths; // NB: Legacy behavior + ImGuiOldColumns* columns = window->DC.CurrentColumns; + if (columns != NULL && columns->Count == columns_count && columns->Flags == flags) + return; + + if (columns != NULL) + EndColumns(); + + if (columns_count != 1) + BeginColumns(id, columns_count, flags); +} + +//------------------------------------------------------------------------- + +#endif // #ifndef IMGUI_DISABLE diff --git a/cpp-projects/3d-engine/imgui/imgui_widgets.cpp b/cpp-projects/3d-engine/imgui/imgui_widgets.cpp new file mode 100644 index 0000000..b765701 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/imgui_widgets.cpp @@ -0,0 +1,7918 @@ +// dear imgui, v1.81 WIP +// (widgets code) + +/* + +Index of this file: + +// [SECTION] Forward Declarations +// [SECTION] Widgets: Text, etc. +// [SECTION] Widgets: Main (Button, Image, Checkbox, RadioButton, ProgressBar, Bullet, etc.) +// [SECTION] Widgets: Low-level Layout helpers (Spacing, Dummy, NewLine, Separator, etc.) +// [SECTION] Widgets: ComboBox +// [SECTION] Data Type and Data Formatting Helpers +// [SECTION] Widgets: DragScalar, DragFloat, DragInt, etc. +// [SECTION] Widgets: SliderScalar, SliderFloat, SliderInt, etc. +// [SECTION] Widgets: InputScalar, InputFloat, InputInt, etc. +// [SECTION] Widgets: InputText, InputTextMultiline +// [SECTION] Widgets: ColorEdit, ColorPicker, ColorButton, etc. +// [SECTION] Widgets: TreeNode, CollapsingHeader, etc. +// [SECTION] Widgets: Selectable +// [SECTION] Widgets: ListBox +// [SECTION] Widgets: PlotLines, PlotHistogram +// [SECTION] Widgets: Value helpers +// [SECTION] Widgets: MenuItem, BeginMenu, EndMenu, etc. +// [SECTION] Widgets: BeginTabBar, EndTabBar, etc. +// [SECTION] Widgets: BeginTabItem, EndTabItem, etc. +// [SECTION] Widgets: Columns, BeginColumns, EndColumns, etc. + +*/ + +#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS) +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include "imgui.h" +#ifndef IMGUI_DISABLE + +#ifndef IMGUI_DEFINE_MATH_OPERATORS +#define IMGUI_DEFINE_MATH_OPERATORS +#endif +#include "imgui_internal.h" + +// System includes +#include // toupper +#if defined(_MSC_VER) && _MSC_VER <= 1500 // MSVC 2008 or earlier +#include // intptr_t +#else +#include // intptr_t +#endif + +//------------------------------------------------------------------------- +// Warnings +//------------------------------------------------------------------------- + +// Visual Studio warnings +#ifdef _MSC_VER +#pragma warning (disable: 4127) // condition expression is constant +#pragma warning (disable: 4996) // 'This function or variable may be unsafe': strcpy, strdup, sprintf, vsnprintf, sscanf, fopen +#if defined(_MSC_VER) && _MSC_VER >= 1922 // MSVC 2019 16.2 or later +#pragma warning (disable: 5054) // operator '|': deprecated between enumerations of different types +#endif +#endif + +// Clang/GCC warnings with -Weverything +#if defined(__clang__) +#if __has_warning("-Wunknown-warning-option") +#pragma clang diagnostic ignored "-Wunknown-warning-option" // warning: unknown warning group 'xxx' // not all warnings are known by all Clang versions and they tend to be rename-happy.. so ignoring warnings triggers new warnings on some configuration. Great! +#endif +#pragma clang diagnostic ignored "-Wunknown-pragmas" // warning: unknown warning group 'xxx' +#pragma clang diagnostic ignored "-Wold-style-cast" // warning: use of old-style cast // yes, they are more terse. +#pragma clang diagnostic ignored "-Wfloat-equal" // warning: comparing floating point with == or != is unsafe // storing and comparing against same constants (typically 0.0f) is ok. +#pragma clang diagnostic ignored "-Wformat-nonliteral" // warning: format string is not a string literal // passing non-literal to vsnformat(). yes, user passing incorrect format strings can crash the code. +#pragma clang diagnostic ignored "-Wsign-conversion" // warning: implicit conversion changes signedness +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" // warning: zero as null pointer constant // some standard header variations use #define NULL 0 +#pragma clang diagnostic ignored "-Wdouble-promotion" // warning: implicit conversion from 'float' to 'double' when passing argument to function // using printf() is a misery with this as C++ va_arg ellipsis changes float to double. +#pragma clang diagnostic ignored "-Wenum-enum-conversion" // warning: bitwise operation between different enumeration types ('XXXFlags_' and 'XXXFlagsPrivate_') +#pragma clang diagnostic ignored "-Wdeprecated-enum-enum-conversion"// warning: bitwise operation between different enumeration types ('XXXFlags_' and 'XXXFlagsPrivate_') is deprecated +#pragma clang diagnostic ignored "-Wimplicit-int-float-conversion" // warning: implicit conversion from 'xxx' to 'float' may lose precision +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wpragmas" // warning: unknown option after '#pragma GCC diagnostic' kind +#pragma GCC diagnostic ignored "-Wformat-nonliteral" // warning: format not a string literal, format string not checked +#pragma GCC diagnostic ignored "-Wclass-memaccess" // [__GNUC__ >= 8] warning: 'memset/memcpy' clearing/writing an object of type 'xxxx' with no trivial copy-assignment; use assignment or value-initialization instead +#endif + +//------------------------------------------------------------------------- +// Data +//------------------------------------------------------------------------- + +// Widgets +static const float DRAGDROP_HOLD_TO_OPEN_TIMER = 0.70f; // Time for drag-hold to activate items accepting the ImGuiButtonFlags_PressedOnDragDropHold button behavior. +static const float DRAG_MOUSE_THRESHOLD_FACTOR = 0.50f; // Multiplier for the default value of io.MouseDragThreshold to make DragFloat/DragInt react faster to mouse drags. + +// Those MIN/MAX values are not define because we need to point to them +static const signed char IM_S8_MIN = -128; +static const signed char IM_S8_MAX = 127; +static const unsigned char IM_U8_MIN = 0; +static const unsigned char IM_U8_MAX = 0xFF; +static const signed short IM_S16_MIN = -32768; +static const signed short IM_S16_MAX = 32767; +static const unsigned short IM_U16_MIN = 0; +static const unsigned short IM_U16_MAX = 0xFFFF; +static const ImS32 IM_S32_MIN = INT_MIN; // (-2147483647 - 1), (0x80000000); +static const ImS32 IM_S32_MAX = INT_MAX; // (2147483647), (0x7FFFFFFF) +static const ImU32 IM_U32_MIN = 0; +static const ImU32 IM_U32_MAX = UINT_MAX; // (0xFFFFFFFF) +#ifdef LLONG_MIN +static const ImS64 IM_S64_MIN = LLONG_MIN; // (-9223372036854775807ll - 1ll); +static const ImS64 IM_S64_MAX = LLONG_MAX; // (9223372036854775807ll); +#else +static const ImS64 IM_S64_MIN = -9223372036854775807LL - 1; +static const ImS64 IM_S64_MAX = 9223372036854775807LL; +#endif +static const ImU64 IM_U64_MIN = 0; +#ifdef ULLONG_MAX +static const ImU64 IM_U64_MAX = ULLONG_MAX; // (0xFFFFFFFFFFFFFFFFull); +#else +static const ImU64 IM_U64_MAX = (2ULL * 9223372036854775807LL + 1); +#endif + +//------------------------------------------------------------------------- +// [SECTION] Forward Declarations +//------------------------------------------------------------------------- + +// For InputTextEx() +static bool InputTextFilterCharacter(unsigned int* p_char, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback, void* user_data); +static int InputTextCalcTextLenAndLineCount(const char* text_begin, const char** out_text_end); +static ImVec2 InputTextCalcTextSizeW(const ImWchar* text_begin, const ImWchar* text_end, const ImWchar** remaining = NULL, ImVec2* out_offset = NULL, bool stop_on_new_line = false); + +//------------------------------------------------------------------------- +// [SECTION] Widgets: Text, etc. +//------------------------------------------------------------------------- +// - TextEx() [Internal] +// - TextUnformatted() +// - Text() +// - TextV() +// - TextColored() +// - TextColoredV() +// - TextDisabled() +// - TextDisabledV() +// - TextWrapped() +// - TextWrappedV() +// - LabelText() +// - LabelTextV() +// - BulletText() +// - BulletTextV() +//------------------------------------------------------------------------- + +void ImGui::TextEx(const char* text, const char* text_end, ImGuiTextFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + + ImGuiContext& g = *GImGui; + IM_ASSERT(text != NULL); + const char* text_begin = text; + if (text_end == NULL) + text_end = text + strlen(text); // FIXME-OPT + + const ImVec2 text_pos(window->DC.CursorPos.x, window->DC.CursorPos.y + window->DC.CurrLineTextBaseOffset); + const float wrap_pos_x = window->DC.TextWrapPos; + const bool wrap_enabled = (wrap_pos_x >= 0.0f); + if (text_end - text > 2000 && !wrap_enabled) + { + // Long text! + // Perform manual coarse clipping to optimize for long multi-line text + // - From this point we will only compute the width of lines that are visible. Optimization only available when word-wrapping is disabled. + // - We also don't vertically center the text within the line full height, which is unlikely to matter because we are likely the biggest and only item on the line. + // - We use memchr(), pay attention that well optimized versions of those str/mem functions are much faster than a casually written loop. + const char* line = text; + const float line_height = GetTextLineHeight(); + ImVec2 text_size(0, 0); + + // Lines to skip (can't skip when logging text) + ImVec2 pos = text_pos; + if (!g.LogEnabled) + { + int lines_skippable = (int)((window->ClipRect.Min.y - text_pos.y) / line_height); + if (lines_skippable > 0) + { + int lines_skipped = 0; + while (line < text_end && lines_skipped < lines_skippable) + { + const char* line_end = (const char*)memchr(line, '\n', text_end - line); + if (!line_end) + line_end = text_end; + if ((flags & ImGuiTextFlags_NoWidthForLargeClippedText) == 0) + text_size.x = ImMax(text_size.x, CalcTextSize(line, line_end).x); + line = line_end + 1; + lines_skipped++; + } + pos.y += lines_skipped * line_height; + } + } + + // Lines to render + if (line < text_end) + { + ImRect line_rect(pos, pos + ImVec2(FLT_MAX, line_height)); + while (line < text_end) + { + if (IsClippedEx(line_rect, 0, false)) + break; + + const char* line_end = (const char*)memchr(line, '\n', text_end - line); + if (!line_end) + line_end = text_end; + text_size.x = ImMax(text_size.x, CalcTextSize(line, line_end).x); + RenderText(pos, line, line_end, false); + line = line_end + 1; + line_rect.Min.y += line_height; + line_rect.Max.y += line_height; + pos.y += line_height; + } + + // Count remaining lines + int lines_skipped = 0; + while (line < text_end) + { + const char* line_end = (const char*)memchr(line, '\n', text_end - line); + if (!line_end) + line_end = text_end; + if ((flags & ImGuiTextFlags_NoWidthForLargeClippedText) == 0) + text_size.x = ImMax(text_size.x, CalcTextSize(line, line_end).x); + line = line_end + 1; + lines_skipped++; + } + pos.y += lines_skipped * line_height; + } + text_size.y = (pos - text_pos).y; + + ImRect bb(text_pos, text_pos + text_size); + ItemSize(text_size, 0.0f); + ItemAdd(bb, 0); + } + else + { + const float wrap_width = wrap_enabled ? CalcWrapWidthForPos(window->DC.CursorPos, wrap_pos_x) : 0.0f; + const ImVec2 text_size = CalcTextSize(text_begin, text_end, false, wrap_width); + + ImRect bb(text_pos, text_pos + text_size); + ItemSize(text_size, 0.0f); + if (!ItemAdd(bb, 0)) + return; + + // Render (we don't hide text after ## in this end-user function) + RenderTextWrapped(bb.Min, text_begin, text_end, wrap_width); + } +} + +void ImGui::TextUnformatted(const char* text, const char* text_end) +{ + TextEx(text, text_end, ImGuiTextFlags_NoWidthForLargeClippedText); +} + +void ImGui::Text(const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + TextV(fmt, args); + va_end(args); +} + +void ImGui::TextV(const char* fmt, va_list args) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + + ImGuiContext& g = *GImGui; + const char* text_end = g.TempBuffer + ImFormatStringV(g.TempBuffer, IM_ARRAYSIZE(g.TempBuffer), fmt, args); + TextEx(g.TempBuffer, text_end, ImGuiTextFlags_NoWidthForLargeClippedText); +} + +void ImGui::TextColored(const ImVec4& col, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + TextColoredV(col, fmt, args); + va_end(args); +} + +void ImGui::TextColoredV(const ImVec4& col, const char* fmt, va_list args) +{ + PushStyleColor(ImGuiCol_Text, col); + if (fmt[0] == '%' && fmt[1] == 's' && fmt[2] == 0) + TextEx(va_arg(args, const char*), NULL, ImGuiTextFlags_NoWidthForLargeClippedText); // Skip formatting + else + TextV(fmt, args); + PopStyleColor(); +} + +void ImGui::TextDisabled(const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + TextDisabledV(fmt, args); + va_end(args); +} + +void ImGui::TextDisabledV(const char* fmt, va_list args) +{ + ImGuiContext& g = *GImGui; + PushStyleColor(ImGuiCol_Text, g.Style.Colors[ImGuiCol_TextDisabled]); + if (fmt[0] == '%' && fmt[1] == 's' && fmt[2] == 0) + TextEx(va_arg(args, const char*), NULL, ImGuiTextFlags_NoWidthForLargeClippedText); // Skip formatting + else + TextV(fmt, args); + PopStyleColor(); +} + +void ImGui::TextWrapped(const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + TextWrappedV(fmt, args); + va_end(args); +} + +void ImGui::TextWrappedV(const char* fmt, va_list args) +{ + ImGuiContext& g = *GImGui; + bool need_backup = (g.CurrentWindow->DC.TextWrapPos < 0.0f); // Keep existing wrap position if one is already set + if (need_backup) + PushTextWrapPos(0.0f); + if (fmt[0] == '%' && fmt[1] == 's' && fmt[2] == 0) + TextEx(va_arg(args, const char*), NULL, ImGuiTextFlags_NoWidthForLargeClippedText); // Skip formatting + else + TextV(fmt, args); + if (need_backup) + PopTextWrapPos(); +} + +void ImGui::LabelText(const char* label, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + LabelTextV(label, fmt, args); + va_end(args); +} + +// Add a label+text combo aligned to other label+value widgets +void ImGui::LabelTextV(const char* label, const char* fmt, va_list args) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + const float w = CalcItemWidth(); + + const ImVec2 label_size = CalcTextSize(label, NULL, true); + const ImRect value_bb(window->DC.CursorPos, window->DC.CursorPos + ImVec2(w, label_size.y + style.FramePadding.y * 2)); + const ImRect total_bb(window->DC.CursorPos, window->DC.CursorPos + ImVec2(w + (label_size.x > 0.0f ? style.ItemInnerSpacing.x : 0.0f), style.FramePadding.y * 2) + label_size); + ItemSize(total_bb, style.FramePadding.y); + if (!ItemAdd(total_bb, 0)) + return; + + // Render + const char* value_text_begin = &g.TempBuffer[0]; + const char* value_text_end = value_text_begin + ImFormatStringV(g.TempBuffer, IM_ARRAYSIZE(g.TempBuffer), fmt, args); + RenderTextClipped(value_bb.Min, value_bb.Max, value_text_begin, value_text_end, NULL, ImVec2(0.0f, 0.5f)); + if (label_size.x > 0.0f) + RenderText(ImVec2(value_bb.Max.x + style.ItemInnerSpacing.x, value_bb.Min.y + style.FramePadding.y), label); +} + +void ImGui::BulletText(const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + BulletTextV(fmt, args); + va_end(args); +} + +// Text with a little bullet aligned to the typical tree node. +void ImGui::BulletTextV(const char* fmt, va_list args) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + + const char* text_begin = g.TempBuffer; + const char* text_end = text_begin + ImFormatStringV(g.TempBuffer, IM_ARRAYSIZE(g.TempBuffer), fmt, args); + const ImVec2 label_size = CalcTextSize(text_begin, text_end, false); + const ImVec2 total_size = ImVec2(g.FontSize + (label_size.x > 0.0f ? (label_size.x + style.FramePadding.x * 2) : 0.0f), label_size.y); // Empty text doesn't add padding + ImVec2 pos = window->DC.CursorPos; + pos.y += window->DC.CurrLineTextBaseOffset; + ItemSize(total_size, 0.0f); + const ImRect bb(pos, pos + total_size); + if (!ItemAdd(bb, 0)) + return; + + // Render + ImU32 text_col = GetColorU32(ImGuiCol_Text); + RenderBullet(window->DrawList, bb.Min + ImVec2(style.FramePadding.x + g.FontSize * 0.5f, g.FontSize * 0.5f), text_col); + RenderText(bb.Min + ImVec2(g.FontSize + style.FramePadding.x * 2, 0.0f), text_begin, text_end, false); +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: Main +//------------------------------------------------------------------------- +// - ButtonBehavior() [Internal] +// - Button() +// - SmallButton() +// - InvisibleButton() +// - ArrowButton() +// - CloseButton() [Internal] +// - CollapseButton() [Internal] +// - GetWindowScrollbarID() [Internal] +// - GetWindowScrollbarRect() [Internal] +// - Scrollbar() [Internal] +// - ScrollbarEx() [Internal] +// - Image() +// - ImageButton() +// - Checkbox() +// - CheckboxFlagsT() [Internal] +// - CheckboxFlags() +// - RadioButton() +// - ProgressBar() +// - Bullet() +//------------------------------------------------------------------------- + +// The ButtonBehavior() function is key to many interactions and used by many/most widgets. +// Because we handle so many cases (keyboard/gamepad navigation, drag and drop) and many specific behavior (via ImGuiButtonFlags_), +// this code is a little complex. +// By far the most common path is interacting with the Mouse using the default ImGuiButtonFlags_PressedOnClickRelease button behavior. +// See the series of events below and the corresponding state reported by dear imgui: +//------------------------------------------------------------------------------------------------------------------------------------------------ +// with PressedOnClickRelease: return-value IsItemHovered() IsItemActive() IsItemActivated() IsItemDeactivated() IsItemClicked() +// Frame N+0 (mouse is outside bb) - - - - - - +// Frame N+1 (mouse moves inside bb) - true - - - - +// Frame N+2 (mouse button is down) - true true true - true +// Frame N+3 (mouse button is down) - true true - - - +// Frame N+4 (mouse moves outside bb) - - true - - - +// Frame N+5 (mouse moves inside bb) - true true - - - +// Frame N+6 (mouse button is released) true true - - true - +// Frame N+7 (mouse button is released) - true - - - - +// Frame N+8 (mouse moves outside bb) - - - - - - +//------------------------------------------------------------------------------------------------------------------------------------------------ +// with PressedOnClick: return-value IsItemHovered() IsItemActive() IsItemActivated() IsItemDeactivated() IsItemClicked() +// Frame N+2 (mouse button is down) true true true true - true +// Frame N+3 (mouse button is down) - true true - - - +// Frame N+6 (mouse button is released) - true - - true - +// Frame N+7 (mouse button is released) - true - - - - +//------------------------------------------------------------------------------------------------------------------------------------------------ +// with PressedOnRelease: return-value IsItemHovered() IsItemActive() IsItemActivated() IsItemDeactivated() IsItemClicked() +// Frame N+2 (mouse button is down) - true - - - true +// Frame N+3 (mouse button is down) - true - - - - +// Frame N+6 (mouse button is released) true true - - - - +// Frame N+7 (mouse button is released) - true - - - - +//------------------------------------------------------------------------------------------------------------------------------------------------ +// with PressedOnDoubleClick: return-value IsItemHovered() IsItemActive() IsItemActivated() IsItemDeactivated() IsItemClicked() +// Frame N+0 (mouse button is down) - true - - - true +// Frame N+1 (mouse button is down) - true - - - - +// Frame N+2 (mouse button is released) - true - - - - +// Frame N+3 (mouse button is released) - true - - - - +// Frame N+4 (mouse button is down) true true true true - true +// Frame N+5 (mouse button is down) - true true - - - +// Frame N+6 (mouse button is released) - true - - true - +// Frame N+7 (mouse button is released) - true - - - - +//------------------------------------------------------------------------------------------------------------------------------------------------ +// Note that some combinations are supported, +// - PressedOnDragDropHold can generally be associated with any flag. +// - PressedOnDoubleClick can be associated by PressedOnClickRelease/PressedOnRelease, in which case the second release event won't be reported. +//------------------------------------------------------------------------------------------------------------------------------------------------ +// The behavior of the return-value changes when ImGuiButtonFlags_Repeat is set: +// Repeat+ Repeat+ Repeat+ Repeat+ +// PressedOnClickRelease PressedOnClick PressedOnRelease PressedOnDoubleClick +//------------------------------------------------------------------------------------------------------------------------------------------------- +// Frame N+0 (mouse button is down) - true - true +// ... - - - - +// Frame N + RepeatDelay true true - true +// ... - - - - +// Frame N + RepeatDelay + RepeatRate*N true true - true +//------------------------------------------------------------------------------------------------------------------------------------------------- + +bool ImGui::ButtonBehavior(const ImRect& bb, ImGuiID id, bool* out_hovered, bool* out_held, ImGuiButtonFlags flags) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = GetCurrentWindow(); + + if (flags & ImGuiButtonFlags_Disabled) + { + if (out_hovered) *out_hovered = false; + if (out_held) *out_held = false; + if (g.ActiveId == id) ClearActiveID(); + return false; + } + + // Default only reacts to left mouse button + if ((flags & ImGuiButtonFlags_MouseButtonMask_) == 0) + flags |= ImGuiButtonFlags_MouseButtonDefault_; + + // Default behavior requires click + release inside bounding box + if ((flags & ImGuiButtonFlags_PressedOnMask_) == 0) + flags |= ImGuiButtonFlags_PressedOnDefault_; + + ImGuiWindow* backup_hovered_window = g.HoveredWindow; + const bool flatten_hovered_children = (flags & ImGuiButtonFlags_FlattenChildren) && g.HoveredRootWindow == window; + if (flatten_hovered_children) + g.HoveredWindow = window; + +#ifdef IMGUI_ENABLE_TEST_ENGINE + if (id != 0 && window->DC.LastItemId != id) + IMGUI_TEST_ENGINE_ITEM_ADD(bb, id); +#endif + + bool pressed = false; + bool hovered = ItemHoverable(bb, id); + + // Drag source doesn't report as hovered + if (hovered && g.DragDropActive && g.DragDropPayload.SourceId == id && !(g.DragDropSourceFlags & ImGuiDragDropFlags_SourceNoDisableHover)) + hovered = false; + + // Special mode for Drag and Drop where holding button pressed for a long time while dragging another item triggers the button + if (g.DragDropActive && (flags & ImGuiButtonFlags_PressedOnDragDropHold) && !(g.DragDropSourceFlags & ImGuiDragDropFlags_SourceNoHoldToOpenOthers)) + if (IsItemHovered(ImGuiHoveredFlags_AllowWhenBlockedByActiveItem)) + { + hovered = true; + SetHoveredID(id); + if (CalcTypematicRepeatAmount(g.HoveredIdTimer + 0.0001f - g.IO.DeltaTime, g.HoveredIdTimer + 0.0001f, DRAGDROP_HOLD_TO_OPEN_TIMER, 0.00f)) + { + pressed = true; + g.DragDropHoldJustPressedId = id; + FocusWindow(window); + } + } + + if (flatten_hovered_children) + g.HoveredWindow = backup_hovered_window; + + // AllowOverlap mode (rarely used) requires previous frame HoveredId to be null or to match. This allows using patterns where a later submitted widget overlaps a previous one. + if (hovered && (flags & ImGuiButtonFlags_AllowItemOverlap) && (g.HoveredIdPreviousFrame != id && g.HoveredIdPreviousFrame != 0)) + hovered = false; + + // Mouse handling + if (hovered) + { + if (!(flags & ImGuiButtonFlags_NoKeyModifiers) || (!g.IO.KeyCtrl && !g.IO.KeyShift && !g.IO.KeyAlt)) + { + // Poll buttons + int mouse_button_clicked = -1; + int mouse_button_released = -1; + if ((flags & ImGuiButtonFlags_MouseButtonLeft) && g.IO.MouseClicked[0]) { mouse_button_clicked = 0; } + else if ((flags & ImGuiButtonFlags_MouseButtonRight) && g.IO.MouseClicked[1]) { mouse_button_clicked = 1; } + else if ((flags & ImGuiButtonFlags_MouseButtonMiddle) && g.IO.MouseClicked[2]) { mouse_button_clicked = 2; } + if ((flags & ImGuiButtonFlags_MouseButtonLeft) && g.IO.MouseReleased[0]) { mouse_button_released = 0; } + else if ((flags & ImGuiButtonFlags_MouseButtonRight) && g.IO.MouseReleased[1]) { mouse_button_released = 1; } + else if ((flags & ImGuiButtonFlags_MouseButtonMiddle) && g.IO.MouseReleased[2]) { mouse_button_released = 2; } + + if (mouse_button_clicked != -1 && g.ActiveId != id) + { + if (flags & (ImGuiButtonFlags_PressedOnClickRelease | ImGuiButtonFlags_PressedOnClickReleaseAnywhere)) + { + SetActiveID(id, window); + g.ActiveIdMouseButton = mouse_button_clicked; + if (!(flags & ImGuiButtonFlags_NoNavFocus)) + SetFocusID(id, window); + FocusWindow(window); + } + if ((flags & ImGuiButtonFlags_PressedOnClick) || ((flags & ImGuiButtonFlags_PressedOnDoubleClick) && g.IO.MouseDoubleClicked[mouse_button_clicked])) + { + pressed = true; + if (flags & ImGuiButtonFlags_NoHoldingActiveId) + ClearActiveID(); + else + SetActiveID(id, window); // Hold on ID + g.ActiveIdMouseButton = mouse_button_clicked; + FocusWindow(window); + } + } + if ((flags & ImGuiButtonFlags_PressedOnRelease) && mouse_button_released != -1) + { + // Repeat mode trumps on release behavior + const bool has_repeated_at_least_once = (flags & ImGuiButtonFlags_Repeat) && g.IO.MouseDownDurationPrev[mouse_button_released] >= g.IO.KeyRepeatDelay; + if (!has_repeated_at_least_once) + pressed = true; + ClearActiveID(); + } + + // 'Repeat' mode acts when held regardless of _PressedOn flags (see table above). + // Relies on repeat logic of IsMouseClicked() but we may as well do it ourselves if we end up exposing finer RepeatDelay/RepeatRate settings. + if (g.ActiveId == id && (flags & ImGuiButtonFlags_Repeat)) + if (g.IO.MouseDownDuration[g.ActiveIdMouseButton] > 0.0f && IsMouseClicked(g.ActiveIdMouseButton, true)) + pressed = true; + } + + if (pressed) + g.NavDisableHighlight = true; + } + + // Gamepad/Keyboard navigation + // We report navigated item as hovered but we don't set g.HoveredId to not interfere with mouse. + if (g.NavId == id && !g.NavDisableHighlight && g.NavDisableMouseHover && (g.ActiveId == 0 || g.ActiveId == id || g.ActiveId == window->MoveId)) + if (!(flags & ImGuiButtonFlags_NoHoveredOnFocus)) + hovered = true; + if (g.NavActivateDownId == id) + { + bool nav_activated_by_code = (g.NavActivateId == id); + bool nav_activated_by_inputs = IsNavInputTest(ImGuiNavInput_Activate, (flags & ImGuiButtonFlags_Repeat) ? ImGuiInputReadMode_Repeat : ImGuiInputReadMode_Pressed); + if (nav_activated_by_code || nav_activated_by_inputs) + pressed = true; + if (nav_activated_by_code || nav_activated_by_inputs || g.ActiveId == id) + { + // Set active id so it can be queried by user via IsItemActive(), equivalent of holding the mouse button. + g.NavActivateId = id; // This is so SetActiveId assign a Nav source + SetActiveID(id, window); + if ((nav_activated_by_code || nav_activated_by_inputs) && !(flags & ImGuiButtonFlags_NoNavFocus)) + SetFocusID(id, window); + } + } + + // Process while held + bool held = false; + if (g.ActiveId == id) + { + if (g.ActiveIdSource == ImGuiInputSource_Mouse) + { + if (g.ActiveIdIsJustActivated) + g.ActiveIdClickOffset = g.IO.MousePos - bb.Min; + + const int mouse_button = g.ActiveIdMouseButton; + IM_ASSERT(mouse_button >= 0 && mouse_button < ImGuiMouseButton_COUNT); + if (g.IO.MouseDown[mouse_button]) + { + held = true; + } + else + { + bool release_in = hovered && (flags & ImGuiButtonFlags_PressedOnClickRelease) != 0; + bool release_anywhere = (flags & ImGuiButtonFlags_PressedOnClickReleaseAnywhere) != 0; + if ((release_in || release_anywhere) && !g.DragDropActive) + { + // Report as pressed when releasing the mouse (this is the most common path) + bool is_double_click_release = (flags & ImGuiButtonFlags_PressedOnDoubleClick) && g.IO.MouseDownWasDoubleClick[mouse_button]; + bool is_repeating_already = (flags & ImGuiButtonFlags_Repeat) && g.IO.MouseDownDurationPrev[mouse_button] >= g.IO.KeyRepeatDelay; // Repeat mode trumps + if (!is_double_click_release && !is_repeating_already) + pressed = true; + } + ClearActiveID(); + } + if (!(flags & ImGuiButtonFlags_NoNavFocus)) + g.NavDisableHighlight = true; + } + else if (g.ActiveIdSource == ImGuiInputSource_Nav) + { + // When activated using Nav, we hold on the ActiveID until activation button is released + if (g.NavActivateDownId != id) + ClearActiveID(); + } + if (pressed) + g.ActiveIdHasBeenPressedBefore = true; + } + + if (out_hovered) *out_hovered = hovered; + if (out_held) *out_held = held; + + return pressed; +} + +bool ImGui::ButtonEx(const char* label, const ImVec2& size_arg, ImGuiButtonFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + const ImGuiID id = window->GetID(label); + const ImVec2 label_size = CalcTextSize(label, NULL, true); + + ImVec2 pos = window->DC.CursorPos; + if ((flags & ImGuiButtonFlags_AlignTextBaseLine) && style.FramePadding.y < window->DC.CurrLineTextBaseOffset) // Try to vertically align buttons that are smaller/have no padding so that text baseline matches (bit hacky, since it shouldn't be a flag) + pos.y += window->DC.CurrLineTextBaseOffset - style.FramePadding.y; + ImVec2 size = CalcItemSize(size_arg, label_size.x + style.FramePadding.x * 2.0f, label_size.y + style.FramePadding.y * 2.0f); + + const ImRect bb(pos, pos + size); + ItemSize(size, style.FramePadding.y); + if (!ItemAdd(bb, id)) + return false; + + if (window->DC.ItemFlags & ImGuiItemFlags_ButtonRepeat) + flags |= ImGuiButtonFlags_Repeat; + bool hovered, held; + bool pressed = ButtonBehavior(bb, id, &hovered, &held, flags); + + // Render + const ImU32 col = GetColorU32((held && hovered) ? ImGuiCol_ButtonActive : hovered ? ImGuiCol_ButtonHovered : ImGuiCol_Button); + RenderNavHighlight(bb, id); + RenderFrame(bb.Min, bb.Max, col, true, style.FrameRounding); + RenderTextClipped(bb.Min + style.FramePadding, bb.Max - style.FramePadding, label, NULL, &label_size, style.ButtonTextAlign, &bb); + + // Automatically close popups + //if (pressed && !(flags & ImGuiButtonFlags_DontClosePopups) && (window->Flags & ImGuiWindowFlags_Popup)) + // CloseCurrentPopup(); + + IMGUI_TEST_ENGINE_ITEM_INFO(id, label, window->DC.LastItemStatusFlags); + return pressed; +} + +bool ImGui::Button(const char* label, const ImVec2& size_arg) +{ + return ButtonEx(label, size_arg, ImGuiButtonFlags_None); +} + +// Small buttons fits within text without additional vertical spacing. +bool ImGui::SmallButton(const char* label) +{ + ImGuiContext& g = *GImGui; + float backup_padding_y = g.Style.FramePadding.y; + g.Style.FramePadding.y = 0.0f; + bool pressed = ButtonEx(label, ImVec2(0, 0), ImGuiButtonFlags_AlignTextBaseLine); + g.Style.FramePadding.y = backup_padding_y; + return pressed; +} + +// Tip: use ImGui::PushID()/PopID() to push indices or pointers in the ID stack. +// Then you can keep 'str_id' empty or the same for all your buttons (instead of creating a string based on a non-string id) +bool ImGui::InvisibleButton(const char* str_id, const ImVec2& size_arg, ImGuiButtonFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + // Cannot use zero-size for InvisibleButton(). Unlike Button() there is not way to fallback using the label size. + IM_ASSERT(size_arg.x != 0.0f && size_arg.y != 0.0f); + + const ImGuiID id = window->GetID(str_id); + ImVec2 size = CalcItemSize(size_arg, 0.0f, 0.0f); + const ImRect bb(window->DC.CursorPos, window->DC.CursorPos + size); + ItemSize(size); + if (!ItemAdd(bb, id)) + return false; + + bool hovered, held; + bool pressed = ButtonBehavior(bb, id, &hovered, &held, flags); + + return pressed; +} + +bool ImGui::ArrowButtonEx(const char* str_id, ImGuiDir dir, ImVec2 size, ImGuiButtonFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiID id = window->GetID(str_id); + const ImRect bb(window->DC.CursorPos, window->DC.CursorPos + size); + const float default_size = GetFrameHeight(); + ItemSize(size, (size.y >= default_size) ? g.Style.FramePadding.y : -1.0f); + if (!ItemAdd(bb, id)) + return false; + + if (window->DC.ItemFlags & ImGuiItemFlags_ButtonRepeat) + flags |= ImGuiButtonFlags_Repeat; + + bool hovered, held; + bool pressed = ButtonBehavior(bb, id, &hovered, &held, flags); + + // Render + const ImU32 bg_col = GetColorU32((held && hovered) ? ImGuiCol_ButtonActive : hovered ? ImGuiCol_ButtonHovered : ImGuiCol_Button); + const ImU32 text_col = GetColorU32(ImGuiCol_Text); + RenderNavHighlight(bb, id); + RenderFrame(bb.Min, bb.Max, bg_col, true, g.Style.FrameRounding); + RenderArrow(window->DrawList, bb.Min + ImVec2(ImMax(0.0f, (size.x - g.FontSize) * 0.5f), ImMax(0.0f, (size.y - g.FontSize) * 0.5f)), text_col, dir); + + return pressed; +} + +bool ImGui::ArrowButton(const char* str_id, ImGuiDir dir) +{ + float sz = GetFrameHeight(); + return ArrowButtonEx(str_id, dir, ImVec2(sz, sz), ImGuiButtonFlags_None); +} + +// Button to close a window +bool ImGui::CloseButton(ImGuiID id, const ImVec2& pos)//, float size) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + // We intentionally allow interaction when clipped so that a mechanical Alt,Right,Validate sequence close a window. + // (this isn't the regular behavior of buttons, but it doesn't affect the user much because navigation tends to keep items visible). + const ImRect bb(pos, pos + ImVec2(g.FontSize, g.FontSize) + g.Style.FramePadding * 2.0f); + bool is_clipped = !ItemAdd(bb, id); + + bool hovered, held; + bool pressed = ButtonBehavior(bb, id, &hovered, &held); + if (is_clipped) + return pressed; + + // Render + ImU32 col = GetColorU32(held ? ImGuiCol_ButtonActive : ImGuiCol_ButtonHovered); + ImVec2 center = bb.GetCenter(); + if (hovered) + window->DrawList->AddCircleFilled(center, ImMax(2.0f, g.FontSize * 0.5f + 1.0f), col, 12); + + float cross_extent = g.FontSize * 0.5f * 0.7071f - 1.0f; + ImU32 cross_col = GetColorU32(ImGuiCol_Text); + center -= ImVec2(0.5f, 0.5f); + window->DrawList->AddLine(center + ImVec2(+cross_extent, +cross_extent), center + ImVec2(-cross_extent, -cross_extent), cross_col, 1.0f); + window->DrawList->AddLine(center + ImVec2(+cross_extent, -cross_extent), center + ImVec2(-cross_extent, +cross_extent), cross_col, 1.0f); + + return pressed; +} + +bool ImGui::CollapseButton(ImGuiID id, const ImVec2& pos) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + ImRect bb(pos, pos + ImVec2(g.FontSize, g.FontSize) + g.Style.FramePadding * 2.0f); + ItemAdd(bb, id); + bool hovered, held; + bool pressed = ButtonBehavior(bb, id, &hovered, &held, ImGuiButtonFlags_None); + + // Render + ImU32 bg_col = GetColorU32((held && hovered) ? ImGuiCol_ButtonActive : hovered ? ImGuiCol_ButtonHovered : ImGuiCol_Button); + ImU32 text_col = GetColorU32(ImGuiCol_Text); + ImVec2 center = bb.GetCenter(); + if (hovered || held) + window->DrawList->AddCircleFilled(center/*+ ImVec2(0.0f, -0.5f)*/, g.FontSize * 0.5f + 1.0f, bg_col, 12); + RenderArrow(window->DrawList, bb.Min + g.Style.FramePadding, text_col, window->Collapsed ? ImGuiDir_Right : ImGuiDir_Down, 1.0f); + + // Switch to moving the window after mouse is moved beyond the initial drag threshold + if (IsItemActive() && IsMouseDragging(0)) + StartMouseMovingWindow(window); + + return pressed; +} + +ImGuiID ImGui::GetWindowScrollbarID(ImGuiWindow* window, ImGuiAxis axis) +{ + return window->GetIDNoKeepAlive(axis == ImGuiAxis_X ? "#SCROLLX" : "#SCROLLY"); +} + +// Return scrollbar rectangle, must only be called for corresponding axis if window->ScrollbarX/Y is set. +ImRect ImGui::GetWindowScrollbarRect(ImGuiWindow* window, ImGuiAxis axis) +{ + const ImRect outer_rect = window->Rect(); + const ImRect inner_rect = window->InnerRect; + const float border_size = window->WindowBorderSize; + const float scrollbar_size = window->ScrollbarSizes[axis ^ 1]; // (ScrollbarSizes.x = width of Y scrollbar; ScrollbarSizes.y = height of X scrollbar) + IM_ASSERT(scrollbar_size > 0.0f); + if (axis == ImGuiAxis_X) + return ImRect(inner_rect.Min.x, ImMax(outer_rect.Min.y, outer_rect.Max.y - border_size - scrollbar_size), inner_rect.Max.x, outer_rect.Max.y); + else + return ImRect(ImMax(outer_rect.Min.x, outer_rect.Max.x - border_size - scrollbar_size), inner_rect.Min.y, outer_rect.Max.x, inner_rect.Max.y); +} + +void ImGui::Scrollbar(ImGuiAxis axis) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + const ImGuiID id = GetWindowScrollbarID(window, axis); + KeepAliveID(id); + + // Calculate scrollbar bounding box + ImRect bb = GetWindowScrollbarRect(window, axis); + ImDrawCornerFlags rounding_corners = 0; + if (axis == ImGuiAxis_X) + { + rounding_corners |= ImDrawCornerFlags_BotLeft; + if (!window->ScrollbarY) + rounding_corners |= ImDrawCornerFlags_BotRight; + } + else + { + if ((window->Flags & ImGuiWindowFlags_NoTitleBar) && !(window->Flags & ImGuiWindowFlags_MenuBar)) + rounding_corners |= ImDrawCornerFlags_TopRight; + if (!window->ScrollbarX) + rounding_corners |= ImDrawCornerFlags_BotRight; + } + float size_avail = window->InnerRect.Max[axis] - window->InnerRect.Min[axis]; + float size_contents = window->ContentSize[axis] + window->WindowPadding[axis] * 2.0f; + ScrollbarEx(bb, id, axis, &window->Scroll[axis], size_avail, size_contents, rounding_corners); +} + +// Vertical/Horizontal scrollbar +// The entire piece of code below is rather confusing because: +// - We handle absolute seeking (when first clicking outside the grab) and relative manipulation (afterward or when clicking inside the grab) +// - We store values as normalized ratio and in a form that allows the window content to change while we are holding on a scrollbar +// - We handle both horizontal and vertical scrollbars, which makes the terminology not ideal. +// Still, the code should probably be made simpler.. +bool ImGui::ScrollbarEx(const ImRect& bb_frame, ImGuiID id, ImGuiAxis axis, float* p_scroll_v, float size_avail_v, float size_contents_v, ImDrawCornerFlags rounding_corners) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return false; + + const float bb_frame_width = bb_frame.GetWidth(); + const float bb_frame_height = bb_frame.GetHeight(); + if (bb_frame_width <= 0.0f || bb_frame_height <= 0.0f) + return false; + + // When we are too small, start hiding and disabling the grab (this reduce visual noise on very small window and facilitate using the window resize grab) + float alpha = 1.0f; + if ((axis == ImGuiAxis_Y) && bb_frame_height < g.FontSize + g.Style.FramePadding.y * 2.0f) + alpha = ImSaturate((bb_frame_height - g.FontSize) / (g.Style.FramePadding.y * 2.0f)); + if (alpha <= 0.0f) + return false; + + const ImGuiStyle& style = g.Style; + const bool allow_interaction = (alpha >= 1.0f); + + ImRect bb = bb_frame; + bb.Expand(ImVec2(-ImClamp(IM_FLOOR((bb_frame_width - 2.0f) * 0.5f), 0.0f, 3.0f), -ImClamp(IM_FLOOR((bb_frame_height - 2.0f) * 0.5f), 0.0f, 3.0f))); + + // V denote the main, longer axis of the scrollbar (= height for a vertical scrollbar) + const float scrollbar_size_v = (axis == ImGuiAxis_X) ? bb.GetWidth() : bb.GetHeight(); + + // Calculate the height of our grabbable box. It generally represent the amount visible (vs the total scrollable amount) + // But we maintain a minimum size in pixel to allow for the user to still aim inside. + IM_ASSERT(ImMax(size_contents_v, size_avail_v) > 0.0f); // Adding this assert to check if the ImMax(XXX,1.0f) is still needed. PLEASE CONTACT ME if this triggers. + const float win_size_v = ImMax(ImMax(size_contents_v, size_avail_v), 1.0f); + const float grab_h_pixels = ImClamp(scrollbar_size_v * (size_avail_v / win_size_v), style.GrabMinSize, scrollbar_size_v); + const float grab_h_norm = grab_h_pixels / scrollbar_size_v; + + // Handle input right away. None of the code of Begin() is relying on scrolling position before calling Scrollbar(). + bool held = false; + bool hovered = false; + ButtonBehavior(bb, id, &hovered, &held, ImGuiButtonFlags_NoNavFocus); + + float scroll_max = ImMax(1.0f, size_contents_v - size_avail_v); + float scroll_ratio = ImSaturate(*p_scroll_v / scroll_max); + float grab_v_norm = scroll_ratio * (scrollbar_size_v - grab_h_pixels) / scrollbar_size_v; // Grab position in normalized space + if (held && allow_interaction && grab_h_norm < 1.0f) + { + float scrollbar_pos_v = bb.Min[axis]; + float mouse_pos_v = g.IO.MousePos[axis]; + + // Click position in scrollbar normalized space (0.0f->1.0f) + const float clicked_v_norm = ImSaturate((mouse_pos_v - scrollbar_pos_v) / scrollbar_size_v); + SetHoveredID(id); + + bool seek_absolute = false; + if (g.ActiveIdIsJustActivated) + { + // On initial click calculate the distance between mouse and the center of the grab + seek_absolute = (clicked_v_norm < grab_v_norm || clicked_v_norm > grab_v_norm + grab_h_norm); + if (seek_absolute) + g.ScrollbarClickDeltaToGrabCenter = 0.0f; + else + g.ScrollbarClickDeltaToGrabCenter = clicked_v_norm - grab_v_norm - grab_h_norm * 0.5f; + } + + // Apply scroll (p_scroll_v will generally point on one member of window->Scroll) + // It is ok to modify Scroll here because we are being called in Begin() after the calculation of ContentSize and before setting up our starting position + const float scroll_v_norm = ImSaturate((clicked_v_norm - g.ScrollbarClickDeltaToGrabCenter - grab_h_norm * 0.5f) / (1.0f - grab_h_norm)); + *p_scroll_v = IM_ROUND(scroll_v_norm * scroll_max);//(win_size_contents_v - win_size_v)); + + // Update values for rendering + scroll_ratio = ImSaturate(*p_scroll_v / scroll_max); + grab_v_norm = scroll_ratio * (scrollbar_size_v - grab_h_pixels) / scrollbar_size_v; + + // Update distance to grab now that we have seeked and saturated + if (seek_absolute) + g.ScrollbarClickDeltaToGrabCenter = clicked_v_norm - grab_v_norm - grab_h_norm * 0.5f; + } + + // Render + const ImU32 bg_col = GetColorU32(ImGuiCol_ScrollbarBg); + const ImU32 grab_col = GetColorU32(held ? ImGuiCol_ScrollbarGrabActive : hovered ? ImGuiCol_ScrollbarGrabHovered : ImGuiCol_ScrollbarGrab, alpha); + window->DrawList->AddRectFilled(bb_frame.Min, bb_frame.Max, bg_col, window->WindowRounding, rounding_corners); + ImRect grab_rect; + if (axis == ImGuiAxis_X) + grab_rect = ImRect(ImLerp(bb.Min.x, bb.Max.x, grab_v_norm), bb.Min.y, ImLerp(bb.Min.x, bb.Max.x, grab_v_norm) + grab_h_pixels, bb.Max.y); + else + grab_rect = ImRect(bb.Min.x, ImLerp(bb.Min.y, bb.Max.y, grab_v_norm), bb.Max.x, ImLerp(bb.Min.y, bb.Max.y, grab_v_norm) + grab_h_pixels); + window->DrawList->AddRectFilled(grab_rect.Min, grab_rect.Max, grab_col, style.ScrollbarRounding); + + return held; +} + +void ImGui::Image(ImTextureID user_texture_id, const ImVec2& size, const ImVec2& uv0, const ImVec2& uv1, const ImVec4& tint_col, const ImVec4& border_col) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + + ImRect bb(window->DC.CursorPos, window->DC.CursorPos + size); + if (border_col.w > 0.0f) + bb.Max += ImVec2(2, 2); + ItemSize(bb); + if (!ItemAdd(bb, 0)) + return; + + if (border_col.w > 0.0f) + { + window->DrawList->AddRect(bb.Min, bb.Max, GetColorU32(border_col), 0.0f); + window->DrawList->AddImage(user_texture_id, bb.Min + ImVec2(1, 1), bb.Max - ImVec2(1, 1), uv0, uv1, GetColorU32(tint_col)); + } + else + { + window->DrawList->AddImage(user_texture_id, bb.Min, bb.Max, uv0, uv1, GetColorU32(tint_col)); + } +} + +// ImageButton() is flawed as 'id' is always derived from 'texture_id' (see #2464 #1390) +// We provide this internal helper to write your own variant while we figure out how to redesign the public ImageButton() API. +bool ImGui::ImageButtonEx(ImGuiID id, ImTextureID texture_id, const ImVec2& size, const ImVec2& uv0, const ImVec2& uv1, const ImVec2& padding, const ImVec4& bg_col, const ImVec4& tint_col) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + const ImRect bb(window->DC.CursorPos, window->DC.CursorPos + size + padding * 2); + ItemSize(bb); + if (!ItemAdd(bb, id)) + return false; + + bool hovered, held; + bool pressed = ButtonBehavior(bb, id, &hovered, &held); + + // Render + const ImU32 col = GetColorU32((held && hovered) ? ImGuiCol_ButtonActive : hovered ? ImGuiCol_ButtonHovered : ImGuiCol_Button); + RenderNavHighlight(bb, id); + RenderFrame(bb.Min, bb.Max, col, true, ImClamp((float)ImMin(padding.x, padding.y), 0.0f, g.Style.FrameRounding)); + if (bg_col.w > 0.0f) + window->DrawList->AddRectFilled(bb.Min + padding, bb.Max - padding, GetColorU32(bg_col)); + window->DrawList->AddImage(texture_id, bb.Min + padding, bb.Max - padding, uv0, uv1, GetColorU32(tint_col)); + + return pressed; +} + +// frame_padding < 0: uses FramePadding from style (default) +// frame_padding = 0: no framing +// frame_padding > 0: set framing size +bool ImGui::ImageButton(ImTextureID user_texture_id, const ImVec2& size, const ImVec2& uv0, const ImVec2& uv1, int frame_padding, const ImVec4& bg_col, const ImVec4& tint_col) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return false; + + // Default to using texture ID as ID. User can still push string/integer prefixes. + PushID((void*)(intptr_t)user_texture_id); + const ImGuiID id = window->GetID("#image"); + PopID(); + + const ImVec2 padding = (frame_padding >= 0) ? ImVec2((float)frame_padding, (float)frame_padding) : g.Style.FramePadding; + return ImageButtonEx(id, user_texture_id, size, uv0, uv1, padding, bg_col, tint_col); +} + +bool ImGui::Checkbox(const char* label, bool* v) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + const ImGuiID id = window->GetID(label); + const ImVec2 label_size = CalcTextSize(label, NULL, true); + + const float square_sz = GetFrameHeight(); + const ImVec2 pos = window->DC.CursorPos; + const ImRect total_bb(pos, pos + ImVec2(square_sz + (label_size.x > 0.0f ? style.ItemInnerSpacing.x + label_size.x : 0.0f), label_size.y + style.FramePadding.y * 2.0f)); + ItemSize(total_bb, style.FramePadding.y); + if (!ItemAdd(total_bb, id)) + { + IMGUI_TEST_ENGINE_ITEM_INFO(id, label, window->DC.ItemFlags | ImGuiItemStatusFlags_Checkable | (*v ? ImGuiItemStatusFlags_Checked : 0)); + return false; + } + + bool hovered, held; + bool pressed = ButtonBehavior(total_bb, id, &hovered, &held); + if (pressed) + { + *v = !(*v); + MarkItemEdited(id); + } + + const ImRect check_bb(pos, pos + ImVec2(square_sz, square_sz)); + RenderNavHighlight(total_bb, id); + RenderFrame(check_bb.Min, check_bb.Max, GetColorU32((held && hovered) ? ImGuiCol_FrameBgActive : hovered ? ImGuiCol_FrameBgHovered : ImGuiCol_FrameBg), true, style.FrameRounding); + ImU32 check_col = GetColorU32(ImGuiCol_CheckMark); + bool mixed_value = (window->DC.ItemFlags & ImGuiItemFlags_MixedValue) != 0; + if (mixed_value) + { + // Undocumented tristate/mixed/indeterminate checkbox (#2644) + // This may seem awkwardly designed because the aim is to make ImGuiItemFlags_MixedValue supported by all widgets (not just checkbox) + ImVec2 pad(ImMax(1.0f, IM_FLOOR(square_sz / 3.6f)), ImMax(1.0f, IM_FLOOR(square_sz / 3.6f))); + window->DrawList->AddRectFilled(check_bb.Min + pad, check_bb.Max - pad, check_col, style.FrameRounding); + } + else if (*v) + { + const float pad = ImMax(1.0f, IM_FLOOR(square_sz / 6.0f)); + RenderCheckMark(window->DrawList, check_bb.Min + ImVec2(pad, pad), check_col, square_sz - pad * 2.0f); + } + + if (g.LogEnabled) + LogRenderedText(&total_bb.Min, mixed_value ? "[~]" : *v ? "[x]" : "[ ]"); + if (label_size.x > 0.0f) + RenderText(ImVec2(check_bb.Max.x + style.ItemInnerSpacing.x, check_bb.Min.y + style.FramePadding.y), label); + + IMGUI_TEST_ENGINE_ITEM_INFO(id, label, window->DC.ItemFlags | ImGuiItemStatusFlags_Checkable | (*v ? ImGuiItemStatusFlags_Checked : 0)); + return pressed; +} + +template +bool ImGui::CheckboxFlagsT(const char* label, T* flags, T flags_value) +{ + bool all_on = (*flags & flags_value) == flags_value; + bool any_on = (*flags & flags_value) != 0; + bool pressed; + if (!all_on && any_on) + { + ImGuiWindow* window = GetCurrentWindow(); + ImGuiItemFlags backup_item_flags = window->DC.ItemFlags; + window->DC.ItemFlags |= ImGuiItemFlags_MixedValue; + pressed = Checkbox(label, &all_on); + window->DC.ItemFlags = backup_item_flags; + } + else + { + pressed = Checkbox(label, &all_on); + + } + if (pressed) + { + if (all_on) + *flags |= flags_value; + else + *flags &= ~flags_value; + } + return pressed; +} + +bool ImGui::CheckboxFlags(const char* label, int* flags, int flags_value) +{ + return CheckboxFlagsT(label, flags, flags_value); +} + +bool ImGui::CheckboxFlags(const char* label, unsigned int* flags, unsigned int flags_value) +{ + return CheckboxFlagsT(label, flags, flags_value); +} + +bool ImGui::CheckboxFlags(const char* label, ImS64* flags, ImS64 flags_value) +{ + return CheckboxFlagsT(label, flags, flags_value); +} + +bool ImGui::CheckboxFlags(const char* label, ImU64* flags, ImU64 flags_value) +{ + return CheckboxFlagsT(label, flags, flags_value); +} + +bool ImGui::RadioButton(const char* label, bool active) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + const ImGuiID id = window->GetID(label); + const ImVec2 label_size = CalcTextSize(label, NULL, true); + + const float square_sz = GetFrameHeight(); + const ImVec2 pos = window->DC.CursorPos; + const ImRect check_bb(pos, pos + ImVec2(square_sz, square_sz)); + const ImRect total_bb(pos, pos + ImVec2(square_sz + (label_size.x > 0.0f ? style.ItemInnerSpacing.x + label_size.x : 0.0f), label_size.y + style.FramePadding.y * 2.0f)); + ItemSize(total_bb, style.FramePadding.y); + if (!ItemAdd(total_bb, id)) + return false; + + ImVec2 center = check_bb.GetCenter(); + center.x = IM_ROUND(center.x); + center.y = IM_ROUND(center.y); + const float radius = (square_sz - 1.0f) * 0.5f; + + bool hovered, held; + bool pressed = ButtonBehavior(total_bb, id, &hovered, &held); + if (pressed) + MarkItemEdited(id); + + RenderNavHighlight(total_bb, id); + window->DrawList->AddCircleFilled(center, radius, GetColorU32((held && hovered) ? ImGuiCol_FrameBgActive : hovered ? ImGuiCol_FrameBgHovered : ImGuiCol_FrameBg), 16); + if (active) + { + const float pad = ImMax(1.0f, IM_FLOOR(square_sz / 6.0f)); + window->DrawList->AddCircleFilled(center, radius - pad, GetColorU32(ImGuiCol_CheckMark), 16); + } + + if (style.FrameBorderSize > 0.0f) + { + window->DrawList->AddCircle(center + ImVec2(1, 1), radius, GetColorU32(ImGuiCol_BorderShadow), 16, style.FrameBorderSize); + window->DrawList->AddCircle(center, radius, GetColorU32(ImGuiCol_Border), 16, style.FrameBorderSize); + } + + if (g.LogEnabled) + LogRenderedText(&total_bb.Min, active ? "(x)" : "( )"); + if (label_size.x > 0.0f) + RenderText(ImVec2(check_bb.Max.x + style.ItemInnerSpacing.x, check_bb.Min.y + style.FramePadding.y), label); + + IMGUI_TEST_ENGINE_ITEM_INFO(id, label, window->DC.ItemFlags); + return pressed; +} + +// FIXME: This would work nicely if it was a public template, e.g. 'template RadioButton(const char* label, T* v, T v_button)', but I'm not sure how we would expose it.. +bool ImGui::RadioButton(const char* label, int* v, int v_button) +{ + const bool pressed = RadioButton(label, *v == v_button); + if (pressed) + *v = v_button; + return pressed; +} + +// size_arg (for each axis) < 0.0f: align to end, 0.0f: auto, > 0.0f: specified size +void ImGui::ProgressBar(float fraction, const ImVec2& size_arg, const char* overlay) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + + ImVec2 pos = window->DC.CursorPos; + ImVec2 size = CalcItemSize(size_arg, CalcItemWidth(), g.FontSize + style.FramePadding.y * 2.0f); + ImRect bb(pos, pos + size); + ItemSize(size, style.FramePadding.y); + if (!ItemAdd(bb, 0)) + return; + + // Render + fraction = ImSaturate(fraction); + RenderFrame(bb.Min, bb.Max, GetColorU32(ImGuiCol_FrameBg), true, style.FrameRounding); + bb.Expand(ImVec2(-style.FrameBorderSize, -style.FrameBorderSize)); + const ImVec2 fill_br = ImVec2(ImLerp(bb.Min.x, bb.Max.x, fraction), bb.Max.y); + RenderRectFilledRangeH(window->DrawList, bb, GetColorU32(ImGuiCol_PlotHistogram), 0.0f, fraction, style.FrameRounding); + + // Default displaying the fraction as percentage string, but user can override it + char overlay_buf[32]; + if (!overlay) + { + ImFormatString(overlay_buf, IM_ARRAYSIZE(overlay_buf), "%.0f%%", fraction * 100 + 0.01f); + overlay = overlay_buf; + } + + ImVec2 overlay_size = CalcTextSize(overlay, NULL); + if (overlay_size.x > 0.0f) + RenderTextClipped(ImVec2(ImClamp(fill_br.x + style.ItemSpacing.x, bb.Min.x, bb.Max.x - overlay_size.x - style.ItemInnerSpacing.x), bb.Min.y), bb.Max, overlay, NULL, &overlay_size, ImVec2(0.0f, 0.5f), &bb); +} + +void ImGui::Bullet() +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + const float line_height = ImMax(ImMin(window->DC.CurrLineSize.y, g.FontSize + g.Style.FramePadding.y * 2), g.FontSize); + const ImRect bb(window->DC.CursorPos, window->DC.CursorPos + ImVec2(g.FontSize, line_height)); + ItemSize(bb); + if (!ItemAdd(bb, 0)) + { + SameLine(0, style.FramePadding.x * 2); + return; + } + + // Render and stay on same line + ImU32 text_col = GetColorU32(ImGuiCol_Text); + RenderBullet(window->DrawList, bb.Min + ImVec2(style.FramePadding.x + g.FontSize * 0.5f, line_height * 0.5f), text_col); + SameLine(0, style.FramePadding.x * 2.0f); +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: Low-level Layout helpers +//------------------------------------------------------------------------- +// - Spacing() +// - Dummy() +// - NewLine() +// - AlignTextToFramePadding() +// - SeparatorEx() [Internal] +// - Separator() +// - SplitterBehavior() [Internal] +// - ShrinkWidths() [Internal] +//------------------------------------------------------------------------- + +void ImGui::Spacing() +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + ItemSize(ImVec2(0, 0)); +} + +void ImGui::Dummy(const ImVec2& size) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + + const ImRect bb(window->DC.CursorPos, window->DC.CursorPos + size); + ItemSize(size); + ItemAdd(bb, 0); +} + +void ImGui::NewLine() +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + + ImGuiContext& g = *GImGui; + const ImGuiLayoutType backup_layout_type = window->DC.LayoutType; + window->DC.LayoutType = ImGuiLayoutType_Vertical; + if (window->DC.CurrLineSize.y > 0.0f) // In the event that we are on a line with items that is smaller that FontSize high, we will preserve its height. + ItemSize(ImVec2(0, 0)); + else + ItemSize(ImVec2(0.0f, g.FontSize)); + window->DC.LayoutType = backup_layout_type; +} + +void ImGui::AlignTextToFramePadding() +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + + ImGuiContext& g = *GImGui; + window->DC.CurrLineSize.y = ImMax(window->DC.CurrLineSize.y, g.FontSize + g.Style.FramePadding.y * 2); + window->DC.CurrLineTextBaseOffset = ImMax(window->DC.CurrLineTextBaseOffset, g.Style.FramePadding.y); +} + +// Horizontal/vertical separating line +void ImGui::SeparatorEx(ImGuiSeparatorFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + + ImGuiContext& g = *GImGui; + IM_ASSERT(ImIsPowerOfTwo(flags & (ImGuiSeparatorFlags_Horizontal | ImGuiSeparatorFlags_Vertical))); // Check that only 1 option is selected + + float thickness_draw = 1.0f; + float thickness_layout = 0.0f; + if (flags & ImGuiSeparatorFlags_Vertical) + { + // Vertical separator, for menu bars (use current line height). Not exposed because it is misleading and it doesn't have an effect on regular layout. + float y1 = window->DC.CursorPos.y; + float y2 = window->DC.CursorPos.y + window->DC.CurrLineSize.y; + const ImRect bb(ImVec2(window->DC.CursorPos.x, y1), ImVec2(window->DC.CursorPos.x + thickness_draw, y2)); + ItemSize(ImVec2(thickness_layout, 0.0f)); + if (!ItemAdd(bb, 0)) + return; + + // Draw + window->DrawList->AddLine(ImVec2(bb.Min.x, bb.Min.y), ImVec2(bb.Min.x, bb.Max.y), GetColorU32(ImGuiCol_Separator)); + if (g.LogEnabled) + LogText(" |"); + } + else if (flags & ImGuiSeparatorFlags_Horizontal) + { + // Horizontal Separator + float x1 = window->Pos.x; + float x2 = window->Pos.x + window->Size.x; + + // FIXME-WORKRECT: old hack (#205) until we decide of consistent behavior with WorkRect/Indent and Separator + if (g.GroupStack.Size > 0 && g.GroupStack.back().WindowID == window->ID) + x1 += window->DC.Indent.x; + + ImGuiOldColumns* columns = (flags & ImGuiSeparatorFlags_SpanAllColumns) ? window->DC.CurrentColumns : NULL; + if (columns) + PushColumnsBackground(); + + // We don't provide our width to the layout so that it doesn't get feed back into AutoFit + const ImRect bb(ImVec2(x1, window->DC.CursorPos.y), ImVec2(x2, window->DC.CursorPos.y + thickness_draw)); + ItemSize(ImVec2(0.0f, thickness_layout)); + const bool item_visible = ItemAdd(bb, 0); + if (item_visible) + { + // Draw + window->DrawList->AddLine(bb.Min, ImVec2(bb.Max.x, bb.Min.y), GetColorU32(ImGuiCol_Separator)); + if (g.LogEnabled) + LogRenderedText(&bb.Min, "--------------------------------"); + } + if (columns) + { + PopColumnsBackground(); + columns->LineMinY = window->DC.CursorPos.y; + } + } +} + +void ImGui::Separator() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return; + + // Those flags should eventually be overridable by the user + ImGuiSeparatorFlags flags = (window->DC.LayoutType == ImGuiLayoutType_Horizontal) ? ImGuiSeparatorFlags_Vertical : ImGuiSeparatorFlags_Horizontal; + flags |= ImGuiSeparatorFlags_SpanAllColumns; + SeparatorEx(flags); +} + +// Using 'hover_visibility_delay' allows us to hide the highlight and mouse cursor for a short time, which can be convenient to reduce visual noise. +bool ImGui::SplitterBehavior(const ImRect& bb, ImGuiID id, ImGuiAxis axis, float* size1, float* size2, float min_size1, float min_size2, float hover_extend, float hover_visibility_delay) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + const ImGuiItemFlags item_flags_backup = window->DC.ItemFlags; + window->DC.ItemFlags |= ImGuiItemFlags_NoNav | ImGuiItemFlags_NoNavDefaultFocus; + bool item_add = ItemAdd(bb, id); + window->DC.ItemFlags = item_flags_backup; + if (!item_add) + return false; + + bool hovered, held; + ImRect bb_interact = bb; + bb_interact.Expand(axis == ImGuiAxis_Y ? ImVec2(0.0f, hover_extend) : ImVec2(hover_extend, 0.0f)); + ButtonBehavior(bb_interact, id, &hovered, &held, ImGuiButtonFlags_FlattenChildren | ImGuiButtonFlags_AllowItemOverlap); + if (g.ActiveId != id) + SetItemAllowOverlap(); + + if (held || (g.HoveredId == id && g.HoveredIdPreviousFrame == id && g.HoveredIdTimer >= hover_visibility_delay)) + SetMouseCursor(axis == ImGuiAxis_Y ? ImGuiMouseCursor_ResizeNS : ImGuiMouseCursor_ResizeEW); + + ImRect bb_render = bb; + if (held) + { + ImVec2 mouse_delta_2d = g.IO.MousePos - g.ActiveIdClickOffset - bb_interact.Min; + float mouse_delta = (axis == ImGuiAxis_Y) ? mouse_delta_2d.y : mouse_delta_2d.x; + + // Minimum pane size + float size_1_maximum_delta = ImMax(0.0f, *size1 - min_size1); + float size_2_maximum_delta = ImMax(0.0f, *size2 - min_size2); + if (mouse_delta < -size_1_maximum_delta) + mouse_delta = -size_1_maximum_delta; + if (mouse_delta > size_2_maximum_delta) + mouse_delta = size_2_maximum_delta; + + // Apply resize + if (mouse_delta != 0.0f) + { + if (mouse_delta < 0.0f) + IM_ASSERT(*size1 + mouse_delta >= min_size1); + if (mouse_delta > 0.0f) + IM_ASSERT(*size2 - mouse_delta >= min_size2); + *size1 += mouse_delta; + *size2 -= mouse_delta; + bb_render.Translate((axis == ImGuiAxis_X) ? ImVec2(mouse_delta, 0.0f) : ImVec2(0.0f, mouse_delta)); + MarkItemEdited(id); + } + } + + // Render + const ImU32 col = GetColorU32(held ? ImGuiCol_SeparatorActive : (hovered && g.HoveredIdTimer >= hover_visibility_delay) ? ImGuiCol_SeparatorHovered : ImGuiCol_Separator); + window->DrawList->AddRectFilled(bb_render.Min, bb_render.Max, col, 0.0f); + + return held; +} + +static int IMGUI_CDECL ShrinkWidthItemComparer(const void* lhs, const void* rhs) +{ + const ImGuiShrinkWidthItem* a = (const ImGuiShrinkWidthItem*)lhs; + const ImGuiShrinkWidthItem* b = (const ImGuiShrinkWidthItem*)rhs; + if (int d = (int)(b->Width - a->Width)) + return d; + return (b->Index - a->Index); +} + +// Shrink excess width from a set of item, by removing width from the larger items first. +// Set items Width to -1.0f to disable shrinking this item. +void ImGui::ShrinkWidths(ImGuiShrinkWidthItem* items, int count, float width_excess) +{ + if (count == 1) + { + if (items[0].Width >= 0.0f) + items[0].Width = ImMax(items[0].Width - width_excess, 1.0f); + return; + } + ImQsort(items, (size_t)count, sizeof(ImGuiShrinkWidthItem), ShrinkWidthItemComparer); + int count_same_width = 1; + while (width_excess > 0.0f && count_same_width < count) + { + while (count_same_width < count && items[0].Width <= items[count_same_width].Width) + count_same_width++; + float max_width_to_remove_per_item = (count_same_width < count && items[count_same_width].Width >= 0.0f) ? (items[0].Width - items[count_same_width].Width) : (items[0].Width - 1.0f); + if (max_width_to_remove_per_item <= 0.0f) + break; + float width_to_remove_per_item = ImMin(width_excess / count_same_width, max_width_to_remove_per_item); + for (int item_n = 0; item_n < count_same_width; item_n++) + items[item_n].Width -= width_to_remove_per_item; + width_excess -= width_to_remove_per_item * count_same_width; + } + + // Round width and redistribute remainder left-to-right (could make it an option of the function?) + // Ensure that e.g. the right-most tab of a shrunk tab-bar always reaches exactly at the same distance from the right-most edge of the tab bar separator. + width_excess = 0.0f; + for (int n = 0; n < count; n++) + { + float width_rounded = ImFloor(items[n].Width); + width_excess += items[n].Width - width_rounded; + items[n].Width = width_rounded; + } + if (width_excess > 0.0f) + for (int n = 0; n < count; n++) + if (items[n].Index < (int)(width_excess + 0.01f)) + items[n].Width += 1.0f; +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: ComboBox +//------------------------------------------------------------------------- +// - BeginCombo() +// - EndCombo() +// - Combo() +//------------------------------------------------------------------------- + +static float CalcMaxPopupHeightFromItemCount(int items_count) +{ + ImGuiContext& g = *GImGui; + if (items_count <= 0) + return FLT_MAX; + return (g.FontSize + g.Style.ItemSpacing.y) * items_count - g.Style.ItemSpacing.y + (g.Style.WindowPadding.y * 2); +} + +bool ImGui::BeginCombo(const char* label, const char* preview_value, ImGuiComboFlags flags) +{ + // Always consume the SetNextWindowSizeConstraint() call in our early return paths + ImGuiContext& g = *GImGui; + bool has_window_size_constraint = (g.NextWindowData.Flags & ImGuiNextWindowDataFlags_HasSizeConstraint) != 0; + g.NextWindowData.Flags &= ~ImGuiNextWindowDataFlags_HasSizeConstraint; + + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + IM_ASSERT((flags & (ImGuiComboFlags_NoArrowButton | ImGuiComboFlags_NoPreview)) != (ImGuiComboFlags_NoArrowButton | ImGuiComboFlags_NoPreview)); // Can't use both flags together + + const ImGuiStyle& style = g.Style; + const ImGuiID id = window->GetID(label); + + const float arrow_size = (flags & ImGuiComboFlags_NoArrowButton) ? 0.0f : GetFrameHeight(); + const ImVec2 label_size = CalcTextSize(label, NULL, true); + const float expected_w = CalcItemWidth(); + const float w = (flags & ImGuiComboFlags_NoPreview) ? arrow_size : expected_w; + const ImRect frame_bb(window->DC.CursorPos, window->DC.CursorPos + ImVec2(w, label_size.y + style.FramePadding.y * 2.0f)); + const ImRect total_bb(frame_bb.Min, frame_bb.Max + ImVec2(label_size.x > 0.0f ? style.ItemInnerSpacing.x + label_size.x : 0.0f, 0.0f)); + ItemSize(total_bb, style.FramePadding.y); + if (!ItemAdd(total_bb, id, &frame_bb)) + return false; + + bool hovered, held; + bool pressed = ButtonBehavior(frame_bb, id, &hovered, &held); + bool popup_open = IsPopupOpen(id, ImGuiPopupFlags_None); + + const ImU32 frame_col = GetColorU32(hovered ? ImGuiCol_FrameBgHovered : ImGuiCol_FrameBg); + const float value_x2 = ImMax(frame_bb.Min.x, frame_bb.Max.x - arrow_size); + RenderNavHighlight(frame_bb, id); + if (!(flags & ImGuiComboFlags_NoPreview)) + window->DrawList->AddRectFilled(frame_bb.Min, ImVec2(value_x2, frame_bb.Max.y), frame_col, style.FrameRounding, (flags & ImGuiComboFlags_NoArrowButton) ? ImDrawCornerFlags_All : ImDrawCornerFlags_Left); + if (!(flags & ImGuiComboFlags_NoArrowButton)) + { + ImU32 bg_col = GetColorU32((popup_open || hovered) ? ImGuiCol_ButtonHovered : ImGuiCol_Button); + ImU32 text_col = GetColorU32(ImGuiCol_Text); + window->DrawList->AddRectFilled(ImVec2(value_x2, frame_bb.Min.y), frame_bb.Max, bg_col, style.FrameRounding, (w <= arrow_size) ? ImDrawCornerFlags_All : ImDrawCornerFlags_Right); + if (value_x2 + arrow_size - style.FramePadding.x <= frame_bb.Max.x) + RenderArrow(window->DrawList, ImVec2(value_x2 + style.FramePadding.y, frame_bb.Min.y + style.FramePadding.y), text_col, ImGuiDir_Down, 1.0f); + } + RenderFrameBorder(frame_bb.Min, frame_bb.Max, style.FrameRounding); + if (preview_value != NULL && !(flags & ImGuiComboFlags_NoPreview)) + RenderTextClipped(frame_bb.Min + style.FramePadding, ImVec2(value_x2, frame_bb.Max.y), preview_value, NULL, NULL, ImVec2(0.0f, 0.0f)); + if (label_size.x > 0) + RenderText(ImVec2(frame_bb.Max.x + style.ItemInnerSpacing.x, frame_bb.Min.y + style.FramePadding.y), label); + + if ((pressed || g.NavActivateId == id) && !popup_open) + { + if (window->DC.NavLayerCurrent == 0) + window->NavLastIds[0] = id; + OpenPopupEx(id, ImGuiPopupFlags_None); + popup_open = true; + } + + if (!popup_open) + return false; + + if (has_window_size_constraint) + { + g.NextWindowData.Flags |= ImGuiNextWindowDataFlags_HasSizeConstraint; + g.NextWindowData.SizeConstraintRect.Min.x = ImMax(g.NextWindowData.SizeConstraintRect.Min.x, w); + } + else + { + if ((flags & ImGuiComboFlags_HeightMask_) == 0) + flags |= ImGuiComboFlags_HeightRegular; + IM_ASSERT(ImIsPowerOfTwo(flags & ImGuiComboFlags_HeightMask_)); // Only one + int popup_max_height_in_items = -1; + if (flags & ImGuiComboFlags_HeightRegular) popup_max_height_in_items = 8; + else if (flags & ImGuiComboFlags_HeightSmall) popup_max_height_in_items = 4; + else if (flags & ImGuiComboFlags_HeightLarge) popup_max_height_in_items = 20; + SetNextWindowSizeConstraints(ImVec2(w, 0.0f), ImVec2(FLT_MAX, CalcMaxPopupHeightFromItemCount(popup_max_height_in_items))); + } + + char name[16]; + ImFormatString(name, IM_ARRAYSIZE(name), "##Combo_%02d", g.BeginPopupStack.Size); // Recycle windows based on depth + + // Position the window given a custom constraint (peak into expected window size so we can position it) + // This might be easier to express with an hypothetical SetNextWindowPosConstraints() function. + if (ImGuiWindow* popup_window = FindWindowByName(name)) + if (popup_window->WasActive) + { + // Always override 'AutoPosLastDirection' to not leave a chance for a past value to affect us. + ImVec2 size_expected = CalcWindowNextAutoFitSize(popup_window); + if (flags & ImGuiComboFlags_PopupAlignLeft) + popup_window->AutoPosLastDirection = ImGuiDir_Left; // "Below, Toward Left" + else + popup_window->AutoPosLastDirection = ImGuiDir_Down; // "Below, Toward Right (default)" + ImRect r_outer = GetWindowAllowedExtentRect(popup_window); + ImVec2 pos = FindBestWindowPosForPopupEx(frame_bb.GetBL(), size_expected, &popup_window->AutoPosLastDirection, r_outer, frame_bb, ImGuiPopupPositionPolicy_ComboBox); + SetNextWindowPos(pos); + } + + // We don't use BeginPopupEx() solely because we have a custom name string, which we could make an argument to BeginPopupEx() + ImGuiWindowFlags window_flags = ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_Popup | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoMove; + + // Horizontally align ourselves with the framed text + PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(style.FramePadding.x, style.WindowPadding.y)); + bool ret = Begin(name, NULL, window_flags); + PopStyleVar(); + if (!ret) + { + EndPopup(); + IM_ASSERT(0); // This should never happen as we tested for IsPopupOpen() above + return false; + } + return true; +} + +void ImGui::EndCombo() +{ + EndPopup(); +} + +// Getter for the old Combo() API: const char*[] +static bool Items_ArrayGetter(void* data, int idx, const char** out_text) +{ + const char* const* items = (const char* const*)data; + if (out_text) + *out_text = items[idx]; + return true; +} + +// Getter for the old Combo() API: "item1\0item2\0item3\0" +static bool Items_SingleStringGetter(void* data, int idx, const char** out_text) +{ + // FIXME-OPT: we could pre-compute the indices to fasten this. But only 1 active combo means the waste is limited. + const char* items_separated_by_zeros = (const char*)data; + int items_count = 0; + const char* p = items_separated_by_zeros; + while (*p) + { + if (idx == items_count) + break; + p += strlen(p) + 1; + items_count++; + } + if (!*p) + return false; + if (out_text) + *out_text = p; + return true; +} + +// Old API, prefer using BeginCombo() nowadays if you can. +bool ImGui::Combo(const char* label, int* current_item, bool (*items_getter)(void*, int, const char**), void* data, int items_count, int popup_max_height_in_items) +{ + ImGuiContext& g = *GImGui; + + // Call the getter to obtain the preview string which is a parameter to BeginCombo() + const char* preview_value = NULL; + if (*current_item >= 0 && *current_item < items_count) + items_getter(data, *current_item, &preview_value); + + // The old Combo() API exposed "popup_max_height_in_items". The new more general BeginCombo() API doesn't have/need it, but we emulate it here. + if (popup_max_height_in_items != -1 && !(g.NextWindowData.Flags & ImGuiNextWindowDataFlags_HasSizeConstraint)) + SetNextWindowSizeConstraints(ImVec2(0, 0), ImVec2(FLT_MAX, CalcMaxPopupHeightFromItemCount(popup_max_height_in_items))); + + if (!BeginCombo(label, preview_value, ImGuiComboFlags_None)) + return false; + + // Display items + // FIXME-OPT: Use clipper (but we need to disable it on the appearing frame to make sure our call to SetItemDefaultFocus() is processed) + bool value_changed = false; + for (int i = 0; i < items_count; i++) + { + PushID((void*)(intptr_t)i); + const bool item_selected = (i == *current_item); + const char* item_text; + if (!items_getter(data, i, &item_text)) + item_text = "*Unknown item*"; + if (Selectable(item_text, item_selected)) + { + value_changed = true; + *current_item = i; + } + if (item_selected) + SetItemDefaultFocus(); + PopID(); + } + + EndCombo(); + if (value_changed) + MarkItemEdited(g.CurrentWindow->DC.LastItemId); + + return value_changed; +} + +// Combo box helper allowing to pass an array of strings. +bool ImGui::Combo(const char* label, int* current_item, const char* const items[], int items_count, int height_in_items) +{ + const bool value_changed = Combo(label, current_item, Items_ArrayGetter, (void*)items, items_count, height_in_items); + return value_changed; +} + +// Combo box helper allowing to pass all items in a single string literal holding multiple zero-terminated items "item1\0item2\0" +bool ImGui::Combo(const char* label, int* current_item, const char* items_separated_by_zeros, int height_in_items) +{ + int items_count = 0; + const char* p = items_separated_by_zeros; // FIXME-OPT: Avoid computing this, or at least only when combo is open + while (*p) + { + p += strlen(p) + 1; + items_count++; + } + bool value_changed = Combo(label, current_item, Items_SingleStringGetter, (void*)items_separated_by_zeros, items_count, height_in_items); + return value_changed; +} + +//------------------------------------------------------------------------- +// [SECTION] Data Type and Data Formatting Helpers [Internal] +//------------------------------------------------------------------------- +// - PatchFormatStringFloatToInt() +// - DataTypeGetInfo() +// - DataTypeFormatString() +// - DataTypeApplyOp() +// - DataTypeApplyOpFromText() +// - DataTypeClamp() +// - GetMinimumStepAtDecimalPrecision +// - RoundScalarWithFormat<>() +//------------------------------------------------------------------------- + +static const ImGuiDataTypeInfo GDataTypeInfo[] = +{ + { sizeof(char), "S8", "%d", "%d" }, // ImGuiDataType_S8 + { sizeof(unsigned char), "U8", "%u", "%u" }, + { sizeof(short), "S16", "%d", "%d" }, // ImGuiDataType_S16 + { sizeof(unsigned short), "U16", "%u", "%u" }, + { sizeof(int), "S32", "%d", "%d" }, // ImGuiDataType_S32 + { sizeof(unsigned int), "U32", "%u", "%u" }, +#ifdef _MSC_VER + { sizeof(ImS64), "S64", "%I64d","%I64d" }, // ImGuiDataType_S64 + { sizeof(ImU64), "U64", "%I64u","%I64u" }, +#else + { sizeof(ImS64), "S64", "%lld", "%lld" }, // ImGuiDataType_S64 + { sizeof(ImU64), "U64", "%llu", "%llu" }, +#endif + { sizeof(float), "float", "%f", "%f" }, // ImGuiDataType_Float (float are promoted to double in va_arg) + { sizeof(double), "double","%f", "%lf" }, // ImGuiDataType_Double +}; +IM_STATIC_ASSERT(IM_ARRAYSIZE(GDataTypeInfo) == ImGuiDataType_COUNT); + +// FIXME-LEGACY: Prior to 1.61 our DragInt() function internally used floats and because of this the compile-time default value for format was "%.0f". +// Even though we changed the compile-time default, we expect users to have carried %f around, which would break the display of DragInt() calls. +// To honor backward compatibility we are rewriting the format string, unless IMGUI_DISABLE_OBSOLETE_FUNCTIONS is enabled. What could possibly go wrong?! +static const char* PatchFormatStringFloatToInt(const char* fmt) +{ + if (fmt[0] == '%' && fmt[1] == '.' && fmt[2] == '0' && fmt[3] == 'f' && fmt[4] == 0) // Fast legacy path for "%.0f" which is expected to be the most common case. + return "%d"; + const char* fmt_start = ImParseFormatFindStart(fmt); // Find % (if any, and ignore %%) + const char* fmt_end = ImParseFormatFindEnd(fmt_start); // Find end of format specifier, which itself is an exercise of confidence/recklessness (because snprintf is dependent on libc or user). + if (fmt_end > fmt_start && fmt_end[-1] == 'f') + { +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + if (fmt_start == fmt && fmt_end[0] == 0) + return "%d"; + ImGuiContext& g = *GImGui; + ImFormatString(g.TempBuffer, IM_ARRAYSIZE(g.TempBuffer), "%.*s%%d%s", (int)(fmt_start - fmt), fmt, fmt_end); // Honor leading and trailing decorations, but lose alignment/precision. + return g.TempBuffer; +#else + IM_ASSERT(0 && "DragInt(): Invalid format string!"); // Old versions used a default parameter of "%.0f", please replace with e.g. "%d" +#endif + } + return fmt; +} + +const ImGuiDataTypeInfo* ImGui::DataTypeGetInfo(ImGuiDataType data_type) +{ + IM_ASSERT(data_type >= 0 && data_type < ImGuiDataType_COUNT); + return &GDataTypeInfo[data_type]; +} + +int ImGui::DataTypeFormatString(char* buf, int buf_size, ImGuiDataType data_type, const void* p_data, const char* format) +{ + // Signedness doesn't matter when pushing integer arguments + if (data_type == ImGuiDataType_S32 || data_type == ImGuiDataType_U32) + return ImFormatString(buf, buf_size, format, *(const ImU32*)p_data); + if (data_type == ImGuiDataType_S64 || data_type == ImGuiDataType_U64) + return ImFormatString(buf, buf_size, format, *(const ImU64*)p_data); + if (data_type == ImGuiDataType_Float) + return ImFormatString(buf, buf_size, format, *(const float*)p_data); + if (data_type == ImGuiDataType_Double) + return ImFormatString(buf, buf_size, format, *(const double*)p_data); + if (data_type == ImGuiDataType_S8) + return ImFormatString(buf, buf_size, format, *(const ImS8*)p_data); + if (data_type == ImGuiDataType_U8) + return ImFormatString(buf, buf_size, format, *(const ImU8*)p_data); + if (data_type == ImGuiDataType_S16) + return ImFormatString(buf, buf_size, format, *(const ImS16*)p_data); + if (data_type == ImGuiDataType_U16) + return ImFormatString(buf, buf_size, format, *(const ImU16*)p_data); + IM_ASSERT(0); + return 0; +} + +void ImGui::DataTypeApplyOp(ImGuiDataType data_type, int op, void* output, const void* arg1, const void* arg2) +{ + IM_ASSERT(op == '+' || op == '-'); + switch (data_type) + { + case ImGuiDataType_S8: + if (op == '+') { *(ImS8*)output = ImAddClampOverflow(*(const ImS8*)arg1, *(const ImS8*)arg2, IM_S8_MIN, IM_S8_MAX); } + if (op == '-') { *(ImS8*)output = ImSubClampOverflow(*(const ImS8*)arg1, *(const ImS8*)arg2, IM_S8_MIN, IM_S8_MAX); } + return; + case ImGuiDataType_U8: + if (op == '+') { *(ImU8*)output = ImAddClampOverflow(*(const ImU8*)arg1, *(const ImU8*)arg2, IM_U8_MIN, IM_U8_MAX); } + if (op == '-') { *(ImU8*)output = ImSubClampOverflow(*(const ImU8*)arg1, *(const ImU8*)arg2, IM_U8_MIN, IM_U8_MAX); } + return; + case ImGuiDataType_S16: + if (op == '+') { *(ImS16*)output = ImAddClampOverflow(*(const ImS16*)arg1, *(const ImS16*)arg2, IM_S16_MIN, IM_S16_MAX); } + if (op == '-') { *(ImS16*)output = ImSubClampOverflow(*(const ImS16*)arg1, *(const ImS16*)arg2, IM_S16_MIN, IM_S16_MAX); } + return; + case ImGuiDataType_U16: + if (op == '+') { *(ImU16*)output = ImAddClampOverflow(*(const ImU16*)arg1, *(const ImU16*)arg2, IM_U16_MIN, IM_U16_MAX); } + if (op == '-') { *(ImU16*)output = ImSubClampOverflow(*(const ImU16*)arg1, *(const ImU16*)arg2, IM_U16_MIN, IM_U16_MAX); } + return; + case ImGuiDataType_S32: + if (op == '+') { *(ImS32*)output = ImAddClampOverflow(*(const ImS32*)arg1, *(const ImS32*)arg2, IM_S32_MIN, IM_S32_MAX); } + if (op == '-') { *(ImS32*)output = ImSubClampOverflow(*(const ImS32*)arg1, *(const ImS32*)arg2, IM_S32_MIN, IM_S32_MAX); } + return; + case ImGuiDataType_U32: + if (op == '+') { *(ImU32*)output = ImAddClampOverflow(*(const ImU32*)arg1, *(const ImU32*)arg2, IM_U32_MIN, IM_U32_MAX); } + if (op == '-') { *(ImU32*)output = ImSubClampOverflow(*(const ImU32*)arg1, *(const ImU32*)arg2, IM_U32_MIN, IM_U32_MAX); } + return; + case ImGuiDataType_S64: + if (op == '+') { *(ImS64*)output = ImAddClampOverflow(*(const ImS64*)arg1, *(const ImS64*)arg2, IM_S64_MIN, IM_S64_MAX); } + if (op == '-') { *(ImS64*)output = ImSubClampOverflow(*(const ImS64*)arg1, *(const ImS64*)arg2, IM_S64_MIN, IM_S64_MAX); } + return; + case ImGuiDataType_U64: + if (op == '+') { *(ImU64*)output = ImAddClampOverflow(*(const ImU64*)arg1, *(const ImU64*)arg2, IM_U64_MIN, IM_U64_MAX); } + if (op == '-') { *(ImU64*)output = ImSubClampOverflow(*(const ImU64*)arg1, *(const ImU64*)arg2, IM_U64_MIN, IM_U64_MAX); } + return; + case ImGuiDataType_Float: + if (op == '+') { *(float*)output = *(const float*)arg1 + *(const float*)arg2; } + if (op == '-') { *(float*)output = *(const float*)arg1 - *(const float*)arg2; } + return; + case ImGuiDataType_Double: + if (op == '+') { *(double*)output = *(const double*)arg1 + *(const double*)arg2; } + if (op == '-') { *(double*)output = *(const double*)arg1 - *(const double*)arg2; } + return; + case ImGuiDataType_COUNT: break; + } + IM_ASSERT(0); +} + +// User can input math operators (e.g. +100) to edit a numerical values. +// NB: This is _not_ a full expression evaluator. We should probably add one and replace this dumb mess.. +bool ImGui::DataTypeApplyOpFromText(const char* buf, const char* initial_value_buf, ImGuiDataType data_type, void* p_data, const char* format) +{ + while (ImCharIsBlankA(*buf)) + buf++; + + // We don't support '-' op because it would conflict with inputing negative value. + // Instead you can use +-100 to subtract from an existing value + char op = buf[0]; + if (op == '+' || op == '*' || op == '/') + { + buf++; + while (ImCharIsBlankA(*buf)) + buf++; + } + else + { + op = 0; + } + if (!buf[0]) + return false; + + // Copy the value in an opaque buffer so we can compare at the end of the function if it changed at all. + const ImGuiDataTypeInfo* type_info = DataTypeGetInfo(data_type); + ImGuiDataTypeTempStorage data_backup; + memcpy(&data_backup, p_data, type_info->Size); + + if (format == NULL) + format = type_info->ScanFmt; + + // FIXME-LEGACY: The aim is to remove those operators and write a proper expression evaluator at some point.. + int arg1i = 0; + if (data_type == ImGuiDataType_S32) + { + int* v = (int*)p_data; + int arg0i = *v; + float arg1f = 0.0f; + if (op && sscanf(initial_value_buf, format, &arg0i) < 1) + return false; + // Store operand in a float so we can use fractional value for multipliers (*1.1), but constant always parsed as integer so we can fit big integers (e.g. 2000000003) past float precision + if (op == '+') { if (sscanf(buf, "%d", &arg1i)) *v = (int)(arg0i + arg1i); } // Add (use "+-" to subtract) + else if (op == '*') { if (sscanf(buf, "%f", &arg1f)) *v = (int)(arg0i * arg1f); } // Multiply + else if (op == '/') { if (sscanf(buf, "%f", &arg1f) && arg1f != 0.0f) *v = (int)(arg0i / arg1f); } // Divide + else { if (sscanf(buf, format, &arg1i) == 1) *v = arg1i; } // Assign constant + } + else if (data_type == ImGuiDataType_Float) + { + // For floats we have to ignore format with precision (e.g. "%.2f") because sscanf doesn't take them in + format = "%f"; + float* v = (float*)p_data; + float arg0f = *v, arg1f = 0.0f; + if (op && sscanf(initial_value_buf, format, &arg0f) < 1) + return false; + if (sscanf(buf, format, &arg1f) < 1) + return false; + if (op == '+') { *v = arg0f + arg1f; } // Add (use "+-" to subtract) + else if (op == '*') { *v = arg0f * arg1f; } // Multiply + else if (op == '/') { if (arg1f != 0.0f) *v = arg0f / arg1f; } // Divide + else { *v = arg1f; } // Assign constant + } + else if (data_type == ImGuiDataType_Double) + { + format = "%lf"; // scanf differentiate float/double unlike printf which forces everything to double because of ellipsis + double* v = (double*)p_data; + double arg0f = *v, arg1f = 0.0; + if (op && sscanf(initial_value_buf, format, &arg0f) < 1) + return false; + if (sscanf(buf, format, &arg1f) < 1) + return false; + if (op == '+') { *v = arg0f + arg1f; } // Add (use "+-" to subtract) + else if (op == '*') { *v = arg0f * arg1f; } // Multiply + else if (op == '/') { if (arg1f != 0.0f) *v = arg0f / arg1f; } // Divide + else { *v = arg1f; } // Assign constant + } + else if (data_type == ImGuiDataType_U32 || data_type == ImGuiDataType_S64 || data_type == ImGuiDataType_U64) + { + // All other types assign constant + // We don't bother handling support for legacy operators since they are a little too crappy. Instead we will later implement a proper expression evaluator in the future. + sscanf(buf, format, p_data); + } + else + { + // Small types need a 32-bit buffer to receive the result from scanf() + int v32; + sscanf(buf, format, &v32); + if (data_type == ImGuiDataType_S8) + *(ImS8*)p_data = (ImS8)ImClamp(v32, (int)IM_S8_MIN, (int)IM_S8_MAX); + else if (data_type == ImGuiDataType_U8) + *(ImU8*)p_data = (ImU8)ImClamp(v32, (int)IM_U8_MIN, (int)IM_U8_MAX); + else if (data_type == ImGuiDataType_S16) + *(ImS16*)p_data = (ImS16)ImClamp(v32, (int)IM_S16_MIN, (int)IM_S16_MAX); + else if (data_type == ImGuiDataType_U16) + *(ImU16*)p_data = (ImU16)ImClamp(v32, (int)IM_U16_MIN, (int)IM_U16_MAX); + else + IM_ASSERT(0); + } + + return memcmp(&data_backup, p_data, type_info->Size) != 0; +} + +template +static int DataTypeCompareT(const T* lhs, const T* rhs) +{ + if (*lhs < *rhs) return -1; + if (*lhs > *rhs) return +1; + return 0; +} + +int ImGui::DataTypeCompare(ImGuiDataType data_type, const void* arg_1, const void* arg_2) +{ + switch (data_type) + { + case ImGuiDataType_S8: return DataTypeCompareT((const ImS8* )arg_1, (const ImS8* )arg_2); + case ImGuiDataType_U8: return DataTypeCompareT((const ImU8* )arg_1, (const ImU8* )arg_2); + case ImGuiDataType_S16: return DataTypeCompareT((const ImS16* )arg_1, (const ImS16* )arg_2); + case ImGuiDataType_U16: return DataTypeCompareT((const ImU16* )arg_1, (const ImU16* )arg_2); + case ImGuiDataType_S32: return DataTypeCompareT((const ImS32* )arg_1, (const ImS32* )arg_2); + case ImGuiDataType_U32: return DataTypeCompareT((const ImU32* )arg_1, (const ImU32* )arg_2); + case ImGuiDataType_S64: return DataTypeCompareT((const ImS64* )arg_1, (const ImS64* )arg_2); + case ImGuiDataType_U64: return DataTypeCompareT((const ImU64* )arg_1, (const ImU64* )arg_2); + case ImGuiDataType_Float: return DataTypeCompareT((const float* )arg_1, (const float* )arg_2); + case ImGuiDataType_Double: return DataTypeCompareT((const double*)arg_1, (const double*)arg_2); + case ImGuiDataType_COUNT: break; + } + IM_ASSERT(0); + return 0; +} + +template +static bool DataTypeClampT(T* v, const T* v_min, const T* v_max) +{ + // Clamp, both sides are optional, return true if modified + if (v_min && *v < *v_min) { *v = *v_min; return true; } + if (v_max && *v > *v_max) { *v = *v_max; return true; } + return false; +} + +bool ImGui::DataTypeClamp(ImGuiDataType data_type, void* p_data, const void* p_min, const void* p_max) +{ + switch (data_type) + { + case ImGuiDataType_S8: return DataTypeClampT((ImS8* )p_data, (const ImS8* )p_min, (const ImS8* )p_max); + case ImGuiDataType_U8: return DataTypeClampT((ImU8* )p_data, (const ImU8* )p_min, (const ImU8* )p_max); + case ImGuiDataType_S16: return DataTypeClampT((ImS16* )p_data, (const ImS16* )p_min, (const ImS16* )p_max); + case ImGuiDataType_U16: return DataTypeClampT((ImU16* )p_data, (const ImU16* )p_min, (const ImU16* )p_max); + case ImGuiDataType_S32: return DataTypeClampT((ImS32* )p_data, (const ImS32* )p_min, (const ImS32* )p_max); + case ImGuiDataType_U32: return DataTypeClampT((ImU32* )p_data, (const ImU32* )p_min, (const ImU32* )p_max); + case ImGuiDataType_S64: return DataTypeClampT((ImS64* )p_data, (const ImS64* )p_min, (const ImS64* )p_max); + case ImGuiDataType_U64: return DataTypeClampT((ImU64* )p_data, (const ImU64* )p_min, (const ImU64* )p_max); + case ImGuiDataType_Float: return DataTypeClampT((float* )p_data, (const float* )p_min, (const float* )p_max); + case ImGuiDataType_Double: return DataTypeClampT((double*)p_data, (const double*)p_min, (const double*)p_max); + case ImGuiDataType_COUNT: break; + } + IM_ASSERT(0); + return false; +} + +static float GetMinimumStepAtDecimalPrecision(int decimal_precision) +{ + static const float min_steps[10] = { 1.0f, 0.1f, 0.01f, 0.001f, 0.0001f, 0.00001f, 0.000001f, 0.0000001f, 0.00000001f, 0.000000001f }; + if (decimal_precision < 0) + return FLT_MIN; + return (decimal_precision < IM_ARRAYSIZE(min_steps)) ? min_steps[decimal_precision] : ImPow(10.0f, (float)-decimal_precision); +} + +template +static const char* ImAtoi(const char* src, TYPE* output) +{ + int negative = 0; + if (*src == '-') { negative = 1; src++; } + if (*src == '+') { src++; } + TYPE v = 0; + while (*src >= '0' && *src <= '9') + v = (v * 10) + (*src++ - '0'); + *output = negative ? -v : v; + return src; +} + +template +TYPE ImGui::RoundScalarWithFormatT(const char* format, ImGuiDataType data_type, TYPE v) +{ + const char* fmt_start = ImParseFormatFindStart(format); + if (fmt_start[0] != '%' || fmt_start[1] == '%') // Don't apply if the value is not visible in the format string + return v; + char v_str[64]; + ImFormatString(v_str, IM_ARRAYSIZE(v_str), fmt_start, v); + const char* p = v_str; + while (*p == ' ') + p++; + if (data_type == ImGuiDataType_Float || data_type == ImGuiDataType_Double) + v = (TYPE)ImAtof(p); + else + ImAtoi(p, (SIGNEDTYPE*)&v); + return v; +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: DragScalar, DragFloat, DragInt, etc. +//------------------------------------------------------------------------- +// - DragBehaviorT<>() [Internal] +// - DragBehavior() [Internal] +// - DragScalar() +// - DragScalarN() +// - DragFloat() +// - DragFloat2() +// - DragFloat3() +// - DragFloat4() +// - DragFloatRange2() +// - DragInt() +// - DragInt2() +// - DragInt3() +// - DragInt4() +// - DragIntRange2() +//------------------------------------------------------------------------- + +// This is called by DragBehavior() when the widget is active (held by mouse or being manipulated with Nav controls) +template +bool ImGui::DragBehaviorT(ImGuiDataType data_type, TYPE* v, float v_speed, const TYPE v_min, const TYPE v_max, const char* format, ImGuiSliderFlags flags) +{ + ImGuiContext& g = *GImGui; + const ImGuiAxis axis = (flags & ImGuiSliderFlags_Vertical) ? ImGuiAxis_Y : ImGuiAxis_X; + const bool is_decimal = (data_type == ImGuiDataType_Float) || (data_type == ImGuiDataType_Double); + const bool is_clamped = (v_min < v_max); + const bool is_logarithmic = (flags & ImGuiSliderFlags_Logarithmic) && is_decimal; + + // Default tweak speed + if (v_speed == 0.0f && is_clamped && (v_max - v_min < FLT_MAX)) + v_speed = (float)((v_max - v_min) * g.DragSpeedDefaultRatio); + + // Inputs accumulates into g.DragCurrentAccum, which is flushed into the current value as soon as it makes a difference with our precision settings + float adjust_delta = 0.0f; + if (g.ActiveIdSource == ImGuiInputSource_Mouse && IsMousePosValid() && IsMouseDragPastThreshold(0, g.IO.MouseDragThreshold * DRAG_MOUSE_THRESHOLD_FACTOR)) + { + adjust_delta = g.IO.MouseDelta[axis]; + if (g.IO.KeyAlt) + adjust_delta *= 1.0f / 100.0f; + if (g.IO.KeyShift) + adjust_delta *= 10.0f; + } + else if (g.ActiveIdSource == ImGuiInputSource_Nav) + { + int decimal_precision = is_decimal ? ImParseFormatPrecision(format, 3) : 0; + adjust_delta = GetNavInputAmount2d(ImGuiNavDirSourceFlags_Keyboard | ImGuiNavDirSourceFlags_PadDPad, ImGuiInputReadMode_RepeatFast, 1.0f / 10.0f, 10.0f)[axis]; + v_speed = ImMax(v_speed, GetMinimumStepAtDecimalPrecision(decimal_precision)); + } + adjust_delta *= v_speed; + + // For vertical drag we currently assume that Up=higher value (like we do with vertical sliders). This may become a parameter. + if (axis == ImGuiAxis_Y) + adjust_delta = -adjust_delta; + + // For logarithmic use our range is effectively 0..1 so scale the delta into that range + if (is_logarithmic && (v_max - v_min < FLT_MAX) && ((v_max - v_min) > 0.000001f)) // Epsilon to avoid /0 + adjust_delta /= (float)(v_max - v_min); + + // Clear current value on activation + // Avoid altering values and clamping when we are _already_ past the limits and heading in the same direction, so e.g. if range is 0..255, current value is 300 and we are pushing to the right side, keep the 300. + bool is_just_activated = g.ActiveIdIsJustActivated; + bool is_already_past_limits_and_pushing_outward = is_clamped && ((*v >= v_max && adjust_delta > 0.0f) || (*v <= v_min && adjust_delta < 0.0f)); + if (is_just_activated || is_already_past_limits_and_pushing_outward) + { + g.DragCurrentAccum = 0.0f; + g.DragCurrentAccumDirty = false; + } + else if (adjust_delta != 0.0f) + { + g.DragCurrentAccum += adjust_delta; + g.DragCurrentAccumDirty = true; + } + + if (!g.DragCurrentAccumDirty) + return false; + + TYPE v_cur = *v; + FLOATTYPE v_old_ref_for_accum_remainder = (FLOATTYPE)0.0f; + + float logarithmic_zero_epsilon = 0.0f; // Only valid when is_logarithmic is true + const float zero_deadzone_halfsize = 0.0f; // Drag widgets have no deadzone (as it doesn't make sense) + if (is_logarithmic) + { + // When using logarithmic sliders, we need to clamp to avoid hitting zero, but our choice of clamp value greatly affects slider precision. We attempt to use the specified precision to estimate a good lower bound. + const int decimal_precision = is_decimal ? ImParseFormatPrecision(format, 3) : 1; + logarithmic_zero_epsilon = ImPow(0.1f, (float)decimal_precision); + + // Convert to parametric space, apply delta, convert back + float v_old_parametric = ScaleRatioFromValueT(data_type, v_cur, v_min, v_max, is_logarithmic, logarithmic_zero_epsilon, zero_deadzone_halfsize); + float v_new_parametric = v_old_parametric + g.DragCurrentAccum; + v_cur = ScaleValueFromRatioT(data_type, v_new_parametric, v_min, v_max, is_logarithmic, logarithmic_zero_epsilon, zero_deadzone_halfsize); + v_old_ref_for_accum_remainder = v_old_parametric; + } + else + { + v_cur += (SIGNEDTYPE)g.DragCurrentAccum; + } + + // Round to user desired precision based on format string + if (!(flags & ImGuiSliderFlags_NoRoundToFormat)) + v_cur = RoundScalarWithFormatT(format, data_type, v_cur); + + // Preserve remainder after rounding has been applied. This also allow slow tweaking of values. + g.DragCurrentAccumDirty = false; + if (is_logarithmic) + { + // Convert to parametric space, apply delta, convert back + float v_new_parametric = ScaleRatioFromValueT(data_type, v_cur, v_min, v_max, is_logarithmic, logarithmic_zero_epsilon, zero_deadzone_halfsize); + g.DragCurrentAccum -= (float)(v_new_parametric - v_old_ref_for_accum_remainder); + } + else + { + g.DragCurrentAccum -= (float)((SIGNEDTYPE)v_cur - (SIGNEDTYPE)*v); + } + + // Lose zero sign for float/double + if (v_cur == (TYPE)-0) + v_cur = (TYPE)0; + + // Clamp values (+ handle overflow/wrap-around for integer types) + if (*v != v_cur && is_clamped) + { + if (v_cur < v_min || (v_cur > *v && adjust_delta < 0.0f && !is_decimal)) + v_cur = v_min; + if (v_cur > v_max || (v_cur < *v && adjust_delta > 0.0f && !is_decimal)) + v_cur = v_max; + } + + // Apply result + if (*v == v_cur) + return false; + *v = v_cur; + return true; +} + +bool ImGui::DragBehavior(ImGuiID id, ImGuiDataType data_type, void* p_v, float v_speed, const void* p_min, const void* p_max, const char* format, ImGuiSliderFlags flags) +{ + // Read imgui.cpp "API BREAKING CHANGES" section for 1.78 if you hit this assert. + IM_ASSERT((flags == 1 || (flags & ImGuiSliderFlags_InvalidMask_) == 0) && "Invalid ImGuiSliderFlags flags! Has the 'float power' argument been mistakenly cast to flags? Call function with ImGuiSliderFlags_Logarithmic flags instead."); + + ImGuiContext& g = *GImGui; + if (g.ActiveId == id) + { + if (g.ActiveIdSource == ImGuiInputSource_Mouse && !g.IO.MouseDown[0]) + ClearActiveID(); + else if (g.ActiveIdSource == ImGuiInputSource_Nav && g.NavActivatePressedId == id && !g.ActiveIdIsJustActivated) + ClearActiveID(); + } + if (g.ActiveId != id) + return false; + if ((g.CurrentWindow->DC.ItemFlags & ImGuiItemFlags_ReadOnly) || (flags & ImGuiSliderFlags_ReadOnly)) + return false; + + switch (data_type) + { + case ImGuiDataType_S8: { ImS32 v32 = (ImS32)*(ImS8*)p_v; bool r = DragBehaviorT(ImGuiDataType_S32, &v32, v_speed, p_min ? *(const ImS8*) p_min : IM_S8_MIN, p_max ? *(const ImS8*)p_max : IM_S8_MAX, format, flags); if (r) *(ImS8*)p_v = (ImS8)v32; return r; } + case ImGuiDataType_U8: { ImU32 v32 = (ImU32)*(ImU8*)p_v; bool r = DragBehaviorT(ImGuiDataType_U32, &v32, v_speed, p_min ? *(const ImU8*) p_min : IM_U8_MIN, p_max ? *(const ImU8*)p_max : IM_U8_MAX, format, flags); if (r) *(ImU8*)p_v = (ImU8)v32; return r; } + case ImGuiDataType_S16: { ImS32 v32 = (ImS32)*(ImS16*)p_v; bool r = DragBehaviorT(ImGuiDataType_S32, &v32, v_speed, p_min ? *(const ImS16*)p_min : IM_S16_MIN, p_max ? *(const ImS16*)p_max : IM_S16_MAX, format, flags); if (r) *(ImS16*)p_v = (ImS16)v32; return r; } + case ImGuiDataType_U16: { ImU32 v32 = (ImU32)*(ImU16*)p_v; bool r = DragBehaviorT(ImGuiDataType_U32, &v32, v_speed, p_min ? *(const ImU16*)p_min : IM_U16_MIN, p_max ? *(const ImU16*)p_max : IM_U16_MAX, format, flags); if (r) *(ImU16*)p_v = (ImU16)v32; return r; } + case ImGuiDataType_S32: return DragBehaviorT(data_type, (ImS32*)p_v, v_speed, p_min ? *(const ImS32* )p_min : IM_S32_MIN, p_max ? *(const ImS32* )p_max : IM_S32_MAX, format, flags); + case ImGuiDataType_U32: return DragBehaviorT(data_type, (ImU32*)p_v, v_speed, p_min ? *(const ImU32* )p_min : IM_U32_MIN, p_max ? *(const ImU32* )p_max : IM_U32_MAX, format, flags); + case ImGuiDataType_S64: return DragBehaviorT(data_type, (ImS64*)p_v, v_speed, p_min ? *(const ImS64* )p_min : IM_S64_MIN, p_max ? *(const ImS64* )p_max : IM_S64_MAX, format, flags); + case ImGuiDataType_U64: return DragBehaviorT(data_type, (ImU64*)p_v, v_speed, p_min ? *(const ImU64* )p_min : IM_U64_MIN, p_max ? *(const ImU64* )p_max : IM_U64_MAX, format, flags); + case ImGuiDataType_Float: return DragBehaviorT(data_type, (float*)p_v, v_speed, p_min ? *(const float* )p_min : -FLT_MAX, p_max ? *(const float* )p_max : FLT_MAX, format, flags); + case ImGuiDataType_Double: return DragBehaviorT(data_type, (double*)p_v, v_speed, p_min ? *(const double*)p_min : -DBL_MAX, p_max ? *(const double*)p_max : DBL_MAX, format, flags); + case ImGuiDataType_COUNT: break; + } + IM_ASSERT(0); + return false; +} + +// Note: p_data, p_min and p_max are _pointers_ to a memory address holding the data. For a Drag widget, p_min and p_max are optional. +// Read code of e.g. DragFloat(), DragInt() etc. or examples in 'Demo->Widgets->Data Types' to understand how to use this function directly. +bool ImGui::DragScalar(const char* label, ImGuiDataType data_type, void* p_data, float v_speed, const void* p_min, const void* p_max, const char* format, ImGuiSliderFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + const ImGuiID id = window->GetID(label); + const float w = CalcItemWidth(); + const ImVec2 label_size = CalcTextSize(label, NULL, true); + const ImRect frame_bb(window->DC.CursorPos, window->DC.CursorPos + ImVec2(w, label_size.y + style.FramePadding.y * 2.0f)); + const ImRect total_bb(frame_bb.Min, frame_bb.Max + ImVec2(label_size.x > 0.0f ? style.ItemInnerSpacing.x + label_size.x : 0.0f, 0.0f)); + + ItemSize(total_bb, style.FramePadding.y); + if (!ItemAdd(total_bb, id, &frame_bb)) + return false; + + // Default format string when passing NULL + if (format == NULL) + format = DataTypeGetInfo(data_type)->PrintFmt; + else if (data_type == ImGuiDataType_S32 && strcmp(format, "%d") != 0) // (FIXME-LEGACY: Patch old "%.0f" format string to use "%d", read function more details.) + format = PatchFormatStringFloatToInt(format); + + // Tabbing or CTRL-clicking on Drag turns it into an InputText + const bool hovered = ItemHoverable(frame_bb, id); + const bool temp_input_allowed = (flags & ImGuiSliderFlags_NoInput) == 0; + bool temp_input_is_active = temp_input_allowed && TempInputIsActive(id); + if (!temp_input_is_active) + { + const bool focus_requested = temp_input_allowed && FocusableItemRegister(window, id); + const bool clicked = (hovered && g.IO.MouseClicked[0]); + const bool double_clicked = (hovered && g.IO.MouseDoubleClicked[0]); + if (focus_requested || clicked || double_clicked || g.NavActivateId == id || g.NavInputId == id) + { + SetActiveID(id, window); + SetFocusID(id, window); + FocusWindow(window); + g.ActiveIdUsingNavDirMask = (1 << ImGuiDir_Left) | (1 << ImGuiDir_Right); + if (temp_input_allowed && (focus_requested || (clicked && g.IO.KeyCtrl) || double_clicked || g.NavInputId == id)) + { + temp_input_is_active = true; + FocusableItemUnregister(window); + } + } + // Experimental: simple click (without moving) turns Drag into an InputText + // FIXME: Currently polling ImGuiConfigFlags_IsTouchScreen, may either poll an hypothetical ImGuiBackendFlags_HasKeyboard and/or an explicit drag settings. + if (g.IO.ConfigDragClickToInputText && temp_input_allowed && !temp_input_is_active) + if (g.ActiveId == id && hovered && g.IO.MouseReleased[0] && !IsMouseDragPastThreshold(0, g.IO.MouseDragThreshold * DRAG_MOUSE_THRESHOLD_FACTOR)) + { + g.NavInputId = id; + temp_input_is_active = true; + FocusableItemUnregister(window); + } + } + + if (temp_input_is_active) + { + // Only clamp CTRL+Click input when ImGuiSliderFlags_AlwaysClamp is set + const bool is_clamp_input = (flags & ImGuiSliderFlags_AlwaysClamp) != 0 && (p_min == NULL || p_max == NULL || DataTypeCompare(data_type, p_min, p_max) < 0); + return TempInputScalar(frame_bb, id, label, data_type, p_data, format, is_clamp_input ? p_min : NULL, is_clamp_input ? p_max : NULL); + } + + // Draw frame + const ImU32 frame_col = GetColorU32(g.ActiveId == id ? ImGuiCol_FrameBgActive : g.HoveredId == id ? ImGuiCol_FrameBgHovered : ImGuiCol_FrameBg); + RenderNavHighlight(frame_bb, id); + RenderFrame(frame_bb.Min, frame_bb.Max, frame_col, true, style.FrameRounding); + + // Drag behavior + const bool value_changed = DragBehavior(id, data_type, p_data, v_speed, p_min, p_max, format, flags); + if (value_changed) + MarkItemEdited(id); + + // Display value using user-provided display format so user can add prefix/suffix/decorations to the value. + char value_buf[64]; + const char* value_buf_end = value_buf + DataTypeFormatString(value_buf, IM_ARRAYSIZE(value_buf), data_type, p_data, format); + RenderTextClipped(frame_bb.Min, frame_bb.Max, value_buf, value_buf_end, NULL, ImVec2(0.5f, 0.5f)); + + if (label_size.x > 0.0f) + RenderText(ImVec2(frame_bb.Max.x + style.ItemInnerSpacing.x, frame_bb.Min.y + style.FramePadding.y), label); + + IMGUI_TEST_ENGINE_ITEM_INFO(id, label, window->DC.ItemFlags); + return value_changed; +} + +bool ImGui::DragScalarN(const char* label, ImGuiDataType data_type, void* p_data, int components, float v_speed, const void* p_min, const void* p_max, const char* format, ImGuiSliderFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + bool value_changed = false; + BeginGroup(); + PushID(label); + PushMultiItemsWidths(components, CalcItemWidth()); + size_t type_size = GDataTypeInfo[data_type].Size; + for (int i = 0; i < components; i++) + { + PushID(i); + if (i > 0) + SameLine(0, g.Style.ItemInnerSpacing.x); + value_changed |= DragScalar("", data_type, p_data, v_speed, p_min, p_max, format, flags); + PopID(); + PopItemWidth(); + p_data = (void*)((char*)p_data + type_size); + } + PopID(); + + const char* label_end = FindRenderedTextEnd(label); + if (label != label_end) + { + SameLine(0, g.Style.ItemInnerSpacing.x); + TextEx(label, label_end); + } + + EndGroup(); + return value_changed; +} + +bool ImGui::DragFloat(const char* label, float* v, float v_speed, float v_min, float v_max, const char* format, ImGuiSliderFlags flags) +{ + return DragScalar(label, ImGuiDataType_Float, v, v_speed, &v_min, &v_max, format, flags); +} + +bool ImGui::DragFloat2(const char* label, float v[2], float v_speed, float v_min, float v_max, const char* format, ImGuiSliderFlags flags) +{ + return DragScalarN(label, ImGuiDataType_Float, v, 2, v_speed, &v_min, &v_max, format, flags); +} + +bool ImGui::DragFloat3(const char* label, float v[3], float v_speed, float v_min, float v_max, const char* format, ImGuiSliderFlags flags) +{ + return DragScalarN(label, ImGuiDataType_Float, v, 3, v_speed, &v_min, &v_max, format, flags); +} + +bool ImGui::DragFloat4(const char* label, float v[4], float v_speed, float v_min, float v_max, const char* format, ImGuiSliderFlags flags) +{ + return DragScalarN(label, ImGuiDataType_Float, v, 4, v_speed, &v_min, &v_max, format, flags); +} + +// NB: You likely want to specify the ImGuiSliderFlags_AlwaysClamp when using this. +bool ImGui::DragFloatRange2(const char* label, float* v_current_min, float* v_current_max, float v_speed, float v_min, float v_max, const char* format, const char* format_max, ImGuiSliderFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + PushID(label); + BeginGroup(); + PushMultiItemsWidths(2, CalcItemWidth()); + + float min_min = (v_min >= v_max) ? -FLT_MAX : v_min; + float min_max = (v_min >= v_max) ? *v_current_max : ImMin(v_max, *v_current_max); + ImGuiSliderFlags min_flags = flags | ((min_min == min_max) ? ImGuiSliderFlags_ReadOnly : 0); + bool value_changed = DragScalar("##min", ImGuiDataType_Float, v_current_min, v_speed, &min_min, &min_max, format, min_flags); + PopItemWidth(); + SameLine(0, g.Style.ItemInnerSpacing.x); + + float max_min = (v_min >= v_max) ? *v_current_min : ImMax(v_min, *v_current_min); + float max_max = (v_min >= v_max) ? FLT_MAX : v_max; + ImGuiSliderFlags max_flags = flags | ((max_min == max_max) ? ImGuiSliderFlags_ReadOnly : 0); + value_changed |= DragScalar("##max", ImGuiDataType_Float, v_current_max, v_speed, &max_min, &max_max, format_max ? format_max : format, max_flags); + PopItemWidth(); + SameLine(0, g.Style.ItemInnerSpacing.x); + + TextEx(label, FindRenderedTextEnd(label)); + EndGroup(); + PopID(); + return value_changed; +} + +// NB: v_speed is float to allow adjusting the drag speed with more precision +bool ImGui::DragInt(const char* label, int* v, float v_speed, int v_min, int v_max, const char* format, ImGuiSliderFlags flags) +{ + return DragScalar(label, ImGuiDataType_S32, v, v_speed, &v_min, &v_max, format, flags); +} + +bool ImGui::DragInt2(const char* label, int v[2], float v_speed, int v_min, int v_max, const char* format, ImGuiSliderFlags flags) +{ + return DragScalarN(label, ImGuiDataType_S32, v, 2, v_speed, &v_min, &v_max, format, flags); +} + +bool ImGui::DragInt3(const char* label, int v[3], float v_speed, int v_min, int v_max, const char* format, ImGuiSliderFlags flags) +{ + return DragScalarN(label, ImGuiDataType_S32, v, 3, v_speed, &v_min, &v_max, format, flags); +} + +bool ImGui::DragInt4(const char* label, int v[4], float v_speed, int v_min, int v_max, const char* format, ImGuiSliderFlags flags) +{ + return DragScalarN(label, ImGuiDataType_S32, v, 4, v_speed, &v_min, &v_max, format, flags); +} + +// NB: You likely want to specify the ImGuiSliderFlags_AlwaysClamp when using this. +bool ImGui::DragIntRange2(const char* label, int* v_current_min, int* v_current_max, float v_speed, int v_min, int v_max, const char* format, const char* format_max, ImGuiSliderFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + PushID(label); + BeginGroup(); + PushMultiItemsWidths(2, CalcItemWidth()); + + int min_min = (v_min >= v_max) ? INT_MIN : v_min; + int min_max = (v_min >= v_max) ? *v_current_max : ImMin(v_max, *v_current_max); + ImGuiSliderFlags min_flags = flags | ((min_min == min_max) ? ImGuiSliderFlags_ReadOnly : 0); + bool value_changed = DragInt("##min", v_current_min, v_speed, min_min, min_max, format, min_flags); + PopItemWidth(); + SameLine(0, g.Style.ItemInnerSpacing.x); + + int max_min = (v_min >= v_max) ? *v_current_min : ImMax(v_min, *v_current_min); + int max_max = (v_min >= v_max) ? INT_MAX : v_max; + ImGuiSliderFlags max_flags = flags | ((max_min == max_max) ? ImGuiSliderFlags_ReadOnly : 0); + value_changed |= DragInt("##max", v_current_max, v_speed, max_min, max_max, format_max ? format_max : format, max_flags); + PopItemWidth(); + SameLine(0, g.Style.ItemInnerSpacing.x); + + TextEx(label, FindRenderedTextEnd(label)); + EndGroup(); + PopID(); + + return value_changed; +} + +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + +// Obsolete versions with power parameter. See https://github.com/ocornut/imgui/issues/3361 for details. +bool ImGui::DragScalar(const char* label, ImGuiDataType data_type, void* p_data, float v_speed, const void* p_min, const void* p_max, const char* format, float power) +{ + ImGuiSliderFlags drag_flags = ImGuiSliderFlags_None; + if (power != 1.0f) + { + IM_ASSERT(power == 1.0f && "Call function with ImGuiSliderFlags_Logarithmic flags instead of using the old 'float power' function!"); + IM_ASSERT(p_min != NULL && p_max != NULL); // When using a power curve the drag needs to have known bounds + drag_flags |= ImGuiSliderFlags_Logarithmic; // Fallback for non-asserting paths + } + return DragScalar(label, data_type, p_data, v_speed, p_min, p_max, format, drag_flags); +} + +bool ImGui::DragScalarN(const char* label, ImGuiDataType data_type, void* p_data, int components, float v_speed, const void* p_min, const void* p_max, const char* format, float power) +{ + ImGuiSliderFlags drag_flags = ImGuiSliderFlags_None; + if (power != 1.0f) + { + IM_ASSERT(power == 1.0f && "Call function with ImGuiSliderFlags_Logarithmic flags instead of using the old 'float power' function!"); + IM_ASSERT(p_min != NULL && p_max != NULL); // When using a power curve the drag needs to have known bounds + drag_flags |= ImGuiSliderFlags_Logarithmic; // Fallback for non-asserting paths + } + return DragScalarN(label, data_type, p_data, components, v_speed, p_min, p_max, format, drag_flags); +} + +#endif // IMGUI_DISABLE_OBSOLETE_FUNCTIONS + +//------------------------------------------------------------------------- +// [SECTION] Widgets: SliderScalar, SliderFloat, SliderInt, etc. +//------------------------------------------------------------------------- +// - ScaleRatioFromValueT<> [Internal] +// - ScaleValueFromRatioT<> [Internal] +// - SliderBehaviorT<>() [Internal] +// - SliderBehavior() [Internal] +// - SliderScalar() +// - SliderScalarN() +// - SliderFloat() +// - SliderFloat2() +// - SliderFloat3() +// - SliderFloat4() +// - SliderAngle() +// - SliderInt() +// - SliderInt2() +// - SliderInt3() +// - SliderInt4() +// - VSliderScalar() +// - VSliderFloat() +// - VSliderInt() +//------------------------------------------------------------------------- + +// Convert a value v in the output space of a slider into a parametric position on the slider itself (the logical opposite of ScaleValueFromRatioT) +template +float ImGui::ScaleRatioFromValueT(ImGuiDataType data_type, TYPE v, TYPE v_min, TYPE v_max, bool is_logarithmic, float logarithmic_zero_epsilon, float zero_deadzone_halfsize) +{ + if (v_min == v_max) + return 0.0f; + IM_UNUSED(data_type); + + const TYPE v_clamped = (v_min < v_max) ? ImClamp(v, v_min, v_max) : ImClamp(v, v_max, v_min); + if (is_logarithmic) + { + bool flipped = v_max < v_min; + + if (flipped) // Handle the case where the range is backwards + ImSwap(v_min, v_max); + + // Fudge min/max to avoid getting close to log(0) + FLOATTYPE v_min_fudged = (ImAbs((FLOATTYPE)v_min) < logarithmic_zero_epsilon) ? ((v_min < 0.0f) ? -logarithmic_zero_epsilon : logarithmic_zero_epsilon) : (FLOATTYPE)v_min; + FLOATTYPE v_max_fudged = (ImAbs((FLOATTYPE)v_max) < logarithmic_zero_epsilon) ? ((v_max < 0.0f) ? -logarithmic_zero_epsilon : logarithmic_zero_epsilon) : (FLOATTYPE)v_max; + + // Awkward special cases - we need ranges of the form (-100 .. 0) to convert to (-100 .. -epsilon), not (-100 .. epsilon) + if ((v_min == 0.0f) && (v_max < 0.0f)) + v_min_fudged = -logarithmic_zero_epsilon; + else if ((v_max == 0.0f) && (v_min < 0.0f)) + v_max_fudged = -logarithmic_zero_epsilon; + + float result; + + if (v_clamped <= v_min_fudged) + result = 0.0f; // Workaround for values that are in-range but below our fudge + else if (v_clamped >= v_max_fudged) + result = 1.0f; // Workaround for values that are in-range but above our fudge + else if ((v_min * v_max) < 0.0f) // Range crosses zero, so split into two portions + { + float zero_point_center = (-(float)v_min) / ((float)v_max - (float)v_min); // The zero point in parametric space. There's an argument we should take the logarithmic nature into account when calculating this, but for now this should do (and the most common case of a symmetrical range works fine) + float zero_point_snap_L = zero_point_center - zero_deadzone_halfsize; + float zero_point_snap_R = zero_point_center + zero_deadzone_halfsize; + if (v == 0.0f) + result = zero_point_center; // Special case for exactly zero + else if (v < 0.0f) + result = (1.0f - (float)(ImLog(-(FLOATTYPE)v_clamped / logarithmic_zero_epsilon) / ImLog(-v_min_fudged / logarithmic_zero_epsilon))) * zero_point_snap_L; + else + result = zero_point_snap_R + ((float)(ImLog((FLOATTYPE)v_clamped / logarithmic_zero_epsilon) / ImLog(v_max_fudged / logarithmic_zero_epsilon)) * (1.0f - zero_point_snap_R)); + } + else if ((v_min < 0.0f) || (v_max < 0.0f)) // Entirely negative slider + result = 1.0f - (float)(ImLog(-(FLOATTYPE)v_clamped / -v_max_fudged) / ImLog(-v_min_fudged / -v_max_fudged)); + else + result = (float)(ImLog((FLOATTYPE)v_clamped / v_min_fudged) / ImLog(v_max_fudged / v_min_fudged)); + + return flipped ? (1.0f - result) : result; + } + + // Linear slider + return (float)((FLOATTYPE)(SIGNEDTYPE)(v_clamped - v_min) / (FLOATTYPE)(SIGNEDTYPE)(v_max - v_min)); +} + +// Convert a parametric position on a slider into a value v in the output space (the logical opposite of ScaleRatioFromValueT) +template +TYPE ImGui::ScaleValueFromRatioT(ImGuiDataType data_type, float t, TYPE v_min, TYPE v_max, bool is_logarithmic, float logarithmic_zero_epsilon, float zero_deadzone_halfsize) +{ + if (v_min == v_max) + return (TYPE)0.0f; + const bool is_decimal = (data_type == ImGuiDataType_Float) || (data_type == ImGuiDataType_Double); + + TYPE result; + if (is_logarithmic) + { + // We special-case the extents because otherwise our fudging can lead to "mathematically correct" but non-intuitive behaviors like a fully-left slider not actually reaching the minimum value + if (t <= 0.0f) + result = v_min; + else if (t >= 1.0f) + result = v_max; + else + { + bool flipped = v_max < v_min; // Check if range is "backwards" + + // Fudge min/max to avoid getting silly results close to zero + FLOATTYPE v_min_fudged = (ImAbs((FLOATTYPE)v_min) < logarithmic_zero_epsilon) ? ((v_min < 0.0f) ? -logarithmic_zero_epsilon : logarithmic_zero_epsilon) : (FLOATTYPE)v_min; + FLOATTYPE v_max_fudged = (ImAbs((FLOATTYPE)v_max) < logarithmic_zero_epsilon) ? ((v_max < 0.0f) ? -logarithmic_zero_epsilon : logarithmic_zero_epsilon) : (FLOATTYPE)v_max; + + if (flipped) + ImSwap(v_min_fudged, v_max_fudged); + + // Awkward special case - we need ranges of the form (-100 .. 0) to convert to (-100 .. -epsilon), not (-100 .. epsilon) + if ((v_max == 0.0f) && (v_min < 0.0f)) + v_max_fudged = -logarithmic_zero_epsilon; + + float t_with_flip = flipped ? (1.0f - t) : t; // t, but flipped if necessary to account for us flipping the range + + if ((v_min * v_max) < 0.0f) // Range crosses zero, so we have to do this in two parts + { + float zero_point_center = (-(float)ImMin(v_min, v_max)) / ImAbs((float)v_max - (float)v_min); // The zero point in parametric space + float zero_point_snap_L = zero_point_center - zero_deadzone_halfsize; + float zero_point_snap_R = zero_point_center + zero_deadzone_halfsize; + if (t_with_flip >= zero_point_snap_L && t_with_flip <= zero_point_snap_R) + result = (TYPE)0.0f; // Special case to make getting exactly zero possible (the epsilon prevents it otherwise) + else if (t_with_flip < zero_point_center) + result = (TYPE)-(logarithmic_zero_epsilon * ImPow(-v_min_fudged / logarithmic_zero_epsilon, (FLOATTYPE)(1.0f - (t_with_flip / zero_point_snap_L)))); + else + result = (TYPE)(logarithmic_zero_epsilon * ImPow(v_max_fudged / logarithmic_zero_epsilon, (FLOATTYPE)((t_with_flip - zero_point_snap_R) / (1.0f - zero_point_snap_R)))); + } + else if ((v_min < 0.0f) || (v_max < 0.0f)) // Entirely negative slider + result = (TYPE)-(-v_max_fudged * ImPow(-v_min_fudged / -v_max_fudged, (FLOATTYPE)(1.0f - t_with_flip))); + else + result = (TYPE)(v_min_fudged * ImPow(v_max_fudged / v_min_fudged, (FLOATTYPE)t_with_flip)); + } + } + else + { + // Linear slider + if (is_decimal) + { + result = ImLerp(v_min, v_max, t); + } + else + { + // - For integer values we want the clicking position to match the grab box so we round above + // This code is carefully tuned to work with large values (e.g. high ranges of U64) while preserving this property.. + // - Not doing a *1.0 multiply at the end of a range as it tends to be lossy. While absolute aiming at a large s64/u64 + // range is going to be imprecise anyway, with this check we at least make the edge values matches expected limits. + if (t < 1.0) + { + FLOATTYPE v_new_off_f = (SIGNEDTYPE)(v_max - v_min) * t; + result = (TYPE)((SIGNEDTYPE)v_min + (SIGNEDTYPE)(v_new_off_f + (FLOATTYPE)(v_min > v_max ? -0.5 : 0.5))); + } + else + { + result = v_max; + } + } + } + + return result; +} + +// FIXME: Move more of the code into SliderBehavior() +template +bool ImGui::SliderBehaviorT(const ImRect& bb, ImGuiID id, ImGuiDataType data_type, TYPE* v, const TYPE v_min, const TYPE v_max, const char* format, ImGuiSliderFlags flags, ImRect* out_grab_bb) +{ + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + + const ImGuiAxis axis = (flags & ImGuiSliderFlags_Vertical) ? ImGuiAxis_Y : ImGuiAxis_X; + const bool is_decimal = (data_type == ImGuiDataType_Float) || (data_type == ImGuiDataType_Double); + const bool is_logarithmic = (flags & ImGuiSliderFlags_Logarithmic) && is_decimal; + + const float grab_padding = 2.0f; + const float slider_sz = (bb.Max[axis] - bb.Min[axis]) - grab_padding * 2.0f; + float grab_sz = style.GrabMinSize; + SIGNEDTYPE v_range = (v_min < v_max ? v_max - v_min : v_min - v_max); + if (!is_decimal && v_range >= 0) // v_range < 0 may happen on integer overflows + grab_sz = ImMax((float)(slider_sz / (v_range + 1)), style.GrabMinSize); // For integer sliders: if possible have the grab size represent 1 unit + grab_sz = ImMin(grab_sz, slider_sz); + const float slider_usable_sz = slider_sz - grab_sz; + const float slider_usable_pos_min = bb.Min[axis] + grab_padding + grab_sz * 0.5f; + const float slider_usable_pos_max = bb.Max[axis] - grab_padding - grab_sz * 0.5f; + + float logarithmic_zero_epsilon = 0.0f; // Only valid when is_logarithmic is true + float zero_deadzone_halfsize = 0.0f; // Only valid when is_logarithmic is true + if (is_logarithmic) + { + // When using logarithmic sliders, we need to clamp to avoid hitting zero, but our choice of clamp value greatly affects slider precision. We attempt to use the specified precision to estimate a good lower bound. + const int decimal_precision = is_decimal ? ImParseFormatPrecision(format, 3) : 1; + logarithmic_zero_epsilon = ImPow(0.1f, (float)decimal_precision); + zero_deadzone_halfsize = (style.LogSliderDeadzone * 0.5f) / ImMax(slider_usable_sz, 1.0f); + } + + // Process interacting with the slider + bool value_changed = false; + if (g.ActiveId == id) + { + bool set_new_value = false; + float clicked_t = 0.0f; + if (g.ActiveIdSource == ImGuiInputSource_Mouse) + { + if (!g.IO.MouseDown[0]) + { + ClearActiveID(); + } + else + { + const float mouse_abs_pos = g.IO.MousePos[axis]; + clicked_t = (slider_usable_sz > 0.0f) ? ImClamp((mouse_abs_pos - slider_usable_pos_min) / slider_usable_sz, 0.0f, 1.0f) : 0.0f; + if (axis == ImGuiAxis_Y) + clicked_t = 1.0f - clicked_t; + set_new_value = true; + } + } + else if (g.ActiveIdSource == ImGuiInputSource_Nav) + { + if (g.ActiveIdIsJustActivated) + { + g.SliderCurrentAccum = 0.0f; // Reset any stored nav delta upon activation + g.SliderCurrentAccumDirty = false; + } + + const ImVec2 input_delta2 = GetNavInputAmount2d(ImGuiNavDirSourceFlags_Keyboard | ImGuiNavDirSourceFlags_PadDPad, ImGuiInputReadMode_RepeatFast, 0.0f, 0.0f); + float input_delta = (axis == ImGuiAxis_X) ? input_delta2.x : -input_delta2.y; + if (input_delta != 0.0f) + { + const int decimal_precision = is_decimal ? ImParseFormatPrecision(format, 3) : 0; + if (decimal_precision > 0) + { + input_delta /= 100.0f; // Gamepad/keyboard tweak speeds in % of slider bounds + if (IsNavInputDown(ImGuiNavInput_TweakSlow)) + input_delta /= 10.0f; + } + else + { + if ((v_range >= -100.0f && v_range <= 100.0f) || IsNavInputDown(ImGuiNavInput_TweakSlow)) + input_delta = ((input_delta < 0.0f) ? -1.0f : +1.0f) / (float)v_range; // Gamepad/keyboard tweak speeds in integer steps + else + input_delta /= 100.0f; + } + if (IsNavInputDown(ImGuiNavInput_TweakFast)) + input_delta *= 10.0f; + + g.SliderCurrentAccum += input_delta; + g.SliderCurrentAccumDirty = true; + } + + float delta = g.SliderCurrentAccum; + if (g.NavActivatePressedId == id && !g.ActiveIdIsJustActivated) + { + ClearActiveID(); + } + else if (g.SliderCurrentAccumDirty) + { + clicked_t = ScaleRatioFromValueT(data_type, *v, v_min, v_max, is_logarithmic, logarithmic_zero_epsilon, zero_deadzone_halfsize); + + if ((clicked_t >= 1.0f && delta > 0.0f) || (clicked_t <= 0.0f && delta < 0.0f)) // This is to avoid applying the saturation when already past the limits + { + set_new_value = false; + g.SliderCurrentAccum = 0.0f; // If pushing up against the limits, don't continue to accumulate + } + else + { + set_new_value = true; + float old_clicked_t = clicked_t; + clicked_t = ImSaturate(clicked_t + delta); + + // Calculate what our "new" clicked_t will be, and thus how far we actually moved the slider, and subtract this from the accumulator + TYPE v_new = ScaleValueFromRatioT(data_type, clicked_t, v_min, v_max, is_logarithmic, logarithmic_zero_epsilon, zero_deadzone_halfsize); + if (!(flags & ImGuiSliderFlags_NoRoundToFormat)) + v_new = RoundScalarWithFormatT(format, data_type, v_new); + float new_clicked_t = ScaleRatioFromValueT(data_type, v_new, v_min, v_max, is_logarithmic, logarithmic_zero_epsilon, zero_deadzone_halfsize); + + if (delta > 0) + g.SliderCurrentAccum -= ImMin(new_clicked_t - old_clicked_t, delta); + else + g.SliderCurrentAccum -= ImMax(new_clicked_t - old_clicked_t, delta); + } + + g.SliderCurrentAccumDirty = false; + } + } + + if (set_new_value) + { + TYPE v_new = ScaleValueFromRatioT(data_type, clicked_t, v_min, v_max, is_logarithmic, logarithmic_zero_epsilon, zero_deadzone_halfsize); + + // Round to user desired precision based on format string + if (!(flags & ImGuiSliderFlags_NoRoundToFormat)) + v_new = RoundScalarWithFormatT(format, data_type, v_new); + + // Apply result + if (*v != v_new) + { + *v = v_new; + value_changed = true; + } + } + } + + if (slider_sz < 1.0f) + { + *out_grab_bb = ImRect(bb.Min, bb.Min); + } + else + { + // Output grab position so it can be displayed by the caller + float grab_t = ScaleRatioFromValueT(data_type, *v, v_min, v_max, is_logarithmic, logarithmic_zero_epsilon, zero_deadzone_halfsize); + if (axis == ImGuiAxis_Y) + grab_t = 1.0f - grab_t; + const float grab_pos = ImLerp(slider_usable_pos_min, slider_usable_pos_max, grab_t); + if (axis == ImGuiAxis_X) + *out_grab_bb = ImRect(grab_pos - grab_sz * 0.5f, bb.Min.y + grab_padding, grab_pos + grab_sz * 0.5f, bb.Max.y - grab_padding); + else + *out_grab_bb = ImRect(bb.Min.x + grab_padding, grab_pos - grab_sz * 0.5f, bb.Max.x - grab_padding, grab_pos + grab_sz * 0.5f); + } + + return value_changed; +} + +// For 32-bit and larger types, slider bounds are limited to half the natural type range. +// So e.g. an integer Slider between INT_MAX-10 and INT_MAX will fail, but an integer Slider between INT_MAX/2-10 and INT_MAX/2 will be ok. +// It would be possible to lift that limitation with some work but it doesn't seem to be worth it for sliders. +bool ImGui::SliderBehavior(const ImRect& bb, ImGuiID id, ImGuiDataType data_type, void* p_v, const void* p_min, const void* p_max, const char* format, ImGuiSliderFlags flags, ImRect* out_grab_bb) +{ + // Read imgui.cpp "API BREAKING CHANGES" section for 1.78 if you hit this assert. + IM_ASSERT((flags == 1 || (flags & ImGuiSliderFlags_InvalidMask_) == 0) && "Invalid ImGuiSliderFlags flag! Has the 'float power' argument been mistakenly cast to flags? Call function with ImGuiSliderFlags_Logarithmic flags instead."); + + ImGuiContext& g = *GImGui; + if ((g.CurrentWindow->DC.ItemFlags & ImGuiItemFlags_ReadOnly) || (flags & ImGuiSliderFlags_ReadOnly)) + return false; + + switch (data_type) + { + case ImGuiDataType_S8: { ImS32 v32 = (ImS32)*(ImS8*)p_v; bool r = SliderBehaviorT(bb, id, ImGuiDataType_S32, &v32, *(const ImS8*)p_min, *(const ImS8*)p_max, format, flags, out_grab_bb); if (r) *(ImS8*)p_v = (ImS8)v32; return r; } + case ImGuiDataType_U8: { ImU32 v32 = (ImU32)*(ImU8*)p_v; bool r = SliderBehaviorT(bb, id, ImGuiDataType_U32, &v32, *(const ImU8*)p_min, *(const ImU8*)p_max, format, flags, out_grab_bb); if (r) *(ImU8*)p_v = (ImU8)v32; return r; } + case ImGuiDataType_S16: { ImS32 v32 = (ImS32)*(ImS16*)p_v; bool r = SliderBehaviorT(bb, id, ImGuiDataType_S32, &v32, *(const ImS16*)p_min, *(const ImS16*)p_max, format, flags, out_grab_bb); if (r) *(ImS16*)p_v = (ImS16)v32; return r; } + case ImGuiDataType_U16: { ImU32 v32 = (ImU32)*(ImU16*)p_v; bool r = SliderBehaviorT(bb, id, ImGuiDataType_U32, &v32, *(const ImU16*)p_min, *(const ImU16*)p_max, format, flags, out_grab_bb); if (r) *(ImU16*)p_v = (ImU16)v32; return r; } + case ImGuiDataType_S32: + IM_ASSERT(*(const ImS32*)p_min >= IM_S32_MIN / 2 && *(const ImS32*)p_max <= IM_S32_MAX / 2); + return SliderBehaviorT(bb, id, data_type, (ImS32*)p_v, *(const ImS32*)p_min, *(const ImS32*)p_max, format, flags, out_grab_bb); + case ImGuiDataType_U32: + IM_ASSERT(*(const ImU32*)p_max <= IM_U32_MAX / 2); + return SliderBehaviorT(bb, id, data_type, (ImU32*)p_v, *(const ImU32*)p_min, *(const ImU32*)p_max, format, flags, out_grab_bb); + case ImGuiDataType_S64: + IM_ASSERT(*(const ImS64*)p_min >= IM_S64_MIN / 2 && *(const ImS64*)p_max <= IM_S64_MAX / 2); + return SliderBehaviorT(bb, id, data_type, (ImS64*)p_v, *(const ImS64*)p_min, *(const ImS64*)p_max, format, flags, out_grab_bb); + case ImGuiDataType_U64: + IM_ASSERT(*(const ImU64*)p_max <= IM_U64_MAX / 2); + return SliderBehaviorT(bb, id, data_type, (ImU64*)p_v, *(const ImU64*)p_min, *(const ImU64*)p_max, format, flags, out_grab_bb); + case ImGuiDataType_Float: + IM_ASSERT(*(const float*)p_min >= -FLT_MAX / 2.0f && *(const float*)p_max <= FLT_MAX / 2.0f); + return SliderBehaviorT(bb, id, data_type, (float*)p_v, *(const float*)p_min, *(const float*)p_max, format, flags, out_grab_bb); + case ImGuiDataType_Double: + IM_ASSERT(*(const double*)p_min >= -DBL_MAX / 2.0f && *(const double*)p_max <= DBL_MAX / 2.0f); + return SliderBehaviorT(bb, id, data_type, (double*)p_v, *(const double*)p_min, *(const double*)p_max, format, flags, out_grab_bb); + case ImGuiDataType_COUNT: break; + } + IM_ASSERT(0); + return false; +} + +// Note: p_data, p_min and p_max are _pointers_ to a memory address holding the data. For a slider, they are all required. +// Read code of e.g. SliderFloat(), SliderInt() etc. or examples in 'Demo->Widgets->Data Types' to understand how to use this function directly. +bool ImGui::SliderScalar(const char* label, ImGuiDataType data_type, void* p_data, const void* p_min, const void* p_max, const char* format, ImGuiSliderFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + const ImGuiID id = window->GetID(label); + const float w = CalcItemWidth(); + + const ImVec2 label_size = CalcTextSize(label, NULL, true); + const ImRect frame_bb(window->DC.CursorPos, window->DC.CursorPos + ImVec2(w, label_size.y + style.FramePadding.y * 2.0f)); + const ImRect total_bb(frame_bb.Min, frame_bb.Max + ImVec2(label_size.x > 0.0f ? style.ItemInnerSpacing.x + label_size.x : 0.0f, 0.0f)); + + ItemSize(total_bb, style.FramePadding.y); + if (!ItemAdd(total_bb, id, &frame_bb)) + return false; + + // Default format string when passing NULL + if (format == NULL) + format = DataTypeGetInfo(data_type)->PrintFmt; + else if (data_type == ImGuiDataType_S32 && strcmp(format, "%d") != 0) // (FIXME-LEGACY: Patch old "%.0f" format string to use "%d", read function more details.) + format = PatchFormatStringFloatToInt(format); + + // Tabbing or CTRL-clicking on Slider turns it into an input box + const bool hovered = ItemHoverable(frame_bb, id); + const bool temp_input_allowed = (flags & ImGuiSliderFlags_NoInput) == 0; + bool temp_input_is_active = temp_input_allowed && TempInputIsActive(id); + if (!temp_input_is_active) + { + const bool focus_requested = temp_input_allowed && FocusableItemRegister(window, id); + const bool clicked = (hovered && g.IO.MouseClicked[0]); + if (focus_requested || clicked || g.NavActivateId == id || g.NavInputId == id) + { + SetActiveID(id, window); + SetFocusID(id, window); + FocusWindow(window); + g.ActiveIdUsingNavDirMask |= (1 << ImGuiDir_Left) | (1 << ImGuiDir_Right); + if (temp_input_allowed && (focus_requested || (clicked && g.IO.KeyCtrl) || g.NavInputId == id)) + { + temp_input_is_active = true; + FocusableItemUnregister(window); + } + } + } + + if (temp_input_is_active) + { + // Only clamp CTRL+Click input when ImGuiSliderFlags_AlwaysClamp is set + const bool is_clamp_input = (flags & ImGuiSliderFlags_AlwaysClamp) != 0; + return TempInputScalar(frame_bb, id, label, data_type, p_data, format, is_clamp_input ? p_min : NULL, is_clamp_input ? p_max : NULL); + } + + // Draw frame + const ImU32 frame_col = GetColorU32(g.ActiveId == id ? ImGuiCol_FrameBgActive : g.HoveredId == id ? ImGuiCol_FrameBgHovered : ImGuiCol_FrameBg); + RenderNavHighlight(frame_bb, id); + RenderFrame(frame_bb.Min, frame_bb.Max, frame_col, true, g.Style.FrameRounding); + + // Slider behavior + ImRect grab_bb; + const bool value_changed = SliderBehavior(frame_bb, id, data_type, p_data, p_min, p_max, format, flags, &grab_bb); + if (value_changed) + MarkItemEdited(id); + + // Render grab + if (grab_bb.Max.x > grab_bb.Min.x) + window->DrawList->AddRectFilled(grab_bb.Min, grab_bb.Max, GetColorU32(g.ActiveId == id ? ImGuiCol_SliderGrabActive : ImGuiCol_SliderGrab), style.GrabRounding); + + // Display value using user-provided display format so user can add prefix/suffix/decorations to the value. + char value_buf[64]; + const char* value_buf_end = value_buf + DataTypeFormatString(value_buf, IM_ARRAYSIZE(value_buf), data_type, p_data, format); + RenderTextClipped(frame_bb.Min, frame_bb.Max, value_buf, value_buf_end, NULL, ImVec2(0.5f, 0.5f)); + + if (label_size.x > 0.0f) + RenderText(ImVec2(frame_bb.Max.x + style.ItemInnerSpacing.x, frame_bb.Min.y + style.FramePadding.y), label); + + IMGUI_TEST_ENGINE_ITEM_INFO(id, label, window->DC.ItemFlags); + return value_changed; +} + +// Add multiple sliders on 1 line for compact edition of multiple components +bool ImGui::SliderScalarN(const char* label, ImGuiDataType data_type, void* v, int components, const void* v_min, const void* v_max, const char* format, ImGuiSliderFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + bool value_changed = false; + BeginGroup(); + PushID(label); + PushMultiItemsWidths(components, CalcItemWidth()); + size_t type_size = GDataTypeInfo[data_type].Size; + for (int i = 0; i < components; i++) + { + PushID(i); + if (i > 0) + SameLine(0, g.Style.ItemInnerSpacing.x); + value_changed |= SliderScalar("", data_type, v, v_min, v_max, format, flags); + PopID(); + PopItemWidth(); + v = (void*)((char*)v + type_size); + } + PopID(); + + const char* label_end = FindRenderedTextEnd(label); + if (label != label_end) + { + SameLine(0, g.Style.ItemInnerSpacing.x); + TextEx(label, label_end); + } + + EndGroup(); + return value_changed; +} + +bool ImGui::SliderFloat(const char* label, float* v, float v_min, float v_max, const char* format, ImGuiSliderFlags flags) +{ + return SliderScalar(label, ImGuiDataType_Float, v, &v_min, &v_max, format, flags); +} + +bool ImGui::SliderFloat2(const char* label, float v[2], float v_min, float v_max, const char* format, ImGuiSliderFlags flags) +{ + return SliderScalarN(label, ImGuiDataType_Float, v, 2, &v_min, &v_max, format, flags); +} + +bool ImGui::SliderFloat3(const char* label, float v[3], float v_min, float v_max, const char* format, ImGuiSliderFlags flags) +{ + return SliderScalarN(label, ImGuiDataType_Float, v, 3, &v_min, &v_max, format, flags); +} + +bool ImGui::SliderFloat4(const char* label, float v[4], float v_min, float v_max, const char* format, ImGuiSliderFlags flags) +{ + return SliderScalarN(label, ImGuiDataType_Float, v, 4, &v_min, &v_max, format, flags); +} + +bool ImGui::SliderAngle(const char* label, float* v_rad, float v_degrees_min, float v_degrees_max, const char* format, ImGuiSliderFlags flags) +{ + if (format == NULL) + format = "%.0f deg"; + float v_deg = (*v_rad) * 360.0f / (2 * IM_PI); + bool value_changed = SliderFloat(label, &v_deg, v_degrees_min, v_degrees_max, format, flags); + *v_rad = v_deg * (2 * IM_PI) / 360.0f; + return value_changed; +} + +bool ImGui::SliderInt(const char* label, int* v, int v_min, int v_max, const char* format, ImGuiSliderFlags flags) +{ + return SliderScalar(label, ImGuiDataType_S32, v, &v_min, &v_max, format, flags); +} + +bool ImGui::SliderInt2(const char* label, int v[2], int v_min, int v_max, const char* format, ImGuiSliderFlags flags) +{ + return SliderScalarN(label, ImGuiDataType_S32, v, 2, &v_min, &v_max, format, flags); +} + +bool ImGui::SliderInt3(const char* label, int v[3], int v_min, int v_max, const char* format, ImGuiSliderFlags flags) +{ + return SliderScalarN(label, ImGuiDataType_S32, v, 3, &v_min, &v_max, format, flags); +} + +bool ImGui::SliderInt4(const char* label, int v[4], int v_min, int v_max, const char* format, ImGuiSliderFlags flags) +{ + return SliderScalarN(label, ImGuiDataType_S32, v, 4, &v_min, &v_max, format, flags); +} + +bool ImGui::VSliderScalar(const char* label, const ImVec2& size, ImGuiDataType data_type, void* p_data, const void* p_min, const void* p_max, const char* format, ImGuiSliderFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + const ImGuiID id = window->GetID(label); + + const ImVec2 label_size = CalcTextSize(label, NULL, true); + const ImRect frame_bb(window->DC.CursorPos, window->DC.CursorPos + size); + const ImRect bb(frame_bb.Min, frame_bb.Max + ImVec2(label_size.x > 0.0f ? style.ItemInnerSpacing.x + label_size.x : 0.0f, 0.0f)); + + ItemSize(bb, style.FramePadding.y); + if (!ItemAdd(frame_bb, id)) + return false; + + // Default format string when passing NULL + if (format == NULL) + format = DataTypeGetInfo(data_type)->PrintFmt; + else if (data_type == ImGuiDataType_S32 && strcmp(format, "%d") != 0) // (FIXME-LEGACY: Patch old "%.0f" format string to use "%d", read function more details.) + format = PatchFormatStringFloatToInt(format); + + const bool hovered = ItemHoverable(frame_bb, id); + if ((hovered && g.IO.MouseClicked[0]) || g.NavActivateId == id || g.NavInputId == id) + { + SetActiveID(id, window); + SetFocusID(id, window); + FocusWindow(window); + g.ActiveIdUsingNavDirMask |= (1 << ImGuiDir_Up) | (1 << ImGuiDir_Down); + } + + // Draw frame + const ImU32 frame_col = GetColorU32(g.ActiveId == id ? ImGuiCol_FrameBgActive : g.HoveredId == id ? ImGuiCol_FrameBgHovered : ImGuiCol_FrameBg); + RenderNavHighlight(frame_bb, id); + RenderFrame(frame_bb.Min, frame_bb.Max, frame_col, true, g.Style.FrameRounding); + + // Slider behavior + ImRect grab_bb; + const bool value_changed = SliderBehavior(frame_bb, id, data_type, p_data, p_min, p_max, format, flags | ImGuiSliderFlags_Vertical, &grab_bb); + if (value_changed) + MarkItemEdited(id); + + // Render grab + if (grab_bb.Max.y > grab_bb.Min.y) + window->DrawList->AddRectFilled(grab_bb.Min, grab_bb.Max, GetColorU32(g.ActiveId == id ? ImGuiCol_SliderGrabActive : ImGuiCol_SliderGrab), style.GrabRounding); + + // Display value using user-provided display format so user can add prefix/suffix/decorations to the value. + // For the vertical slider we allow centered text to overlap the frame padding + char value_buf[64]; + const char* value_buf_end = value_buf + DataTypeFormatString(value_buf, IM_ARRAYSIZE(value_buf), data_type, p_data, format); + RenderTextClipped(ImVec2(frame_bb.Min.x, frame_bb.Min.y + style.FramePadding.y), frame_bb.Max, value_buf, value_buf_end, NULL, ImVec2(0.5f, 0.0f)); + if (label_size.x > 0.0f) + RenderText(ImVec2(frame_bb.Max.x + style.ItemInnerSpacing.x, frame_bb.Min.y + style.FramePadding.y), label); + + return value_changed; +} + +bool ImGui::VSliderFloat(const char* label, const ImVec2& size, float* v, float v_min, float v_max, const char* format, ImGuiSliderFlags flags) +{ + return VSliderScalar(label, size, ImGuiDataType_Float, v, &v_min, &v_max, format, flags); +} + +bool ImGui::VSliderInt(const char* label, const ImVec2& size, int* v, int v_min, int v_max, const char* format, ImGuiSliderFlags flags) +{ + return VSliderScalar(label, size, ImGuiDataType_S32, v, &v_min, &v_max, format, flags); +} + +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + +// Obsolete versions with power parameter. See https://github.com/ocornut/imgui/issues/3361 for details. +bool ImGui::SliderScalar(const char* label, ImGuiDataType data_type, void* p_data, const void* p_min, const void* p_max, const char* format, float power) +{ + ImGuiSliderFlags slider_flags = ImGuiSliderFlags_None; + if (power != 1.0f) + { + IM_ASSERT(power == 1.0f && "Call function with ImGuiSliderFlags_Logarithmic flags instead of using the old 'float power' function!"); + slider_flags |= ImGuiSliderFlags_Logarithmic; // Fallback for non-asserting paths + } + return SliderScalar(label, data_type, p_data, p_min, p_max, format, slider_flags); +} + +bool ImGui::SliderScalarN(const char* label, ImGuiDataType data_type, void* v, int components, const void* v_min, const void* v_max, const char* format, float power) +{ + ImGuiSliderFlags slider_flags = ImGuiSliderFlags_None; + if (power != 1.0f) + { + IM_ASSERT(power == 1.0f && "Call function with ImGuiSliderFlags_Logarithmic flags instead of using the old 'float power' function!"); + slider_flags |= ImGuiSliderFlags_Logarithmic; // Fallback for non-asserting paths + } + return SliderScalarN(label, data_type, v, components, v_min, v_max, format, slider_flags); +} + +#endif // IMGUI_DISABLE_OBSOLETE_FUNCTIONS + +//------------------------------------------------------------------------- +// [SECTION] Widgets: InputScalar, InputFloat, InputInt, etc. +//------------------------------------------------------------------------- +// - ImParseFormatFindStart() [Internal] +// - ImParseFormatFindEnd() [Internal] +// - ImParseFormatTrimDecorations() [Internal] +// - ImParseFormatPrecision() [Internal] +// - TempInputTextScalar() [Internal] +// - InputScalar() +// - InputScalarN() +// - InputFloat() +// - InputFloat2() +// - InputFloat3() +// - InputFloat4() +// - InputInt() +// - InputInt2() +// - InputInt3() +// - InputInt4() +// - InputDouble() +//------------------------------------------------------------------------- + +// We don't use strchr() because our strings are usually very short and often start with '%' +const char* ImParseFormatFindStart(const char* fmt) +{ + while (char c = fmt[0]) + { + if (c == '%' && fmt[1] != '%') + return fmt; + else if (c == '%') + fmt++; + fmt++; + } + return fmt; +} + +const char* ImParseFormatFindEnd(const char* fmt) +{ + // Printf/scanf types modifiers: I/L/h/j/l/t/w/z. Other uppercase letters qualify as types aka end of the format. + if (fmt[0] != '%') + return fmt; + const unsigned int ignored_uppercase_mask = (1 << ('I'-'A')) | (1 << ('L'-'A')); + const unsigned int ignored_lowercase_mask = (1 << ('h'-'a')) | (1 << ('j'-'a')) | (1 << ('l'-'a')) | (1 << ('t'-'a')) | (1 << ('w'-'a')) | (1 << ('z'-'a')); + for (char c; (c = *fmt) != 0; fmt++) + { + if (c >= 'A' && c <= 'Z' && ((1 << (c - 'A')) & ignored_uppercase_mask) == 0) + return fmt + 1; + if (c >= 'a' && c <= 'z' && ((1 << (c - 'a')) & ignored_lowercase_mask) == 0) + return fmt + 1; + } + return fmt; +} + +// Extract the format out of a format string with leading or trailing decorations +// fmt = "blah blah" -> return fmt +// fmt = "%.3f" -> return fmt +// fmt = "hello %.3f" -> return fmt + 6 +// fmt = "%.3f hello" -> return buf written with "%.3f" +const char* ImParseFormatTrimDecorations(const char* fmt, char* buf, size_t buf_size) +{ + const char* fmt_start = ImParseFormatFindStart(fmt); + if (fmt_start[0] != '%') + return fmt; + const char* fmt_end = ImParseFormatFindEnd(fmt_start); + if (fmt_end[0] == 0) // If we only have leading decoration, we don't need to copy the data. + return fmt_start; + ImStrncpy(buf, fmt_start, ImMin((size_t)(fmt_end - fmt_start) + 1, buf_size)); + return buf; +} + +// Parse display precision back from the display format string +// FIXME: This is still used by some navigation code path to infer a minimum tweak step, but we should aim to rework widgets so it isn't needed. +int ImParseFormatPrecision(const char* fmt, int default_precision) +{ + fmt = ImParseFormatFindStart(fmt); + if (fmt[0] != '%') + return default_precision; + fmt++; + while (*fmt >= '0' && *fmt <= '9') + fmt++; + int precision = INT_MAX; + if (*fmt == '.') + { + fmt = ImAtoi(fmt + 1, &precision); + if (precision < 0 || precision > 99) + precision = default_precision; + } + if (*fmt == 'e' || *fmt == 'E') // Maximum precision with scientific notation + precision = -1; + if ((*fmt == 'g' || *fmt == 'G') && precision == INT_MAX) + precision = -1; + return (precision == INT_MAX) ? default_precision : precision; +} + +// Create text input in place of another active widget (e.g. used when doing a CTRL+Click on drag/slider widgets) +// FIXME: Facilitate using this in variety of other situations. +bool ImGui::TempInputText(const ImRect& bb, ImGuiID id, const char* label, char* buf, int buf_size, ImGuiInputTextFlags flags) +{ + // On the first frame, g.TempInputTextId == 0, then on subsequent frames it becomes == id. + // We clear ActiveID on the first frame to allow the InputText() taking it back. + ImGuiContext& g = *GImGui; + const bool init = (g.TempInputId != id); + if (init) + ClearActiveID(); + + g.CurrentWindow->DC.CursorPos = bb.Min; + bool value_changed = InputTextEx(label, NULL, buf, buf_size, bb.GetSize(), flags); + if (init) + { + // First frame we started displaying the InputText widget, we expect it to take the active id. + IM_ASSERT(g.ActiveId == id); + g.TempInputId = g.ActiveId; + } + return value_changed; +} + +// Note that Drag/Slider functions are only forwarding the min/max values clamping values if the ImGuiSliderFlags_AlwaysClamp flag is set! +// This is intended: this way we allow CTRL+Click manual input to set a value out of bounds, for maximum flexibility. +// However this may not be ideal for all uses, as some user code may break on out of bound values. +bool ImGui::TempInputScalar(const ImRect& bb, ImGuiID id, const char* label, ImGuiDataType data_type, void* p_data, const char* format, const void* p_clamp_min, const void* p_clamp_max) +{ + ImGuiContext& g = *GImGui; + + char fmt_buf[32]; + char data_buf[32]; + format = ImParseFormatTrimDecorations(format, fmt_buf, IM_ARRAYSIZE(fmt_buf)); + DataTypeFormatString(data_buf, IM_ARRAYSIZE(data_buf), data_type, p_data, format); + ImStrTrimBlanks(data_buf); + + ImGuiInputTextFlags flags = ImGuiInputTextFlags_AutoSelectAll | ImGuiInputTextFlags_NoMarkEdited; + flags |= ((data_type == ImGuiDataType_Float || data_type == ImGuiDataType_Double) ? ImGuiInputTextFlags_CharsScientific : ImGuiInputTextFlags_CharsDecimal); + bool value_changed = false; + if (TempInputText(bb, id, label, data_buf, IM_ARRAYSIZE(data_buf), flags)) + { + // Backup old value + size_t data_type_size = DataTypeGetInfo(data_type)->Size; + ImGuiDataTypeTempStorage data_backup; + memcpy(&data_backup, p_data, data_type_size); + + // Apply new value (or operations) then clamp + DataTypeApplyOpFromText(data_buf, g.InputTextState.InitialTextA.Data, data_type, p_data, NULL); + if (p_clamp_min || p_clamp_max) + { + if (DataTypeCompare(data_type, p_clamp_min, p_clamp_max) > 0) + ImSwap(p_clamp_min, p_clamp_max); + DataTypeClamp(data_type, p_data, p_clamp_min, p_clamp_max); + } + + // Only mark as edited if new value is different + value_changed = memcmp(&data_backup, p_data, data_type_size) != 0; + if (value_changed) + MarkItemEdited(id); + } + return value_changed; +} + +// Note: p_data, p_step, p_step_fast are _pointers_ to a memory address holding the data. For an Input widget, p_step and p_step_fast are optional. +// Read code of e.g. InputFloat(), InputInt() etc. or examples in 'Demo->Widgets->Data Types' to understand how to use this function directly. +bool ImGui::InputScalar(const char* label, ImGuiDataType data_type, void* p_data, const void* p_step, const void* p_step_fast, const char* format, ImGuiInputTextFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + ImGuiStyle& style = g.Style; + + if (format == NULL) + format = DataTypeGetInfo(data_type)->PrintFmt; + + char buf[64]; + DataTypeFormatString(buf, IM_ARRAYSIZE(buf), data_type, p_data, format); + + bool value_changed = false; + if ((flags & (ImGuiInputTextFlags_CharsHexadecimal | ImGuiInputTextFlags_CharsScientific)) == 0) + flags |= ImGuiInputTextFlags_CharsDecimal; + flags |= ImGuiInputTextFlags_AutoSelectAll; + flags |= ImGuiInputTextFlags_NoMarkEdited; // We call MarkItemEdited() ourselves by comparing the actual data rather than the string. + + if (p_step != NULL) + { + const float button_size = GetFrameHeight(); + + BeginGroup(); // The only purpose of the group here is to allow the caller to query item data e.g. IsItemActive() + PushID(label); + SetNextItemWidth(ImMax(1.0f, CalcItemWidth() - (button_size + style.ItemInnerSpacing.x) * 2)); + if (InputText("", buf, IM_ARRAYSIZE(buf), flags)) // PushId(label) + "" gives us the expected ID from outside point of view + value_changed = DataTypeApplyOpFromText(buf, g.InputTextState.InitialTextA.Data, data_type, p_data, format); + + // Step buttons + const ImVec2 backup_frame_padding = style.FramePadding; + style.FramePadding.x = style.FramePadding.y; + ImGuiButtonFlags button_flags = ImGuiButtonFlags_Repeat | ImGuiButtonFlags_DontClosePopups; + if (flags & ImGuiInputTextFlags_ReadOnly) + button_flags |= ImGuiButtonFlags_Disabled; + SameLine(0, style.ItemInnerSpacing.x); + if (ButtonEx("-", ImVec2(button_size, button_size), button_flags)) + { + DataTypeApplyOp(data_type, '-', p_data, p_data, g.IO.KeyCtrl && p_step_fast ? p_step_fast : p_step); + value_changed = true; + } + SameLine(0, style.ItemInnerSpacing.x); + if (ButtonEx("+", ImVec2(button_size, button_size), button_flags)) + { + DataTypeApplyOp(data_type, '+', p_data, p_data, g.IO.KeyCtrl && p_step_fast ? p_step_fast : p_step); + value_changed = true; + } + + const char* label_end = FindRenderedTextEnd(label); + if (label != label_end) + { + SameLine(0, style.ItemInnerSpacing.x); + TextEx(label, label_end); + } + style.FramePadding = backup_frame_padding; + + PopID(); + EndGroup(); + } + else + { + if (InputText(label, buf, IM_ARRAYSIZE(buf), flags)) + value_changed = DataTypeApplyOpFromText(buf, g.InputTextState.InitialTextA.Data, data_type, p_data, format); + } + if (value_changed) + MarkItemEdited(window->DC.LastItemId); + + return value_changed; +} + +bool ImGui::InputScalarN(const char* label, ImGuiDataType data_type, void* p_data, int components, const void* p_step, const void* p_step_fast, const char* format, ImGuiInputTextFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + bool value_changed = false; + BeginGroup(); + PushID(label); + PushMultiItemsWidths(components, CalcItemWidth()); + size_t type_size = GDataTypeInfo[data_type].Size; + for (int i = 0; i < components; i++) + { + PushID(i); + if (i > 0) + SameLine(0, g.Style.ItemInnerSpacing.x); + value_changed |= InputScalar("", data_type, p_data, p_step, p_step_fast, format, flags); + PopID(); + PopItemWidth(); + p_data = (void*)((char*)p_data + type_size); + } + PopID(); + + const char* label_end = FindRenderedTextEnd(label); + if (label != label_end) + { + SameLine(0.0f, g.Style.ItemInnerSpacing.x); + TextEx(label, label_end); + } + + EndGroup(); + return value_changed; +} + +bool ImGui::InputFloat(const char* label, float* v, float step, float step_fast, const char* format, ImGuiInputTextFlags flags) +{ + flags |= ImGuiInputTextFlags_CharsScientific; + return InputScalar(label, ImGuiDataType_Float, (void*)v, (void*)(step > 0.0f ? &step : NULL), (void*)(step_fast > 0.0f ? &step_fast : NULL), format, flags); +} + +bool ImGui::InputFloat2(const char* label, float v[2], const char* format, ImGuiInputTextFlags flags) +{ + return InputScalarN(label, ImGuiDataType_Float, v, 2, NULL, NULL, format, flags); +} + +bool ImGui::InputFloat3(const char* label, float v[3], const char* format, ImGuiInputTextFlags flags) +{ + return InputScalarN(label, ImGuiDataType_Float, v, 3, NULL, NULL, format, flags); +} + +bool ImGui::InputFloat4(const char* label, float v[4], const char* format, ImGuiInputTextFlags flags) +{ + return InputScalarN(label, ImGuiDataType_Float, v, 4, NULL, NULL, format, flags); +} + +bool ImGui::InputInt(const char* label, int* v, int step, int step_fast, ImGuiInputTextFlags flags) +{ + // Hexadecimal input provided as a convenience but the flag name is awkward. Typically you'd use InputText() to parse your own data, if you want to handle prefixes. + const char* format = (flags & ImGuiInputTextFlags_CharsHexadecimal) ? "%08X" : "%d"; + return InputScalar(label, ImGuiDataType_S32, (void*)v, (void*)(step > 0 ? &step : NULL), (void*)(step_fast > 0 ? &step_fast : NULL), format, flags); +} + +bool ImGui::InputInt2(const char* label, int v[2], ImGuiInputTextFlags flags) +{ + return InputScalarN(label, ImGuiDataType_S32, v, 2, NULL, NULL, "%d", flags); +} + +bool ImGui::InputInt3(const char* label, int v[3], ImGuiInputTextFlags flags) +{ + return InputScalarN(label, ImGuiDataType_S32, v, 3, NULL, NULL, "%d", flags); +} + +bool ImGui::InputInt4(const char* label, int v[4], ImGuiInputTextFlags flags) +{ + return InputScalarN(label, ImGuiDataType_S32, v, 4, NULL, NULL, "%d", flags); +} + +bool ImGui::InputDouble(const char* label, double* v, double step, double step_fast, const char* format, ImGuiInputTextFlags flags) +{ + flags |= ImGuiInputTextFlags_CharsScientific; + return InputScalar(label, ImGuiDataType_Double, (void*)v, (void*)(step > 0.0 ? &step : NULL), (void*)(step_fast > 0.0 ? &step_fast : NULL), format, flags); +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: InputText, InputTextMultiline, InputTextWithHint +//------------------------------------------------------------------------- +// - InputText() +// - InputTextWithHint() +// - InputTextMultiline() +// - InputTextEx() [Internal] +//------------------------------------------------------------------------- + +bool ImGui::InputText(const char* label, char* buf, size_t buf_size, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback, void* user_data) +{ + IM_ASSERT(!(flags & ImGuiInputTextFlags_Multiline)); // call InputTextMultiline() + return InputTextEx(label, NULL, buf, (int)buf_size, ImVec2(0, 0), flags, callback, user_data); +} + +bool ImGui::InputTextMultiline(const char* label, char* buf, size_t buf_size, const ImVec2& size, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback, void* user_data) +{ + return InputTextEx(label, NULL, buf, (int)buf_size, size, flags | ImGuiInputTextFlags_Multiline, callback, user_data); +} + +bool ImGui::InputTextWithHint(const char* label, const char* hint, char* buf, size_t buf_size, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback, void* user_data) +{ + IM_ASSERT(!(flags & ImGuiInputTextFlags_Multiline)); // call InputTextMultiline() + return InputTextEx(label, hint, buf, (int)buf_size, ImVec2(0, 0), flags, callback, user_data); +} + +static int InputTextCalcTextLenAndLineCount(const char* text_begin, const char** out_text_end) +{ + int line_count = 0; + const char* s = text_begin; + while (char c = *s++) // We are only matching for \n so we can ignore UTF-8 decoding + if (c == '\n') + line_count++; + s--; + if (s[0] != '\n' && s[0] != '\r') + line_count++; + *out_text_end = s; + return line_count; +} + +static ImVec2 InputTextCalcTextSizeW(const ImWchar* text_begin, const ImWchar* text_end, const ImWchar** remaining, ImVec2* out_offset, bool stop_on_new_line) +{ + ImGuiContext& g = *GImGui; + ImFont* font = g.Font; + const float line_height = g.FontSize; + const float scale = line_height / font->FontSize; + + ImVec2 text_size = ImVec2(0, 0); + float line_width = 0.0f; + + const ImWchar* s = text_begin; + while (s < text_end) + { + unsigned int c = (unsigned int)(*s++); + if (c == '\n') + { + text_size.x = ImMax(text_size.x, line_width); + text_size.y += line_height; + line_width = 0.0f; + if (stop_on_new_line) + break; + continue; + } + if (c == '\r') + continue; + + const float char_width = font->GetCharAdvance((ImWchar)c) * scale; + line_width += char_width; + } + + if (text_size.x < line_width) + text_size.x = line_width; + + if (out_offset) + *out_offset = ImVec2(line_width, text_size.y + line_height); // offset allow for the possibility of sitting after a trailing \n + + if (line_width > 0 || text_size.y == 0.0f) // whereas size.y will ignore the trailing \n + text_size.y += line_height; + + if (remaining) + *remaining = s; + + return text_size; +} + +// Wrapper for stb_textedit.h to edit text (our wrapper is for: statically sized buffer, single-line, wchar characters. InputText converts between UTF-8 and wchar) +namespace ImStb +{ + +static int STB_TEXTEDIT_STRINGLEN(const STB_TEXTEDIT_STRING* obj) { return obj->CurLenW; } +static ImWchar STB_TEXTEDIT_GETCHAR(const STB_TEXTEDIT_STRING* obj, int idx) { return obj->TextW[idx]; } +static float STB_TEXTEDIT_GETWIDTH(STB_TEXTEDIT_STRING* obj, int line_start_idx, int char_idx) { ImWchar c = obj->TextW[line_start_idx + char_idx]; if (c == '\n') return STB_TEXTEDIT_GETWIDTH_NEWLINE; ImGuiContext& g = *GImGui; return g.Font->GetCharAdvance(c) * (g.FontSize / g.Font->FontSize); } +static int STB_TEXTEDIT_KEYTOTEXT(int key) { return key >= 0x200000 ? 0 : key; } +static ImWchar STB_TEXTEDIT_NEWLINE = '\n'; +static void STB_TEXTEDIT_LAYOUTROW(StbTexteditRow* r, STB_TEXTEDIT_STRING* obj, int line_start_idx) +{ + const ImWchar* text = obj->TextW.Data; + const ImWchar* text_remaining = NULL; + const ImVec2 size = InputTextCalcTextSizeW(text + line_start_idx, text + obj->CurLenW, &text_remaining, NULL, true); + r->x0 = 0.0f; + r->x1 = size.x; + r->baseline_y_delta = size.y; + r->ymin = 0.0f; + r->ymax = size.y; + r->num_chars = (int)(text_remaining - (text + line_start_idx)); +} + +static bool is_separator(unsigned int c) { return ImCharIsBlankW(c) || c==',' || c==';' || c=='(' || c==')' || c=='{' || c=='}' || c=='[' || c==']' || c=='|'; } +static int is_word_boundary_from_right(STB_TEXTEDIT_STRING* obj, int idx) { return idx > 0 ? (is_separator(obj->TextW[idx - 1]) && !is_separator(obj->TextW[idx]) ) : 1; } +static int STB_TEXTEDIT_MOVEWORDLEFT_IMPL(STB_TEXTEDIT_STRING* obj, int idx) { idx--; while (idx >= 0 && !is_word_boundary_from_right(obj, idx)) idx--; return idx < 0 ? 0 : idx; } +#ifdef __APPLE__ // FIXME: Move setting to IO structure +static int is_word_boundary_from_left(STB_TEXTEDIT_STRING* obj, int idx) { return idx > 0 ? (!is_separator(obj->TextW[idx - 1]) && is_separator(obj->TextW[idx]) ) : 1; } +static int STB_TEXTEDIT_MOVEWORDRIGHT_IMPL(STB_TEXTEDIT_STRING* obj, int idx) { idx++; int len = obj->CurLenW; while (idx < len && !is_word_boundary_from_left(obj, idx)) idx++; return idx > len ? len : idx; } +#else +static int STB_TEXTEDIT_MOVEWORDRIGHT_IMPL(STB_TEXTEDIT_STRING* obj, int idx) { idx++; int len = obj->CurLenW; while (idx < len && !is_word_boundary_from_right(obj, idx)) idx++; return idx > len ? len : idx; } +#endif +#define STB_TEXTEDIT_MOVEWORDLEFT STB_TEXTEDIT_MOVEWORDLEFT_IMPL // They need to be #define for stb_textedit.h +#define STB_TEXTEDIT_MOVEWORDRIGHT STB_TEXTEDIT_MOVEWORDRIGHT_IMPL + +static void STB_TEXTEDIT_DELETECHARS(STB_TEXTEDIT_STRING* obj, int pos, int n) +{ + ImWchar* dst = obj->TextW.Data + pos; + + // We maintain our buffer length in both UTF-8 and wchar formats + obj->Edited = true; + obj->CurLenA -= ImTextCountUtf8BytesFromStr(dst, dst + n); + obj->CurLenW -= n; + + // Offset remaining text (FIXME-OPT: Use memmove) + const ImWchar* src = obj->TextW.Data + pos + n; + while (ImWchar c = *src++) + *dst++ = c; + *dst = '\0'; +} + +static bool STB_TEXTEDIT_INSERTCHARS(STB_TEXTEDIT_STRING* obj, int pos, const ImWchar* new_text, int new_text_len) +{ + const bool is_resizable = (obj->UserFlags & ImGuiInputTextFlags_CallbackResize) != 0; + const int text_len = obj->CurLenW; + IM_ASSERT(pos <= text_len); + + const int new_text_len_utf8 = ImTextCountUtf8BytesFromStr(new_text, new_text + new_text_len); + if (!is_resizable && (new_text_len_utf8 + obj->CurLenA + 1 > obj->BufCapacityA)) + return false; + + // Grow internal buffer if needed + if (new_text_len + text_len + 1 > obj->TextW.Size) + { + if (!is_resizable) + return false; + IM_ASSERT(text_len < obj->TextW.Size); + obj->TextW.resize(text_len + ImClamp(new_text_len * 4, 32, ImMax(256, new_text_len)) + 1); + } + + ImWchar* text = obj->TextW.Data; + if (pos != text_len) + memmove(text + pos + new_text_len, text + pos, (size_t)(text_len - pos) * sizeof(ImWchar)); + memcpy(text + pos, new_text, (size_t)new_text_len * sizeof(ImWchar)); + + obj->Edited = true; + obj->CurLenW += new_text_len; + obj->CurLenA += new_text_len_utf8; + obj->TextW[obj->CurLenW] = '\0'; + + return true; +} + +// We don't use an enum so we can build even with conflicting symbols (if another user of stb_textedit.h leak their STB_TEXTEDIT_K_* symbols) +#define STB_TEXTEDIT_K_LEFT 0x200000 // keyboard input to move cursor left +#define STB_TEXTEDIT_K_RIGHT 0x200001 // keyboard input to move cursor right +#define STB_TEXTEDIT_K_UP 0x200002 // keyboard input to move cursor up +#define STB_TEXTEDIT_K_DOWN 0x200003 // keyboard input to move cursor down +#define STB_TEXTEDIT_K_LINESTART 0x200004 // keyboard input to move cursor to start of line +#define STB_TEXTEDIT_K_LINEEND 0x200005 // keyboard input to move cursor to end of line +#define STB_TEXTEDIT_K_TEXTSTART 0x200006 // keyboard input to move cursor to start of text +#define STB_TEXTEDIT_K_TEXTEND 0x200007 // keyboard input to move cursor to end of text +#define STB_TEXTEDIT_K_DELETE 0x200008 // keyboard input to delete selection or character under cursor +#define STB_TEXTEDIT_K_BACKSPACE 0x200009 // keyboard input to delete selection or character left of cursor +#define STB_TEXTEDIT_K_UNDO 0x20000A // keyboard input to perform undo +#define STB_TEXTEDIT_K_REDO 0x20000B // keyboard input to perform redo +#define STB_TEXTEDIT_K_WORDLEFT 0x20000C // keyboard input to move cursor left one word +#define STB_TEXTEDIT_K_WORDRIGHT 0x20000D // keyboard input to move cursor right one word +#define STB_TEXTEDIT_K_PGUP 0x20000E // keyboard input to move cursor up a page +#define STB_TEXTEDIT_K_PGDOWN 0x20000F // keyboard input to move cursor down a page +#define STB_TEXTEDIT_K_SHIFT 0x400000 + +#define STB_TEXTEDIT_IMPLEMENTATION +#include "imstb_textedit.h" + +// stb_textedit internally allows for a single undo record to do addition and deletion, but somehow, calling +// the stb_textedit_paste() function creates two separate records, so we perform it manually. (FIXME: Report to nothings/stb?) +static void stb_textedit_replace(STB_TEXTEDIT_STRING* str, STB_TexteditState* state, const STB_TEXTEDIT_CHARTYPE* text, int text_len) +{ + stb_text_makeundo_replace(str, state, 0, str->CurLenW, text_len); + ImStb::STB_TEXTEDIT_DELETECHARS(str, 0, str->CurLenW); + if (text_len <= 0) + return; + if (ImStb::STB_TEXTEDIT_INSERTCHARS(str, 0, text, text_len)) + { + state->cursor = text_len; + state->has_preferred_x = 0; + return; + } + IM_ASSERT(0); // Failed to insert character, normally shouldn't happen because of how we currently use stb_textedit_replace() +} + +} // namespace ImStb + +void ImGuiInputTextState::OnKeyPressed(int key) +{ + stb_textedit_key(this, &Stb, key); + CursorFollow = true; + CursorAnimReset(); +} + +ImGuiInputTextCallbackData::ImGuiInputTextCallbackData() +{ + memset(this, 0, sizeof(*this)); +} + +// Public API to manipulate UTF-8 text +// We expose UTF-8 to the user (unlike the STB_TEXTEDIT_* functions which are manipulating wchar) +// FIXME: The existence of this rarely exercised code path is a bit of a nuisance. +void ImGuiInputTextCallbackData::DeleteChars(int pos, int bytes_count) +{ + IM_ASSERT(pos + bytes_count <= BufTextLen); + char* dst = Buf + pos; + const char* src = Buf + pos + bytes_count; + while (char c = *src++) + *dst++ = c; + *dst = '\0'; + + if (CursorPos >= pos + bytes_count) + CursorPos -= bytes_count; + else if (CursorPos >= pos) + CursorPos = pos; + SelectionStart = SelectionEnd = CursorPos; + BufDirty = true; + BufTextLen -= bytes_count; +} + +void ImGuiInputTextCallbackData::InsertChars(int pos, const char* new_text, const char* new_text_end) +{ + const bool is_resizable = (Flags & ImGuiInputTextFlags_CallbackResize) != 0; + const int new_text_len = new_text_end ? (int)(new_text_end - new_text) : (int)strlen(new_text); + if (new_text_len + BufTextLen >= BufSize) + { + if (!is_resizable) + return; + + // Contrary to STB_TEXTEDIT_INSERTCHARS() this is working in the UTF8 buffer, hence the mildly similar code (until we remove the U16 buffer altogether!) + ImGuiContext& g = *GImGui; + ImGuiInputTextState* edit_state = &g.InputTextState; + IM_ASSERT(edit_state->ID != 0 && g.ActiveId == edit_state->ID); + IM_ASSERT(Buf == edit_state->TextA.Data); + int new_buf_size = BufTextLen + ImClamp(new_text_len * 4, 32, ImMax(256, new_text_len)) + 1; + edit_state->TextA.reserve(new_buf_size + 1); + Buf = edit_state->TextA.Data; + BufSize = edit_state->BufCapacityA = new_buf_size; + } + + if (BufTextLen != pos) + memmove(Buf + pos + new_text_len, Buf + pos, (size_t)(BufTextLen - pos)); + memcpy(Buf + pos, new_text, (size_t)new_text_len * sizeof(char)); + Buf[BufTextLen + new_text_len] = '\0'; + + if (CursorPos >= pos) + CursorPos += new_text_len; + SelectionStart = SelectionEnd = CursorPos; + BufDirty = true; + BufTextLen += new_text_len; +} + +// Return false to discard a character. +static bool InputTextFilterCharacter(unsigned int* p_char, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback, void* user_data) +{ + unsigned int c = *p_char; + + // Filter non-printable (NB: isprint is unreliable! see #2467) + if (c < 0x20) + { + bool pass = false; + pass |= (c == '\n' && (flags & ImGuiInputTextFlags_Multiline)); + pass |= (c == '\t' && (flags & ImGuiInputTextFlags_AllowTabInput)); + if (!pass) + return false; + } + + // We ignore Ascii representation of delete (emitted from Backspace on OSX, see #2578, #2817) + if (c == 127) + return false; + + // Filter private Unicode range. GLFW on OSX seems to send private characters for special keys like arrow keys (FIXME) + if (c >= 0xE000 && c <= 0xF8FF) + return false; + + // Filter Unicode ranges we are not handling in this build. + if (c > IM_UNICODE_CODEPOINT_MAX) + return false; + + // Generic named filters + if (flags & (ImGuiInputTextFlags_CharsDecimal | ImGuiInputTextFlags_CharsHexadecimal | ImGuiInputTextFlags_CharsUppercase | ImGuiInputTextFlags_CharsNoBlank | ImGuiInputTextFlags_CharsScientific)) + { + // The libc allows overriding locale, with e.g. 'setlocale(LC_NUMERIC, "de_DE.UTF-8");' which affect the output/input of printf/scanf. + // The standard mandate that programs starts in the "C" locale where the decimal point is '.'. + // We don't really intend to provide widespread support for it, but out of empathy for people stuck with using odd API, we support the bare minimum aka overriding the decimal point. + // Change the default decimal_point with: + // ImGui::GetCurrentContext()->PlatformLocaleDecimalPoint = *localeconv()->decimal_point; + ImGuiContext& g = *GImGui; + const unsigned c_decimal_point = (unsigned int)g.PlatformLocaleDecimalPoint; + + // Allow 0-9 . - + * / + if (flags & ImGuiInputTextFlags_CharsDecimal) + if (!(c >= '0' && c <= '9') && (c != c_decimal_point) && (c != '-') && (c != '+') && (c != '*') && (c != '/')) + return false; + + // Allow 0-9 . - + * / e E + if (flags & ImGuiInputTextFlags_CharsScientific) + if (!(c >= '0' && c <= '9') && (c != c_decimal_point) && (c != '-') && (c != '+') && (c != '*') && (c != '/') && (c != 'e') && (c != 'E')) + return false; + + // Allow 0-9 a-F A-F + if (flags & ImGuiInputTextFlags_CharsHexadecimal) + if (!(c >= '0' && c <= '9') && !(c >= 'a' && c <= 'f') && !(c >= 'A' && c <= 'F')) + return false; + + // Turn a-z into A-Z + if (flags & ImGuiInputTextFlags_CharsUppercase) + if (c >= 'a' && c <= 'z') + *p_char = (c += (unsigned int)('A' - 'a')); + + if (flags & ImGuiInputTextFlags_CharsNoBlank) + if (ImCharIsBlankW(c)) + return false; + } + + // Custom callback filter + if (flags & ImGuiInputTextFlags_CallbackCharFilter) + { + ImGuiInputTextCallbackData callback_data; + memset(&callback_data, 0, sizeof(ImGuiInputTextCallbackData)); + callback_data.EventFlag = ImGuiInputTextFlags_CallbackCharFilter; + callback_data.EventChar = (ImWchar)c; + callback_data.Flags = flags; + callback_data.UserData = user_data; + if (callback(&callback_data) != 0) + return false; + *p_char = callback_data.EventChar; + if (!callback_data.EventChar) + return false; + } + + return true; +} + +// Edit a string of text +// - buf_size account for the zero-terminator, so a buf_size of 6 can hold "Hello" but not "Hello!". +// This is so we can easily call InputText() on static arrays using ARRAYSIZE() and to match +// Note that in std::string world, capacity() would omit 1 byte used by the zero-terminator. +// - When active, hold on a privately held copy of the text (and apply back to 'buf'). So changing 'buf' while the InputText is active has no effect. +// - If you want to use ImGui::InputText() with std::string, see misc/cpp/imgui_stdlib.h +// (FIXME: Rather confusing and messy function, among the worse part of our codebase, expecting to rewrite a V2 at some point.. Partly because we are +// doing UTF8 > U16 > UTF8 conversions on the go to easily interface with stb_textedit. Ideally should stay in UTF-8 all the time. See https://github.com/nothings/stb/issues/188) +bool ImGui::InputTextEx(const char* label, const char* hint, char* buf, int buf_size, const ImVec2& size_arg, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback, void* callback_user_data) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + IM_ASSERT(buf != NULL && buf_size >= 0); + IM_ASSERT(!((flags & ImGuiInputTextFlags_CallbackHistory) && (flags & ImGuiInputTextFlags_Multiline))); // Can't use both together (they both use up/down keys) + IM_ASSERT(!((flags & ImGuiInputTextFlags_CallbackCompletion) && (flags & ImGuiInputTextFlags_AllowTabInput))); // Can't use both together (they both use tab key) + + ImGuiContext& g = *GImGui; + ImGuiIO& io = g.IO; + const ImGuiStyle& style = g.Style; + + const bool RENDER_SELECTION_WHEN_INACTIVE = false; + const bool is_multiline = (flags & ImGuiInputTextFlags_Multiline) != 0; + const bool is_readonly = (flags & ImGuiInputTextFlags_ReadOnly) != 0; + const bool is_password = (flags & ImGuiInputTextFlags_Password) != 0; + const bool is_undoable = (flags & ImGuiInputTextFlags_NoUndoRedo) == 0; + const bool is_resizable = (flags & ImGuiInputTextFlags_CallbackResize) != 0; + if (is_resizable) + IM_ASSERT(callback != NULL); // Must provide a callback if you set the ImGuiInputTextFlags_CallbackResize flag! + + if (is_multiline) // Open group before calling GetID() because groups tracks id created within their scope, + BeginGroup(); + const ImGuiID id = window->GetID(label); + const ImVec2 label_size = CalcTextSize(label, NULL, true); + const ImVec2 frame_size = CalcItemSize(size_arg, CalcItemWidth(), (is_multiline ? g.FontSize * 8.0f : label_size.y) + style.FramePadding.y * 2.0f); // Arbitrary default of 8 lines high for multi-line + const ImVec2 total_size = ImVec2(frame_size.x + (label_size.x > 0.0f ? style.ItemInnerSpacing.x + label_size.x : 0.0f), frame_size.y); + + const ImRect frame_bb(window->DC.CursorPos, window->DC.CursorPos + frame_size); + const ImRect total_bb(frame_bb.Min, frame_bb.Min + total_size); + + ImGuiWindow* draw_window = window; + ImVec2 inner_size = frame_size; + if (is_multiline) + { + if (!ItemAdd(total_bb, id, &frame_bb)) + { + ItemSize(total_bb, style.FramePadding.y); + EndGroup(); + return false; + } + + // We reproduce the contents of BeginChildFrame() in order to provide 'label' so our window internal data are easier to read/debug. + PushStyleColor(ImGuiCol_ChildBg, style.Colors[ImGuiCol_FrameBg]); + PushStyleVar(ImGuiStyleVar_ChildRounding, style.FrameRounding); + PushStyleVar(ImGuiStyleVar_ChildBorderSize, style.FrameBorderSize); + PushStyleVar(ImGuiStyleVar_WindowPadding, style.FramePadding); + bool child_visible = BeginChildEx(label, id, frame_bb.GetSize(), true, ImGuiWindowFlags_NoMove | ImGuiWindowFlags_AlwaysUseWindowPadding); + PopStyleVar(3); + PopStyleColor(); + if (!child_visible) + { + EndChild(); + EndGroup(); + return false; + } + draw_window = g.CurrentWindow; // Child window + draw_window->DC.NavLayerActiveMaskNext |= (1 << draw_window->DC.NavLayerCurrent); // This is to ensure that EndChild() will display a navigation highlight so we can "enter" into it. + inner_size.x -= draw_window->ScrollbarSizes.x; + } + else + { + ItemSize(total_bb, style.FramePadding.y); + if (!ItemAdd(total_bb, id, &frame_bb)) + return false; + } + const bool hovered = ItemHoverable(frame_bb, id); + if (hovered) + g.MouseCursor = ImGuiMouseCursor_TextInput; + + // We are only allowed to access the state if we are already the active widget. + ImGuiInputTextState* state = GetInputTextState(id); + + const bool focus_requested = FocusableItemRegister(window, id); + const bool focus_requested_by_code = focus_requested && (g.FocusRequestCurrWindow == window && g.FocusRequestCurrCounterRegular == window->DC.FocusCounterRegular); + const bool focus_requested_by_tab = focus_requested && !focus_requested_by_code; + + const bool user_clicked = hovered && io.MouseClicked[0]; + const bool user_nav_input_start = (g.ActiveId != id) && ((g.NavInputId == id) || (g.NavActivateId == id && g.NavInputSource == ImGuiInputSource_NavKeyboard)); + const bool user_scroll_finish = is_multiline && state != NULL && g.ActiveId == 0 && g.ActiveIdPreviousFrame == GetWindowScrollbarID(draw_window, ImGuiAxis_Y); + const bool user_scroll_active = is_multiline && state != NULL && g.ActiveId == GetWindowScrollbarID(draw_window, ImGuiAxis_Y); + + bool clear_active_id = false; + bool select_all = (g.ActiveId != id) && ((flags & ImGuiInputTextFlags_AutoSelectAll) != 0 || user_nav_input_start) && (!is_multiline); + + float scroll_y = is_multiline ? draw_window->Scroll.y : FLT_MAX; + + const bool init_changed_specs = (state != NULL && state->Stb.single_line != !is_multiline); + const bool init_make_active = (focus_requested || user_clicked || user_scroll_finish || user_nav_input_start); + const bool init_state = (init_make_active || user_scroll_active); + if ((init_state && g.ActiveId != id) || init_changed_specs) + { + // Access state even if we don't own it yet. + state = &g.InputTextState; + state->CursorAnimReset(); + + // Take a copy of the initial buffer value (both in original UTF-8 format and converted to wchar) + // From the moment we focused we are ignoring the content of 'buf' (unless we are in read-only mode) + const int buf_len = (int)strlen(buf); + state->InitialTextA.resize(buf_len + 1); // UTF-8. we use +1 to make sure that .Data is always pointing to at least an empty string. + memcpy(state->InitialTextA.Data, buf, buf_len + 1); + + // Start edition + const char* buf_end = NULL; + state->TextW.resize(buf_size + 1); // wchar count <= UTF-8 count. we use +1 to make sure that .Data is always pointing to at least an empty string. + state->TextA.resize(0); + state->TextAIsValid = false; // TextA is not valid yet (we will display buf until then) + state->CurLenW = ImTextStrFromUtf8(state->TextW.Data, buf_size, buf, NULL, &buf_end); + state->CurLenA = (int)(buf_end - buf); // We can't get the result from ImStrncpy() above because it is not UTF-8 aware. Here we'll cut off malformed UTF-8. + + // Preserve cursor position and undo/redo stack if we come back to same widget + // FIXME: For non-readonly widgets we might be able to require that TextAIsValid && TextA == buf ? (untested) and discard undo stack if user buffer has changed. + const bool recycle_state = (state->ID == id && !init_changed_specs); + if (recycle_state) + { + // Recycle existing cursor/selection/undo stack but clamp position + // Note a single mouse click will override the cursor/position immediately by calling stb_textedit_click handler. + state->CursorClamp(); + } + else + { + state->ID = id; + state->ScrollX = 0.0f; + stb_textedit_initialize_state(&state->Stb, !is_multiline); + if (!is_multiline && focus_requested_by_code) + select_all = true; + } + if (flags & ImGuiInputTextFlags_AlwaysInsertMode) + state->Stb.insert_mode = 1; + if (!is_multiline && (focus_requested_by_tab || (user_clicked && io.KeyCtrl))) + select_all = true; + } + + if (g.ActiveId != id && init_make_active) + { + IM_ASSERT(state && state->ID == id); + SetActiveID(id, window); + SetFocusID(id, window); + FocusWindow(window); + + // Declare our inputs + IM_ASSERT(ImGuiNavInput_COUNT < 32); + g.ActiveIdUsingNavDirMask |= (1 << ImGuiDir_Left) | (1 << ImGuiDir_Right); + if (is_multiline || (flags & ImGuiInputTextFlags_CallbackHistory)) + g.ActiveIdUsingNavDirMask |= (1 << ImGuiDir_Up) | (1 << ImGuiDir_Down); + g.ActiveIdUsingNavInputMask |= (1 << ImGuiNavInput_Cancel); + g.ActiveIdUsingKeyInputMask |= ((ImU64)1 << ImGuiKey_Home) | ((ImU64)1 << ImGuiKey_End); + if (is_multiline) + g.ActiveIdUsingKeyInputMask |= ((ImU64)1 << ImGuiKey_PageUp) | ((ImU64)1 << ImGuiKey_PageDown); + if (flags & (ImGuiInputTextFlags_CallbackCompletion | ImGuiInputTextFlags_AllowTabInput)) // Disable keyboard tabbing out as we will use the \t character. + g.ActiveIdUsingKeyInputMask |= ((ImU64)1 << ImGuiKey_Tab); + } + + // We have an edge case if ActiveId was set through another widget (e.g. widget being swapped), clear id immediately (don't wait until the end of the function) + if (g.ActiveId == id && state == NULL) + ClearActiveID(); + + // Release focus when we click outside + if (g.ActiveId == id && io.MouseClicked[0] && !init_state && !init_make_active) //-V560 + clear_active_id = true; + + // Lock the decision of whether we are going to take the path displaying the cursor or selection + const bool render_cursor = (g.ActiveId == id) || (state && user_scroll_active); + bool render_selection = state && state->HasSelection() && (RENDER_SELECTION_WHEN_INACTIVE || render_cursor); + bool value_changed = false; + bool enter_pressed = false; + + // When read-only we always use the live data passed to the function + // FIXME-OPT: Because our selection/cursor code currently needs the wide text we need to convert it when active, which is not ideal :( + if (is_readonly && state != NULL && (render_cursor || render_selection)) + { + const char* buf_end = NULL; + state->TextW.resize(buf_size + 1); + state->CurLenW = ImTextStrFromUtf8(state->TextW.Data, state->TextW.Size, buf, NULL, &buf_end); + state->CurLenA = (int)(buf_end - buf); + state->CursorClamp(); + render_selection &= state->HasSelection(); + } + + // Select the buffer to render. + const bool buf_display_from_state = (render_cursor || render_selection || g.ActiveId == id) && !is_readonly && state && state->TextAIsValid; + const bool is_displaying_hint = (hint != NULL && (buf_display_from_state ? state->TextA.Data : buf)[0] == 0); + + // Password pushes a temporary font with only a fallback glyph + if (is_password && !is_displaying_hint) + { + const ImFontGlyph* glyph = g.Font->FindGlyph('*'); + ImFont* password_font = &g.InputTextPasswordFont; + password_font->FontSize = g.Font->FontSize; + password_font->Scale = g.Font->Scale; + password_font->Ascent = g.Font->Ascent; + password_font->Descent = g.Font->Descent; + password_font->ContainerAtlas = g.Font->ContainerAtlas; + password_font->FallbackGlyph = glyph; + password_font->FallbackAdvanceX = glyph->AdvanceX; + IM_ASSERT(password_font->Glyphs.empty() && password_font->IndexAdvanceX.empty() && password_font->IndexLookup.empty()); + PushFont(password_font); + } + + // Process mouse inputs and character inputs + int backup_current_text_length = 0; + if (g.ActiveId == id) + { + IM_ASSERT(state != NULL); + backup_current_text_length = state->CurLenA; + state->Edited = false; + state->BufCapacityA = buf_size; + state->UserFlags = flags; + state->UserCallback = callback; + state->UserCallbackData = callback_user_data; + + // Although we are active we don't prevent mouse from hovering other elements unless we are interacting right now with the widget. + // Down the line we should have a cleaner library-wide concept of Selected vs Active. + g.ActiveIdAllowOverlap = !io.MouseDown[0]; + g.WantTextInputNextFrame = 1; + + // Edit in progress + const float mouse_x = (io.MousePos.x - frame_bb.Min.x - style.FramePadding.x) + state->ScrollX; + const float mouse_y = (is_multiline ? (io.MousePos.y - draw_window->DC.CursorPos.y - style.FramePadding.y) : (g.FontSize * 0.5f)); + + const bool is_osx = io.ConfigMacOSXBehaviors; + if (select_all || (hovered && !is_osx && io.MouseDoubleClicked[0])) + { + state->SelectAll(); + state->SelectedAllMouseLock = true; + } + else if (hovered && is_osx && io.MouseDoubleClicked[0]) + { + // Double-click select a word only, OS X style (by simulating keystrokes) + state->OnKeyPressed(STB_TEXTEDIT_K_WORDLEFT); + state->OnKeyPressed(STB_TEXTEDIT_K_WORDRIGHT | STB_TEXTEDIT_K_SHIFT); + } + else if (io.MouseClicked[0] && !state->SelectedAllMouseLock) + { + if (hovered) + { + stb_textedit_click(state, &state->Stb, mouse_x, mouse_y); + state->CursorAnimReset(); + } + } + else if (io.MouseDown[0] && !state->SelectedAllMouseLock && (io.MouseDelta.x != 0.0f || io.MouseDelta.y != 0.0f)) + { + stb_textedit_drag(state, &state->Stb, mouse_x, mouse_y); + state->CursorAnimReset(); + state->CursorFollow = true; + } + if (state->SelectedAllMouseLock && !io.MouseDown[0]) + state->SelectedAllMouseLock = false; + + // It is ill-defined whether the backend needs to send a \t character when pressing the TAB keys. + // Win32 and GLFW naturally do it but not SDL. + const bool ignore_char_inputs = (io.KeyCtrl && !io.KeyAlt) || (is_osx && io.KeySuper); + if ((flags & ImGuiInputTextFlags_AllowTabInput) && IsKeyPressedMap(ImGuiKey_Tab) && !ignore_char_inputs && !io.KeyShift && !is_readonly) + if (!io.InputQueueCharacters.contains('\t')) + { + unsigned int c = '\t'; // Insert TAB + if (InputTextFilterCharacter(&c, flags, callback, callback_user_data)) + state->OnKeyPressed((int)c); + } + + // Process regular text input (before we check for Return because using some IME will effectively send a Return?) + // We ignore CTRL inputs, but need to allow ALT+CTRL as some keyboards (e.g. German) use AltGR (which _is_ Alt+Ctrl) to input certain characters. + if (io.InputQueueCharacters.Size > 0) + { + if (!ignore_char_inputs && !is_readonly && !user_nav_input_start) + for (int n = 0; n < io.InputQueueCharacters.Size; n++) + { + // Insert character if they pass filtering + unsigned int c = (unsigned int)io.InputQueueCharacters[n]; + if (c == '\t' && io.KeyShift) + continue; + if (InputTextFilterCharacter(&c, flags, callback, callback_user_data)) + state->OnKeyPressed((int)c); + } + + // Consume characters + io.InputQueueCharacters.resize(0); + } + } + + // Process other shortcuts/key-presses + bool cancel_edit = false; + if (g.ActiveId == id && !g.ActiveIdIsJustActivated && !clear_active_id) + { + IM_ASSERT(state != NULL); + IM_ASSERT(io.KeyMods == GetMergedKeyModFlags() && "Mismatching io.KeyCtrl/io.KeyShift/io.KeyAlt/io.KeySuper vs io.KeyMods"); // We rarely do this check, but if anything let's do it here. + + const int row_count_per_page = ImMax((int)((inner_size.y - style.FramePadding.y) / g.FontSize), 1); + state->Stb.row_count_per_page = row_count_per_page; + + const int k_mask = (io.KeyShift ? STB_TEXTEDIT_K_SHIFT : 0); + const bool is_osx = io.ConfigMacOSXBehaviors; + const bool is_osx_shift_shortcut = is_osx && (io.KeyMods == (ImGuiKeyModFlags_Super | ImGuiKeyModFlags_Shift)); + const bool is_wordmove_key_down = is_osx ? io.KeyAlt : io.KeyCtrl; // OS X style: Text editing cursor movement using Alt instead of Ctrl + const bool is_startend_key_down = is_osx && io.KeySuper && !io.KeyCtrl && !io.KeyAlt; // OS X style: Line/Text Start and End using Cmd+Arrows instead of Home/End + const bool is_ctrl_key_only = (io.KeyMods == ImGuiKeyModFlags_Ctrl); + const bool is_shift_key_only = (io.KeyMods == ImGuiKeyModFlags_Shift); + const bool is_shortcut_key = g.IO.ConfigMacOSXBehaviors ? (io.KeyMods == ImGuiKeyModFlags_Super) : (io.KeyMods == ImGuiKeyModFlags_Ctrl); + + const bool is_cut = ((is_shortcut_key && IsKeyPressedMap(ImGuiKey_X)) || (is_shift_key_only && IsKeyPressedMap(ImGuiKey_Delete))) && !is_readonly && !is_password && (!is_multiline || state->HasSelection()); + const bool is_copy = ((is_shortcut_key && IsKeyPressedMap(ImGuiKey_C)) || (is_ctrl_key_only && IsKeyPressedMap(ImGuiKey_Insert))) && !is_password && (!is_multiline || state->HasSelection()); + const bool is_paste = ((is_shortcut_key && IsKeyPressedMap(ImGuiKey_V)) || (is_shift_key_only && IsKeyPressedMap(ImGuiKey_Insert))) && !is_readonly; + const bool is_undo = ((is_shortcut_key && IsKeyPressedMap(ImGuiKey_Z)) && !is_readonly && is_undoable); + const bool is_redo = ((is_shortcut_key && IsKeyPressedMap(ImGuiKey_Y)) || (is_osx_shift_shortcut && IsKeyPressedMap(ImGuiKey_Z))) && !is_readonly && is_undoable; + + if (IsKeyPressedMap(ImGuiKey_LeftArrow)) { state->OnKeyPressed((is_startend_key_down ? STB_TEXTEDIT_K_LINESTART : is_wordmove_key_down ? STB_TEXTEDIT_K_WORDLEFT : STB_TEXTEDIT_K_LEFT) | k_mask); } + else if (IsKeyPressedMap(ImGuiKey_RightArrow)) { state->OnKeyPressed((is_startend_key_down ? STB_TEXTEDIT_K_LINEEND : is_wordmove_key_down ? STB_TEXTEDIT_K_WORDRIGHT : STB_TEXTEDIT_K_RIGHT) | k_mask); } + else if (IsKeyPressedMap(ImGuiKey_UpArrow) && is_multiline) { if (io.KeyCtrl) SetScrollY(draw_window, ImMax(draw_window->Scroll.y - g.FontSize, 0.0f)); else state->OnKeyPressed((is_startend_key_down ? STB_TEXTEDIT_K_TEXTSTART : STB_TEXTEDIT_K_UP) | k_mask); } + else if (IsKeyPressedMap(ImGuiKey_DownArrow) && is_multiline) { if (io.KeyCtrl) SetScrollY(draw_window, ImMin(draw_window->Scroll.y + g.FontSize, GetScrollMaxY())); else state->OnKeyPressed((is_startend_key_down ? STB_TEXTEDIT_K_TEXTEND : STB_TEXTEDIT_K_DOWN) | k_mask); } + else if (IsKeyPressedMap(ImGuiKey_PageUp) && is_multiline) { state->OnKeyPressed(STB_TEXTEDIT_K_PGUP | k_mask); scroll_y -= row_count_per_page * g.FontSize; } + else if (IsKeyPressedMap(ImGuiKey_PageDown) && is_multiline) { state->OnKeyPressed(STB_TEXTEDIT_K_PGDOWN | k_mask); scroll_y += row_count_per_page * g.FontSize; } + else if (IsKeyPressedMap(ImGuiKey_Home)) { state->OnKeyPressed(io.KeyCtrl ? STB_TEXTEDIT_K_TEXTSTART | k_mask : STB_TEXTEDIT_K_LINESTART | k_mask); } + else if (IsKeyPressedMap(ImGuiKey_End)) { state->OnKeyPressed(io.KeyCtrl ? STB_TEXTEDIT_K_TEXTEND | k_mask : STB_TEXTEDIT_K_LINEEND | k_mask); } + else if (IsKeyPressedMap(ImGuiKey_Delete) && !is_readonly) { state->OnKeyPressed(STB_TEXTEDIT_K_DELETE | k_mask); } + else if (IsKeyPressedMap(ImGuiKey_Backspace) && !is_readonly) + { + if (!state->HasSelection()) + { + if (is_wordmove_key_down) + state->OnKeyPressed(STB_TEXTEDIT_K_WORDLEFT | STB_TEXTEDIT_K_SHIFT); + else if (is_osx && io.KeySuper && !io.KeyAlt && !io.KeyCtrl) + state->OnKeyPressed(STB_TEXTEDIT_K_LINESTART | STB_TEXTEDIT_K_SHIFT); + } + state->OnKeyPressed(STB_TEXTEDIT_K_BACKSPACE | k_mask); + } + else if (IsKeyPressedMap(ImGuiKey_Enter) || IsKeyPressedMap(ImGuiKey_KeyPadEnter)) + { + bool ctrl_enter_for_new_line = (flags & ImGuiInputTextFlags_CtrlEnterForNewLine) != 0; + if (!is_multiline || (ctrl_enter_for_new_line && !io.KeyCtrl) || (!ctrl_enter_for_new_line && io.KeyCtrl)) + { + enter_pressed = clear_active_id = true; + } + else if (!is_readonly) + { + unsigned int c = '\n'; // Insert new line + if (InputTextFilterCharacter(&c, flags, callback, callback_user_data)) + state->OnKeyPressed((int)c); + } + } + else if (IsKeyPressedMap(ImGuiKey_Escape)) + { + clear_active_id = cancel_edit = true; + } + else if (is_undo || is_redo) + { + state->OnKeyPressed(is_undo ? STB_TEXTEDIT_K_UNDO : STB_TEXTEDIT_K_REDO); + state->ClearSelection(); + } + else if (is_shortcut_key && IsKeyPressedMap(ImGuiKey_A)) + { + state->SelectAll(); + state->CursorFollow = true; + } + else if (is_cut || is_copy) + { + // Cut, Copy + if (io.SetClipboardTextFn) + { + const int ib = state->HasSelection() ? ImMin(state->Stb.select_start, state->Stb.select_end) : 0; + const int ie = state->HasSelection() ? ImMax(state->Stb.select_start, state->Stb.select_end) : state->CurLenW; + const int clipboard_data_len = ImTextCountUtf8BytesFromStr(state->TextW.Data + ib, state->TextW.Data + ie) + 1; + char* clipboard_data = (char*)IM_ALLOC(clipboard_data_len * sizeof(char)); + ImTextStrToUtf8(clipboard_data, clipboard_data_len, state->TextW.Data + ib, state->TextW.Data + ie); + SetClipboardText(clipboard_data); + MemFree(clipboard_data); + } + if (is_cut) + { + if (!state->HasSelection()) + state->SelectAll(); + state->CursorFollow = true; + stb_textedit_cut(state, &state->Stb); + } + } + else if (is_paste) + { + if (const char* clipboard = GetClipboardText()) + { + // Filter pasted buffer + const int clipboard_len = (int)strlen(clipboard); + ImWchar* clipboard_filtered = (ImWchar*)IM_ALLOC((clipboard_len + 1) * sizeof(ImWchar)); + int clipboard_filtered_len = 0; + for (const char* s = clipboard; *s; ) + { + unsigned int c; + s += ImTextCharFromUtf8(&c, s, NULL); + if (c == 0) + break; + if (!InputTextFilterCharacter(&c, flags, callback, callback_user_data)) + continue; + clipboard_filtered[clipboard_filtered_len++] = (ImWchar)c; + } + clipboard_filtered[clipboard_filtered_len] = 0; + if (clipboard_filtered_len > 0) // If everything was filtered, ignore the pasting operation + { + stb_textedit_paste(state, &state->Stb, clipboard_filtered, clipboard_filtered_len); + state->CursorFollow = true; + } + MemFree(clipboard_filtered); + } + } + + // Update render selection flag after events have been handled, so selection highlight can be displayed during the same frame. + render_selection |= state->HasSelection() && (RENDER_SELECTION_WHEN_INACTIVE || render_cursor); + } + + // Process callbacks and apply result back to user's buffer. + if (g.ActiveId == id) + { + IM_ASSERT(state != NULL); + const char* apply_new_text = NULL; + int apply_new_text_length = 0; + if (cancel_edit) + { + // Restore initial value. Only return true if restoring to the initial value changes the current buffer contents. + if (!is_readonly && strcmp(buf, state->InitialTextA.Data) != 0) + { + // Push records into the undo stack so we can CTRL+Z the revert operation itself + apply_new_text = state->InitialTextA.Data; + apply_new_text_length = state->InitialTextA.Size - 1; + ImVector w_text; + if (apply_new_text_length > 0) + { + w_text.resize(ImTextCountCharsFromUtf8(apply_new_text, apply_new_text + apply_new_text_length) + 1); + ImTextStrFromUtf8(w_text.Data, w_text.Size, apply_new_text, apply_new_text + apply_new_text_length); + } + stb_textedit_replace(state, &state->Stb, w_text.Data, (apply_new_text_length > 0) ? (w_text.Size - 1) : 0); + } + } + + // When using 'ImGuiInputTextFlags_EnterReturnsTrue' as a special case we reapply the live buffer back to the input buffer before clearing ActiveId, even though strictly speaking it wasn't modified on this frame. + // If we didn't do that, code like InputInt() with ImGuiInputTextFlags_EnterReturnsTrue would fail. + // This also allows the user to use InputText() with ImGuiInputTextFlags_EnterReturnsTrue without maintaining any user-side storage (please note that if you use this property along ImGuiInputTextFlags_CallbackResize you can end up with your temporary string object unnecessarily allocating once a frame, either store your string data, either if you don't then don't use ImGuiInputTextFlags_CallbackResize). + bool apply_edit_back_to_user_buffer = !cancel_edit || (enter_pressed && (flags & ImGuiInputTextFlags_EnterReturnsTrue) != 0); + if (apply_edit_back_to_user_buffer) + { + // Apply new value immediately - copy modified buffer back + // Note that as soon as the input box is active, the in-widget value gets priority over any underlying modification of the input buffer + // FIXME: We actually always render 'buf' when calling DrawList->AddText, making the comment above incorrect. + // FIXME-OPT: CPU waste to do this every time the widget is active, should mark dirty state from the stb_textedit callbacks. + if (!is_readonly) + { + state->TextAIsValid = true; + state->TextA.resize(state->TextW.Size * 4 + 1); + ImTextStrToUtf8(state->TextA.Data, state->TextA.Size, state->TextW.Data, NULL); + } + + // User callback + if ((flags & (ImGuiInputTextFlags_CallbackCompletion | ImGuiInputTextFlags_CallbackHistory | ImGuiInputTextFlags_CallbackEdit | ImGuiInputTextFlags_CallbackAlways)) != 0) + { + IM_ASSERT(callback != NULL); + + // The reason we specify the usage semantic (Completion/History) is that Completion needs to disable keyboard TABBING at the moment. + ImGuiInputTextFlags event_flag = 0; + ImGuiKey event_key = ImGuiKey_COUNT; + if ((flags & ImGuiInputTextFlags_CallbackCompletion) != 0 && IsKeyPressedMap(ImGuiKey_Tab)) + { + event_flag = ImGuiInputTextFlags_CallbackCompletion; + event_key = ImGuiKey_Tab; + } + else if ((flags & ImGuiInputTextFlags_CallbackHistory) != 0 && IsKeyPressedMap(ImGuiKey_UpArrow)) + { + event_flag = ImGuiInputTextFlags_CallbackHistory; + event_key = ImGuiKey_UpArrow; + } + else if ((flags & ImGuiInputTextFlags_CallbackHistory) != 0 && IsKeyPressedMap(ImGuiKey_DownArrow)) + { + event_flag = ImGuiInputTextFlags_CallbackHistory; + event_key = ImGuiKey_DownArrow; + } + else if ((flags & ImGuiInputTextFlags_CallbackEdit) && state->Edited) + { + event_flag = ImGuiInputTextFlags_CallbackEdit; + } + else if (flags & ImGuiInputTextFlags_CallbackAlways) + { + event_flag = ImGuiInputTextFlags_CallbackAlways; + } + + if (event_flag) + { + ImGuiInputTextCallbackData callback_data; + memset(&callback_data, 0, sizeof(ImGuiInputTextCallbackData)); + callback_data.EventFlag = event_flag; + callback_data.Flags = flags; + callback_data.UserData = callback_user_data; + + callback_data.EventKey = event_key; + callback_data.Buf = state->TextA.Data; + callback_data.BufTextLen = state->CurLenA; + callback_data.BufSize = state->BufCapacityA; + callback_data.BufDirty = false; + + // We have to convert from wchar-positions to UTF-8-positions, which can be pretty slow (an incentive to ditch the ImWchar buffer, see https://github.com/nothings/stb/issues/188) + ImWchar* text = state->TextW.Data; + const int utf8_cursor_pos = callback_data.CursorPos = ImTextCountUtf8BytesFromStr(text, text + state->Stb.cursor); + const int utf8_selection_start = callback_data.SelectionStart = ImTextCountUtf8BytesFromStr(text, text + state->Stb.select_start); + const int utf8_selection_end = callback_data.SelectionEnd = ImTextCountUtf8BytesFromStr(text, text + state->Stb.select_end); + + // Call user code + callback(&callback_data); + + // Read back what user may have modified + IM_ASSERT(callback_data.Buf == state->TextA.Data); // Invalid to modify those fields + IM_ASSERT(callback_data.BufSize == state->BufCapacityA); + IM_ASSERT(callback_data.Flags == flags); + const bool buf_dirty = callback_data.BufDirty; + if (callback_data.CursorPos != utf8_cursor_pos || buf_dirty) { state->Stb.cursor = ImTextCountCharsFromUtf8(callback_data.Buf, callback_data.Buf + callback_data.CursorPos); state->CursorFollow = true; } + if (callback_data.SelectionStart != utf8_selection_start || buf_dirty) { state->Stb.select_start = (callback_data.SelectionStart == callback_data.CursorPos) ? state->Stb.cursor : ImTextCountCharsFromUtf8(callback_data.Buf, callback_data.Buf + callback_data.SelectionStart); } + if (callback_data.SelectionEnd != utf8_selection_end || buf_dirty) { state->Stb.select_end = (callback_data.SelectionEnd == callback_data.SelectionStart) ? state->Stb.select_start : ImTextCountCharsFromUtf8(callback_data.Buf, callback_data.Buf + callback_data.SelectionEnd); } + if (buf_dirty) + { + IM_ASSERT(callback_data.BufTextLen == (int)strlen(callback_data.Buf)); // You need to maintain BufTextLen if you change the text! + if (callback_data.BufTextLen > backup_current_text_length && is_resizable) + state->TextW.resize(state->TextW.Size + (callback_data.BufTextLen - backup_current_text_length)); + state->CurLenW = ImTextStrFromUtf8(state->TextW.Data, state->TextW.Size, callback_data.Buf, NULL); + state->CurLenA = callback_data.BufTextLen; // Assume correct length and valid UTF-8 from user, saves us an extra strlen() + state->CursorAnimReset(); + } + } + } + + // Will copy result string if modified + if (!is_readonly && strcmp(state->TextA.Data, buf) != 0) + { + apply_new_text = state->TextA.Data; + apply_new_text_length = state->CurLenA; + } + } + + // Copy result to user buffer + if (apply_new_text) + { + // We cannot test for 'backup_current_text_length != apply_new_text_length' here because we have no guarantee that the size + // of our owned buffer matches the size of the string object held by the user, and by design we allow InputText() to be used + // without any storage on user's side. + IM_ASSERT(apply_new_text_length >= 0); + if (is_resizable) + { + ImGuiInputTextCallbackData callback_data; + callback_data.EventFlag = ImGuiInputTextFlags_CallbackResize; + callback_data.Flags = flags; + callback_data.Buf = buf; + callback_data.BufTextLen = apply_new_text_length; + callback_data.BufSize = ImMax(buf_size, apply_new_text_length + 1); + callback_data.UserData = callback_user_data; + callback(&callback_data); + buf = callback_data.Buf; + buf_size = callback_data.BufSize; + apply_new_text_length = ImMin(callback_data.BufTextLen, buf_size - 1); + IM_ASSERT(apply_new_text_length <= buf_size); + } + //IMGUI_DEBUG_LOG("InputText(\"%s\"): apply_new_text length %d\n", label, apply_new_text_length); + + // If the underlying buffer resize was denied or not carried to the next frame, apply_new_text_length+1 may be >= buf_size. + ImStrncpy(buf, apply_new_text, ImMin(apply_new_text_length + 1, buf_size)); + value_changed = true; + } + + // Clear temporary user storage + state->UserFlags = 0; + state->UserCallback = NULL; + state->UserCallbackData = NULL; + } + + // Release active ID at the end of the function (so e.g. pressing Return still does a final application of the value) + if (clear_active_id && g.ActiveId == id) + ClearActiveID(); + + // Render frame + if (!is_multiline) + { + RenderNavHighlight(frame_bb, id); + RenderFrame(frame_bb.Min, frame_bb.Max, GetColorU32(ImGuiCol_FrameBg), true, style.FrameRounding); + } + + const ImVec4 clip_rect(frame_bb.Min.x, frame_bb.Min.y, frame_bb.Min.x + inner_size.x, frame_bb.Min.y + inner_size.y); // Not using frame_bb.Max because we have adjusted size + ImVec2 draw_pos = is_multiline ? draw_window->DC.CursorPos : frame_bb.Min + style.FramePadding; + ImVec2 text_size(0.0f, 0.0f); + + // Set upper limit of single-line InputTextEx() at 2 million characters strings. The current pathological worst case is a long line + // without any carriage return, which would makes ImFont::RenderText() reserve too many vertices and probably crash. Avoid it altogether. + // Note that we only use this limit on single-line InputText(), so a pathologically large line on a InputTextMultiline() would still crash. + const int buf_display_max_length = 2 * 1024 * 1024; + const char* buf_display = buf_display_from_state ? state->TextA.Data : buf; //-V595 + const char* buf_display_end = NULL; // We have specialized paths below for setting the length + if (is_displaying_hint) + { + buf_display = hint; + buf_display_end = hint + strlen(hint); + } + + // Render text. We currently only render selection when the widget is active or while scrolling. + // FIXME: We could remove the '&& render_cursor' to keep rendering selection when inactive. + if (render_cursor || render_selection) + { + IM_ASSERT(state != NULL); + if (!is_displaying_hint) + buf_display_end = buf_display + state->CurLenA; + + // Render text (with cursor and selection) + // This is going to be messy. We need to: + // - Display the text (this alone can be more easily clipped) + // - Handle scrolling, highlight selection, display cursor (those all requires some form of 1d->2d cursor position calculation) + // - Measure text height (for scrollbar) + // We are attempting to do most of that in **one main pass** to minimize the computation cost (non-negligible for large amount of text) + 2nd pass for selection rendering (we could merge them by an extra refactoring effort) + // FIXME: This should occur on buf_display but we'd need to maintain cursor/select_start/select_end for UTF-8. + const ImWchar* text_begin = state->TextW.Data; + ImVec2 cursor_offset, select_start_offset; + + { + // Find lines numbers straddling 'cursor' (slot 0) and 'select_start' (slot 1) positions. + const ImWchar* searches_input_ptr[2] = { NULL, NULL }; + int searches_result_line_no[2] = { -1000, -1000 }; + int searches_remaining = 0; + if (render_cursor) + { + searches_input_ptr[0] = text_begin + state->Stb.cursor; + searches_result_line_no[0] = -1; + searches_remaining++; + } + if (render_selection) + { + searches_input_ptr[1] = text_begin + ImMin(state->Stb.select_start, state->Stb.select_end); + searches_result_line_no[1] = -1; + searches_remaining++; + } + + // Iterate all lines to find our line numbers + // In multi-line mode, we never exit the loop until all lines are counted, so add one extra to the searches_remaining counter. + searches_remaining += is_multiline ? 1 : 0; + int line_count = 0; + //for (const ImWchar* s = text_begin; (s = (const ImWchar*)wcschr((const wchar_t*)s, (wchar_t)'\n')) != NULL; s++) // FIXME-OPT: Could use this when wchar_t are 16-bit + for (const ImWchar* s = text_begin; *s != 0; s++) + if (*s == '\n') + { + line_count++; + if (searches_result_line_no[0] == -1 && s >= searches_input_ptr[0]) { searches_result_line_no[0] = line_count; if (--searches_remaining <= 0) break; } + if (searches_result_line_no[1] == -1 && s >= searches_input_ptr[1]) { searches_result_line_no[1] = line_count; if (--searches_remaining <= 0) break; } + } + line_count++; + if (searches_result_line_no[0] == -1) + searches_result_line_no[0] = line_count; + if (searches_result_line_no[1] == -1) + searches_result_line_no[1] = line_count; + + // Calculate 2d position by finding the beginning of the line and measuring distance + cursor_offset.x = InputTextCalcTextSizeW(ImStrbolW(searches_input_ptr[0], text_begin), searches_input_ptr[0]).x; + cursor_offset.y = searches_result_line_no[0] * g.FontSize; + if (searches_result_line_no[1] >= 0) + { + select_start_offset.x = InputTextCalcTextSizeW(ImStrbolW(searches_input_ptr[1], text_begin), searches_input_ptr[1]).x; + select_start_offset.y = searches_result_line_no[1] * g.FontSize; + } + + // Store text height (note that we haven't calculated text width at all, see GitHub issues #383, #1224) + if (is_multiline) + text_size = ImVec2(inner_size.x, line_count * g.FontSize); + } + + // Scroll + if (render_cursor && state->CursorFollow) + { + // Horizontal scroll in chunks of quarter width + if (!(flags & ImGuiInputTextFlags_NoHorizontalScroll)) + { + const float scroll_increment_x = inner_size.x * 0.25f; + if (cursor_offset.x < state->ScrollX) + state->ScrollX = IM_FLOOR(ImMax(0.0f, cursor_offset.x - scroll_increment_x)); + else if (cursor_offset.x - inner_size.x >= state->ScrollX) + state->ScrollX = IM_FLOOR(cursor_offset.x - inner_size.x + scroll_increment_x); + } + else + { + state->ScrollX = 0.0f; + } + + // Vertical scroll + if (is_multiline) + { + // Test if cursor is vertically visible + if (cursor_offset.y - g.FontSize < scroll_y) + scroll_y = ImMax(0.0f, cursor_offset.y - g.FontSize); + else if (cursor_offset.y - inner_size.y >= scroll_y) + scroll_y = cursor_offset.y - inner_size.y + style.FramePadding.y * 2.0f; + const float scroll_max_y = ImMax((text_size.y + style.FramePadding.y * 2.0f) - inner_size.y, 0.0f); + scroll_y = ImClamp(scroll_y, 0.0f, scroll_max_y); + draw_pos.y += (draw_window->Scroll.y - scroll_y); // Manipulate cursor pos immediately avoid a frame of lag + draw_window->Scroll.y = scroll_y; + } + + state->CursorFollow = false; + } + + // Draw selection + const ImVec2 draw_scroll = ImVec2(state->ScrollX, 0.0f); + if (render_selection) + { + const ImWchar* text_selected_begin = text_begin + ImMin(state->Stb.select_start, state->Stb.select_end); + const ImWchar* text_selected_end = text_begin + ImMax(state->Stb.select_start, state->Stb.select_end); + + ImU32 bg_color = GetColorU32(ImGuiCol_TextSelectedBg, render_cursor ? 1.0f : 0.6f); // FIXME: current code flow mandate that render_cursor is always true here, we are leaving the transparent one for tests. + float bg_offy_up = is_multiline ? 0.0f : -1.0f; // FIXME: those offsets should be part of the style? they don't play so well with multi-line selection. + float bg_offy_dn = is_multiline ? 0.0f : 2.0f; + ImVec2 rect_pos = draw_pos + select_start_offset - draw_scroll; + for (const ImWchar* p = text_selected_begin; p < text_selected_end; ) + { + if (rect_pos.y > clip_rect.w + g.FontSize) + break; + if (rect_pos.y < clip_rect.y) + { + //p = (const ImWchar*)wmemchr((const wchar_t*)p, '\n', text_selected_end - p); // FIXME-OPT: Could use this when wchar_t are 16-bit + //p = p ? p + 1 : text_selected_end; + while (p < text_selected_end) + if (*p++ == '\n') + break; + } + else + { + ImVec2 rect_size = InputTextCalcTextSizeW(p, text_selected_end, &p, NULL, true); + if (rect_size.x <= 0.0f) rect_size.x = IM_FLOOR(g.Font->GetCharAdvance((ImWchar)' ') * 0.50f); // So we can see selected empty lines + ImRect rect(rect_pos + ImVec2(0.0f, bg_offy_up - g.FontSize), rect_pos + ImVec2(rect_size.x, bg_offy_dn)); + rect.ClipWith(clip_rect); + if (rect.Overlaps(clip_rect)) + draw_window->DrawList->AddRectFilled(rect.Min, rect.Max, bg_color); + } + rect_pos.x = draw_pos.x - draw_scroll.x; + rect_pos.y += g.FontSize; + } + } + + // We test for 'buf_display_max_length' as a way to avoid some pathological cases (e.g. single-line 1 MB string) which would make ImDrawList crash. + if (is_multiline || (buf_display_end - buf_display) < buf_display_max_length) + { + ImU32 col = GetColorU32(is_displaying_hint ? ImGuiCol_TextDisabled : ImGuiCol_Text); + draw_window->DrawList->AddText(g.Font, g.FontSize, draw_pos - draw_scroll, col, buf_display, buf_display_end, 0.0f, is_multiline ? NULL : &clip_rect); + } + + // Draw blinking cursor + if (render_cursor) + { + state->CursorAnim += io.DeltaTime; + bool cursor_is_visible = (!g.IO.ConfigInputTextCursorBlink) || (state->CursorAnim <= 0.0f) || ImFmod(state->CursorAnim, 1.20f) <= 0.80f; + ImVec2 cursor_screen_pos = draw_pos + cursor_offset - draw_scroll; + ImRect cursor_screen_rect(cursor_screen_pos.x, cursor_screen_pos.y - g.FontSize + 0.5f, cursor_screen_pos.x + 1.0f, cursor_screen_pos.y - 1.5f); + if (cursor_is_visible && cursor_screen_rect.Overlaps(clip_rect)) + draw_window->DrawList->AddLine(cursor_screen_rect.Min, cursor_screen_rect.GetBL(), GetColorU32(ImGuiCol_Text)); + + // Notify OS of text input position for advanced IME (-1 x offset so that Windows IME can cover our cursor. Bit of an extra nicety.) + if (!is_readonly) + g.PlatformImePos = ImVec2(cursor_screen_pos.x - 1.0f, cursor_screen_pos.y - g.FontSize); + } + } + else + { + // Render text only (no selection, no cursor) + if (is_multiline) + text_size = ImVec2(inner_size.x, InputTextCalcTextLenAndLineCount(buf_display, &buf_display_end) * g.FontSize); // We don't need width + else if (!is_displaying_hint && g.ActiveId == id) + buf_display_end = buf_display + state->CurLenA; + else if (!is_displaying_hint) + buf_display_end = buf_display + strlen(buf_display); + + if (is_multiline || (buf_display_end - buf_display) < buf_display_max_length) + { + ImU32 col = GetColorU32(is_displaying_hint ? ImGuiCol_TextDisabled : ImGuiCol_Text); + draw_window->DrawList->AddText(g.Font, g.FontSize, draw_pos, col, buf_display, buf_display_end, 0.0f, is_multiline ? NULL : &clip_rect); + } + } + + if (is_password && !is_displaying_hint) + PopFont(); + + if (is_multiline) + { + Dummy(text_size); + EndChild(); + EndGroup(); + } + + // Log as text + if (g.LogEnabled && (!is_password || is_displaying_hint)) + LogRenderedText(&draw_pos, buf_display, buf_display_end); + + if (label_size.x > 0) + RenderText(ImVec2(frame_bb.Max.x + style.ItemInnerSpacing.x, frame_bb.Min.y + style.FramePadding.y), label); + + if (value_changed && !(flags & ImGuiInputTextFlags_NoMarkEdited)) + MarkItemEdited(id); + + IMGUI_TEST_ENGINE_ITEM_INFO(id, label, window->DC.ItemFlags); + if ((flags & ImGuiInputTextFlags_EnterReturnsTrue) != 0) + return enter_pressed; + else + return value_changed; +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: ColorEdit, ColorPicker, ColorButton, etc. +//------------------------------------------------------------------------- +// - ColorEdit3() +// - ColorEdit4() +// - ColorPicker3() +// - RenderColorRectWithAlphaCheckerboard() [Internal] +// - ColorPicker4() +// - ColorButton() +// - SetColorEditOptions() +// - ColorTooltip() [Internal] +// - ColorEditOptionsPopup() [Internal] +// - ColorPickerOptionsPopup() [Internal] +//------------------------------------------------------------------------- + +bool ImGui::ColorEdit3(const char* label, float col[3], ImGuiColorEditFlags flags) +{ + return ColorEdit4(label, col, flags | ImGuiColorEditFlags_NoAlpha); +} + +// Edit colors components (each component in 0.0f..1.0f range). +// See enum ImGuiColorEditFlags_ for available options. e.g. Only access 3 floats if ImGuiColorEditFlags_NoAlpha flag is set. +// With typical options: Left-click on color square to open color picker. Right-click to open option menu. CTRL-Click over input fields to edit them and TAB to go to next item. +bool ImGui::ColorEdit4(const char* label, float col[4], ImGuiColorEditFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + const float square_sz = GetFrameHeight(); + const float w_full = CalcItemWidth(); + const float w_button = (flags & ImGuiColorEditFlags_NoSmallPreview) ? 0.0f : (square_sz + style.ItemInnerSpacing.x); + const float w_inputs = w_full - w_button; + const char* label_display_end = FindRenderedTextEnd(label); + g.NextItemData.ClearFlags(); + + BeginGroup(); + PushID(label); + + // If we're not showing any slider there's no point in doing any HSV conversions + const ImGuiColorEditFlags flags_untouched = flags; + if (flags & ImGuiColorEditFlags_NoInputs) + flags = (flags & (~ImGuiColorEditFlags__DisplayMask)) | ImGuiColorEditFlags_DisplayRGB | ImGuiColorEditFlags_NoOptions; + + // Context menu: display and modify options (before defaults are applied) + if (!(flags & ImGuiColorEditFlags_NoOptions)) + ColorEditOptionsPopup(col, flags); + + // Read stored options + if (!(flags & ImGuiColorEditFlags__DisplayMask)) + flags |= (g.ColorEditOptions & ImGuiColorEditFlags__DisplayMask); + if (!(flags & ImGuiColorEditFlags__DataTypeMask)) + flags |= (g.ColorEditOptions & ImGuiColorEditFlags__DataTypeMask); + if (!(flags & ImGuiColorEditFlags__PickerMask)) + flags |= (g.ColorEditOptions & ImGuiColorEditFlags__PickerMask); + if (!(flags & ImGuiColorEditFlags__InputMask)) + flags |= (g.ColorEditOptions & ImGuiColorEditFlags__InputMask); + flags |= (g.ColorEditOptions & ~(ImGuiColorEditFlags__DisplayMask | ImGuiColorEditFlags__DataTypeMask | ImGuiColorEditFlags__PickerMask | ImGuiColorEditFlags__InputMask)); + IM_ASSERT(ImIsPowerOfTwo(flags & ImGuiColorEditFlags__DisplayMask)); // Check that only 1 is selected + IM_ASSERT(ImIsPowerOfTwo(flags & ImGuiColorEditFlags__InputMask)); // Check that only 1 is selected + + const bool alpha = (flags & ImGuiColorEditFlags_NoAlpha) == 0; + const bool hdr = (flags & ImGuiColorEditFlags_HDR) != 0; + const int components = alpha ? 4 : 3; + + // Convert to the formats we need + float f[4] = { col[0], col[1], col[2], alpha ? col[3] : 1.0f }; + if ((flags & ImGuiColorEditFlags_InputHSV) && (flags & ImGuiColorEditFlags_DisplayRGB)) + ColorConvertHSVtoRGB(f[0], f[1], f[2], f[0], f[1], f[2]); + else if ((flags & ImGuiColorEditFlags_InputRGB) && (flags & ImGuiColorEditFlags_DisplayHSV)) + { + // Hue is lost when converting from greyscale rgb (saturation=0). Restore it. + ColorConvertRGBtoHSV(f[0], f[1], f[2], f[0], f[1], f[2]); + if (memcmp(g.ColorEditLastColor, col, sizeof(float) * 3) == 0) + { + if (f[1] == 0) + f[0] = g.ColorEditLastHue; + if (f[2] == 0) + f[1] = g.ColorEditLastSat; + } + } + int i[4] = { IM_F32_TO_INT8_UNBOUND(f[0]), IM_F32_TO_INT8_UNBOUND(f[1]), IM_F32_TO_INT8_UNBOUND(f[2]), IM_F32_TO_INT8_UNBOUND(f[3]) }; + + bool value_changed = false; + bool value_changed_as_float = false; + + const ImVec2 pos = window->DC.CursorPos; + const float inputs_offset_x = (style.ColorButtonPosition == ImGuiDir_Left) ? w_button : 0.0f; + window->DC.CursorPos.x = pos.x + inputs_offset_x; + + if ((flags & (ImGuiColorEditFlags_DisplayRGB | ImGuiColorEditFlags_DisplayHSV)) != 0 && (flags & ImGuiColorEditFlags_NoInputs) == 0) + { + // RGB/HSV 0..255 Sliders + const float w_item_one = ImMax(1.0f, IM_FLOOR((w_inputs - (style.ItemInnerSpacing.x) * (components - 1)) / (float)components)); + const float w_item_last = ImMax(1.0f, IM_FLOOR(w_inputs - (w_item_one + style.ItemInnerSpacing.x) * (components - 1))); + + const bool hide_prefix = (w_item_one <= CalcTextSize((flags & ImGuiColorEditFlags_Float) ? "M:0.000" : "M:000").x); + static const char* ids[4] = { "##X", "##Y", "##Z", "##W" }; + static const char* fmt_table_int[3][4] = + { + { "%3d", "%3d", "%3d", "%3d" }, // Short display + { "R:%3d", "G:%3d", "B:%3d", "A:%3d" }, // Long display for RGBA + { "H:%3d", "S:%3d", "V:%3d", "A:%3d" } // Long display for HSVA + }; + static const char* fmt_table_float[3][4] = + { + { "%0.3f", "%0.3f", "%0.3f", "%0.3f" }, // Short display + { "R:%0.3f", "G:%0.3f", "B:%0.3f", "A:%0.3f" }, // Long display for RGBA + { "H:%0.3f", "S:%0.3f", "V:%0.3f", "A:%0.3f" } // Long display for HSVA + }; + const int fmt_idx = hide_prefix ? 0 : (flags & ImGuiColorEditFlags_DisplayHSV) ? 2 : 1; + + for (int n = 0; n < components; n++) + { + if (n > 0) + SameLine(0, style.ItemInnerSpacing.x); + SetNextItemWidth((n + 1 < components) ? w_item_one : w_item_last); + + // FIXME: When ImGuiColorEditFlags_HDR flag is passed HS values snap in weird ways when SV values go below 0. + if (flags & ImGuiColorEditFlags_Float) + { + value_changed |= DragFloat(ids[n], &f[n], 1.0f / 255.0f, 0.0f, hdr ? 0.0f : 1.0f, fmt_table_float[fmt_idx][n]); + value_changed_as_float |= value_changed; + } + else + { + value_changed |= DragInt(ids[n], &i[n], 1.0f, 0, hdr ? 0 : 255, fmt_table_int[fmt_idx][n]); + } + if (!(flags & ImGuiColorEditFlags_NoOptions)) + OpenPopupOnItemClick("context"); + } + } + else if ((flags & ImGuiColorEditFlags_DisplayHex) != 0 && (flags & ImGuiColorEditFlags_NoInputs) == 0) + { + // RGB Hexadecimal Input + char buf[64]; + if (alpha) + ImFormatString(buf, IM_ARRAYSIZE(buf), "#%02X%02X%02X%02X", ImClamp(i[0], 0, 255), ImClamp(i[1], 0, 255), ImClamp(i[2], 0, 255), ImClamp(i[3], 0, 255)); + else + ImFormatString(buf, IM_ARRAYSIZE(buf), "#%02X%02X%02X", ImClamp(i[0], 0, 255), ImClamp(i[1], 0, 255), ImClamp(i[2], 0, 255)); + SetNextItemWidth(w_inputs); + if (InputText("##Text", buf, IM_ARRAYSIZE(buf), ImGuiInputTextFlags_CharsHexadecimal | ImGuiInputTextFlags_CharsUppercase)) + { + value_changed = true; + char* p = buf; + while (*p == '#' || ImCharIsBlankA(*p)) + p++; + i[0] = i[1] = i[2] = i[3] = 0; + if (alpha) + sscanf(p, "%02X%02X%02X%02X", (unsigned int*)&i[0], (unsigned int*)&i[1], (unsigned int*)&i[2], (unsigned int*)&i[3]); // Treat at unsigned (%X is unsigned) + else + sscanf(p, "%02X%02X%02X", (unsigned int*)&i[0], (unsigned int*)&i[1], (unsigned int*)&i[2]); + } + if (!(flags & ImGuiColorEditFlags_NoOptions)) + OpenPopupOnItemClick("context"); + } + + ImGuiWindow* picker_active_window = NULL; + if (!(flags & ImGuiColorEditFlags_NoSmallPreview)) + { + const float button_offset_x = ((flags & ImGuiColorEditFlags_NoInputs) || (style.ColorButtonPosition == ImGuiDir_Left)) ? 0.0f : w_inputs + style.ItemInnerSpacing.x; + window->DC.CursorPos = ImVec2(pos.x + button_offset_x, pos.y); + + const ImVec4 col_v4(col[0], col[1], col[2], alpha ? col[3] : 1.0f); + if (ColorButton("##ColorButton", col_v4, flags)) + { + if (!(flags & ImGuiColorEditFlags_NoPicker)) + { + // Store current color and open a picker + g.ColorPickerRef = col_v4; + OpenPopup("picker"); + SetNextWindowPos(window->DC.LastItemRect.GetBL() + ImVec2(-1, style.ItemSpacing.y)); + } + } + if (!(flags & ImGuiColorEditFlags_NoOptions)) + OpenPopupOnItemClick("context"); + + if (BeginPopup("picker")) + { + picker_active_window = g.CurrentWindow; + if (label != label_display_end) + { + TextEx(label, label_display_end); + Spacing(); + } + ImGuiColorEditFlags picker_flags_to_forward = ImGuiColorEditFlags__DataTypeMask | ImGuiColorEditFlags__PickerMask | ImGuiColorEditFlags__InputMask | ImGuiColorEditFlags_HDR | ImGuiColorEditFlags_NoAlpha | ImGuiColorEditFlags_AlphaBar; + ImGuiColorEditFlags picker_flags = (flags_untouched & picker_flags_to_forward) | ImGuiColorEditFlags__DisplayMask | ImGuiColorEditFlags_NoLabel | ImGuiColorEditFlags_AlphaPreviewHalf; + SetNextItemWidth(square_sz * 12.0f); // Use 256 + bar sizes? + value_changed |= ColorPicker4("##picker", col, picker_flags, &g.ColorPickerRef.x); + EndPopup(); + } + } + + if (label != label_display_end && !(flags & ImGuiColorEditFlags_NoLabel)) + { + const float text_offset_x = (flags & ImGuiColorEditFlags_NoInputs) ? w_button : w_full + style.ItemInnerSpacing.x; + window->DC.CursorPos = ImVec2(pos.x + text_offset_x, pos.y + style.FramePadding.y); + TextEx(label, label_display_end); + } + + // Convert back + if (value_changed && picker_active_window == NULL) + { + if (!value_changed_as_float) + for (int n = 0; n < 4; n++) + f[n] = i[n] / 255.0f; + if ((flags & ImGuiColorEditFlags_DisplayHSV) && (flags & ImGuiColorEditFlags_InputRGB)) + { + g.ColorEditLastHue = f[0]; + g.ColorEditLastSat = f[1]; + ColorConvertHSVtoRGB(f[0], f[1], f[2], f[0], f[1], f[2]); + memcpy(g.ColorEditLastColor, f, sizeof(float) * 3); + } + if ((flags & ImGuiColorEditFlags_DisplayRGB) && (flags & ImGuiColorEditFlags_InputHSV)) + ColorConvertRGBtoHSV(f[0], f[1], f[2], f[0], f[1], f[2]); + + col[0] = f[0]; + col[1] = f[1]; + col[2] = f[2]; + if (alpha) + col[3] = f[3]; + } + + PopID(); + EndGroup(); + + // Drag and Drop Target + // NB: The flag test is merely an optional micro-optimization, BeginDragDropTarget() does the same test. + if ((window->DC.LastItemStatusFlags & ImGuiItemStatusFlags_HoveredRect) && !(flags & ImGuiColorEditFlags_NoDragDrop) && BeginDragDropTarget()) + { + bool accepted_drag_drop = false; + if (const ImGuiPayload* payload = AcceptDragDropPayload(IMGUI_PAYLOAD_TYPE_COLOR_3F)) + { + memcpy((float*)col, payload->Data, sizeof(float) * 3); // Preserve alpha if any //-V512 + value_changed = accepted_drag_drop = true; + } + if (const ImGuiPayload* payload = AcceptDragDropPayload(IMGUI_PAYLOAD_TYPE_COLOR_4F)) + { + memcpy((float*)col, payload->Data, sizeof(float) * components); + value_changed = accepted_drag_drop = true; + } + + // Drag-drop payloads are always RGB + if (accepted_drag_drop && (flags & ImGuiColorEditFlags_InputHSV)) + ColorConvertRGBtoHSV(col[0], col[1], col[2], col[0], col[1], col[2]); + EndDragDropTarget(); + } + + // When picker is being actively used, use its active id so IsItemActive() will function on ColorEdit4(). + if (picker_active_window && g.ActiveId != 0 && g.ActiveIdWindow == picker_active_window) + window->DC.LastItemId = g.ActiveId; + + if (value_changed) + MarkItemEdited(window->DC.LastItemId); + + return value_changed; +} + +bool ImGui::ColorPicker3(const char* label, float col[3], ImGuiColorEditFlags flags) +{ + float col4[4] = { col[0], col[1], col[2], 1.0f }; + if (!ColorPicker4(label, col4, flags | ImGuiColorEditFlags_NoAlpha)) + return false; + col[0] = col4[0]; col[1] = col4[1]; col[2] = col4[2]; + return true; +} + +// Helper for ColorPicker4() +static void RenderArrowsForVerticalBar(ImDrawList* draw_list, ImVec2 pos, ImVec2 half_sz, float bar_w, float alpha) +{ + ImU32 alpha8 = IM_F32_TO_INT8_SAT(alpha); + ImGui::RenderArrowPointingAt(draw_list, ImVec2(pos.x + half_sz.x + 1, pos.y), ImVec2(half_sz.x + 2, half_sz.y + 1), ImGuiDir_Right, IM_COL32(0,0,0,alpha8)); + ImGui::RenderArrowPointingAt(draw_list, ImVec2(pos.x + half_sz.x, pos.y), half_sz, ImGuiDir_Right, IM_COL32(255,255,255,alpha8)); + ImGui::RenderArrowPointingAt(draw_list, ImVec2(pos.x + bar_w - half_sz.x - 1, pos.y), ImVec2(half_sz.x + 2, half_sz.y + 1), ImGuiDir_Left, IM_COL32(0,0,0,alpha8)); + ImGui::RenderArrowPointingAt(draw_list, ImVec2(pos.x + bar_w - half_sz.x, pos.y), half_sz, ImGuiDir_Left, IM_COL32(255,255,255,alpha8)); +} + +// Note: ColorPicker4() only accesses 3 floats if ImGuiColorEditFlags_NoAlpha flag is set. +// (In C++ the 'float col[4]' notation for a function argument is equivalent to 'float* col', we only specify a size to facilitate understanding of the code.) +// FIXME: we adjust the big color square height based on item width, which may cause a flickering feedback loop (if automatic height makes a vertical scrollbar appears, affecting automatic width..) +// FIXME: this is trying to be aware of style.Alpha but not fully correct. Also, the color wheel will have overlapping glitches with (style.Alpha < 1.0) +bool ImGui::ColorPicker4(const char* label, float col[4], ImGuiColorEditFlags flags, const float* ref_col) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImDrawList* draw_list = window->DrawList; + ImGuiStyle& style = g.Style; + ImGuiIO& io = g.IO; + + const float width = CalcItemWidth(); + g.NextItemData.ClearFlags(); + + PushID(label); + BeginGroup(); + + if (!(flags & ImGuiColorEditFlags_NoSidePreview)) + flags |= ImGuiColorEditFlags_NoSmallPreview; + + // Context menu: display and store options. + if (!(flags & ImGuiColorEditFlags_NoOptions)) + ColorPickerOptionsPopup(col, flags); + + // Read stored options + if (!(flags & ImGuiColorEditFlags__PickerMask)) + flags |= ((g.ColorEditOptions & ImGuiColorEditFlags__PickerMask) ? g.ColorEditOptions : ImGuiColorEditFlags__OptionsDefault) & ImGuiColorEditFlags__PickerMask; + if (!(flags & ImGuiColorEditFlags__InputMask)) + flags |= ((g.ColorEditOptions & ImGuiColorEditFlags__InputMask) ? g.ColorEditOptions : ImGuiColorEditFlags__OptionsDefault) & ImGuiColorEditFlags__InputMask; + IM_ASSERT(ImIsPowerOfTwo(flags & ImGuiColorEditFlags__PickerMask)); // Check that only 1 is selected + IM_ASSERT(ImIsPowerOfTwo(flags & ImGuiColorEditFlags__InputMask)); // Check that only 1 is selected + if (!(flags & ImGuiColorEditFlags_NoOptions)) + flags |= (g.ColorEditOptions & ImGuiColorEditFlags_AlphaBar); + + // Setup + int components = (flags & ImGuiColorEditFlags_NoAlpha) ? 3 : 4; + bool alpha_bar = (flags & ImGuiColorEditFlags_AlphaBar) && !(flags & ImGuiColorEditFlags_NoAlpha); + ImVec2 picker_pos = window->DC.CursorPos; + float square_sz = GetFrameHeight(); + float bars_width = square_sz; // Arbitrary smallish width of Hue/Alpha picking bars + float sv_picker_size = ImMax(bars_width * 1, width - (alpha_bar ? 2 : 1) * (bars_width + style.ItemInnerSpacing.x)); // Saturation/Value picking box + float bar0_pos_x = picker_pos.x + sv_picker_size + style.ItemInnerSpacing.x; + float bar1_pos_x = bar0_pos_x + bars_width + style.ItemInnerSpacing.x; + float bars_triangles_half_sz = IM_FLOOR(bars_width * 0.20f); + + float backup_initial_col[4]; + memcpy(backup_initial_col, col, components * sizeof(float)); + + float wheel_thickness = sv_picker_size * 0.08f; + float wheel_r_outer = sv_picker_size * 0.50f; + float wheel_r_inner = wheel_r_outer - wheel_thickness; + ImVec2 wheel_center(picker_pos.x + (sv_picker_size + bars_width)*0.5f, picker_pos.y + sv_picker_size * 0.5f); + + // Note: the triangle is displayed rotated with triangle_pa pointing to Hue, but most coordinates stays unrotated for logic. + float triangle_r = wheel_r_inner - (int)(sv_picker_size * 0.027f); + ImVec2 triangle_pa = ImVec2(triangle_r, 0.0f); // Hue point. + ImVec2 triangle_pb = ImVec2(triangle_r * -0.5f, triangle_r * -0.866025f); // Black point. + ImVec2 triangle_pc = ImVec2(triangle_r * -0.5f, triangle_r * +0.866025f); // White point. + + float H = col[0], S = col[1], V = col[2]; + float R = col[0], G = col[1], B = col[2]; + if (flags & ImGuiColorEditFlags_InputRGB) + { + // Hue is lost when converting from greyscale rgb (saturation=0). Restore it. + ColorConvertRGBtoHSV(R, G, B, H, S, V); + if (memcmp(g.ColorEditLastColor, col, sizeof(float) * 3) == 0) + { + if (S == 0) + H = g.ColorEditLastHue; + if (V == 0) + S = g.ColorEditLastSat; + } + } + else if (flags & ImGuiColorEditFlags_InputHSV) + { + ColorConvertHSVtoRGB(H, S, V, R, G, B); + } + + bool value_changed = false, value_changed_h = false, value_changed_sv = false; + + PushItemFlag(ImGuiItemFlags_NoNav, true); + if (flags & ImGuiColorEditFlags_PickerHueWheel) + { + // Hue wheel + SV triangle logic + InvisibleButton("hsv", ImVec2(sv_picker_size + style.ItemInnerSpacing.x + bars_width, sv_picker_size)); + if (IsItemActive()) + { + ImVec2 initial_off = g.IO.MouseClickedPos[0] - wheel_center; + ImVec2 current_off = g.IO.MousePos - wheel_center; + float initial_dist2 = ImLengthSqr(initial_off); + if (initial_dist2 >= (wheel_r_inner - 1) * (wheel_r_inner - 1) && initial_dist2 <= (wheel_r_outer + 1) * (wheel_r_outer + 1)) + { + // Interactive with Hue wheel + H = ImAtan2(current_off.y, current_off.x) / IM_PI * 0.5f; + if (H < 0.0f) + H += 1.0f; + value_changed = value_changed_h = true; + } + float cos_hue_angle = ImCos(-H * 2.0f * IM_PI); + float sin_hue_angle = ImSin(-H * 2.0f * IM_PI); + if (ImTriangleContainsPoint(triangle_pa, triangle_pb, triangle_pc, ImRotate(initial_off, cos_hue_angle, sin_hue_angle))) + { + // Interacting with SV triangle + ImVec2 current_off_unrotated = ImRotate(current_off, cos_hue_angle, sin_hue_angle); + if (!ImTriangleContainsPoint(triangle_pa, triangle_pb, triangle_pc, current_off_unrotated)) + current_off_unrotated = ImTriangleClosestPoint(triangle_pa, triangle_pb, triangle_pc, current_off_unrotated); + float uu, vv, ww; + ImTriangleBarycentricCoords(triangle_pa, triangle_pb, triangle_pc, current_off_unrotated, uu, vv, ww); + V = ImClamp(1.0f - vv, 0.0001f, 1.0f); + S = ImClamp(uu / V, 0.0001f, 1.0f); + value_changed = value_changed_sv = true; + } + } + if (!(flags & ImGuiColorEditFlags_NoOptions)) + OpenPopupOnItemClick("context"); + } + else if (flags & ImGuiColorEditFlags_PickerHueBar) + { + // SV rectangle logic + InvisibleButton("sv", ImVec2(sv_picker_size, sv_picker_size)); + if (IsItemActive()) + { + S = ImSaturate((io.MousePos.x - picker_pos.x) / (sv_picker_size - 1)); + V = 1.0f - ImSaturate((io.MousePos.y - picker_pos.y) / (sv_picker_size - 1)); + value_changed = value_changed_sv = true; + } + if (!(flags & ImGuiColorEditFlags_NoOptions)) + OpenPopupOnItemClick("context"); + + // Hue bar logic + SetCursorScreenPos(ImVec2(bar0_pos_x, picker_pos.y)); + InvisibleButton("hue", ImVec2(bars_width, sv_picker_size)); + if (IsItemActive()) + { + H = ImSaturate((io.MousePos.y - picker_pos.y) / (sv_picker_size - 1)); + value_changed = value_changed_h = true; + } + } + + // Alpha bar logic + if (alpha_bar) + { + SetCursorScreenPos(ImVec2(bar1_pos_x, picker_pos.y)); + InvisibleButton("alpha", ImVec2(bars_width, sv_picker_size)); + if (IsItemActive()) + { + col[3] = 1.0f - ImSaturate((io.MousePos.y - picker_pos.y) / (sv_picker_size - 1)); + value_changed = true; + } + } + PopItemFlag(); // ImGuiItemFlags_NoNav + + if (!(flags & ImGuiColorEditFlags_NoSidePreview)) + { + SameLine(0, style.ItemInnerSpacing.x); + BeginGroup(); + } + + if (!(flags & ImGuiColorEditFlags_NoLabel)) + { + const char* label_display_end = FindRenderedTextEnd(label); + if (label != label_display_end) + { + if ((flags & ImGuiColorEditFlags_NoSidePreview)) + SameLine(0, style.ItemInnerSpacing.x); + TextEx(label, label_display_end); + } + } + + if (!(flags & ImGuiColorEditFlags_NoSidePreview)) + { + PushItemFlag(ImGuiItemFlags_NoNavDefaultFocus, true); + ImVec4 col_v4(col[0], col[1], col[2], (flags & ImGuiColorEditFlags_NoAlpha) ? 1.0f : col[3]); + if ((flags & ImGuiColorEditFlags_NoLabel)) + Text("Current"); + + ImGuiColorEditFlags sub_flags_to_forward = ImGuiColorEditFlags__InputMask | ImGuiColorEditFlags_HDR | ImGuiColorEditFlags_AlphaPreview | ImGuiColorEditFlags_AlphaPreviewHalf | ImGuiColorEditFlags_NoTooltip; + ColorButton("##current", col_v4, (flags & sub_flags_to_forward), ImVec2(square_sz * 3, square_sz * 2)); + if (ref_col != NULL) + { + Text("Original"); + ImVec4 ref_col_v4(ref_col[0], ref_col[1], ref_col[2], (flags & ImGuiColorEditFlags_NoAlpha) ? 1.0f : ref_col[3]); + if (ColorButton("##original", ref_col_v4, (flags & sub_flags_to_forward), ImVec2(square_sz * 3, square_sz * 2))) + { + memcpy(col, ref_col, components * sizeof(float)); + value_changed = true; + } + } + PopItemFlag(); + EndGroup(); + } + + // Convert back color to RGB + if (value_changed_h || value_changed_sv) + { + if (flags & ImGuiColorEditFlags_InputRGB) + { + ColorConvertHSVtoRGB(H >= 1.0f ? H - 10 * 1e-6f : H, S > 0.0f ? S : 10 * 1e-6f, V > 0.0f ? V : 1e-6f, col[0], col[1], col[2]); + g.ColorEditLastHue = H; + g.ColorEditLastSat = S; + memcpy(g.ColorEditLastColor, col, sizeof(float) * 3); + } + else if (flags & ImGuiColorEditFlags_InputHSV) + { + col[0] = H; + col[1] = S; + col[2] = V; + } + } + + // R,G,B and H,S,V slider color editor + bool value_changed_fix_hue_wrap = false; + if ((flags & ImGuiColorEditFlags_NoInputs) == 0) + { + PushItemWidth((alpha_bar ? bar1_pos_x : bar0_pos_x) + bars_width - picker_pos.x); + ImGuiColorEditFlags sub_flags_to_forward = ImGuiColorEditFlags__DataTypeMask | ImGuiColorEditFlags__InputMask | ImGuiColorEditFlags_HDR | ImGuiColorEditFlags_NoAlpha | ImGuiColorEditFlags_NoOptions | ImGuiColorEditFlags_NoSmallPreview | ImGuiColorEditFlags_AlphaPreview | ImGuiColorEditFlags_AlphaPreviewHalf; + ImGuiColorEditFlags sub_flags = (flags & sub_flags_to_forward) | ImGuiColorEditFlags_NoPicker; + if (flags & ImGuiColorEditFlags_DisplayRGB || (flags & ImGuiColorEditFlags__DisplayMask) == 0) + if (ColorEdit4("##rgb", col, sub_flags | ImGuiColorEditFlags_DisplayRGB)) + { + // FIXME: Hackily differentiating using the DragInt (ActiveId != 0 && !ActiveIdAllowOverlap) vs. using the InputText or DropTarget. + // For the later we don't want to run the hue-wrap canceling code. If you are well versed in HSV picker please provide your input! (See #2050) + value_changed_fix_hue_wrap = (g.ActiveId != 0 && !g.ActiveIdAllowOverlap); + value_changed = true; + } + if (flags & ImGuiColorEditFlags_DisplayHSV || (flags & ImGuiColorEditFlags__DisplayMask) == 0) + value_changed |= ColorEdit4("##hsv", col, sub_flags | ImGuiColorEditFlags_DisplayHSV); + if (flags & ImGuiColorEditFlags_DisplayHex || (flags & ImGuiColorEditFlags__DisplayMask) == 0) + value_changed |= ColorEdit4("##hex", col, sub_flags | ImGuiColorEditFlags_DisplayHex); + PopItemWidth(); + } + + // Try to cancel hue wrap (after ColorEdit4 call), if any + if (value_changed_fix_hue_wrap && (flags & ImGuiColorEditFlags_InputRGB)) + { + float new_H, new_S, new_V; + ColorConvertRGBtoHSV(col[0], col[1], col[2], new_H, new_S, new_V); + if (new_H <= 0 && H > 0) + { + if (new_V <= 0 && V != new_V) + ColorConvertHSVtoRGB(H, S, new_V <= 0 ? V * 0.5f : new_V, col[0], col[1], col[2]); + else if (new_S <= 0) + ColorConvertHSVtoRGB(H, new_S <= 0 ? S * 0.5f : new_S, new_V, col[0], col[1], col[2]); + } + } + + if (value_changed) + { + if (flags & ImGuiColorEditFlags_InputRGB) + { + R = col[0]; + G = col[1]; + B = col[2]; + ColorConvertRGBtoHSV(R, G, B, H, S, V); + if (memcmp(g.ColorEditLastColor, col, sizeof(float) * 3) == 0) // Fix local Hue as display below will use it immediately. + { + if (S == 0) + H = g.ColorEditLastHue; + if (V == 0) + S = g.ColorEditLastSat; + } + } + else if (flags & ImGuiColorEditFlags_InputHSV) + { + H = col[0]; + S = col[1]; + V = col[2]; + ColorConvertHSVtoRGB(H, S, V, R, G, B); + } + } + + const int style_alpha8 = IM_F32_TO_INT8_SAT(style.Alpha); + const ImU32 col_black = IM_COL32(0,0,0,style_alpha8); + const ImU32 col_white = IM_COL32(255,255,255,style_alpha8); + const ImU32 col_midgrey = IM_COL32(128,128,128,style_alpha8); + const ImU32 col_hues[6 + 1] = { IM_COL32(255,0,0,style_alpha8), IM_COL32(255,255,0,style_alpha8), IM_COL32(0,255,0,style_alpha8), IM_COL32(0,255,255,style_alpha8), IM_COL32(0,0,255,style_alpha8), IM_COL32(255,0,255,style_alpha8), IM_COL32(255,0,0,style_alpha8) }; + + ImVec4 hue_color_f(1, 1, 1, style.Alpha); ColorConvertHSVtoRGB(H, 1, 1, hue_color_f.x, hue_color_f.y, hue_color_f.z); + ImU32 hue_color32 = ColorConvertFloat4ToU32(hue_color_f); + ImU32 user_col32_striped_of_alpha = ColorConvertFloat4ToU32(ImVec4(R, G, B, style.Alpha)); // Important: this is still including the main rendering/style alpha!! + + ImVec2 sv_cursor_pos; + + if (flags & ImGuiColorEditFlags_PickerHueWheel) + { + // Render Hue Wheel + const float aeps = 0.5f / wheel_r_outer; // Half a pixel arc length in radians (2pi cancels out). + const int segment_per_arc = ImMax(4, (int)wheel_r_outer / 12); + for (int n = 0; n < 6; n++) + { + const float a0 = (n) /6.0f * 2.0f * IM_PI - aeps; + const float a1 = (n+1.0f)/6.0f * 2.0f * IM_PI + aeps; + const int vert_start_idx = draw_list->VtxBuffer.Size; + draw_list->PathArcTo(wheel_center, (wheel_r_inner + wheel_r_outer)*0.5f, a0, a1, segment_per_arc); + draw_list->PathStroke(col_white, false, wheel_thickness); + const int vert_end_idx = draw_list->VtxBuffer.Size; + + // Paint colors over existing vertices + ImVec2 gradient_p0(wheel_center.x + ImCos(a0) * wheel_r_inner, wheel_center.y + ImSin(a0) * wheel_r_inner); + ImVec2 gradient_p1(wheel_center.x + ImCos(a1) * wheel_r_inner, wheel_center.y + ImSin(a1) * wheel_r_inner); + ShadeVertsLinearColorGradientKeepAlpha(draw_list, vert_start_idx, vert_end_idx, gradient_p0, gradient_p1, col_hues[n], col_hues[n + 1]); + } + + // Render Cursor + preview on Hue Wheel + float cos_hue_angle = ImCos(H * 2.0f * IM_PI); + float sin_hue_angle = ImSin(H * 2.0f * IM_PI); + ImVec2 hue_cursor_pos(wheel_center.x + cos_hue_angle * (wheel_r_inner + wheel_r_outer) * 0.5f, wheel_center.y + sin_hue_angle * (wheel_r_inner + wheel_r_outer) * 0.5f); + float hue_cursor_rad = value_changed_h ? wheel_thickness * 0.65f : wheel_thickness * 0.55f; + int hue_cursor_segments = ImClamp((int)(hue_cursor_rad / 1.4f), 9, 32); + draw_list->AddCircleFilled(hue_cursor_pos, hue_cursor_rad, hue_color32, hue_cursor_segments); + draw_list->AddCircle(hue_cursor_pos, hue_cursor_rad + 1, col_midgrey, hue_cursor_segments); + draw_list->AddCircle(hue_cursor_pos, hue_cursor_rad, col_white, hue_cursor_segments); + + // Render SV triangle (rotated according to hue) + ImVec2 tra = wheel_center + ImRotate(triangle_pa, cos_hue_angle, sin_hue_angle); + ImVec2 trb = wheel_center + ImRotate(triangle_pb, cos_hue_angle, sin_hue_angle); + ImVec2 trc = wheel_center + ImRotate(triangle_pc, cos_hue_angle, sin_hue_angle); + ImVec2 uv_white = GetFontTexUvWhitePixel(); + draw_list->PrimReserve(6, 6); + draw_list->PrimVtx(tra, uv_white, hue_color32); + draw_list->PrimVtx(trb, uv_white, hue_color32); + draw_list->PrimVtx(trc, uv_white, col_white); + draw_list->PrimVtx(tra, uv_white, 0); + draw_list->PrimVtx(trb, uv_white, col_black); + draw_list->PrimVtx(trc, uv_white, 0); + draw_list->AddTriangle(tra, trb, trc, col_midgrey, 1.5f); + sv_cursor_pos = ImLerp(ImLerp(trc, tra, ImSaturate(S)), trb, ImSaturate(1 - V)); + } + else if (flags & ImGuiColorEditFlags_PickerHueBar) + { + // Render SV Square + draw_list->AddRectFilledMultiColor(picker_pos, picker_pos + ImVec2(sv_picker_size, sv_picker_size), col_white, hue_color32, hue_color32, col_white); + draw_list->AddRectFilledMultiColor(picker_pos, picker_pos + ImVec2(sv_picker_size, sv_picker_size), 0, 0, col_black, col_black); + RenderFrameBorder(picker_pos, picker_pos + ImVec2(sv_picker_size, sv_picker_size), 0.0f); + sv_cursor_pos.x = ImClamp(IM_ROUND(picker_pos.x + ImSaturate(S) * sv_picker_size), picker_pos.x + 2, picker_pos.x + sv_picker_size - 2); // Sneakily prevent the circle to stick out too much + sv_cursor_pos.y = ImClamp(IM_ROUND(picker_pos.y + ImSaturate(1 - V) * sv_picker_size), picker_pos.y + 2, picker_pos.y + sv_picker_size - 2); + + // Render Hue Bar + for (int i = 0; i < 6; ++i) + draw_list->AddRectFilledMultiColor(ImVec2(bar0_pos_x, picker_pos.y + i * (sv_picker_size / 6)), ImVec2(bar0_pos_x + bars_width, picker_pos.y + (i + 1) * (sv_picker_size / 6)), col_hues[i], col_hues[i], col_hues[i + 1], col_hues[i + 1]); + float bar0_line_y = IM_ROUND(picker_pos.y + H * sv_picker_size); + RenderFrameBorder(ImVec2(bar0_pos_x, picker_pos.y), ImVec2(bar0_pos_x + bars_width, picker_pos.y + sv_picker_size), 0.0f); + RenderArrowsForVerticalBar(draw_list, ImVec2(bar0_pos_x - 1, bar0_line_y), ImVec2(bars_triangles_half_sz + 1, bars_triangles_half_sz), bars_width + 2.0f, style.Alpha); + } + + // Render cursor/preview circle (clamp S/V within 0..1 range because floating points colors may lead HSV values to be out of range) + float sv_cursor_rad = value_changed_sv ? 10.0f : 6.0f; + draw_list->AddCircleFilled(sv_cursor_pos, sv_cursor_rad, user_col32_striped_of_alpha, 12); + draw_list->AddCircle(sv_cursor_pos, sv_cursor_rad + 1, col_midgrey, 12); + draw_list->AddCircle(sv_cursor_pos, sv_cursor_rad, col_white, 12); + + // Render alpha bar + if (alpha_bar) + { + float alpha = ImSaturate(col[3]); + ImRect bar1_bb(bar1_pos_x, picker_pos.y, bar1_pos_x + bars_width, picker_pos.y + sv_picker_size); + RenderColorRectWithAlphaCheckerboard(draw_list, bar1_bb.Min, bar1_bb.Max, 0, bar1_bb.GetWidth() / 2.0f, ImVec2(0.0f, 0.0f)); + draw_list->AddRectFilledMultiColor(bar1_bb.Min, bar1_bb.Max, user_col32_striped_of_alpha, user_col32_striped_of_alpha, user_col32_striped_of_alpha & ~IM_COL32_A_MASK, user_col32_striped_of_alpha & ~IM_COL32_A_MASK); + float bar1_line_y = IM_ROUND(picker_pos.y + (1.0f - alpha) * sv_picker_size); + RenderFrameBorder(bar1_bb.Min, bar1_bb.Max, 0.0f); + RenderArrowsForVerticalBar(draw_list, ImVec2(bar1_pos_x - 1, bar1_line_y), ImVec2(bars_triangles_half_sz + 1, bars_triangles_half_sz), bars_width + 2.0f, style.Alpha); + } + + EndGroup(); + + if (value_changed && memcmp(backup_initial_col, col, components * sizeof(float)) == 0) + value_changed = false; + if (value_changed) + MarkItemEdited(window->DC.LastItemId); + + PopID(); + + return value_changed; +} + +// A little color square. Return true when clicked. +// FIXME: May want to display/ignore the alpha component in the color display? Yet show it in the tooltip. +// 'desc_id' is not called 'label' because we don't display it next to the button, but only in the tooltip. +// Note that 'col' may be encoded in HSV if ImGuiColorEditFlags_InputHSV is set. +bool ImGui::ColorButton(const char* desc_id, const ImVec4& col, ImGuiColorEditFlags flags, ImVec2 size) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiID id = window->GetID(desc_id); + float default_size = GetFrameHeight(); + if (size.x == 0.0f) + size.x = default_size; + if (size.y == 0.0f) + size.y = default_size; + const ImRect bb(window->DC.CursorPos, window->DC.CursorPos + size); + ItemSize(bb, (size.y >= default_size) ? g.Style.FramePadding.y : 0.0f); + if (!ItemAdd(bb, id)) + return false; + + bool hovered, held; + bool pressed = ButtonBehavior(bb, id, &hovered, &held); + + if (flags & ImGuiColorEditFlags_NoAlpha) + flags &= ~(ImGuiColorEditFlags_AlphaPreview | ImGuiColorEditFlags_AlphaPreviewHalf); + + ImVec4 col_rgb = col; + if (flags & ImGuiColorEditFlags_InputHSV) + ColorConvertHSVtoRGB(col_rgb.x, col_rgb.y, col_rgb.z, col_rgb.x, col_rgb.y, col_rgb.z); + + ImVec4 col_rgb_without_alpha(col_rgb.x, col_rgb.y, col_rgb.z, 1.0f); + float grid_step = ImMin(size.x, size.y) / 2.99f; + float rounding = ImMin(g.Style.FrameRounding, grid_step * 0.5f); + ImRect bb_inner = bb; + float off = 0.0f; + if ((flags & ImGuiColorEditFlags_NoBorder) == 0) + { + off = -0.75f; // The border (using Col_FrameBg) tends to look off when color is near-opaque and rounding is enabled. This offset seemed like a good middle ground to reduce those artifacts. + bb_inner.Expand(off); + } + if ((flags & ImGuiColorEditFlags_AlphaPreviewHalf) && col_rgb.w < 1.0f) + { + float mid_x = IM_ROUND((bb_inner.Min.x + bb_inner.Max.x) * 0.5f); + RenderColorRectWithAlphaCheckerboard(window->DrawList, ImVec2(bb_inner.Min.x + grid_step, bb_inner.Min.y), bb_inner.Max, GetColorU32(col_rgb), grid_step, ImVec2(-grid_step + off, off), rounding, ImDrawCornerFlags_TopRight | ImDrawCornerFlags_BotRight); + window->DrawList->AddRectFilled(bb_inner.Min, ImVec2(mid_x, bb_inner.Max.y), GetColorU32(col_rgb_without_alpha), rounding, ImDrawCornerFlags_TopLeft | ImDrawCornerFlags_BotLeft); + } + else + { + // Because GetColorU32() multiplies by the global style Alpha and we don't want to display a checkerboard if the source code had no alpha + ImVec4 col_source = (flags & ImGuiColorEditFlags_AlphaPreview) ? col_rgb : col_rgb_without_alpha; + if (col_source.w < 1.0f) + RenderColorRectWithAlphaCheckerboard(window->DrawList, bb_inner.Min, bb_inner.Max, GetColorU32(col_source), grid_step, ImVec2(off, off), rounding); + else + window->DrawList->AddRectFilled(bb_inner.Min, bb_inner.Max, GetColorU32(col_source), rounding, ImDrawCornerFlags_All); + } + RenderNavHighlight(bb, id); + if ((flags & ImGuiColorEditFlags_NoBorder) == 0) + { + if (g.Style.FrameBorderSize > 0.0f) + RenderFrameBorder(bb.Min, bb.Max, rounding); + else + window->DrawList->AddRect(bb.Min, bb.Max, GetColorU32(ImGuiCol_FrameBg), rounding); // Color button are often in need of some sort of border + } + + // Drag and Drop Source + // NB: The ActiveId test is merely an optional micro-optimization, BeginDragDropSource() does the same test. + if (g.ActiveId == id && !(flags & ImGuiColorEditFlags_NoDragDrop) && BeginDragDropSource()) + { + if (flags & ImGuiColorEditFlags_NoAlpha) + SetDragDropPayload(IMGUI_PAYLOAD_TYPE_COLOR_3F, &col_rgb, sizeof(float) * 3, ImGuiCond_Once); + else + SetDragDropPayload(IMGUI_PAYLOAD_TYPE_COLOR_4F, &col_rgb, sizeof(float) * 4, ImGuiCond_Once); + ColorButton(desc_id, col, flags); + SameLine(); + TextEx("Color"); + EndDragDropSource(); + } + + // Tooltip + if (!(flags & ImGuiColorEditFlags_NoTooltip) && hovered) + ColorTooltip(desc_id, &col.x, flags & (ImGuiColorEditFlags__InputMask | ImGuiColorEditFlags_NoAlpha | ImGuiColorEditFlags_AlphaPreview | ImGuiColorEditFlags_AlphaPreviewHalf)); + + return pressed; +} + +// Initialize/override default color options +void ImGui::SetColorEditOptions(ImGuiColorEditFlags flags) +{ + ImGuiContext& g = *GImGui; + if ((flags & ImGuiColorEditFlags__DisplayMask) == 0) + flags |= ImGuiColorEditFlags__OptionsDefault & ImGuiColorEditFlags__DisplayMask; + if ((flags & ImGuiColorEditFlags__DataTypeMask) == 0) + flags |= ImGuiColorEditFlags__OptionsDefault & ImGuiColorEditFlags__DataTypeMask; + if ((flags & ImGuiColorEditFlags__PickerMask) == 0) + flags |= ImGuiColorEditFlags__OptionsDefault & ImGuiColorEditFlags__PickerMask; + if ((flags & ImGuiColorEditFlags__InputMask) == 0) + flags |= ImGuiColorEditFlags__OptionsDefault & ImGuiColorEditFlags__InputMask; + IM_ASSERT(ImIsPowerOfTwo(flags & ImGuiColorEditFlags__DisplayMask)); // Check only 1 option is selected + IM_ASSERT(ImIsPowerOfTwo(flags & ImGuiColorEditFlags__DataTypeMask)); // Check only 1 option is selected + IM_ASSERT(ImIsPowerOfTwo(flags & ImGuiColorEditFlags__PickerMask)); // Check only 1 option is selected + IM_ASSERT(ImIsPowerOfTwo(flags & ImGuiColorEditFlags__InputMask)); // Check only 1 option is selected + g.ColorEditOptions = flags; +} + +// Note: only access 3 floats if ImGuiColorEditFlags_NoAlpha flag is set. +void ImGui::ColorTooltip(const char* text, const float* col, ImGuiColorEditFlags flags) +{ + ImGuiContext& g = *GImGui; + + BeginTooltipEx(0, ImGuiTooltipFlags_OverridePreviousTooltip); + const char* text_end = text ? FindRenderedTextEnd(text, NULL) : text; + if (text_end > text) + { + TextEx(text, text_end); + Separator(); + } + + ImVec2 sz(g.FontSize * 3 + g.Style.FramePadding.y * 2, g.FontSize * 3 + g.Style.FramePadding.y * 2); + ImVec4 cf(col[0], col[1], col[2], (flags & ImGuiColorEditFlags_NoAlpha) ? 1.0f : col[3]); + int cr = IM_F32_TO_INT8_SAT(col[0]), cg = IM_F32_TO_INT8_SAT(col[1]), cb = IM_F32_TO_INT8_SAT(col[2]), ca = (flags & ImGuiColorEditFlags_NoAlpha) ? 255 : IM_F32_TO_INT8_SAT(col[3]); + ColorButton("##preview", cf, (flags & (ImGuiColorEditFlags__InputMask | ImGuiColorEditFlags_NoAlpha | ImGuiColorEditFlags_AlphaPreview | ImGuiColorEditFlags_AlphaPreviewHalf)) | ImGuiColorEditFlags_NoTooltip, sz); + SameLine(); + if ((flags & ImGuiColorEditFlags_InputRGB) || !(flags & ImGuiColorEditFlags__InputMask)) + { + if (flags & ImGuiColorEditFlags_NoAlpha) + Text("#%02X%02X%02X\nR: %d, G: %d, B: %d\n(%.3f, %.3f, %.3f)", cr, cg, cb, cr, cg, cb, col[0], col[1], col[2]); + else + Text("#%02X%02X%02X%02X\nR:%d, G:%d, B:%d, A:%d\n(%.3f, %.3f, %.3f, %.3f)", cr, cg, cb, ca, cr, cg, cb, ca, col[0], col[1], col[2], col[3]); + } + else if (flags & ImGuiColorEditFlags_InputHSV) + { + if (flags & ImGuiColorEditFlags_NoAlpha) + Text("H: %.3f, S: %.3f, V: %.3f", col[0], col[1], col[2]); + else + Text("H: %.3f, S: %.3f, V: %.3f, A: %.3f", col[0], col[1], col[2], col[3]); + } + EndTooltip(); +} + +void ImGui::ColorEditOptionsPopup(const float* col, ImGuiColorEditFlags flags) +{ + bool allow_opt_inputs = !(flags & ImGuiColorEditFlags__DisplayMask); + bool allow_opt_datatype = !(flags & ImGuiColorEditFlags__DataTypeMask); + if ((!allow_opt_inputs && !allow_opt_datatype) || !BeginPopup("context")) + return; + ImGuiContext& g = *GImGui; + ImGuiColorEditFlags opts = g.ColorEditOptions; + if (allow_opt_inputs) + { + if (RadioButton("RGB", (opts & ImGuiColorEditFlags_DisplayRGB) != 0)) opts = (opts & ~ImGuiColorEditFlags__DisplayMask) | ImGuiColorEditFlags_DisplayRGB; + if (RadioButton("HSV", (opts & ImGuiColorEditFlags_DisplayHSV) != 0)) opts = (opts & ~ImGuiColorEditFlags__DisplayMask) | ImGuiColorEditFlags_DisplayHSV; + if (RadioButton("Hex", (opts & ImGuiColorEditFlags_DisplayHex) != 0)) opts = (opts & ~ImGuiColorEditFlags__DisplayMask) | ImGuiColorEditFlags_DisplayHex; + } + if (allow_opt_datatype) + { + if (allow_opt_inputs) Separator(); + if (RadioButton("0..255", (opts & ImGuiColorEditFlags_Uint8) != 0)) opts = (opts & ~ImGuiColorEditFlags__DataTypeMask) | ImGuiColorEditFlags_Uint8; + if (RadioButton("0.00..1.00", (opts & ImGuiColorEditFlags_Float) != 0)) opts = (opts & ~ImGuiColorEditFlags__DataTypeMask) | ImGuiColorEditFlags_Float; + } + + if (allow_opt_inputs || allow_opt_datatype) + Separator(); + if (Button("Copy as..", ImVec2(-1, 0))) + OpenPopup("Copy"); + if (BeginPopup("Copy")) + { + int cr = IM_F32_TO_INT8_SAT(col[0]), cg = IM_F32_TO_INT8_SAT(col[1]), cb = IM_F32_TO_INT8_SAT(col[2]), ca = (flags & ImGuiColorEditFlags_NoAlpha) ? 255 : IM_F32_TO_INT8_SAT(col[3]); + char buf[64]; + ImFormatString(buf, IM_ARRAYSIZE(buf), "(%.3ff, %.3ff, %.3ff, %.3ff)", col[0], col[1], col[2], (flags & ImGuiColorEditFlags_NoAlpha) ? 1.0f : col[3]); + if (Selectable(buf)) + SetClipboardText(buf); + ImFormatString(buf, IM_ARRAYSIZE(buf), "(%d,%d,%d,%d)", cr, cg, cb, ca); + if (Selectable(buf)) + SetClipboardText(buf); + ImFormatString(buf, IM_ARRAYSIZE(buf), "#%02X%02X%02X", cr, cg, cb); + if (Selectable(buf)) + SetClipboardText(buf); + if (!(flags & ImGuiColorEditFlags_NoAlpha)) + { + ImFormatString(buf, IM_ARRAYSIZE(buf), "#%02X%02X%02X%02X", cr, cg, cb, ca); + if (Selectable(buf)) + SetClipboardText(buf); + } + EndPopup(); + } + + g.ColorEditOptions = opts; + EndPopup(); +} + +void ImGui::ColorPickerOptionsPopup(const float* ref_col, ImGuiColorEditFlags flags) +{ + bool allow_opt_picker = !(flags & ImGuiColorEditFlags__PickerMask); + bool allow_opt_alpha_bar = !(flags & ImGuiColorEditFlags_NoAlpha) && !(flags & ImGuiColorEditFlags_AlphaBar); + if ((!allow_opt_picker && !allow_opt_alpha_bar) || !BeginPopup("context")) + return; + ImGuiContext& g = *GImGui; + if (allow_opt_picker) + { + ImVec2 picker_size(g.FontSize * 8, ImMax(g.FontSize * 8 - (GetFrameHeight() + g.Style.ItemInnerSpacing.x), 1.0f)); // FIXME: Picker size copied from main picker function + PushItemWidth(picker_size.x); + for (int picker_type = 0; picker_type < 2; picker_type++) + { + // Draw small/thumbnail version of each picker type (over an invisible button for selection) + if (picker_type > 0) Separator(); + PushID(picker_type); + ImGuiColorEditFlags picker_flags = ImGuiColorEditFlags_NoInputs | ImGuiColorEditFlags_NoOptions | ImGuiColorEditFlags_NoLabel | ImGuiColorEditFlags_NoSidePreview | (flags & ImGuiColorEditFlags_NoAlpha); + if (picker_type == 0) picker_flags |= ImGuiColorEditFlags_PickerHueBar; + if (picker_type == 1) picker_flags |= ImGuiColorEditFlags_PickerHueWheel; + ImVec2 backup_pos = GetCursorScreenPos(); + if (Selectable("##selectable", false, 0, picker_size)) // By default, Selectable() is closing popup + g.ColorEditOptions = (g.ColorEditOptions & ~ImGuiColorEditFlags__PickerMask) | (picker_flags & ImGuiColorEditFlags__PickerMask); + SetCursorScreenPos(backup_pos); + ImVec4 previewing_ref_col; + memcpy(&previewing_ref_col, ref_col, sizeof(float) * ((picker_flags & ImGuiColorEditFlags_NoAlpha) ? 3 : 4)); + ColorPicker4("##previewing_picker", &previewing_ref_col.x, picker_flags); + PopID(); + } + PopItemWidth(); + } + if (allow_opt_alpha_bar) + { + if (allow_opt_picker) Separator(); + CheckboxFlags("Alpha Bar", &g.ColorEditOptions, ImGuiColorEditFlags_AlphaBar); + } + EndPopup(); +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: TreeNode, CollapsingHeader, etc. +//------------------------------------------------------------------------- +// - TreeNode() +// - TreeNodeV() +// - TreeNodeEx() +// - TreeNodeExV() +// - TreeNodeBehavior() [Internal] +// - TreePush() +// - TreePop() +// - GetTreeNodeToLabelSpacing() +// - SetNextItemOpen() +// - CollapsingHeader() +//------------------------------------------------------------------------- + +bool ImGui::TreeNode(const char* str_id, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + bool is_open = TreeNodeExV(str_id, 0, fmt, args); + va_end(args); + return is_open; +} + +bool ImGui::TreeNode(const void* ptr_id, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + bool is_open = TreeNodeExV(ptr_id, 0, fmt, args); + va_end(args); + return is_open; +} + +bool ImGui::TreeNode(const char* label) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + return TreeNodeBehavior(window->GetID(label), 0, label, NULL); +} + +bool ImGui::TreeNodeV(const char* str_id, const char* fmt, va_list args) +{ + return TreeNodeExV(str_id, 0, fmt, args); +} + +bool ImGui::TreeNodeV(const void* ptr_id, const char* fmt, va_list args) +{ + return TreeNodeExV(ptr_id, 0, fmt, args); +} + +bool ImGui::TreeNodeEx(const char* label, ImGuiTreeNodeFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + return TreeNodeBehavior(window->GetID(label), flags, label, NULL); +} + +bool ImGui::TreeNodeEx(const char* str_id, ImGuiTreeNodeFlags flags, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + bool is_open = TreeNodeExV(str_id, flags, fmt, args); + va_end(args); + return is_open; +} + +bool ImGui::TreeNodeEx(const void* ptr_id, ImGuiTreeNodeFlags flags, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + bool is_open = TreeNodeExV(ptr_id, flags, fmt, args); + va_end(args); + return is_open; +} + +bool ImGui::TreeNodeExV(const char* str_id, ImGuiTreeNodeFlags flags, const char* fmt, va_list args) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const char* label_end = g.TempBuffer + ImFormatStringV(g.TempBuffer, IM_ARRAYSIZE(g.TempBuffer), fmt, args); + return TreeNodeBehavior(window->GetID(str_id), flags, g.TempBuffer, label_end); +} + +bool ImGui::TreeNodeExV(const void* ptr_id, ImGuiTreeNodeFlags flags, const char* fmt, va_list args) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const char* label_end = g.TempBuffer + ImFormatStringV(g.TempBuffer, IM_ARRAYSIZE(g.TempBuffer), fmt, args); + return TreeNodeBehavior(window->GetID(ptr_id), flags, g.TempBuffer, label_end); +} + +bool ImGui::TreeNodeBehaviorIsOpen(ImGuiID id, ImGuiTreeNodeFlags flags) +{ + if (flags & ImGuiTreeNodeFlags_Leaf) + return true; + + // We only write to the tree storage if the user clicks (or explicitly use the SetNextItemOpen function) + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + ImGuiStorage* storage = window->DC.StateStorage; + + bool is_open; + if (g.NextItemData.Flags & ImGuiNextItemDataFlags_HasOpen) + { + if (g.NextItemData.OpenCond & ImGuiCond_Always) + { + is_open = g.NextItemData.OpenVal; + storage->SetInt(id, is_open); + } + else + { + // We treat ImGuiCond_Once and ImGuiCond_FirstUseEver the same because tree node state are not saved persistently. + const int stored_value = storage->GetInt(id, -1); + if (stored_value == -1) + { + is_open = g.NextItemData.OpenVal; + storage->SetInt(id, is_open); + } + else + { + is_open = stored_value != 0; + } + } + } + else + { + is_open = storage->GetInt(id, (flags & ImGuiTreeNodeFlags_DefaultOpen) ? 1 : 0) != 0; + } + + // When logging is enabled, we automatically expand tree nodes (but *NOT* collapsing headers.. seems like sensible behavior). + // NB- If we are above max depth we still allow manually opened nodes to be logged. + if (g.LogEnabled && !(flags & ImGuiTreeNodeFlags_NoAutoOpenOnLog) && (window->DC.TreeDepth - g.LogDepthRef) < g.LogDepthToExpand) + is_open = true; + + return is_open; +} + +bool ImGui::TreeNodeBehavior(ImGuiID id, ImGuiTreeNodeFlags flags, const char* label, const char* label_end) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + const bool display_frame = (flags & ImGuiTreeNodeFlags_Framed) != 0; + const ImVec2 padding = (display_frame || (flags & ImGuiTreeNodeFlags_FramePadding)) ? style.FramePadding : ImVec2(style.FramePadding.x, ImMin(window->DC.CurrLineTextBaseOffset, style.FramePadding.y)); + + if (!label_end) + label_end = FindRenderedTextEnd(label); + const ImVec2 label_size = CalcTextSize(label, label_end, false); + + // We vertically grow up to current line height up the typical widget height. + const float frame_height = ImMax(ImMin(window->DC.CurrLineSize.y, g.FontSize + style.FramePadding.y * 2), label_size.y + padding.y * 2); + ImRect frame_bb; + frame_bb.Min.x = (flags & ImGuiTreeNodeFlags_SpanFullWidth) ? window->WorkRect.Min.x : window->DC.CursorPos.x; + frame_bb.Min.y = window->DC.CursorPos.y; + frame_bb.Max.x = window->WorkRect.Max.x; + frame_bb.Max.y = window->DC.CursorPos.y + frame_height; + if (display_frame) + { + // Framed header expand a little outside the default padding, to the edge of InnerClipRect + // (FIXME: May remove this at some point and make InnerClipRect align with WindowPadding.x instead of WindowPadding.x*0.5f) + frame_bb.Min.x -= IM_FLOOR(window->WindowPadding.x * 0.5f - 1.0f); + frame_bb.Max.x += IM_FLOOR(window->WindowPadding.x * 0.5f); + } + + const float text_offset_x = g.FontSize + (display_frame ? padding.x * 3 : padding.x * 2); // Collapser arrow width + Spacing + const float text_offset_y = ImMax(padding.y, window->DC.CurrLineTextBaseOffset); // Latch before ItemSize changes it + const float text_width = g.FontSize + (label_size.x > 0.0f ? label_size.x + padding.x * 2 : 0.0f); // Include collapser + ImVec2 text_pos(window->DC.CursorPos.x + text_offset_x, window->DC.CursorPos.y + text_offset_y); + ItemSize(ImVec2(text_width, frame_height), padding.y); + + // For regular tree nodes, we arbitrary allow to click past 2 worth of ItemSpacing + ImRect interact_bb = frame_bb; + if (!display_frame && (flags & (ImGuiTreeNodeFlags_SpanAvailWidth | ImGuiTreeNodeFlags_SpanFullWidth)) == 0) + interact_bb.Max.x = frame_bb.Min.x + text_width + style.ItemSpacing.x * 2.0f; + + // Store a flag for the current depth to tell if we will allow closing this node when navigating one of its child. + // For this purpose we essentially compare if g.NavIdIsAlive went from 0 to 1 between TreeNode() and TreePop(). + // This is currently only support 32 level deep and we are fine with (1 << Depth) overflowing into a zero. + const bool is_leaf = (flags & ImGuiTreeNodeFlags_Leaf) != 0; + bool is_open = TreeNodeBehaviorIsOpen(id, flags); + if (is_open && !g.NavIdIsAlive && (flags & ImGuiTreeNodeFlags_NavLeftJumpsBackHere) && !(flags & ImGuiTreeNodeFlags_NoTreePushOnOpen)) + window->DC.TreeJumpToParentOnPopMask |= (1 << window->DC.TreeDepth); + + bool item_add = ItemAdd(interact_bb, id); + window->DC.LastItemStatusFlags |= ImGuiItemStatusFlags_HasDisplayRect; + window->DC.LastItemDisplayRect = frame_bb; + + if (!item_add) + { + if (is_open && !(flags & ImGuiTreeNodeFlags_NoTreePushOnOpen)) + TreePushOverrideID(id); + IMGUI_TEST_ENGINE_ITEM_INFO(window->DC.LastItemId, label, window->DC.ItemFlags | (is_leaf ? 0 : ImGuiItemStatusFlags_Openable) | (is_open ? ImGuiItemStatusFlags_Opened : 0)); + return is_open; + } + + ImGuiButtonFlags button_flags = ImGuiTreeNodeFlags_None; + if (flags & ImGuiTreeNodeFlags_AllowItemOverlap) + button_flags |= ImGuiButtonFlags_AllowItemOverlap; + if (!is_leaf) + button_flags |= ImGuiButtonFlags_PressedOnDragDropHold; + + // We allow clicking on the arrow section with keyboard modifiers held, in order to easily + // allow browsing a tree while preserving selection with code implementing multi-selection patterns. + // When clicking on the rest of the tree node we always disallow keyboard modifiers. + const float arrow_hit_x1 = (text_pos.x - text_offset_x) - style.TouchExtraPadding.x; + const float arrow_hit_x2 = (text_pos.x - text_offset_x) + (g.FontSize + padding.x * 2.0f) + style.TouchExtraPadding.x; + const bool is_mouse_x_over_arrow = (g.IO.MousePos.x >= arrow_hit_x1 && g.IO.MousePos.x < arrow_hit_x2); + if (window != g.HoveredWindow || !is_mouse_x_over_arrow) + button_flags |= ImGuiButtonFlags_NoKeyModifiers; + + // Open behaviors can be altered with the _OpenOnArrow and _OnOnDoubleClick flags. + // Some alteration have subtle effects (e.g. toggle on MouseUp vs MouseDown events) due to requirements for multi-selection and drag and drop support. + // - Single-click on label = Toggle on MouseUp (default, when _OpenOnArrow=0) + // - Single-click on arrow = Toggle on MouseDown (when _OpenOnArrow=0) + // - Single-click on arrow = Toggle on MouseDown (when _OpenOnArrow=1) + // - Double-click on label = Toggle on MouseDoubleClick (when _OpenOnDoubleClick=1) + // - Double-click on arrow = Toggle on MouseDoubleClick (when _OpenOnDoubleClick=1 and _OpenOnArrow=0) + // It is rather standard that arrow click react on Down rather than Up. + // We set ImGuiButtonFlags_PressedOnClickRelease on OpenOnDoubleClick because we want the item to be active on the initial MouseDown in order for drag and drop to work. + if (is_mouse_x_over_arrow) + button_flags |= ImGuiButtonFlags_PressedOnClick; + else if (flags & ImGuiTreeNodeFlags_OpenOnDoubleClick) + button_flags |= ImGuiButtonFlags_PressedOnClickRelease | ImGuiButtonFlags_PressedOnDoubleClick; + else + button_flags |= ImGuiButtonFlags_PressedOnClickRelease; + + bool selected = (flags & ImGuiTreeNodeFlags_Selected) != 0; + const bool was_selected = selected; + + bool hovered, held; + bool pressed = ButtonBehavior(interact_bb, id, &hovered, &held, button_flags); + bool toggled = false; + if (!is_leaf) + { + if (pressed && g.DragDropHoldJustPressedId != id) + { + if ((flags & (ImGuiTreeNodeFlags_OpenOnArrow | ImGuiTreeNodeFlags_OpenOnDoubleClick)) == 0 || (g.NavActivateId == id)) + toggled = true; + if (flags & ImGuiTreeNodeFlags_OpenOnArrow) + toggled |= is_mouse_x_over_arrow && !g.NavDisableMouseHover; // Lightweight equivalent of IsMouseHoveringRect() since ButtonBehavior() already did the job + if ((flags & ImGuiTreeNodeFlags_OpenOnDoubleClick) && g.IO.MouseDoubleClicked[0]) + toggled = true; + } + else if (pressed && g.DragDropHoldJustPressedId == id) + { + IM_ASSERT(button_flags & ImGuiButtonFlags_PressedOnDragDropHold); + if (!is_open) // When using Drag and Drop "hold to open" we keep the node highlighted after opening, but never close it again. + toggled = true; + } + + if (g.NavId == id && g.NavMoveRequest && g.NavMoveDir == ImGuiDir_Left && is_open) + { + toggled = true; + NavMoveRequestCancel(); + } + if (g.NavId == id && g.NavMoveRequest && g.NavMoveDir == ImGuiDir_Right && !is_open) // If there's something upcoming on the line we may want to give it the priority? + { + toggled = true; + NavMoveRequestCancel(); + } + + if (toggled) + { + is_open = !is_open; + window->DC.StateStorage->SetInt(id, is_open); + window->DC.LastItemStatusFlags |= ImGuiItemStatusFlags_ToggledOpen; + } + } + if (flags & ImGuiTreeNodeFlags_AllowItemOverlap) + SetItemAllowOverlap(); + + // In this branch, TreeNodeBehavior() cannot toggle the selection so this will never trigger. + if (selected != was_selected) //-V547 + window->DC.LastItemStatusFlags |= ImGuiItemStatusFlags_ToggledSelection; + + // Render + const ImU32 text_col = GetColorU32(ImGuiCol_Text); + ImGuiNavHighlightFlags nav_highlight_flags = ImGuiNavHighlightFlags_TypeThin; + if (display_frame) + { + // Framed type + const ImU32 bg_col = GetColorU32((held && hovered) ? ImGuiCol_HeaderActive : hovered ? ImGuiCol_HeaderHovered : ImGuiCol_Header); + RenderFrame(frame_bb.Min, frame_bb.Max, bg_col, true, style.FrameRounding); + RenderNavHighlight(frame_bb, id, nav_highlight_flags); + if (flags & ImGuiTreeNodeFlags_Bullet) + RenderBullet(window->DrawList, ImVec2(text_pos.x - text_offset_x * 0.60f, text_pos.y + g.FontSize * 0.5f), text_col); + else if (!is_leaf) + RenderArrow(window->DrawList, ImVec2(text_pos.x - text_offset_x + padding.x, text_pos.y), text_col, is_open ? ImGuiDir_Down : ImGuiDir_Right, 1.0f); + else // Leaf without bullet, left-adjusted text + text_pos.x -= text_offset_x; + if (flags & ImGuiTreeNodeFlags_ClipLabelForTrailingButton) + frame_bb.Max.x -= g.FontSize + style.FramePadding.x; + if (g.LogEnabled) + { + // NB: '##' is normally used to hide text (as a library-wide feature), so we need to specify the text range to make sure the ## aren't stripped out here. + const char log_prefix[] = "\n##"; + const char log_suffix[] = "##"; + LogRenderedText(&text_pos, log_prefix, log_prefix + 3); + RenderTextClipped(text_pos, frame_bb.Max, label, label_end, &label_size); + LogRenderedText(&text_pos, log_suffix, log_suffix + 2); + } + else + { + RenderTextClipped(text_pos, frame_bb.Max, label, label_end, &label_size); + } + } + else + { + // Unframed typed for tree nodes + if (hovered || selected) + { + const ImU32 bg_col = GetColorU32((held && hovered) ? ImGuiCol_HeaderActive : hovered ? ImGuiCol_HeaderHovered : ImGuiCol_Header); + RenderFrame(frame_bb.Min, frame_bb.Max, bg_col, false); + RenderNavHighlight(frame_bb, id, nav_highlight_flags); + } + if (flags & ImGuiTreeNodeFlags_Bullet) + RenderBullet(window->DrawList, ImVec2(text_pos.x - text_offset_x * 0.5f, text_pos.y + g.FontSize * 0.5f), text_col); + else if (!is_leaf) + RenderArrow(window->DrawList, ImVec2(text_pos.x - text_offset_x + padding.x, text_pos.y + g.FontSize * 0.15f), text_col, is_open ? ImGuiDir_Down : ImGuiDir_Right, 0.70f); + if (g.LogEnabled) + LogRenderedText(&text_pos, ">"); + RenderText(text_pos, label, label_end, false); + } + + if (is_open && !(flags & ImGuiTreeNodeFlags_NoTreePushOnOpen)) + TreePushOverrideID(id); + IMGUI_TEST_ENGINE_ITEM_INFO(id, label, window->DC.ItemFlags | (is_leaf ? 0 : ImGuiItemStatusFlags_Openable) | (is_open ? ImGuiItemStatusFlags_Opened : 0)); + return is_open; +} + +void ImGui::TreePush(const char* str_id) +{ + ImGuiWindow* window = GetCurrentWindow(); + Indent(); + window->DC.TreeDepth++; + PushID(str_id ? str_id : "#TreePush"); +} + +void ImGui::TreePush(const void* ptr_id) +{ + ImGuiWindow* window = GetCurrentWindow(); + Indent(); + window->DC.TreeDepth++; + PushID(ptr_id ? ptr_id : (const void*)"#TreePush"); +} + +void ImGui::TreePushOverrideID(ImGuiID id) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + Indent(); + window->DC.TreeDepth++; + window->IDStack.push_back(id); +} + +void ImGui::TreePop() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + Unindent(); + + window->DC.TreeDepth--; + ImU32 tree_depth_mask = (1 << window->DC.TreeDepth); + + // Handle Left arrow to move to parent tree node (when ImGuiTreeNodeFlags_NavLeftJumpsBackHere is enabled) + if (g.NavMoveDir == ImGuiDir_Left && g.NavWindow == window && NavMoveRequestButNoResultYet()) + if (g.NavIdIsAlive && (window->DC.TreeJumpToParentOnPopMask & tree_depth_mask)) + { + SetNavID(window->IDStack.back(), g.NavLayer, 0); + NavMoveRequestCancel(); + } + window->DC.TreeJumpToParentOnPopMask &= tree_depth_mask - 1; + + IM_ASSERT(window->IDStack.Size > 1); // There should always be 1 element in the IDStack (pushed during window creation). If this triggers you called TreePop/PopID too much. + PopID(); +} + +// Horizontal distance preceding label when using TreeNode() or Bullet() +float ImGui::GetTreeNodeToLabelSpacing() +{ + ImGuiContext& g = *GImGui; + return g.FontSize + (g.Style.FramePadding.x * 2.0f); +} + +// Set next TreeNode/CollapsingHeader open state. +void ImGui::SetNextItemOpen(bool is_open, ImGuiCond cond) +{ + ImGuiContext& g = *GImGui; + if (g.CurrentWindow->SkipItems) + return; + g.NextItemData.Flags |= ImGuiNextItemDataFlags_HasOpen; + g.NextItemData.OpenVal = is_open; + g.NextItemData.OpenCond = cond ? cond : ImGuiCond_Always; +} + +// CollapsingHeader returns true when opened but do not indent nor push into the ID stack (because of the ImGuiTreeNodeFlags_NoTreePushOnOpen flag). +// This is basically the same as calling TreeNodeEx(label, ImGuiTreeNodeFlags_CollapsingHeader). You can remove the _NoTreePushOnOpen flag if you want behavior closer to normal TreeNode(). +bool ImGui::CollapsingHeader(const char* label, ImGuiTreeNodeFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + return TreeNodeBehavior(window->GetID(label), flags | ImGuiTreeNodeFlags_CollapsingHeader, label); +} + +// p_visible == NULL : regular collapsing header +// p_visible != NULL && *p_visible == true : show a small close button on the corner of the header, clicking the button will set *p_visible = false +// p_visible != NULL && *p_visible == false : do not show the header at all +// Do not mistake this with the Open state of the header itself, which you can adjust with SetNextItemOpen() or ImGuiTreeNodeFlags_DefaultOpen. +bool ImGui::CollapsingHeader(const char* label, bool* p_visible, ImGuiTreeNodeFlags flags) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + if (p_visible && !*p_visible) + return false; + + ImGuiID id = window->GetID(label); + flags |= ImGuiTreeNodeFlags_CollapsingHeader; + if (p_visible) + flags |= ImGuiTreeNodeFlags_AllowItemOverlap | ImGuiTreeNodeFlags_ClipLabelForTrailingButton; + bool is_open = TreeNodeBehavior(id, flags, label); + if (p_visible != NULL) + { + // Create a small overlapping close button + // FIXME: We can evolve this into user accessible helpers to add extra buttons on title bars, headers, etc. + // FIXME: CloseButton can overlap into text, need find a way to clip the text somehow. + ImGuiContext& g = *GImGui; + ImGuiLastItemDataBackup last_item_backup; + float button_size = g.FontSize; + float button_x = ImMax(window->DC.LastItemRect.Min.x, window->DC.LastItemRect.Max.x - g.Style.FramePadding.x * 2.0f - button_size); + float button_y = window->DC.LastItemRect.Min.y; + ImGuiID close_button_id = GetIDWithSeed("#CLOSE", NULL, id); + if (CloseButton(close_button_id, ImVec2(button_x, button_y))) + *p_visible = false; + last_item_backup.Restore(); + } + + return is_open; +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: Selectable +//------------------------------------------------------------------------- +// - Selectable() +//------------------------------------------------------------------------- + +// Tip: pass a non-visible label (e.g. "##hello") then you can use the space to draw other text or image. +// But you need to make sure the ID is unique, e.g. enclose calls in PushID/PopID or use ##unique_id. +// With this scheme, ImGuiSelectableFlags_SpanAllColumns and ImGuiSelectableFlags_AllowItemOverlap are also frequently used flags. +// FIXME: Selectable() with (size.x == 0.0f) and (SelectableTextAlign.x > 0.0f) followed by SameLine() is currently not supported. +bool ImGui::Selectable(const char* label, bool selected, ImGuiSelectableFlags flags, const ImVec2& size_arg) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + + // Submit label or explicit size to ItemSize(), whereas ItemAdd() will submit a larger/spanning rectangle. + ImGuiID id = window->GetID(label); + ImVec2 label_size = CalcTextSize(label, NULL, true); + ImVec2 size(size_arg.x != 0.0f ? size_arg.x : label_size.x, size_arg.y != 0.0f ? size_arg.y : label_size.y); + ImVec2 pos = window->DC.CursorPos; + pos.y += window->DC.CurrLineTextBaseOffset; + ItemSize(size, 0.0f); + + // Fill horizontal space + // We don't support (size < 0.0f) in Selectable() because the ItemSpacing extension would make explicitely right-aligned sizes not visibly match other widgets. + const bool span_all_columns = (flags & ImGuiSelectableFlags_SpanAllColumns) != 0; + const float min_x = span_all_columns ? window->ParentWorkRect.Min.x : pos.x; + const float max_x = span_all_columns ? window->ParentWorkRect.Max.x : window->WorkRect.Max.x; + if (size_arg.x == 0.0f || (flags & ImGuiSelectableFlags_SpanAvailWidth)) + size.x = ImMax(label_size.x, max_x - min_x); + + // Text stays at the submission position, but bounding box may be extended on both sides + const ImVec2 text_min = pos; + const ImVec2 text_max(min_x + size.x, pos.y + size.y); + + // Selectables are meant to be tightly packed together with no click-gap, so we extend their box to cover spacing between selectable. + ImRect bb(min_x, pos.y, text_max.x, text_max.y); + if ((flags & ImGuiSelectableFlags_NoPadWithHalfSpacing) == 0) + { + const float spacing_x = span_all_columns ? 0.0f : style.ItemSpacing.x; + const float spacing_y = style.ItemSpacing.y; + const float spacing_L = IM_FLOOR(spacing_x * 0.50f); + const float spacing_U = IM_FLOOR(spacing_y * 0.50f); + bb.Min.x -= spacing_L; + bb.Min.y -= spacing_U; + bb.Max.x += (spacing_x - spacing_L); + bb.Max.y += (spacing_y - spacing_U); + } + //if (g.IO.KeyCtrl) { GetForegroundDrawList()->AddRect(bb.Min, bb.Max, IM_COL32(0, 255, 0, 255)); } + + // Modify ClipRect for the ItemAdd(), faster than doing a PushColumnsBackground/PushTableBackground for every Selectable.. + const float backup_clip_rect_min_x = window->ClipRect.Min.x; + const float backup_clip_rect_max_x = window->ClipRect.Max.x; + if (span_all_columns) + { + window->ClipRect.Min.x = window->ParentWorkRect.Min.x; + window->ClipRect.Max.x = window->ParentWorkRect.Max.x; + } + + bool item_add; + if (flags & ImGuiSelectableFlags_Disabled) + { + ImGuiItemFlags backup_item_flags = window->DC.ItemFlags; + window->DC.ItemFlags |= ImGuiItemFlags_Disabled | ImGuiItemFlags_NoNavDefaultFocus; + item_add = ItemAdd(bb, id); + window->DC.ItemFlags = backup_item_flags; + } + else + { + item_add = ItemAdd(bb, id); + } + + if (span_all_columns) + { + window->ClipRect.Min.x = backup_clip_rect_min_x; + window->ClipRect.Max.x = backup_clip_rect_max_x; + } + + if (!item_add) + return false; + + // FIXME: We can standardize the behavior of those two, we could also keep the fast path of override ClipRect + full push on render only, + // which would be advantageous since most selectable are not selected. + if (span_all_columns && window->DC.CurrentColumns) + PushColumnsBackground(); + else if (span_all_columns && g.CurrentTable) + TablePushBackgroundChannel(); + + // We use NoHoldingActiveID on menus so user can click and _hold_ on a menu then drag to browse child entries + ImGuiButtonFlags button_flags = 0; + if (flags & ImGuiSelectableFlags_NoHoldingActiveID) { button_flags |= ImGuiButtonFlags_NoHoldingActiveId; } + if (flags & ImGuiSelectableFlags_SelectOnClick) { button_flags |= ImGuiButtonFlags_PressedOnClick; } + if (flags & ImGuiSelectableFlags_SelectOnRelease) { button_flags |= ImGuiButtonFlags_PressedOnRelease; } + if (flags & ImGuiSelectableFlags_Disabled) { button_flags |= ImGuiButtonFlags_Disabled; } + if (flags & ImGuiSelectableFlags_AllowDoubleClick) { button_flags |= ImGuiButtonFlags_PressedOnClickRelease | ImGuiButtonFlags_PressedOnDoubleClick; } + if (flags & ImGuiSelectableFlags_AllowItemOverlap) { button_flags |= ImGuiButtonFlags_AllowItemOverlap; } + + if (flags & ImGuiSelectableFlags_Disabled) + selected = false; + + const bool was_selected = selected; + bool hovered, held; + bool pressed = ButtonBehavior(bb, id, &hovered, &held, button_flags); + + // Update NavId when clicking or when Hovering (this doesn't happen on most widgets), so navigation can be resumed with gamepad/keyboard + if (pressed || (hovered && (flags & ImGuiSelectableFlags_SetNavIdOnHover))) + { + if (!g.NavDisableMouseHover && g.NavWindow == window && g.NavLayer == window->DC.NavLayerCurrent) + { + g.NavDisableHighlight = true; + SetNavID(id, window->DC.NavLayerCurrent, window->DC.NavFocusScopeIdCurrent); + } + } + if (pressed) + MarkItemEdited(id); + + if (flags & ImGuiSelectableFlags_AllowItemOverlap) + SetItemAllowOverlap(); + + // In this branch, Selectable() cannot toggle the selection so this will never trigger. + if (selected != was_selected) //-V547 + window->DC.LastItemStatusFlags |= ImGuiItemStatusFlags_ToggledSelection; + + // Render + if (held && (flags & ImGuiSelectableFlags_DrawHoveredWhenHeld)) + hovered = true; + if (hovered || selected) + { + const ImU32 col = GetColorU32((held && hovered) ? ImGuiCol_HeaderActive : hovered ? ImGuiCol_HeaderHovered : ImGuiCol_Header); + RenderFrame(bb.Min, bb.Max, col, false, 0.0f); + RenderNavHighlight(bb, id, ImGuiNavHighlightFlags_TypeThin | ImGuiNavHighlightFlags_NoRounding); + } + + if (span_all_columns && window->DC.CurrentColumns) + PopColumnsBackground(); + else if (span_all_columns && g.CurrentTable) + TablePopBackgroundChannel(); + + if (flags & ImGuiSelectableFlags_Disabled) PushStyleColor(ImGuiCol_Text, style.Colors[ImGuiCol_TextDisabled]); + RenderTextClipped(text_min, text_max, label, NULL, &label_size, style.SelectableTextAlign, &bb); + if (flags & ImGuiSelectableFlags_Disabled) PopStyleColor(); + + // Automatically close popups + if (pressed && (window->Flags & ImGuiWindowFlags_Popup) && !(flags & ImGuiSelectableFlags_DontClosePopups) && !(window->DC.ItemFlags & ImGuiItemFlags_SelectableDontClosePopup)) + CloseCurrentPopup(); + + IMGUI_TEST_ENGINE_ITEM_INFO(id, label, window->DC.ItemFlags); + return pressed; +} + +bool ImGui::Selectable(const char* label, bool* p_selected, ImGuiSelectableFlags flags, const ImVec2& size_arg) +{ + if (Selectable(label, *p_selected, flags, size_arg)) + { + *p_selected = !*p_selected; + return true; + } + return false; +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: ListBox +//------------------------------------------------------------------------- +// - ListBox() +// - ListBoxHeader() +// - ListBoxFooter() +//------------------------------------------------------------------------- +// FIXME: This is an old API. We should redesign some of it, rename ListBoxHeader->BeginListBox, ListBoxFooter->EndListBox +// and promote using them over existing ListBox() functions, similarly to change with combo boxes. +//------------------------------------------------------------------------- + +// FIXME: In principle this function should be called BeginListBox(). We should rename it after re-evaluating if we want to keep the same signature. +// Helper to calculate the size of a listbox and display a label on the right. +// Tip: To have a list filling the entire window width, PushItemWidth(-1) and pass an non-visible label e.g. "##empty" +bool ImGui::ListBoxHeader(const char* label, const ImVec2& size_arg) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + const ImGuiStyle& style = g.Style; + const ImGuiID id = GetID(label); + const ImVec2 label_size = CalcTextSize(label, NULL, true); + + // Size default to hold ~7 items. Fractional number of items helps seeing that we can scroll down/up without looking at scrollbar. + ImVec2 size = CalcItemSize(size_arg, CalcItemWidth(), GetTextLineHeightWithSpacing() * 7.4f + style.ItemSpacing.y); + ImVec2 frame_size = ImVec2(size.x, ImMax(size.y, label_size.y)); + ImRect frame_bb(window->DC.CursorPos, window->DC.CursorPos + frame_size); + ImRect bb(frame_bb.Min, frame_bb.Max + ImVec2(label_size.x > 0.0f ? style.ItemInnerSpacing.x + label_size.x : 0.0f, 0.0f)); + window->DC.LastItemRect = bb; // Forward storage for ListBoxFooter.. dodgy. + g.NextItemData.ClearFlags(); + + if (!IsRectVisible(bb.Min, bb.Max)) + { + ItemSize(bb.GetSize(), style.FramePadding.y); + ItemAdd(bb, 0, &frame_bb); + return false; + } + + BeginGroup(); + if (label_size.x > 0) + RenderText(ImVec2(frame_bb.Max.x + style.ItemInnerSpacing.x, frame_bb.Min.y + style.FramePadding.y), label); + + BeginChildFrame(id, frame_bb.GetSize()); + return true; +} + +// FIXME: In principle this function should be called EndListBox(). We should rename it after re-evaluating if we want to keep the same signature. +bool ImGui::ListBoxHeader(const char* label, int items_count, int height_in_items) +{ + // Size default to hold ~7.25 items. + // We add +25% worth of item height to allow the user to see at a glance if there are more items up/down, without looking at the scrollbar. + // We don't add this extra bit if items_count <= height_in_items. It is slightly dodgy, because it means a dynamic list of items will make the widget resize occasionally when it crosses that size. + // I am expecting that someone will come and complain about this behavior in a remote future, then we can advise on a better solution. + if (height_in_items < 0) + height_in_items = ImMin(items_count, 7); + const ImGuiStyle& style = GetStyle(); + float height_in_items_f = (height_in_items < items_count) ? (height_in_items + 0.25f) : (height_in_items + 0.00f); + + // We include ItemSpacing.y so that a list sized for the exact number of items doesn't make a scrollbar appears. We could also enforce that by passing a flag to BeginChild(). + ImVec2 size; + size.x = 0.0f; + size.y = ImFloor(GetTextLineHeightWithSpacing() * height_in_items_f + style.FramePadding.y * 2.0f); + return ListBoxHeader(label, size); +} + +// FIXME: In principle this function should be called EndListBox(). We should rename it after re-evaluating if we want to keep the same signature. +void ImGui::ListBoxFooter() +{ + ImGuiWindow * window = GetCurrentWindow(); + IM_ASSERT((window->Flags & ImGuiWindowFlags_ChildWindow) && "Mismatched ListBoxHeader/ListBoxFooter calls. Did you test the return value of ListBoxHeader()?"); + ImGuiWindow* parent_window = window->ParentWindow; + const ImRect bb = parent_window->DC.LastItemRect; + const ImGuiStyle& style = GetStyle(); + + EndChildFrame(); + + // Redeclare item size so that it includes the label (we have stored the full size in LastItemRect) + // We call SameLine() to restore DC.CurrentLine* data + SameLine(); + parent_window->DC.CursorPos = bb.Min; + ItemSize(bb, style.FramePadding.y); + EndGroup(); +} + +bool ImGui::ListBox(const char* label, int* current_item, const char* const items[], int items_count, int height_items) +{ + const bool value_changed = ListBox(label, current_item, Items_ArrayGetter, (void*)items, items_count, height_items); + return value_changed; +} + +bool ImGui::ListBox(const char* label, int* current_item, bool (*items_getter)(void*, int, const char**), void* data, int items_count, int height_in_items) +{ + if (!ListBoxHeader(label, items_count, height_in_items)) + return false; + + // Assume all items have even height (= 1 line of text). If you need items of different or variable sizes you can create a custom version of ListBox() in your code without using the clipper. + ImGuiContext& g = *GImGui; + bool value_changed = false; + ImGuiListClipper clipper; + clipper.Begin(items_count, GetTextLineHeightWithSpacing()); // We know exactly our line height here so we pass it as a minor optimization, but generally you don't need to. + while (clipper.Step()) + for (int i = clipper.DisplayStart; i < clipper.DisplayEnd; i++) + { + const bool item_selected = (i == *current_item); + const char* item_text; + if (!items_getter(data, i, &item_text)) + item_text = "*Unknown item*"; + + PushID(i); + if (Selectable(item_text, item_selected)) + { + *current_item = i; + value_changed = true; + } + if (item_selected) + SetItemDefaultFocus(); + PopID(); + } + ListBoxFooter(); + if (value_changed) + MarkItemEdited(g.CurrentWindow->DC.LastItemId); + + return value_changed; +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: PlotLines, PlotHistogram +//------------------------------------------------------------------------- +// - PlotEx() [Internal] +// - PlotLines() +// - PlotHistogram() +//------------------------------------------------------------------------- +// Plot/Graph widgets are not very good. +// Consider writing your own, or using a third-party one, see: +// - ImPlot https://github.com/epezent/implot +// - others https://github.com/ocornut/imgui/wiki/Useful-Widgets +//------------------------------------------------------------------------- + +int ImGui::PlotEx(ImGuiPlotType plot_type, const char* label, float (*values_getter)(void* data, int idx), void* data, int values_count, int values_offset, const char* overlay_text, float scale_min, float scale_max, ImVec2 frame_size) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return -1; + + const ImGuiStyle& style = g.Style; + const ImGuiID id = window->GetID(label); + + const ImVec2 label_size = CalcTextSize(label, NULL, true); + if (frame_size.x == 0.0f) + frame_size.x = CalcItemWidth(); + if (frame_size.y == 0.0f) + frame_size.y = label_size.y + (style.FramePadding.y * 2); + + const ImRect frame_bb(window->DC.CursorPos, window->DC.CursorPos + frame_size); + const ImRect inner_bb(frame_bb.Min + style.FramePadding, frame_bb.Max - style.FramePadding); + const ImRect total_bb(frame_bb.Min, frame_bb.Max + ImVec2(label_size.x > 0.0f ? style.ItemInnerSpacing.x + label_size.x : 0.0f, 0)); + ItemSize(total_bb, style.FramePadding.y); + if (!ItemAdd(total_bb, 0, &frame_bb)) + return -1; + const bool hovered = ItemHoverable(frame_bb, id); + + // Determine scale from values if not specified + if (scale_min == FLT_MAX || scale_max == FLT_MAX) + { + float v_min = FLT_MAX; + float v_max = -FLT_MAX; + for (int i = 0; i < values_count; i++) + { + const float v = values_getter(data, i); + if (v != v) // Ignore NaN values + continue; + v_min = ImMin(v_min, v); + v_max = ImMax(v_max, v); + } + if (scale_min == FLT_MAX) + scale_min = v_min; + if (scale_max == FLT_MAX) + scale_max = v_max; + } + + RenderFrame(frame_bb.Min, frame_bb.Max, GetColorU32(ImGuiCol_FrameBg), true, style.FrameRounding); + + const int values_count_min = (plot_type == ImGuiPlotType_Lines) ? 2 : 1; + int idx_hovered = -1; + if (values_count >= values_count_min) + { + int res_w = ImMin((int)frame_size.x, values_count) + ((plot_type == ImGuiPlotType_Lines) ? -1 : 0); + int item_count = values_count + ((plot_type == ImGuiPlotType_Lines) ? -1 : 0); + + // Tooltip on hover + if (hovered && inner_bb.Contains(g.IO.MousePos)) + { + const float t = ImClamp((g.IO.MousePos.x - inner_bb.Min.x) / (inner_bb.Max.x - inner_bb.Min.x), 0.0f, 0.9999f); + const int v_idx = (int)(t * item_count); + IM_ASSERT(v_idx >= 0 && v_idx < values_count); + + const float v0 = values_getter(data, (v_idx + values_offset) % values_count); + const float v1 = values_getter(data, (v_idx + 1 + values_offset) % values_count); + if (plot_type == ImGuiPlotType_Lines) + SetTooltip("%d: %8.4g\n%d: %8.4g", v_idx, v0, v_idx + 1, v1); + else if (plot_type == ImGuiPlotType_Histogram) + SetTooltip("%d: %8.4g", v_idx, v0); + idx_hovered = v_idx; + } + + const float t_step = 1.0f / (float)res_w; + const float inv_scale = (scale_min == scale_max) ? 0.0f : (1.0f / (scale_max - scale_min)); + + float v0 = values_getter(data, (0 + values_offset) % values_count); + float t0 = 0.0f; + ImVec2 tp0 = ImVec2( t0, 1.0f - ImSaturate((v0 - scale_min) * inv_scale) ); // Point in the normalized space of our target rectangle + float histogram_zero_line_t = (scale_min * scale_max < 0.0f) ? (-scale_min * inv_scale) : (scale_min < 0.0f ? 0.0f : 1.0f); // Where does the zero line stands + + const ImU32 col_base = GetColorU32((plot_type == ImGuiPlotType_Lines) ? ImGuiCol_PlotLines : ImGuiCol_PlotHistogram); + const ImU32 col_hovered = GetColorU32((plot_type == ImGuiPlotType_Lines) ? ImGuiCol_PlotLinesHovered : ImGuiCol_PlotHistogramHovered); + + for (int n = 0; n < res_w; n++) + { + const float t1 = t0 + t_step; + const int v1_idx = (int)(t0 * item_count + 0.5f); + IM_ASSERT(v1_idx >= 0 && v1_idx < values_count); + const float v1 = values_getter(data, (v1_idx + values_offset + 1) % values_count); + const ImVec2 tp1 = ImVec2( t1, 1.0f - ImSaturate((v1 - scale_min) * inv_scale) ); + + // NB: Draw calls are merged together by the DrawList system. Still, we should render our batch are lower level to save a bit of CPU. + ImVec2 pos0 = ImLerp(inner_bb.Min, inner_bb.Max, tp0); + ImVec2 pos1 = ImLerp(inner_bb.Min, inner_bb.Max, (plot_type == ImGuiPlotType_Lines) ? tp1 : ImVec2(tp1.x, histogram_zero_line_t)); + if (plot_type == ImGuiPlotType_Lines) + { + window->DrawList->AddLine(pos0, pos1, idx_hovered == v1_idx ? col_hovered : col_base); + } + else if (plot_type == ImGuiPlotType_Histogram) + { + if (pos1.x >= pos0.x + 2.0f) + pos1.x -= 1.0f; + window->DrawList->AddRectFilled(pos0, pos1, idx_hovered == v1_idx ? col_hovered : col_base); + } + + t0 = t1; + tp0 = tp1; + } + } + + // Text overlay + if (overlay_text) + RenderTextClipped(ImVec2(frame_bb.Min.x, frame_bb.Min.y + style.FramePadding.y), frame_bb.Max, overlay_text, NULL, NULL, ImVec2(0.5f, 0.0f)); + + if (label_size.x > 0.0f) + RenderText(ImVec2(frame_bb.Max.x + style.ItemInnerSpacing.x, inner_bb.Min.y), label); + + // Return hovered index or -1 if none are hovered. + // This is currently not exposed in the public API because we need a larger redesign of the whole thing, but in the short-term we are making it available in PlotEx(). + return idx_hovered; +} + +struct ImGuiPlotArrayGetterData +{ + const float* Values; + int Stride; + + ImGuiPlotArrayGetterData(const float* values, int stride) { Values = values; Stride = stride; } +}; + +static float Plot_ArrayGetter(void* data, int idx) +{ + ImGuiPlotArrayGetterData* plot_data = (ImGuiPlotArrayGetterData*)data; + const float v = *(const float*)(const void*)((const unsigned char*)plot_data->Values + (size_t)idx * plot_data->Stride); + return v; +} + +void ImGui::PlotLines(const char* label, const float* values, int values_count, int values_offset, const char* overlay_text, float scale_min, float scale_max, ImVec2 graph_size, int stride) +{ + ImGuiPlotArrayGetterData data(values, stride); + PlotEx(ImGuiPlotType_Lines, label, &Plot_ArrayGetter, (void*)&data, values_count, values_offset, overlay_text, scale_min, scale_max, graph_size); +} + +void ImGui::PlotLines(const char* label, float (*values_getter)(void* data, int idx), void* data, int values_count, int values_offset, const char* overlay_text, float scale_min, float scale_max, ImVec2 graph_size) +{ + PlotEx(ImGuiPlotType_Lines, label, values_getter, data, values_count, values_offset, overlay_text, scale_min, scale_max, graph_size); +} + +void ImGui::PlotHistogram(const char* label, const float* values, int values_count, int values_offset, const char* overlay_text, float scale_min, float scale_max, ImVec2 graph_size, int stride) +{ + ImGuiPlotArrayGetterData data(values, stride); + PlotEx(ImGuiPlotType_Histogram, label, &Plot_ArrayGetter, (void*)&data, values_count, values_offset, overlay_text, scale_min, scale_max, graph_size); +} + +void ImGui::PlotHistogram(const char* label, float (*values_getter)(void* data, int idx), void* data, int values_count, int values_offset, const char* overlay_text, float scale_min, float scale_max, ImVec2 graph_size) +{ + PlotEx(ImGuiPlotType_Histogram, label, values_getter, data, values_count, values_offset, overlay_text, scale_min, scale_max, graph_size); +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: Value helpers +// Those is not very useful, legacy API. +//------------------------------------------------------------------------- +// - Value() +//------------------------------------------------------------------------- + +void ImGui::Value(const char* prefix, bool b) +{ + Text("%s: %s", prefix, (b ? "true" : "false")); +} + +void ImGui::Value(const char* prefix, int v) +{ + Text("%s: %d", prefix, v); +} + +void ImGui::Value(const char* prefix, unsigned int v) +{ + Text("%s: %d", prefix, v); +} + +void ImGui::Value(const char* prefix, float v, const char* float_format) +{ + if (float_format) + { + char fmt[64]; + ImFormatString(fmt, IM_ARRAYSIZE(fmt), "%%s: %s", float_format); + Text(fmt, prefix, v); + } + else + { + Text("%s: %.3f", prefix, v); + } +} + +//------------------------------------------------------------------------- +// [SECTION] MenuItem, BeginMenu, EndMenu, etc. +//------------------------------------------------------------------------- +// - ImGuiMenuColumns [Internal] +// - BeginMenuBar() +// - EndMenuBar() +// - BeginMainMenuBar() +// - EndMainMenuBar() +// - BeginMenu() +// - EndMenu() +// - MenuItem() +//------------------------------------------------------------------------- + +// Helpers for internal use +void ImGuiMenuColumns::Update(int count, float spacing, bool clear) +{ + IM_ASSERT(count == IM_ARRAYSIZE(Pos)); + IM_UNUSED(count); + Width = NextWidth = 0.0f; + Spacing = spacing; + if (clear) + memset(NextWidths, 0, sizeof(NextWidths)); + for (int i = 0; i < IM_ARRAYSIZE(Pos); i++) + { + if (i > 0 && NextWidths[i] > 0.0f) + Width += Spacing; + Pos[i] = IM_FLOOR(Width); + Width += NextWidths[i]; + NextWidths[i] = 0.0f; + } +} + +float ImGuiMenuColumns::DeclColumns(float w0, float w1, float w2) // not using va_arg because they promote float to double +{ + NextWidth = 0.0f; + NextWidths[0] = ImMax(NextWidths[0], w0); + NextWidths[1] = ImMax(NextWidths[1], w1); + NextWidths[2] = ImMax(NextWidths[2], w2); + for (int i = 0; i < IM_ARRAYSIZE(Pos); i++) + NextWidth += NextWidths[i] + ((i > 0 && NextWidths[i] > 0.0f) ? Spacing : 0.0f); + return ImMax(Width, NextWidth); +} + +float ImGuiMenuColumns::CalcExtraSpace(float avail_w) const +{ + return ImMax(0.0f, avail_w - Width); +} + +// FIXME: Provided a rectangle perhaps e.g. a BeginMenuBarEx() could be used anywhere.. +// Currently the main responsibility of this function being to setup clip-rect + horizontal layout + menu navigation layer. +// Ideally we also want this to be responsible for claiming space out of the main window scrolling rectangle, in which case ImGuiWindowFlags_MenuBar will become unnecessary. +// Then later the same system could be used for multiple menu-bars, scrollbars, side-bars. +bool ImGui::BeginMenuBar() +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + if (!(window->Flags & ImGuiWindowFlags_MenuBar)) + return false; + + IM_ASSERT(!window->DC.MenuBarAppending); + BeginGroup(); // Backup position on layer 0 // FIXME: Misleading to use a group for that backup/restore + PushID("##menubar"); + + // We don't clip with current window clipping rectangle as it is already set to the area below. However we clip with window full rect. + // We remove 1 worth of rounding to Max.x to that text in long menus and small windows don't tend to display over the lower-right rounded area, which looks particularly glitchy. + ImRect bar_rect = window->MenuBarRect(); + ImRect clip_rect(IM_ROUND(bar_rect.Min.x + window->WindowBorderSize), IM_ROUND(bar_rect.Min.y + window->WindowBorderSize), IM_ROUND(ImMax(bar_rect.Min.x, bar_rect.Max.x - ImMax(window->WindowRounding, window->WindowBorderSize))), IM_ROUND(bar_rect.Max.y)); + clip_rect.ClipWith(window->OuterRectClipped); + PushClipRect(clip_rect.Min, clip_rect.Max, false); + + // We overwrite CursorMaxPos because BeginGroup sets it to CursorPos (essentially the .EmitItem hack in EndMenuBar() would need something analoguous here, maybe a BeginGroupEx() with flags). + window->DC.CursorPos = window->DC.CursorMaxPos = ImVec2(bar_rect.Min.x + window->DC.MenuBarOffset.x, bar_rect.Min.y + window->DC.MenuBarOffset.y); + window->DC.LayoutType = ImGuiLayoutType_Horizontal; + window->DC.NavLayerCurrent = ImGuiNavLayer_Menu; + window->DC.MenuBarAppending = true; + AlignTextToFramePadding(); + return true; +} + +void ImGui::EndMenuBar() +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return; + ImGuiContext& g = *GImGui; + + // Nav: When a move request within one of our child menu failed, capture the request to navigate among our siblings. + if (NavMoveRequestButNoResultYet() && (g.NavMoveDir == ImGuiDir_Left || g.NavMoveDir == ImGuiDir_Right) && (g.NavWindow->Flags & ImGuiWindowFlags_ChildMenu)) + { + ImGuiWindow* nav_earliest_child = g.NavWindow; + while (nav_earliest_child->ParentWindow && (nav_earliest_child->ParentWindow->Flags & ImGuiWindowFlags_ChildMenu)) + nav_earliest_child = nav_earliest_child->ParentWindow; + if (nav_earliest_child->ParentWindow == window && nav_earliest_child->DC.ParentLayoutType == ImGuiLayoutType_Horizontal && g.NavMoveRequestForward == ImGuiNavForward_None) + { + // To do so we claim focus back, restore NavId and then process the movement request for yet another frame. + // This involve a one-frame delay which isn't very problematic in this situation. We could remove it by scoring in advance for multiple window (probably not worth the hassle/cost) + const ImGuiNavLayer layer = ImGuiNavLayer_Menu; + IM_ASSERT(window->DC.NavLayerActiveMaskNext & (1 << layer)); // Sanity check + FocusWindow(window); + SetNavIDWithRectRel(window->NavLastIds[layer], layer, 0, window->NavRectRel[layer]); + g.NavLayer = layer; + g.NavDisableHighlight = true; // Hide highlight for the current frame so we don't see the intermediary selection. + g.NavMoveRequestForward = ImGuiNavForward_ForwardQueued; + NavMoveRequestCancel(); + } + } + + IM_ASSERT(window->Flags & ImGuiWindowFlags_MenuBar); + IM_ASSERT(window->DC.MenuBarAppending); + PopClipRect(); + PopID(); + window->DC.MenuBarOffset.x = window->DC.CursorPos.x - window->MenuBarRect().Min.x; // Save horizontal position so next append can reuse it. This is kinda equivalent to a per-layer CursorPos. + g.GroupStack.back().EmitItem = false; + EndGroup(); // Restore position on layer 0 + window->DC.LayoutType = ImGuiLayoutType_Vertical; + window->DC.NavLayerCurrent = ImGuiNavLayer_Main; + window->DC.MenuBarAppending = false; +} + +// For the main menu bar, which cannot be moved, we honor g.Style.DisplaySafeAreaPadding to ensure text can be visible on a TV set. +bool ImGui::BeginMainMenuBar() +{ + ImGuiContext& g = *GImGui; + g.NextWindowData.MenuBarOffsetMinVal = ImVec2(g.Style.DisplaySafeAreaPadding.x, ImMax(g.Style.DisplaySafeAreaPadding.y - g.Style.FramePadding.y, 0.0f)); + SetNextWindowPos(ImVec2(0.0f, 0.0f)); + SetNextWindowSize(ImVec2(g.IO.DisplaySize.x, g.NextWindowData.MenuBarOffsetMinVal.y + g.FontBaseSize + g.Style.FramePadding.y)); + PushStyleVar(ImGuiStyleVar_WindowRounding, 0.0f); + PushStyleVar(ImGuiStyleVar_WindowMinSize, ImVec2(0, 0)); + ImGuiWindowFlags window_flags = ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_MenuBar; + bool is_open = Begin("##MainMenuBar", NULL, window_flags) && BeginMenuBar(); + PopStyleVar(2); + g.NextWindowData.MenuBarOffsetMinVal = ImVec2(0.0f, 0.0f); + if (!is_open) + { + End(); + return false; + } + return true; //-V1020 +} + +void ImGui::EndMainMenuBar() +{ + EndMenuBar(); + + // When the user has left the menu layer (typically: closed menus through activation of an item), we restore focus to the previous window + // FIXME: With this strategy we won't be able to restore a NULL focus. + ImGuiContext& g = *GImGui; + if (g.CurrentWindow == g.NavWindow && g.NavLayer == ImGuiNavLayer_Main && !g.NavAnyRequest) + FocusTopMostWindowUnderOne(g.NavWindow, NULL); + + End(); +} + +bool ImGui::BeginMenu(const char* label, bool enabled) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + const ImGuiStyle& style = g.Style; + const ImGuiID id = window->GetID(label); + bool menu_is_open = IsPopupOpen(id, ImGuiPopupFlags_None); + + // Sub-menus are ChildWindow so that mouse can be hovering across them (otherwise top-most popup menu would steal focus and not allow hovering on parent menu) + ImGuiWindowFlags flags = ImGuiWindowFlags_ChildMenu | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoNavFocus; + if (window->Flags & (ImGuiWindowFlags_Popup | ImGuiWindowFlags_ChildMenu)) + flags |= ImGuiWindowFlags_ChildWindow; + + // If a menu with same the ID was already submitted, we will append to it, matching the behavior of Begin(). + // We are relying on a O(N) search - so O(N log N) over the frame - which seems like the most efficient for the expected small amount of BeginMenu() calls per frame. + // If somehow this is ever becoming a problem we can switch to use e.g. ImGuiStorage mapping key to last frame used. + if (g.MenusIdSubmittedThisFrame.contains(id)) + { + if (menu_is_open) + menu_is_open = BeginPopupEx(id, flags); // menu_is_open can be 'false' when the popup is completely clipped (e.g. zero size display) + else + g.NextWindowData.ClearFlags(); // we behave like Begin() and need to consume those values + return menu_is_open; + } + + // Tag menu as used. Next time BeginMenu() with same ID is called it will append to existing menu + g.MenusIdSubmittedThisFrame.push_back(id); + + ImVec2 label_size = CalcTextSize(label, NULL, true); + bool pressed; + bool menuset_is_open = !(window->Flags & ImGuiWindowFlags_Popup) && (g.OpenPopupStack.Size > g.BeginPopupStack.Size && g.OpenPopupStack[g.BeginPopupStack.Size].OpenParentId == window->IDStack.back()); + ImGuiWindow* backed_nav_window = g.NavWindow; + if (menuset_is_open) + g.NavWindow = window; // Odd hack to allow hovering across menus of a same menu-set (otherwise we wouldn't be able to hover parent) + + // The reference position stored in popup_pos will be used by Begin() to find a suitable position for the child menu, + // However the final position is going to be different! It is chosen by FindBestWindowPosForPopup(). + // e.g. Menus tend to overlap each other horizontally to amplify relative Z-ordering. + ImVec2 popup_pos, pos = window->DC.CursorPos; + if (window->DC.LayoutType == ImGuiLayoutType_Horizontal) + { + // Menu inside an horizontal menu bar + // Selectable extend their highlight by half ItemSpacing in each direction. + // For ChildMenu, the popup position will be overwritten by the call to FindBestWindowPosForPopup() in Begin() + popup_pos = ImVec2(pos.x - 1.0f - IM_FLOOR(style.ItemSpacing.x * 0.5f), pos.y - style.FramePadding.y + window->MenuBarHeight()); + window->DC.CursorPos.x += IM_FLOOR(style.ItemSpacing.x * 0.5f); + PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(style.ItemSpacing.x * 2.0f, style.ItemSpacing.y)); + float w = label_size.x; + pressed = Selectable(label, menu_is_open, ImGuiSelectableFlags_NoHoldingActiveID | ImGuiSelectableFlags_SelectOnClick | ImGuiSelectableFlags_DontClosePopups | (!enabled ? ImGuiSelectableFlags_Disabled : 0), ImVec2(w, 0.0f)); + PopStyleVar(); + window->DC.CursorPos.x += IM_FLOOR(style.ItemSpacing.x * (-1.0f + 0.5f)); // -1 spacing to compensate the spacing added when Selectable() did a SameLine(). It would also work to call SameLine() ourselves after the PopStyleVar(). + } + else + { + // Menu inside a menu + // (In a typical menu window where all items are BeginMenu() or MenuItem() calls, extra_w will always be 0.0f. + // Only when they are other items sticking out we're going to add spacing, yet only register minimum width into the layout system. + popup_pos = ImVec2(pos.x, pos.y - style.WindowPadding.y); + float min_w = window->DC.MenuColumns.DeclColumns(label_size.x, 0.0f, IM_FLOOR(g.FontSize * 1.20f)); // Feedback to next frame + float extra_w = ImMax(0.0f, GetContentRegionAvail().x - min_w); + pressed = Selectable(label, menu_is_open, ImGuiSelectableFlags_NoHoldingActiveID | ImGuiSelectableFlags_SelectOnClick | ImGuiSelectableFlags_DontClosePopups | ImGuiSelectableFlags_SpanAvailWidth | (!enabled ? ImGuiSelectableFlags_Disabled : 0), ImVec2(min_w, 0.0f)); + ImU32 text_col = GetColorU32(enabled ? ImGuiCol_Text : ImGuiCol_TextDisabled); + RenderArrow(window->DrawList, pos + ImVec2(window->DC.MenuColumns.Pos[2] + extra_w + g.FontSize * 0.30f, 0.0f), text_col, ImGuiDir_Right); + } + + const bool hovered = enabled && ItemHoverable(window->DC.LastItemRect, id); + if (menuset_is_open) + g.NavWindow = backed_nav_window; + + bool want_open = false; + bool want_close = false; + if (window->DC.LayoutType == ImGuiLayoutType_Vertical) // (window->Flags & (ImGuiWindowFlags_Popup|ImGuiWindowFlags_ChildMenu)) + { + // Close menu when not hovering it anymore unless we are moving roughly in the direction of the menu + // Implement http://bjk5.com/post/44698559168/breaking-down-amazons-mega-dropdown to avoid using timers, so menus feels more reactive. + bool moving_toward_other_child_menu = false; + + ImGuiWindow* child_menu_window = (g.BeginPopupStack.Size < g.OpenPopupStack.Size && g.OpenPopupStack[g.BeginPopupStack.Size].SourceWindow == window) ? g.OpenPopupStack[g.BeginPopupStack.Size].Window : NULL; + if (g.HoveredWindow == window && child_menu_window != NULL && !(window->Flags & ImGuiWindowFlags_MenuBar)) + { + // FIXME-DPI: Values should be derived from a master "scale" factor. + ImRect next_window_rect = child_menu_window->Rect(); + ImVec2 ta = g.IO.MousePos - g.IO.MouseDelta; + ImVec2 tb = (window->Pos.x < child_menu_window->Pos.x) ? next_window_rect.GetTL() : next_window_rect.GetTR(); + ImVec2 tc = (window->Pos.x < child_menu_window->Pos.x) ? next_window_rect.GetBL() : next_window_rect.GetBR(); + float extra = ImClamp(ImFabs(ta.x - tb.x) * 0.30f, 5.0f, 30.0f); // add a bit of extra slack. + ta.x += (window->Pos.x < child_menu_window->Pos.x) ? -0.5f : +0.5f; // to avoid numerical issues + tb.y = ta.y + ImMax((tb.y - extra) - ta.y, -100.0f); // triangle is maximum 200 high to limit the slope and the bias toward large sub-menus // FIXME: Multiply by fb_scale? + tc.y = ta.y + ImMin((tc.y + extra) - ta.y, +100.0f); + moving_toward_other_child_menu = ImTriangleContainsPoint(ta, tb, tc, g.IO.MousePos); + //GetForegroundDrawList()->AddTriangleFilled(ta, tb, tc, moving_within_opened_triangle ? IM_COL32(0,128,0,128) : IM_COL32(128,0,0,128)); // [DEBUG] + } + if (menu_is_open && !hovered && g.HoveredWindow == window && g.HoveredIdPreviousFrame != 0 && g.HoveredIdPreviousFrame != id && !moving_toward_other_child_menu) + want_close = true; + + if (!menu_is_open && hovered && pressed) // Click to open + want_open = true; + else if (!menu_is_open && hovered && !moving_toward_other_child_menu) // Hover to open + want_open = true; + + if (g.NavActivateId == id) + { + want_close = menu_is_open; + want_open = !menu_is_open; + } + if (g.NavId == id && g.NavMoveRequest && g.NavMoveDir == ImGuiDir_Right) // Nav-Right to open + { + want_open = true; + NavMoveRequestCancel(); + } + } + else + { + // Menu bar + if (menu_is_open && pressed && menuset_is_open) // Click an open menu again to close it + { + want_close = true; + want_open = menu_is_open = false; + } + else if (pressed || (hovered && menuset_is_open && !menu_is_open)) // First click to open, then hover to open others + { + want_open = true; + } + else if (g.NavId == id && g.NavMoveRequest && g.NavMoveDir == ImGuiDir_Down) // Nav-Down to open + { + want_open = true; + NavMoveRequestCancel(); + } + } + + if (!enabled) // explicitly close if an open menu becomes disabled, facilitate users code a lot in pattern such as 'if (BeginMenu("options", has_object)) { ..use object.. }' + want_close = true; + if (want_close && IsPopupOpen(id, ImGuiPopupFlags_None)) + ClosePopupToLevel(g.BeginPopupStack.Size, true); + + IMGUI_TEST_ENGINE_ITEM_INFO(id, label, window->DC.ItemFlags | ImGuiItemStatusFlags_Openable | (menu_is_open ? ImGuiItemStatusFlags_Opened : 0)); + + if (!menu_is_open && want_open && g.OpenPopupStack.Size > g.BeginPopupStack.Size) + { + // Don't recycle same menu level in the same frame, first close the other menu and yield for a frame. + OpenPopup(label); + return false; + } + + menu_is_open |= want_open; + if (want_open) + OpenPopup(label); + + if (menu_is_open) + { + SetNextWindowPos(popup_pos, ImGuiCond_Always); + menu_is_open = BeginPopupEx(id, flags); // menu_is_open can be 'false' when the popup is completely clipped (e.g. zero size display) + } + else + { + g.NextWindowData.ClearFlags(); // We behave like Begin() and need to consume those values + } + + return menu_is_open; +} + +void ImGui::EndMenu() +{ + // Nav: When a left move request _within our child menu_ failed, close ourselves (the _parent_ menu). + // A menu doesn't close itself because EndMenuBar() wants the catch the last Left<>Right inputs. + // However, it means that with the current code, a BeginMenu() from outside another menu or a menu-bar won't be closable with the Left direction. + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (g.NavWindow && g.NavWindow->ParentWindow == window && g.NavMoveDir == ImGuiDir_Left && NavMoveRequestButNoResultYet() && window->DC.LayoutType == ImGuiLayoutType_Vertical) + { + ClosePopupToLevel(g.BeginPopupStack.Size, true); + NavMoveRequestCancel(); + } + + EndPopup(); +} + +bool ImGui::MenuItem(const char* label, const char* shortcut, bool selected, bool enabled) +{ + ImGuiWindow* window = GetCurrentWindow(); + if (window->SkipItems) + return false; + + ImGuiContext& g = *GImGui; + ImGuiStyle& style = g.Style; + ImVec2 pos = window->DC.CursorPos; + ImVec2 label_size = CalcTextSize(label, NULL, true); + + // We've been using the equivalent of ImGuiSelectableFlags_SetNavIdOnHover on all Selectable() since early Nav system days (commit 43ee5d73), + // but I am unsure whether this should be kept at all. For now moved it to be an opt-in feature used by menus only. + ImGuiSelectableFlags flags = ImGuiSelectableFlags_SelectOnRelease | ImGuiSelectableFlags_SetNavIdOnHover | (enabled ? 0 : ImGuiSelectableFlags_Disabled); + bool pressed; + if (window->DC.LayoutType == ImGuiLayoutType_Horizontal) + { + // Mimic the exact layout spacing of BeginMenu() to allow MenuItem() inside a menu bar, which is a little misleading but may be useful + // Note that in this situation we render neither the shortcut neither the selected tick mark + float w = label_size.x; + window->DC.CursorPos.x += IM_FLOOR(style.ItemSpacing.x * 0.5f); + PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(style.ItemSpacing.x * 2.0f, style.ItemSpacing.y)); + pressed = Selectable(label, false, flags, ImVec2(w, 0.0f)); + PopStyleVar(); + window->DC.CursorPos.x += IM_FLOOR(style.ItemSpacing.x * (-1.0f + 0.5f)); // -1 spacing to compensate the spacing added when Selectable() did a SameLine(). It would also work to call SameLine() ourselves after the PopStyleVar(). + } + else + { + // Menu item inside a vertical menu + // (In a typical menu window where all items are BeginMenu() or MenuItem() calls, extra_w will always be 0.0f. + // Only when they are other items sticking out we're going to add spacing, yet only register minimum width into the layout system. + float shortcut_w = shortcut ? CalcTextSize(shortcut, NULL).x : 0.0f; + float min_w = window->DC.MenuColumns.DeclColumns(label_size.x, shortcut_w, IM_FLOOR(g.FontSize * 1.20f)); // Feedback for next frame + float extra_w = ImMax(0.0f, GetContentRegionAvail().x - min_w); + pressed = Selectable(label, false, flags | ImGuiSelectableFlags_SpanAvailWidth, ImVec2(min_w, 0.0f)); + if (shortcut_w > 0.0f) + { + PushStyleColor(ImGuiCol_Text, g.Style.Colors[ImGuiCol_TextDisabled]); + RenderText(pos + ImVec2(window->DC.MenuColumns.Pos[1] + extra_w, 0.0f), shortcut, NULL, false); + PopStyleColor(); + } + if (selected) + RenderCheckMark(window->DrawList, pos + ImVec2(window->DC.MenuColumns.Pos[2] + extra_w + g.FontSize * 0.40f, g.FontSize * 0.134f * 0.5f), GetColorU32(enabled ? ImGuiCol_Text : ImGuiCol_TextDisabled), g.FontSize * 0.866f); + } + + IMGUI_TEST_ENGINE_ITEM_INFO(window->DC.LastItemId, label, window->DC.ItemFlags | ImGuiItemStatusFlags_Checkable | (selected ? ImGuiItemStatusFlags_Checked : 0)); + return pressed; +} + +bool ImGui::MenuItem(const char* label, const char* shortcut, bool* p_selected, bool enabled) +{ + if (MenuItem(label, shortcut, p_selected ? *p_selected : false, enabled)) + { + if (p_selected) + *p_selected = !*p_selected; + return true; + } + return false; +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: BeginTabBar, EndTabBar, etc. +//------------------------------------------------------------------------- +// - BeginTabBar() +// - BeginTabBarEx() [Internal] +// - EndTabBar() +// - TabBarLayout() [Internal] +// - TabBarCalcTabID() [Internal] +// - TabBarCalcMaxTabWidth() [Internal] +// - TabBarFindTabById() [Internal] +// - TabBarRemoveTab() [Internal] +// - TabBarCloseTab() [Internal] +// - TabBarScrollClamp() [Internal] +// - TabBarScrollToTab() [Internal] +// - TabBarQueueChangeTabOrder() [Internal] +// - TabBarScrollingButtons() [Internal] +// - TabBarTabListPopupButton() [Internal] +//------------------------------------------------------------------------- + +struct ImGuiTabBarSection +{ + int TabCount; // Number of tabs in this section. + float Width; // Sum of width of tabs in this section (after shrinking down) + float Spacing; // Horizontal spacing at the end of the section. + + ImGuiTabBarSection() { memset(this, 0, sizeof(*this)); } +}; + +namespace ImGui +{ + static void TabBarLayout(ImGuiTabBar* tab_bar); + static ImU32 TabBarCalcTabID(ImGuiTabBar* tab_bar, const char* label); + static float TabBarCalcMaxTabWidth(); + static float TabBarScrollClamp(ImGuiTabBar* tab_bar, float scrolling); + static void TabBarScrollToTab(ImGuiTabBar* tab_bar, ImGuiTabItem* tab, ImGuiTabBarSection* sections); + static ImGuiTabItem* TabBarScrollingButtons(ImGuiTabBar* tab_bar); + static ImGuiTabItem* TabBarTabListPopupButton(ImGuiTabBar* tab_bar); +} + +ImGuiTabBar::ImGuiTabBar() +{ + memset(this, 0, sizeof(*this)); + CurrFrameVisible = PrevFrameVisible = -1; + LastTabItemIdx = -1; +} + +static int IMGUI_CDECL TabItemComparerBySection(const void* lhs, const void* rhs) +{ + const ImGuiTabItem* a = (const ImGuiTabItem*)lhs; + const ImGuiTabItem* b = (const ImGuiTabItem*)rhs; + const int a_section = (a->Flags & ImGuiTabItemFlags_Leading) ? 0 : (a->Flags & ImGuiTabItemFlags_Trailing) ? 2 : 1; + const int b_section = (b->Flags & ImGuiTabItemFlags_Leading) ? 0 : (b->Flags & ImGuiTabItemFlags_Trailing) ? 2 : 1; + if (a_section != b_section) + return a_section - b_section; + return (int)(a->IndexDuringLayout - b->IndexDuringLayout); +} + +static int IMGUI_CDECL TabItemComparerByBeginOrder(const void* lhs, const void* rhs) +{ + const ImGuiTabItem* a = (const ImGuiTabItem*)lhs; + const ImGuiTabItem* b = (const ImGuiTabItem*)rhs; + return (int)(a->BeginOrder - b->BeginOrder); +} + +static ImGuiTabBar* GetTabBarFromTabBarRef(const ImGuiPtrOrIndex& ref) +{ + ImGuiContext& g = *GImGui; + return ref.Ptr ? (ImGuiTabBar*)ref.Ptr : g.TabBars.GetByIndex(ref.Index); +} + +static ImGuiPtrOrIndex GetTabBarRefFromTabBar(ImGuiTabBar* tab_bar) +{ + ImGuiContext& g = *GImGui; + if (g.TabBars.Contains(tab_bar)) + return ImGuiPtrOrIndex(g.TabBars.GetIndex(tab_bar)); + return ImGuiPtrOrIndex(tab_bar); +} + +bool ImGui::BeginTabBar(const char* str_id, ImGuiTabBarFlags flags) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return false; + + ImGuiID id = window->GetID(str_id); + ImGuiTabBar* tab_bar = g.TabBars.GetOrAddByKey(id); + ImRect tab_bar_bb = ImRect(window->DC.CursorPos.x, window->DC.CursorPos.y, window->WorkRect.Max.x, window->DC.CursorPos.y + g.FontSize + g.Style.FramePadding.y * 2); + tab_bar->ID = id; + return BeginTabBarEx(tab_bar, tab_bar_bb, flags | ImGuiTabBarFlags_IsFocused); +} + +bool ImGui::BeginTabBarEx(ImGuiTabBar* tab_bar, const ImRect& tab_bar_bb, ImGuiTabBarFlags flags) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return false; + + if ((flags & ImGuiTabBarFlags_DockNode) == 0) + PushOverrideID(tab_bar->ID); + + // Add to stack + g.CurrentTabBarStack.push_back(GetTabBarRefFromTabBar(tab_bar)); + g.CurrentTabBar = tab_bar; + + // Append with multiple BeginTabBar()/EndTabBar() pairs. + tab_bar->BackupCursorPos = window->DC.CursorPos; + if (tab_bar->CurrFrameVisible == g.FrameCount) + { + window->DC.CursorPos = ImVec2(tab_bar->BarRect.Min.x, tab_bar->BarRect.Max.y + tab_bar->ItemSpacingY); + tab_bar->BeginCount++; + return true; + } + + // Ensure correct ordering when toggling ImGuiTabBarFlags_Reorderable flag, or when a new tab was added while being not reorderable + if ((flags & ImGuiTabBarFlags_Reorderable) != (tab_bar->Flags & ImGuiTabBarFlags_Reorderable) || (tab_bar->TabsAddedNew && !(flags & ImGuiTabBarFlags_Reorderable))) + if (tab_bar->Tabs.Size > 1) + ImQsort(tab_bar->Tabs.Data, tab_bar->Tabs.Size, sizeof(ImGuiTabItem), TabItemComparerByBeginOrder); + tab_bar->TabsAddedNew = false; + + // Flags + if ((flags & ImGuiTabBarFlags_FittingPolicyMask_) == 0) + flags |= ImGuiTabBarFlags_FittingPolicyDefault_; + + tab_bar->Flags = flags; + tab_bar->BarRect = tab_bar_bb; + tab_bar->WantLayout = true; // Layout will be done on the first call to ItemTab() + tab_bar->PrevFrameVisible = tab_bar->CurrFrameVisible; + tab_bar->CurrFrameVisible = g.FrameCount; + tab_bar->PrevTabsContentsHeight = tab_bar->CurrTabsContentsHeight; + tab_bar->CurrTabsContentsHeight = 0.0f; + tab_bar->ItemSpacingY = g.Style.ItemSpacing.y; + tab_bar->FramePadding = g.Style.FramePadding; + tab_bar->TabsActiveCount = 0; + tab_bar->BeginCount = 1; + + // Set cursor pos in a way which only be used in the off-chance the user erroneously submits item before BeginTabItem(): items will overlap + window->DC.CursorPos = ImVec2(tab_bar->BarRect.Min.x, tab_bar->BarRect.Max.y + tab_bar->ItemSpacingY); + + // Draw separator + const ImU32 col = GetColorU32((flags & ImGuiTabBarFlags_IsFocused) ? ImGuiCol_TabActive : ImGuiCol_TabUnfocusedActive); + const float y = tab_bar->BarRect.Max.y - 1.0f; + { + const float separator_min_x = tab_bar->BarRect.Min.x - IM_FLOOR(window->WindowPadding.x * 0.5f); + const float separator_max_x = tab_bar->BarRect.Max.x + IM_FLOOR(window->WindowPadding.x * 0.5f); + window->DrawList->AddLine(ImVec2(separator_min_x, y), ImVec2(separator_max_x, y), col, 1.0f); + } + return true; +} + +void ImGui::EndTabBar() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return; + + ImGuiTabBar* tab_bar = g.CurrentTabBar; + if (tab_bar == NULL) + { + IM_ASSERT_USER_ERROR(tab_bar != NULL, "Mismatched BeginTabBar()/EndTabBar()!"); + return; + } + + // Fallback in case no TabItem have been submitted + if (tab_bar->WantLayout) + TabBarLayout(tab_bar); + + // Restore the last visible height if no tab is visible, this reduce vertical flicker/movement when a tabs gets removed without calling SetTabItemClosed(). + const bool tab_bar_appearing = (tab_bar->PrevFrameVisible + 1 < g.FrameCount); + if (tab_bar->VisibleTabWasSubmitted || tab_bar->VisibleTabId == 0 || tab_bar_appearing) + { + tab_bar->CurrTabsContentsHeight = ImMax(window->DC.CursorPos.y - tab_bar->BarRect.Max.y, tab_bar->CurrTabsContentsHeight); + window->DC.CursorPos.y = tab_bar->BarRect.Max.y + tab_bar->CurrTabsContentsHeight; + } + else + { + window->DC.CursorPos.y = tab_bar->BarRect.Max.y + tab_bar->PrevTabsContentsHeight; + } + if (tab_bar->BeginCount > 1) + window->DC.CursorPos = tab_bar->BackupCursorPos; + + if ((tab_bar->Flags & ImGuiTabBarFlags_DockNode) == 0) + PopID(); + + g.CurrentTabBarStack.pop_back(); + g.CurrentTabBar = g.CurrentTabBarStack.empty() ? NULL : GetTabBarFromTabBarRef(g.CurrentTabBarStack.back()); +} + +// This is called only once a frame before by the first call to ItemTab() +// The reason we're not calling it in BeginTabBar() is to leave a chance to the user to call the SetTabItemClosed() functions. +static void ImGui::TabBarLayout(ImGuiTabBar* tab_bar) +{ + ImGuiContext& g = *GImGui; + tab_bar->WantLayout = false; + + // Garbage collect by compacting list + // Detect if we need to sort out tab list (e.g. in rare case where a tab changed section) + int tab_dst_n = 0; + bool need_sort_by_section = false; + ImGuiTabBarSection sections[3]; // Layout sections: Leading, Central, Trailing + for (int tab_src_n = 0; tab_src_n < tab_bar->Tabs.Size; tab_src_n++) + { + ImGuiTabItem* tab = &tab_bar->Tabs[tab_src_n]; + if (tab->LastFrameVisible < tab_bar->PrevFrameVisible || tab->WantClose) + { + // Remove tab + if (tab_bar->VisibleTabId == tab->ID) { tab_bar->VisibleTabId = 0; } + if (tab_bar->SelectedTabId == tab->ID) { tab_bar->SelectedTabId = 0; } + if (tab_bar->NextSelectedTabId == tab->ID) { tab_bar->NextSelectedTabId = 0; } + continue; + } + if (tab_dst_n != tab_src_n) + tab_bar->Tabs[tab_dst_n] = tab_bar->Tabs[tab_src_n]; + + tab = &tab_bar->Tabs[tab_dst_n]; + tab->IndexDuringLayout = (ImS16)tab_dst_n; + + // We will need sorting if tabs have changed section (e.g. moved from one of Leading/Central/Trailing to another) + int curr_tab_section_n = (tab->Flags & ImGuiTabItemFlags_Leading) ? 0 : (tab->Flags & ImGuiTabItemFlags_Trailing) ? 2 : 1; + if (tab_dst_n > 0) + { + ImGuiTabItem* prev_tab = &tab_bar->Tabs[tab_dst_n - 1]; + int prev_tab_section_n = (prev_tab->Flags & ImGuiTabItemFlags_Leading) ? 0 : (prev_tab->Flags & ImGuiTabItemFlags_Trailing) ? 2 : 1; + if (curr_tab_section_n == 0 && prev_tab_section_n != 0) + need_sort_by_section = true; + if (prev_tab_section_n == 2 && curr_tab_section_n != 2) + need_sort_by_section = true; + } + + sections[curr_tab_section_n].TabCount++; + tab_dst_n++; + } + if (tab_bar->Tabs.Size != tab_dst_n) + tab_bar->Tabs.resize(tab_dst_n); + + if (need_sort_by_section) + ImQsort(tab_bar->Tabs.Data, tab_bar->Tabs.Size, sizeof(ImGuiTabItem), TabItemComparerBySection); + + // Calculate spacing between sections + sections[0].Spacing = sections[0].TabCount > 0 && (sections[1].TabCount + sections[2].TabCount) > 0 ? g.Style.ItemInnerSpacing.x : 0.0f; + sections[1].Spacing = sections[1].TabCount > 0 && sections[2].TabCount > 0 ? g.Style.ItemInnerSpacing.x : 0.0f; + + // Setup next selected tab + ImGuiID scroll_track_selected_tab_id = 0; + if (tab_bar->NextSelectedTabId) + { + tab_bar->SelectedTabId = tab_bar->NextSelectedTabId; + tab_bar->NextSelectedTabId = 0; + scroll_track_selected_tab_id = tab_bar->SelectedTabId; + } + + // Process order change request (we could probably process it when requested but it's just saner to do it in a single spot). + if (tab_bar->ReorderRequestTabId != 0) + { + if (TabBarProcessReorder(tab_bar)) + if (tab_bar->ReorderRequestTabId == tab_bar->SelectedTabId) + scroll_track_selected_tab_id = tab_bar->ReorderRequestTabId; + tab_bar->ReorderRequestTabId = 0; + } + + // Tab List Popup (will alter tab_bar->BarRect and therefore the available width!) + const bool tab_list_popup_button = (tab_bar->Flags & ImGuiTabBarFlags_TabListPopupButton) != 0; + if (tab_list_popup_button) + if (ImGuiTabItem* tab_to_select = TabBarTabListPopupButton(tab_bar)) // NB: Will alter BarRect.Min.x! + scroll_track_selected_tab_id = tab_bar->SelectedTabId = tab_to_select->ID; + + // Leading/Trailing tabs will be shrink only if central one aren't visible anymore, so layout the shrink data as: leading, trailing, central + // (whereas our tabs are stored as: leading, central, trailing) + int shrink_buffer_indexes[3] = { 0, sections[0].TabCount + sections[2].TabCount, sections[0].TabCount }; + g.ShrinkWidthBuffer.resize(tab_bar->Tabs.Size); + + // Compute ideal tabs widths + store them into shrink buffer + ImGuiTabItem* most_recently_selected_tab = NULL; + int curr_section_n = -1; + bool found_selected_tab_id = false; + for (int tab_n = 0; tab_n < tab_bar->Tabs.Size; tab_n++) + { + ImGuiTabItem* tab = &tab_bar->Tabs[tab_n]; + IM_ASSERT(tab->LastFrameVisible >= tab_bar->PrevFrameVisible); + + if ((most_recently_selected_tab == NULL || most_recently_selected_tab->LastFrameSelected < tab->LastFrameSelected) && !(tab->Flags & ImGuiTabItemFlags_Button)) + most_recently_selected_tab = tab; + if (tab->ID == tab_bar->SelectedTabId) + found_selected_tab_id = true; + if (scroll_track_selected_tab_id == 0 && g.NavJustMovedToId == tab->ID) + scroll_track_selected_tab_id = tab->ID; + + // Refresh tab width immediately, otherwise changes of style e.g. style.FramePadding.x would noticeably lag in the tab bar. + // Additionally, when using TabBarAddTab() to manipulate tab bar order we occasionally insert new tabs that don't have a width yet, + // and we cannot wait for the next BeginTabItem() call. We cannot compute this width within TabBarAddTab() because font size depends on the active window. + const char* tab_name = tab_bar->GetTabName(tab); + const bool has_close_button = (tab->Flags & ImGuiTabItemFlags_NoCloseButton) ? false : true; + tab->ContentWidth = TabItemCalcSize(tab_name, has_close_button).x; + + int section_n = (tab->Flags & ImGuiTabItemFlags_Leading) ? 0 : (tab->Flags & ImGuiTabItemFlags_Trailing) ? 2 : 1; + ImGuiTabBarSection* section = §ions[section_n]; + section->Width += tab->ContentWidth + (section_n == curr_section_n ? g.Style.ItemInnerSpacing.x : 0.0f); + curr_section_n = section_n; + + // Store data so we can build an array sorted by width if we need to shrink tabs down + int shrink_buffer_index = shrink_buffer_indexes[section_n]++; + g.ShrinkWidthBuffer[shrink_buffer_index].Index = tab_n; + g.ShrinkWidthBuffer[shrink_buffer_index].Width = tab->ContentWidth; + + IM_ASSERT(tab->ContentWidth > 0.0f); + tab->Width = tab->ContentWidth; + } + + // Compute total ideal width (used for e.g. auto-resizing a window) + tab_bar->WidthAllTabsIdeal = 0.0f; + for (int section_n = 0; section_n < 3; section_n++) + tab_bar->WidthAllTabsIdeal += sections[section_n].Width + sections[section_n].Spacing; + + // Horizontal scrolling buttons + // (note that TabBarScrollButtons() will alter BarRect.Max.x) + if ((tab_bar->WidthAllTabsIdeal > tab_bar->BarRect.GetWidth() && tab_bar->Tabs.Size > 1) && !(tab_bar->Flags & ImGuiTabBarFlags_NoTabListScrollingButtons) && (tab_bar->Flags & ImGuiTabBarFlags_FittingPolicyScroll)) + if (ImGuiTabItem* scroll_track_selected_tab = TabBarScrollingButtons(tab_bar)) + { + scroll_track_selected_tab_id = scroll_track_selected_tab->ID; + if (!(scroll_track_selected_tab->Flags & ImGuiTabItemFlags_Button)) + tab_bar->SelectedTabId = scroll_track_selected_tab_id; + } + + // Shrink widths if full tabs don't fit in their allocated space + float section_0_w = sections[0].Width + sections[0].Spacing; + float section_1_w = sections[1].Width + sections[1].Spacing; + float section_2_w = sections[2].Width + sections[2].Spacing; + bool central_section_is_visible = (section_0_w + section_2_w) < tab_bar->BarRect.GetWidth(); + float width_excess; + if (central_section_is_visible) + width_excess = ImMax(section_1_w - (tab_bar->BarRect.GetWidth() - section_0_w - section_2_w), 0.0f); // Excess used to shrink central section + else + width_excess = (section_0_w + section_2_w) - tab_bar->BarRect.GetWidth(); // Excess used to shrink leading/trailing section + + // With ImGuiTabBarFlags_FittingPolicyScroll policy, we will only shrink leading/trailing if the central section is not visible anymore + if (width_excess > 0.0f && ((tab_bar->Flags & ImGuiTabBarFlags_FittingPolicyResizeDown) || !central_section_is_visible)) + { + int shrink_data_count = (central_section_is_visible ? sections[1].TabCount : sections[0].TabCount + sections[2].TabCount); + int shrink_data_offset = (central_section_is_visible ? sections[0].TabCount + sections[2].TabCount : 0); + ShrinkWidths(g.ShrinkWidthBuffer.Data + shrink_data_offset, shrink_data_count, width_excess); + + // Apply shrunk values into tabs and sections + for (int tab_n = shrink_data_offset; tab_n < shrink_data_offset + shrink_data_count; tab_n++) + { + ImGuiTabItem* tab = &tab_bar->Tabs[g.ShrinkWidthBuffer[tab_n].Index]; + float shrinked_width = IM_FLOOR(g.ShrinkWidthBuffer[tab_n].Width); + if (shrinked_width < 0.0f) + continue; + + int section_n = (tab->Flags & ImGuiTabItemFlags_Leading) ? 0 : (tab->Flags & ImGuiTabItemFlags_Trailing) ? 2 : 1; + sections[section_n].Width -= (tab->Width - shrinked_width); + tab->Width = shrinked_width; + } + } + + // Layout all active tabs + int section_tab_index = 0; + float tab_offset = 0.0f; + tab_bar->WidthAllTabs = 0.0f; + for (int section_n = 0; section_n < 3; section_n++) + { + ImGuiTabBarSection* section = §ions[section_n]; + if (section_n == 2) + tab_offset = ImMin(ImMax(0.0f, tab_bar->BarRect.GetWidth() - section->Width), tab_offset); + + for (int tab_n = 0; tab_n < section->TabCount; tab_n++) + { + ImGuiTabItem* tab = &tab_bar->Tabs[section_tab_index + tab_n]; + tab->Offset = tab_offset; + tab_offset += tab->Width + (tab_n < section->TabCount - 1 ? g.Style.ItemInnerSpacing.x : 0.0f); + } + tab_bar->WidthAllTabs += ImMax(section->Width + section->Spacing, 0.0f); + tab_offset += section->Spacing; + section_tab_index += section->TabCount; + } + + // If we have lost the selected tab, select the next most recently active one + if (found_selected_tab_id == false) + tab_bar->SelectedTabId = 0; + if (tab_bar->SelectedTabId == 0 && tab_bar->NextSelectedTabId == 0 && most_recently_selected_tab != NULL) + scroll_track_selected_tab_id = tab_bar->SelectedTabId = most_recently_selected_tab->ID; + + // Lock in visible tab + tab_bar->VisibleTabId = tab_bar->SelectedTabId; + tab_bar->VisibleTabWasSubmitted = false; + + // Update scrolling + if (scroll_track_selected_tab_id) + if (ImGuiTabItem* scroll_track_selected_tab = TabBarFindTabByID(tab_bar, scroll_track_selected_tab_id)) + TabBarScrollToTab(tab_bar, scroll_track_selected_tab, sections); + tab_bar->ScrollingAnim = TabBarScrollClamp(tab_bar, tab_bar->ScrollingAnim); + tab_bar->ScrollingTarget = TabBarScrollClamp(tab_bar, tab_bar->ScrollingTarget); + if (tab_bar->ScrollingAnim != tab_bar->ScrollingTarget) + { + // Scrolling speed adjust itself so we can always reach our target in 1/3 seconds. + // Teleport if we are aiming far off the visible line + tab_bar->ScrollingSpeed = ImMax(tab_bar->ScrollingSpeed, 70.0f * g.FontSize); + tab_bar->ScrollingSpeed = ImMax(tab_bar->ScrollingSpeed, ImFabs(tab_bar->ScrollingTarget - tab_bar->ScrollingAnim) / 0.3f); + const bool teleport = (tab_bar->PrevFrameVisible + 1 < g.FrameCount) || (tab_bar->ScrollingTargetDistToVisibility > 10.0f * g.FontSize); + tab_bar->ScrollingAnim = teleport ? tab_bar->ScrollingTarget : ImLinearSweep(tab_bar->ScrollingAnim, tab_bar->ScrollingTarget, g.IO.DeltaTime * tab_bar->ScrollingSpeed); + } + else + { + tab_bar->ScrollingSpeed = 0.0f; + } + tab_bar->ScrollingRectMinX = tab_bar->BarRect.Min.x + sections[0].Width + sections[0].Spacing; + tab_bar->ScrollingRectMaxX = tab_bar->BarRect.Max.x - sections[2].Width - sections[1].Spacing; + + // Clear name buffers + if ((tab_bar->Flags & ImGuiTabBarFlags_DockNode) == 0) + tab_bar->TabsNames.Buf.resize(0); + + // Actual layout in host window (we don't do it in BeginTabBar() so as not to waste an extra frame) + ImGuiWindow* window = g.CurrentWindow; + window->DC.CursorPos = tab_bar->BarRect.Min; + ItemSize(ImVec2(tab_bar->WidthAllTabs, tab_bar->BarRect.GetHeight()), tab_bar->FramePadding.y); + window->DC.IdealMaxPos.x = ImMax(window->DC.IdealMaxPos.x, tab_bar->BarRect.Min.x + tab_bar->WidthAllTabsIdeal); +} + +// Dockables uses Name/ID in the global namespace. Non-dockable items use the ID stack. +static ImU32 ImGui::TabBarCalcTabID(ImGuiTabBar* tab_bar, const char* label) +{ + if (tab_bar->Flags & ImGuiTabBarFlags_DockNode) + { + ImGuiID id = ImHashStr(label); + KeepAliveID(id); + return id; + } + else + { + ImGuiWindow* window = GImGui->CurrentWindow; + return window->GetID(label); + } +} + +static float ImGui::TabBarCalcMaxTabWidth() +{ + ImGuiContext& g = *GImGui; + return g.FontSize * 20.0f; +} + +ImGuiTabItem* ImGui::TabBarFindTabByID(ImGuiTabBar* tab_bar, ImGuiID tab_id) +{ + if (tab_id != 0) + for (int n = 0; n < tab_bar->Tabs.Size; n++) + if (tab_bar->Tabs[n].ID == tab_id) + return &tab_bar->Tabs[n]; + return NULL; +} + +// The *TabId fields be already set by the docking system _before_ the actual TabItem was created, so we clear them regardless. +void ImGui::TabBarRemoveTab(ImGuiTabBar* tab_bar, ImGuiID tab_id) +{ + if (ImGuiTabItem* tab = TabBarFindTabByID(tab_bar, tab_id)) + tab_bar->Tabs.erase(tab); + if (tab_bar->VisibleTabId == tab_id) { tab_bar->VisibleTabId = 0; } + if (tab_bar->SelectedTabId == tab_id) { tab_bar->SelectedTabId = 0; } + if (tab_bar->NextSelectedTabId == tab_id) { tab_bar->NextSelectedTabId = 0; } +} + +// Called on manual closure attempt +void ImGui::TabBarCloseTab(ImGuiTabBar* tab_bar, ImGuiTabItem* tab) +{ + IM_ASSERT(!(tab->Flags & ImGuiTabItemFlags_Button)); + if (!(tab->Flags & ImGuiTabItemFlags_UnsavedDocument)) + { + // This will remove a frame of lag for selecting another tab on closure. + // However we don't run it in the case where the 'Unsaved' flag is set, so user gets a chance to fully undo the closure + tab->WantClose = true; + if (tab_bar->VisibleTabId == tab->ID) + { + tab->LastFrameVisible = -1; + tab_bar->SelectedTabId = tab_bar->NextSelectedTabId = 0; + } + } + else + { + // Actually select before expecting closure attempt (on an UnsavedDocument tab user is expect to e.g. show a popup) + if (tab_bar->VisibleTabId != tab->ID) + tab_bar->NextSelectedTabId = tab->ID; + } +} + +static float ImGui::TabBarScrollClamp(ImGuiTabBar* tab_bar, float scrolling) +{ + scrolling = ImMin(scrolling, tab_bar->WidthAllTabs - tab_bar->BarRect.GetWidth()); + return ImMax(scrolling, 0.0f); +} + +static void ImGui::TabBarScrollToTab(ImGuiTabBar* tab_bar, ImGuiTabItem* tab, ImGuiTabBarSection* sections) +{ + if (tab->Flags & (ImGuiTabItemFlags_Leading | ImGuiTabItemFlags_Trailing)) + return; + + ImGuiContext& g = *GImGui; + float margin = g.FontSize * 1.0f; // When to scroll to make Tab N+1 visible always make a bit of N visible to suggest more scrolling area (since we don't have a scrollbar) + int order = tab_bar->GetTabOrder(tab); + + // Scrolling happens only in the central section (leading/trailing sections are not scrolling) + // FIXME: This is all confusing. + float scrollable_width = tab_bar->BarRect.GetWidth() - sections[0].Width - sections[2].Width - sections[1].Spacing; + + // We make all tabs positions all relative Sections[0].Width to make code simpler + float tab_x1 = tab->Offset - sections[0].Width + (order > sections[0].TabCount - 1 ? -margin : 0.0f); + float tab_x2 = tab->Offset - sections[0].Width + tab->Width + (order + 1 < tab_bar->Tabs.Size - sections[2].TabCount ? margin : 1.0f); + tab_bar->ScrollingTargetDistToVisibility = 0.0f; + if (tab_bar->ScrollingTarget > tab_x1 || (tab_x2 - tab_x1 >= scrollable_width)) + { + // Scroll to the left + tab_bar->ScrollingTargetDistToVisibility = ImMax(tab_bar->ScrollingAnim - tab_x2, 0.0f); + tab_bar->ScrollingTarget = tab_x1; + } + else if (tab_bar->ScrollingTarget < tab_x2 - scrollable_width) + { + // Scroll to the right + tab_bar->ScrollingTargetDistToVisibility = ImMax((tab_x1 - scrollable_width) - tab_bar->ScrollingAnim, 0.0f); + tab_bar->ScrollingTarget = tab_x2 - scrollable_width; + } +} + +void ImGui::TabBarQueueReorder(ImGuiTabBar* tab_bar, const ImGuiTabItem* tab, int dir) +{ + IM_ASSERT(dir == -1 || dir == +1); + IM_ASSERT(tab_bar->ReorderRequestTabId == 0); + tab_bar->ReorderRequestTabId = tab->ID; + tab_bar->ReorderRequestDir = (ImS8)dir; +} + +bool ImGui::TabBarProcessReorder(ImGuiTabBar* tab_bar) +{ + ImGuiTabItem* tab1 = TabBarFindTabByID(tab_bar, tab_bar->ReorderRequestTabId); + if (tab1 == NULL || (tab1->Flags & ImGuiTabItemFlags_NoReorder)) + return false; + + //IM_ASSERT(tab_bar->Flags & ImGuiTabBarFlags_Reorderable); // <- this may happen when using debug tools + int tab2_order = tab_bar->GetTabOrder(tab1) + tab_bar->ReorderRequestDir; + if (tab2_order < 0 || tab2_order >= tab_bar->Tabs.Size) + return false; + + // Reordered TabItem must share the same position flags than target + ImGuiTabItem* tab2 = &tab_bar->Tabs[tab2_order]; + if (tab2->Flags & ImGuiTabItemFlags_NoReorder) + return false; + if ((tab1->Flags & (ImGuiTabItemFlags_Leading | ImGuiTabItemFlags_Trailing)) != (tab2->Flags & (ImGuiTabItemFlags_Leading | ImGuiTabItemFlags_Trailing))) + return false; + + ImGuiTabItem item_tmp = *tab1; + *tab1 = *tab2; + *tab2 = item_tmp; + + if (tab_bar->Flags & ImGuiTabBarFlags_SaveSettings) + MarkIniSettingsDirty(); + return true; +} + +static ImGuiTabItem* ImGui::TabBarScrollingButtons(ImGuiTabBar* tab_bar) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + const ImVec2 arrow_button_size(g.FontSize - 2.0f, g.FontSize + g.Style.FramePadding.y * 2.0f); + const float scrolling_buttons_width = arrow_button_size.x * 2.0f; + + const ImVec2 backup_cursor_pos = window->DC.CursorPos; + //window->DrawList->AddRect(ImVec2(tab_bar->BarRect.Max.x - scrolling_buttons_width, tab_bar->BarRect.Min.y), ImVec2(tab_bar->BarRect.Max.x, tab_bar->BarRect.Max.y), IM_COL32(255,0,0,255)); + + int select_dir = 0; + ImVec4 arrow_col = g.Style.Colors[ImGuiCol_Text]; + arrow_col.w *= 0.5f; + + PushStyleColor(ImGuiCol_Text, arrow_col); + PushStyleColor(ImGuiCol_Button, ImVec4(0, 0, 0, 0)); + const float backup_repeat_delay = g.IO.KeyRepeatDelay; + const float backup_repeat_rate = g.IO.KeyRepeatRate; + g.IO.KeyRepeatDelay = 0.250f; + g.IO.KeyRepeatRate = 0.200f; + float x = ImMax(tab_bar->BarRect.Min.x, tab_bar->BarRect.Max.x - scrolling_buttons_width); + window->DC.CursorPos = ImVec2(x, tab_bar->BarRect.Min.y); + if (ArrowButtonEx("##<", ImGuiDir_Left, arrow_button_size, ImGuiButtonFlags_PressedOnClick | ImGuiButtonFlags_Repeat)) + select_dir = -1; + window->DC.CursorPos = ImVec2(x + arrow_button_size.x, tab_bar->BarRect.Min.y); + if (ArrowButtonEx("##>", ImGuiDir_Right, arrow_button_size, ImGuiButtonFlags_PressedOnClick | ImGuiButtonFlags_Repeat)) + select_dir = +1; + PopStyleColor(2); + g.IO.KeyRepeatRate = backup_repeat_rate; + g.IO.KeyRepeatDelay = backup_repeat_delay; + + ImGuiTabItem* tab_to_scroll_to = NULL; + if (select_dir != 0) + if (ImGuiTabItem* tab_item = TabBarFindTabByID(tab_bar, tab_bar->SelectedTabId)) + { + int selected_order = tab_bar->GetTabOrder(tab_item); + int target_order = selected_order + select_dir; + + // Skip tab item buttons until another tab item is found or end is reached + while (tab_to_scroll_to == NULL) + { + // If we are at the end of the list, still scroll to make our tab visible + tab_to_scroll_to = &tab_bar->Tabs[(target_order >= 0 && target_order < tab_bar->Tabs.Size) ? target_order : selected_order]; + + // Cross through buttons + // (even if first/last item is a button, return it so we can update the scroll) + if (tab_to_scroll_to->Flags & ImGuiTabItemFlags_Button) + { + target_order += select_dir; + selected_order += select_dir; + tab_to_scroll_to = (target_order < 0 || target_order >= tab_bar->Tabs.Size) ? tab_to_scroll_to : NULL; + } + } + } + window->DC.CursorPos = backup_cursor_pos; + tab_bar->BarRect.Max.x -= scrolling_buttons_width + 1.0f; + + return tab_to_scroll_to; +} + +static ImGuiTabItem* ImGui::TabBarTabListPopupButton(ImGuiTabBar* tab_bar) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + + // We use g.Style.FramePadding.y to match the square ArrowButton size + const float tab_list_popup_button_width = g.FontSize + g.Style.FramePadding.y; + const ImVec2 backup_cursor_pos = window->DC.CursorPos; + window->DC.CursorPos = ImVec2(tab_bar->BarRect.Min.x - g.Style.FramePadding.y, tab_bar->BarRect.Min.y); + tab_bar->BarRect.Min.x += tab_list_popup_button_width; + + ImVec4 arrow_col = g.Style.Colors[ImGuiCol_Text]; + arrow_col.w *= 0.5f; + PushStyleColor(ImGuiCol_Text, arrow_col); + PushStyleColor(ImGuiCol_Button, ImVec4(0, 0, 0, 0)); + bool open = BeginCombo("##v", NULL, ImGuiComboFlags_NoPreview | ImGuiComboFlags_HeightLargest); + PopStyleColor(2); + + ImGuiTabItem* tab_to_select = NULL; + if (open) + { + for (int tab_n = 0; tab_n < tab_bar->Tabs.Size; tab_n++) + { + ImGuiTabItem* tab = &tab_bar->Tabs[tab_n]; + if (tab->Flags & ImGuiTabItemFlags_Button) + continue; + + const char* tab_name = tab_bar->GetTabName(tab); + if (Selectable(tab_name, tab_bar->SelectedTabId == tab->ID)) + tab_to_select = tab; + } + EndCombo(); + } + + window->DC.CursorPos = backup_cursor_pos; + return tab_to_select; +} + +//------------------------------------------------------------------------- +// [SECTION] Widgets: BeginTabItem, EndTabItem, etc. +//------------------------------------------------------------------------- +// - BeginTabItem() +// - EndTabItem() +// - TabItemButton() +// - TabItemEx() [Internal] +// - SetTabItemClosed() +// - TabItemCalcSize() [Internal] +// - TabItemBackground() [Internal] +// - TabItemLabelAndCloseButton() [Internal] +//------------------------------------------------------------------------- + +bool ImGui::BeginTabItem(const char* label, bool* p_open, ImGuiTabItemFlags flags) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return false; + + ImGuiTabBar* tab_bar = g.CurrentTabBar; + if (tab_bar == NULL) + { + IM_ASSERT_USER_ERROR(tab_bar, "Needs to be called between BeginTabBar() and EndTabBar()!"); + return false; + } + IM_ASSERT(!(flags & ImGuiTabItemFlags_Button)); // BeginTabItem() Can't be used with button flags, use TabItemButton() instead! + + bool ret = TabItemEx(tab_bar, label, p_open, flags); + if (ret && !(flags & ImGuiTabItemFlags_NoPushId)) + { + ImGuiTabItem* tab = &tab_bar->Tabs[tab_bar->LastTabItemIdx]; + PushOverrideID(tab->ID); // We already hashed 'label' so push into the ID stack directly instead of doing another hash through PushID(label) + } + return ret; +} + +void ImGui::EndTabItem() +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return; + + ImGuiTabBar* tab_bar = g.CurrentTabBar; + if (tab_bar == NULL) + { + IM_ASSERT_USER_ERROR(tab_bar != NULL, "Needs to be called between BeginTabBar() and EndTabBar()!"); + return; + } + IM_ASSERT(tab_bar->LastTabItemIdx >= 0); + ImGuiTabItem* tab = &tab_bar->Tabs[tab_bar->LastTabItemIdx]; + if (!(tab->Flags & ImGuiTabItemFlags_NoPushId)) + PopID(); +} + +bool ImGui::TabItemButton(const char* label, ImGuiTabItemFlags flags) +{ + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return false; + + ImGuiTabBar* tab_bar = g.CurrentTabBar; + if (tab_bar == NULL) + { + IM_ASSERT_USER_ERROR(tab_bar != NULL, "Needs to be called between BeginTabBar() and EndTabBar()!"); + return false; + } + return TabItemEx(tab_bar, label, NULL, flags | ImGuiTabItemFlags_Button | ImGuiTabItemFlags_NoReorder); +} + +bool ImGui::TabItemEx(ImGuiTabBar* tab_bar, const char* label, bool* p_open, ImGuiTabItemFlags flags) +{ + // Layout whole tab bar if not already done + if (tab_bar->WantLayout) + TabBarLayout(tab_bar); + + ImGuiContext& g = *GImGui; + ImGuiWindow* window = g.CurrentWindow; + if (window->SkipItems) + return false; + + const ImGuiStyle& style = g.Style; + const ImGuiID id = TabBarCalcTabID(tab_bar, label); + + // If the user called us with *p_open == false, we early out and don't render. + // We make a call to ItemAdd() so that attempts to use a contextual popup menu with an implicit ID won't use an older ID. + IMGUI_TEST_ENGINE_ITEM_INFO(id, label, window->DC.LastItemStatusFlags); + if (p_open && !*p_open) + { + PushItemFlag(ImGuiItemFlags_NoNav | ImGuiItemFlags_NoNavDefaultFocus, true); + ItemAdd(ImRect(), id); + PopItemFlag(); + return false; + } + + IM_ASSERT(!p_open || !(flags & ImGuiTabItemFlags_Button)); + IM_ASSERT((flags & (ImGuiTabItemFlags_Leading | ImGuiTabItemFlags_Trailing)) != (ImGuiTabItemFlags_Leading | ImGuiTabItemFlags_Trailing)); // Can't use both Leading and Trailing + + // Store into ImGuiTabItemFlags_NoCloseButton, also honor ImGuiTabItemFlags_NoCloseButton passed by user (although not documented) + if (flags & ImGuiTabItemFlags_NoCloseButton) + p_open = NULL; + else if (p_open == NULL) + flags |= ImGuiTabItemFlags_NoCloseButton; + + // Calculate tab contents size + ImVec2 size = TabItemCalcSize(label, p_open != NULL); + + // Acquire tab data + ImGuiTabItem* tab = TabBarFindTabByID(tab_bar, id); + bool tab_is_new = false; + if (tab == NULL) + { + tab_bar->Tabs.push_back(ImGuiTabItem()); + tab = &tab_bar->Tabs.back(); + tab->ID = id; + tab->Width = size.x; + tab_bar->TabsAddedNew = true; + tab_is_new = true; + } + tab_bar->LastTabItemIdx = (ImS16)tab_bar->Tabs.index_from_ptr(tab); + tab->ContentWidth = size.x; + tab->BeginOrder = tab_bar->TabsActiveCount++; + + const bool tab_bar_appearing = (tab_bar->PrevFrameVisible + 1 < g.FrameCount); + const bool tab_bar_focused = (tab_bar->Flags & ImGuiTabBarFlags_IsFocused) != 0; + const bool tab_appearing = (tab->LastFrameVisible + 1 < g.FrameCount); + const bool is_tab_button = (flags & ImGuiTabItemFlags_Button) != 0; + tab->LastFrameVisible = g.FrameCount; + tab->Flags = flags; + + // Append name with zero-terminator + tab->NameOffset = (ImS16)tab_bar->TabsNames.size(); + tab_bar->TabsNames.append(label, label + strlen(label) + 1); + + // Update selected tab + if (tab_appearing && (tab_bar->Flags & ImGuiTabBarFlags_AutoSelectNewTabs) && tab_bar->NextSelectedTabId == 0) + if (!tab_bar_appearing || tab_bar->SelectedTabId == 0) + if (!is_tab_button) + tab_bar->NextSelectedTabId = id; // New tabs gets activated + if ((flags & ImGuiTabItemFlags_SetSelected) && (tab_bar->SelectedTabId != id)) // SetSelected can only be passed on explicit tab bar + if (!is_tab_button) + tab_bar->NextSelectedTabId = id; + + // Lock visibility + // (Note: tab_contents_visible != tab_selected... because CTRL+TAB operations may preview some tabs without selecting them!) + bool tab_contents_visible = (tab_bar->VisibleTabId == id); + if (tab_contents_visible) + tab_bar->VisibleTabWasSubmitted = true; + + // On the very first frame of a tab bar we let first tab contents be visible to minimize appearing glitches + if (!tab_contents_visible && tab_bar->SelectedTabId == 0 && tab_bar_appearing) + if (tab_bar->Tabs.Size == 1 && !(tab_bar->Flags & ImGuiTabBarFlags_AutoSelectNewTabs)) + tab_contents_visible = true; + + // Note that tab_is_new is not necessarily the same as tab_appearing! When a tab bar stops being submitted + // and then gets submitted again, the tabs will have 'tab_appearing=true' but 'tab_is_new=false'. + if (tab_appearing && (!tab_bar_appearing || tab_is_new)) + { + PushItemFlag(ImGuiItemFlags_NoNav | ImGuiItemFlags_NoNavDefaultFocus, true); + ItemAdd(ImRect(), id); + PopItemFlag(); + if (is_tab_button) + return false; + return tab_contents_visible; + } + + if (tab_bar->SelectedTabId == id) + tab->LastFrameSelected = g.FrameCount; + + // Backup current layout position + const ImVec2 backup_main_cursor_pos = window->DC.CursorPos; + + // Layout + const bool is_central_section = (tab->Flags & (ImGuiTabItemFlags_Leading | ImGuiTabItemFlags_Trailing)) == 0; + size.x = tab->Width; + if (is_central_section) + window->DC.CursorPos = tab_bar->BarRect.Min + ImVec2(IM_FLOOR(tab->Offset - tab_bar->ScrollingAnim), 0.0f); + else + window->DC.CursorPos = tab_bar->BarRect.Min + ImVec2(tab->Offset, 0.0f); + ImVec2 pos = window->DC.CursorPos; + ImRect bb(pos, pos + size); + + // We don't have CPU clipping primitives to clip the CloseButton (until it becomes a texture), so need to add an extra draw call (temporary in the case of vertical animation) + const bool want_clip_rect = is_central_section && (bb.Min.x < tab_bar->ScrollingRectMinX || bb.Max.x > tab_bar->ScrollingRectMaxX); + if (want_clip_rect) + PushClipRect(ImVec2(ImMax(bb.Min.x, tab_bar->ScrollingRectMinX), bb.Min.y - 1), ImVec2(tab_bar->ScrollingRectMaxX, bb.Max.y), true); + + ImVec2 backup_cursor_max_pos = window->DC.CursorMaxPos; + ItemSize(bb.GetSize(), style.FramePadding.y); + window->DC.CursorMaxPos = backup_cursor_max_pos; + + if (!ItemAdd(bb, id)) + { + if (want_clip_rect) + PopClipRect(); + window->DC.CursorPos = backup_main_cursor_pos; + return tab_contents_visible; + } + + // Click to Select a tab + ImGuiButtonFlags button_flags = ((is_tab_button ? ImGuiButtonFlags_PressedOnClickRelease : ImGuiButtonFlags_PressedOnClick) | ImGuiButtonFlags_AllowItemOverlap); + if (g.DragDropActive) + button_flags |= ImGuiButtonFlags_PressedOnDragDropHold; + bool hovered, held; + bool pressed = ButtonBehavior(bb, id, &hovered, &held, button_flags); + if (pressed && !is_tab_button) + tab_bar->NextSelectedTabId = id; + hovered |= (g.HoveredId == id); + + // Allow the close button to overlap unless we are dragging (in which case we don't want any overlapping tabs to be hovered) + if (g.ActiveId != id) + SetItemAllowOverlap(); + + // Drag and drop: re-order tabs + if (held && !tab_appearing && IsMouseDragging(0)) + { + if (!g.DragDropActive && (tab_bar->Flags & ImGuiTabBarFlags_Reorderable)) + { + // While moving a tab it will jump on the other side of the mouse, so we also test for MouseDelta.x + if (g.IO.MouseDelta.x < 0.0f && g.IO.MousePos.x < bb.Min.x) + { + if (tab_bar->Flags & ImGuiTabBarFlags_Reorderable) + TabBarQueueReorder(tab_bar, tab, -1); + } + else if (g.IO.MouseDelta.x > 0.0f && g.IO.MousePos.x > bb.Max.x) + { + if (tab_bar->Flags & ImGuiTabBarFlags_Reorderable) + TabBarQueueReorder(tab_bar, tab, +1); + } + } + } + +#if 0 + if (hovered && g.HoveredIdNotActiveTimer > TOOLTIP_DELAY && bb.GetWidth() < tab->ContentWidth) + { + // Enlarge tab display when hovering + bb.Max.x = bb.Min.x + IM_FLOOR(ImLerp(bb.GetWidth(), tab->ContentWidth, ImSaturate((g.HoveredIdNotActiveTimer - 0.40f) * 6.0f))); + display_draw_list = GetForegroundDrawList(window); + TabItemBackground(display_draw_list, bb, flags, GetColorU32(ImGuiCol_TitleBgActive)); + } +#endif + + // Render tab shape + ImDrawList* display_draw_list = window->DrawList; + const ImU32 tab_col = GetColorU32((held || hovered) ? ImGuiCol_TabHovered : tab_contents_visible ? (tab_bar_focused ? ImGuiCol_TabActive : ImGuiCol_TabUnfocusedActive) : (tab_bar_focused ? ImGuiCol_Tab : ImGuiCol_TabUnfocused)); + TabItemBackground(display_draw_list, bb, flags, tab_col); + RenderNavHighlight(bb, id); + + // Select with right mouse button. This is so the common idiom for context menu automatically highlight the current widget. + const bool hovered_unblocked = IsItemHovered(ImGuiHoveredFlags_AllowWhenBlockedByPopup); + if (hovered_unblocked && (IsMouseClicked(1) || IsMouseReleased(1))) + if (!is_tab_button) + tab_bar->NextSelectedTabId = id; + + if (tab_bar->Flags & ImGuiTabBarFlags_NoCloseWithMiddleMouseButton) + flags |= ImGuiTabItemFlags_NoCloseWithMiddleMouseButton; + + // Render tab label, process close button + const ImGuiID close_button_id = p_open ? GetIDWithSeed("#CLOSE", NULL, id) : 0; + bool just_closed; + bool text_clipped; + TabItemLabelAndCloseButton(display_draw_list, bb, flags, tab_bar->FramePadding, label, id, close_button_id, tab_contents_visible, &just_closed, &text_clipped); + if (just_closed && p_open != NULL) + { + *p_open = false; + TabBarCloseTab(tab_bar, tab); + } + + // Restore main window position so user can draw there + if (want_clip_rect) + PopClipRect(); + window->DC.CursorPos = backup_main_cursor_pos; + + // Tooltip (FIXME: Won't work over the close button because ItemOverlap systems messes up with HoveredIdTimer) + // We test IsItemHovered() to discard e.g. when another item is active or drag and drop over the tab bar (which g.HoveredId ignores) + if (text_clipped && g.HoveredId == id && !held && g.HoveredIdNotActiveTimer > g.TooltipSlowDelay && IsItemHovered()) + if (!(tab_bar->Flags & ImGuiTabBarFlags_NoTooltip) && !(tab->Flags & ImGuiTabItemFlags_NoTooltip)) + SetTooltip("%.*s", (int)(FindRenderedTextEnd(label) - label), label); + + IM_ASSERT(!is_tab_button || !(tab_bar->SelectedTabId == tab->ID && is_tab_button)); // TabItemButton should not be selected + if (is_tab_button) + return pressed; + return tab_contents_visible; +} + +// [Public] This is call is 100% optional but it allows to remove some one-frame glitches when a tab has been unexpectedly removed. +// To use it to need to call the function SetTabItemClosed() between BeginTabBar() and EndTabBar(). +// Tabs closed by the close button will automatically be flagged to avoid this issue. +void ImGui::SetTabItemClosed(const char* label) +{ + ImGuiContext& g = *GImGui; + bool is_within_manual_tab_bar = g.CurrentTabBar && !(g.CurrentTabBar->Flags & ImGuiTabBarFlags_DockNode); + if (is_within_manual_tab_bar) + { + ImGuiTabBar* tab_bar = g.CurrentTabBar; + ImGuiID tab_id = TabBarCalcTabID(tab_bar, label); + if (ImGuiTabItem* tab = TabBarFindTabByID(tab_bar, tab_id)) + tab->WantClose = true; // Will be processed by next call to TabBarLayout() + } +} + +ImVec2 ImGui::TabItemCalcSize(const char* label, bool has_close_button) +{ + ImGuiContext& g = *GImGui; + ImVec2 label_size = CalcTextSize(label, NULL, true); + ImVec2 size = ImVec2(label_size.x + g.Style.FramePadding.x, label_size.y + g.Style.FramePadding.y * 2.0f); + if (has_close_button) + size.x += g.Style.FramePadding.x + (g.Style.ItemInnerSpacing.x + g.FontSize); // We use Y intentionally to fit the close button circle. + else + size.x += g.Style.FramePadding.x + 1.0f; + return ImVec2(ImMin(size.x, TabBarCalcMaxTabWidth()), size.y); +} + +void ImGui::TabItemBackground(ImDrawList* draw_list, const ImRect& bb, ImGuiTabItemFlags flags, ImU32 col) +{ + // While rendering tabs, we trim 1 pixel off the top of our bounding box so they can fit within a regular frame height while looking "detached" from it. + ImGuiContext& g = *GImGui; + const float width = bb.GetWidth(); + IM_UNUSED(flags); + IM_ASSERT(width > 0.0f); + const float rounding = ImMax(0.0f, ImMin((flags & ImGuiTabItemFlags_Button) ? g.Style.FrameRounding : g.Style.TabRounding, width * 0.5f - 1.0f)); + const float y1 = bb.Min.y + 1.0f; + const float y2 = bb.Max.y - 1.0f; + draw_list->PathLineTo(ImVec2(bb.Min.x, y2)); + draw_list->PathArcToFast(ImVec2(bb.Min.x + rounding, y1 + rounding), rounding, 6, 9); + draw_list->PathArcToFast(ImVec2(bb.Max.x - rounding, y1 + rounding), rounding, 9, 12); + draw_list->PathLineTo(ImVec2(bb.Max.x, y2)); + draw_list->PathFillConvex(col); + if (g.Style.TabBorderSize > 0.0f) + { + draw_list->PathLineTo(ImVec2(bb.Min.x + 0.5f, y2)); + draw_list->PathArcToFast(ImVec2(bb.Min.x + rounding + 0.5f, y1 + rounding + 0.5f), rounding, 6, 9); + draw_list->PathArcToFast(ImVec2(bb.Max.x - rounding - 0.5f, y1 + rounding + 0.5f), rounding, 9, 12); + draw_list->PathLineTo(ImVec2(bb.Max.x - 0.5f, y2)); + draw_list->PathStroke(GetColorU32(ImGuiCol_Border), false, g.Style.TabBorderSize); + } +} + +// Render text label (with custom clipping) + Unsaved Document marker + Close Button logic +// We tend to lock style.FramePadding for a given tab-bar, hence the 'frame_padding' parameter. +void ImGui::TabItemLabelAndCloseButton(ImDrawList* draw_list, const ImRect& bb, ImGuiTabItemFlags flags, ImVec2 frame_padding, const char* label, ImGuiID tab_id, ImGuiID close_button_id, bool is_contents_visible, bool* out_just_closed, bool* out_text_clipped) +{ + ImGuiContext& g = *GImGui; + ImVec2 label_size = CalcTextSize(label, NULL, true); + + if (out_just_closed) + *out_just_closed = false; + if (out_text_clipped) + *out_text_clipped = false; + + if (bb.GetWidth() <= 1.0f) + return; + + // In Style V2 we'll have full override of all colors per state (e.g. focused, selected) + // But right now if you want to alter text color of tabs this is what you need to do. +#if 0 + const float backup_alpha = g.Style.Alpha; + if (!is_contents_visible) + g.Style.Alpha *= 0.7f; +#endif + + // Render text label (with clipping + alpha gradient) + unsaved marker + const char* TAB_UNSAVED_MARKER = "*"; + ImRect text_pixel_clip_bb(bb.Min.x + frame_padding.x, bb.Min.y + frame_padding.y, bb.Max.x - frame_padding.x, bb.Max.y); + if (flags & ImGuiTabItemFlags_UnsavedDocument) + { + text_pixel_clip_bb.Max.x -= CalcTextSize(TAB_UNSAVED_MARKER, NULL, false).x; + ImVec2 unsaved_marker_pos(ImMin(bb.Min.x + frame_padding.x + label_size.x + 2, text_pixel_clip_bb.Max.x), bb.Min.y + frame_padding.y + IM_FLOOR(-g.FontSize * 0.25f)); + RenderTextClippedEx(draw_list, unsaved_marker_pos, bb.Max - frame_padding, TAB_UNSAVED_MARKER, NULL, NULL); + } + ImRect text_ellipsis_clip_bb = text_pixel_clip_bb; + + // Return clipped state ignoring the close button + if (out_text_clipped) + { + *out_text_clipped = (text_ellipsis_clip_bb.Min.x + label_size.x) > text_pixel_clip_bb.Max.x; + //draw_list->AddCircle(text_ellipsis_clip_bb.Min, 3.0f, *out_text_clipped ? IM_COL32(255, 0, 0, 255) : IM_COL32(0, 255, 0, 255)); + } + + // Close Button + // We are relying on a subtle and confusing distinction between 'hovered' and 'g.HoveredId' which happens because we are using ImGuiButtonFlags_AllowOverlapMode + SetItemAllowOverlap() + // 'hovered' will be true when hovering the Tab but NOT when hovering the close button + // 'g.HoveredId==id' will be true when hovering the Tab including when hovering the close button + // 'g.ActiveId==close_button_id' will be true when we are holding on the close button, in which case both hovered booleans are false + bool close_button_pressed = false; + bool close_button_visible = false; + if (close_button_id != 0) + if (is_contents_visible || bb.GetWidth() >= g.Style.TabMinWidthForCloseButton) + if (g.HoveredId == tab_id || g.HoveredId == close_button_id || g.ActiveId == tab_id || g.ActiveId == close_button_id) + close_button_visible = true; + if (close_button_visible) + { + ImGuiLastItemDataBackup last_item_backup; + const float close_button_sz = g.FontSize; + PushStyleVar(ImGuiStyleVar_FramePadding, frame_padding); + if (CloseButton(close_button_id, ImVec2(bb.Max.x - frame_padding.x * 2.0f - close_button_sz, bb.Min.y))) + close_button_pressed = true; + PopStyleVar(); + last_item_backup.Restore(); + + // Close with middle mouse button + if (!(flags & ImGuiTabItemFlags_NoCloseWithMiddleMouseButton) && IsMouseClicked(2)) + close_button_pressed = true; + + text_pixel_clip_bb.Max.x -= close_button_sz; + } + + // FIXME: if FramePadding is noticeably large, ellipsis_max_x will be wrong here (e.g. #3497), maybe for consistency that parameter of RenderTextEllipsis() shouldn't exist.. + float ellipsis_max_x = close_button_visible ? text_pixel_clip_bb.Max.x : bb.Max.x - 1.0f; + RenderTextEllipsis(draw_list, text_ellipsis_clip_bb.Min, text_ellipsis_clip_bb.Max, text_pixel_clip_bb.Max.x, ellipsis_max_x, label, NULL, &label_size); + +#if 0 + if (!is_contents_visible) + g.Style.Alpha = backup_alpha; +#endif + + if (out_just_closed) + *out_just_closed = close_button_pressed; +} + + +#endif // #ifndef IMGUI_DISABLE diff --git a/cpp-projects/3d-engine/imgui/imstb_rectpack.h b/cpp-projects/3d-engine/imgui/imstb_rectpack.h new file mode 100644 index 0000000..ff2a85d --- /dev/null +++ b/cpp-projects/3d-engine/imgui/imstb_rectpack.h @@ -0,0 +1,639 @@ +// [DEAR IMGUI] +// This is a slightly modified version of stb_rect_pack.h 1.00. +// Those changes would need to be pushed into nothings/stb: +// - Added STBRP__CDECL +// Grep for [DEAR IMGUI] to find the changes. + +// stb_rect_pack.h - v1.00 - public domain - rectangle packing +// Sean Barrett 2014 +// +// Useful for e.g. packing rectangular textures into an atlas. +// Does not do rotation. +// +// Not necessarily the awesomest packing method, but better than +// the totally naive one in stb_truetype (which is primarily what +// this is meant to replace). +// +// Has only had a few tests run, may have issues. +// +// More docs to come. +// +// No memory allocations; uses qsort() and assert() from stdlib. +// Can override those by defining STBRP_SORT and STBRP_ASSERT. +// +// This library currently uses the Skyline Bottom-Left algorithm. +// +// Please note: better rectangle packers are welcome! Please +// implement them to the same API, but with a different init +// function. +// +// Credits +// +// Library +// Sean Barrett +// Minor features +// Martins Mozeiko +// github:IntellectualKitty +// +// Bugfixes / warning fixes +// Jeremy Jaussaud +// Fabian Giesen +// +// Version history: +// +// 1.00 (2019-02-25) avoid small space waste; gracefully fail too-wide rectangles +// 0.99 (2019-02-07) warning fixes +// 0.11 (2017-03-03) return packing success/fail result +// 0.10 (2016-10-25) remove cast-away-const to avoid warnings +// 0.09 (2016-08-27) fix compiler warnings +// 0.08 (2015-09-13) really fix bug with empty rects (w=0 or h=0) +// 0.07 (2015-09-13) fix bug with empty rects (w=0 or h=0) +// 0.06 (2015-04-15) added STBRP_SORT to allow replacing qsort +// 0.05: added STBRP_ASSERT to allow replacing assert +// 0.04: fixed minor bug in STBRP_LARGE_RECTS support +// 0.01: initial release +// +// LICENSE +// +// See end of file for license information. + +////////////////////////////////////////////////////////////////////////////// +// +// INCLUDE SECTION +// + +#ifndef STB_INCLUDE_STB_RECT_PACK_H +#define STB_INCLUDE_STB_RECT_PACK_H + +#define STB_RECT_PACK_VERSION 1 + +#ifdef STBRP_STATIC +#define STBRP_DEF static +#else +#define STBRP_DEF extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct stbrp_context stbrp_context; +typedef struct stbrp_node stbrp_node; +typedef struct stbrp_rect stbrp_rect; + +#ifdef STBRP_LARGE_RECTS +typedef int stbrp_coord; +#else +typedef unsigned short stbrp_coord; +#endif + +STBRP_DEF int stbrp_pack_rects (stbrp_context *context, stbrp_rect *rects, int num_rects); +// Assign packed locations to rectangles. The rectangles are of type +// 'stbrp_rect' defined below, stored in the array 'rects', and there +// are 'num_rects' many of them. +// +// Rectangles which are successfully packed have the 'was_packed' flag +// set to a non-zero value and 'x' and 'y' store the minimum location +// on each axis (i.e. bottom-left in cartesian coordinates, top-left +// if you imagine y increasing downwards). Rectangles which do not fit +// have the 'was_packed' flag set to 0. +// +// You should not try to access the 'rects' array from another thread +// while this function is running, as the function temporarily reorders +// the array while it executes. +// +// To pack into another rectangle, you need to call stbrp_init_target +// again. To continue packing into the same rectangle, you can call +// this function again. Calling this multiple times with multiple rect +// arrays will probably produce worse packing results than calling it +// a single time with the full rectangle array, but the option is +// available. +// +// The function returns 1 if all of the rectangles were successfully +// packed and 0 otherwise. + +struct stbrp_rect +{ + // reserved for your use: + int id; + + // input: + stbrp_coord w, h; + + // output: + stbrp_coord x, y; + int was_packed; // non-zero if valid packing + +}; // 16 bytes, nominally + + +STBRP_DEF void stbrp_init_target (stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes); +// Initialize a rectangle packer to: +// pack a rectangle that is 'width' by 'height' in dimensions +// using temporary storage provided by the array 'nodes', which is 'num_nodes' long +// +// You must call this function every time you start packing into a new target. +// +// There is no "shutdown" function. The 'nodes' memory must stay valid for +// the following stbrp_pack_rects() call (or calls), but can be freed after +// the call (or calls) finish. +// +// Note: to guarantee best results, either: +// 1. make sure 'num_nodes' >= 'width' +// or 2. call stbrp_allow_out_of_mem() defined below with 'allow_out_of_mem = 1' +// +// If you don't do either of the above things, widths will be quantized to multiples +// of small integers to guarantee the algorithm doesn't run out of temporary storage. +// +// If you do #2, then the non-quantized algorithm will be used, but the algorithm +// may run out of temporary storage and be unable to pack some rectangles. + +STBRP_DEF void stbrp_setup_allow_out_of_mem (stbrp_context *context, int allow_out_of_mem); +// Optionally call this function after init but before doing any packing to +// change the handling of the out-of-temp-memory scenario, described above. +// If you call init again, this will be reset to the default (false). + + +STBRP_DEF void stbrp_setup_heuristic (stbrp_context *context, int heuristic); +// Optionally select which packing heuristic the library should use. Different +// heuristics will produce better/worse results for different data sets. +// If you call init again, this will be reset to the default. + +enum +{ + STBRP_HEURISTIC_Skyline_default=0, + STBRP_HEURISTIC_Skyline_BL_sortHeight = STBRP_HEURISTIC_Skyline_default, + STBRP_HEURISTIC_Skyline_BF_sortHeight +}; + + +////////////////////////////////////////////////////////////////////////////// +// +// the details of the following structures don't matter to you, but they must +// be visible so you can handle the memory allocations for them + +struct stbrp_node +{ + stbrp_coord x,y; + stbrp_node *next; +}; + +struct stbrp_context +{ + int width; + int height; + int align; + int init_mode; + int heuristic; + int num_nodes; + stbrp_node *active_head; + stbrp_node *free_head; + stbrp_node extra[2]; // we allocate two extra nodes so optimal user-node-count is 'width' not 'width+2' +}; + +#ifdef __cplusplus +} +#endif + +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// IMPLEMENTATION SECTION +// + +#ifdef STB_RECT_PACK_IMPLEMENTATION +#ifndef STBRP_SORT +#include +#define STBRP_SORT qsort +#endif + +#ifndef STBRP_ASSERT +#include +#define STBRP_ASSERT assert +#endif + +// [DEAR IMGUI] Added STBRP__CDECL +#ifdef _MSC_VER +#define STBRP__NOTUSED(v) (void)(v) +#define STBRP__CDECL __cdecl +#else +#define STBRP__NOTUSED(v) (void)sizeof(v) +#define STBRP__CDECL +#endif + +enum +{ + STBRP__INIT_skyline = 1 +}; + +STBRP_DEF void stbrp_setup_heuristic(stbrp_context *context, int heuristic) +{ + switch (context->init_mode) { + case STBRP__INIT_skyline: + STBRP_ASSERT(heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight || heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight); + context->heuristic = heuristic; + break; + default: + STBRP_ASSERT(0); + } +} + +STBRP_DEF void stbrp_setup_allow_out_of_mem(stbrp_context *context, int allow_out_of_mem) +{ + if (allow_out_of_mem) + // if it's ok to run out of memory, then don't bother aligning them; + // this gives better packing, but may fail due to OOM (even though + // the rectangles easily fit). @TODO a smarter approach would be to only + // quantize once we've hit OOM, then we could get rid of this parameter. + context->align = 1; + else { + // if it's not ok to run out of memory, then quantize the widths + // so that num_nodes is always enough nodes. + // + // I.e. num_nodes * align >= width + // align >= width / num_nodes + // align = ceil(width/num_nodes) + + context->align = (context->width + context->num_nodes-1) / context->num_nodes; + } +} + +STBRP_DEF void stbrp_init_target(stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes) +{ + int i; +#ifndef STBRP_LARGE_RECTS + STBRP_ASSERT(width <= 0xffff && height <= 0xffff); +#endif + + for (i=0; i < num_nodes-1; ++i) + nodes[i].next = &nodes[i+1]; + nodes[i].next = NULL; + context->init_mode = STBRP__INIT_skyline; + context->heuristic = STBRP_HEURISTIC_Skyline_default; + context->free_head = &nodes[0]; + context->active_head = &context->extra[0]; + context->width = width; + context->height = height; + context->num_nodes = num_nodes; + stbrp_setup_allow_out_of_mem(context, 0); + + // node 0 is the full width, node 1 is the sentinel (lets us not store width explicitly) + context->extra[0].x = 0; + context->extra[0].y = 0; + context->extra[0].next = &context->extra[1]; + context->extra[1].x = (stbrp_coord) width; +#ifdef STBRP_LARGE_RECTS + context->extra[1].y = (1<<30); +#else + context->extra[1].y = 65535; +#endif + context->extra[1].next = NULL; +} + +// find minimum y position if it starts at x1 +static int stbrp__skyline_find_min_y(stbrp_context *c, stbrp_node *first, int x0, int width, int *pwaste) +{ + stbrp_node *node = first; + int x1 = x0 + width; + int min_y, visited_width, waste_area; + + STBRP__NOTUSED(c); + + STBRP_ASSERT(first->x <= x0); + + #if 0 + // skip in case we're past the node + while (node->next->x <= x0) + ++node; + #else + STBRP_ASSERT(node->next->x > x0); // we ended up handling this in the caller for efficiency + #endif + + STBRP_ASSERT(node->x <= x0); + + min_y = 0; + waste_area = 0; + visited_width = 0; + while (node->x < x1) { + if (node->y > min_y) { + // raise min_y higher. + // we've accounted for all waste up to min_y, + // but we'll now add more waste for everything we've visted + waste_area += visited_width * (node->y - min_y); + min_y = node->y; + // the first time through, visited_width might be reduced + if (node->x < x0) + visited_width += node->next->x - x0; + else + visited_width += node->next->x - node->x; + } else { + // add waste area + int under_width = node->next->x - node->x; + if (under_width + visited_width > width) + under_width = width - visited_width; + waste_area += under_width * (min_y - node->y); + visited_width += under_width; + } + node = node->next; + } + + *pwaste = waste_area; + return min_y; +} + +typedef struct +{ + int x,y; + stbrp_node **prev_link; +} stbrp__findresult; + +static stbrp__findresult stbrp__skyline_find_best_pos(stbrp_context *c, int width, int height) +{ + int best_waste = (1<<30), best_x, best_y = (1 << 30); + stbrp__findresult fr; + stbrp_node **prev, *node, *tail, **best = NULL; + + // align to multiple of c->align + width = (width + c->align - 1); + width -= width % c->align; + STBRP_ASSERT(width % c->align == 0); + + // if it can't possibly fit, bail immediately + if (width > c->width || height > c->height) { + fr.prev_link = NULL; + fr.x = fr.y = 0; + return fr; + } + + node = c->active_head; + prev = &c->active_head; + while (node->x + width <= c->width) { + int y,waste; + y = stbrp__skyline_find_min_y(c, node, node->x, width, &waste); + if (c->heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight) { // actually just want to test BL + // bottom left + if (y < best_y) { + best_y = y; + best = prev; + } + } else { + // best-fit + if (y + height <= c->height) { + // can only use it if it first vertically + if (y < best_y || (y == best_y && waste < best_waste)) { + best_y = y; + best_waste = waste; + best = prev; + } + } + } + prev = &node->next; + node = node->next; + } + + best_x = (best == NULL) ? 0 : (*best)->x; + + // if doing best-fit (BF), we also have to try aligning right edge to each node position + // + // e.g, if fitting + // + // ____________________ + // |____________________| + // + // into + // + // | | + // | ____________| + // |____________| + // + // then right-aligned reduces waste, but bottom-left BL is always chooses left-aligned + // + // This makes BF take about 2x the time + + if (c->heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight) { + tail = c->active_head; + node = c->active_head; + prev = &c->active_head; + // find first node that's admissible + while (tail->x < width) + tail = tail->next; + while (tail) { + int xpos = tail->x - width; + int y,waste; + STBRP_ASSERT(xpos >= 0); + // find the left position that matches this + while (node->next->x <= xpos) { + prev = &node->next; + node = node->next; + } + STBRP_ASSERT(node->next->x > xpos && node->x <= xpos); + y = stbrp__skyline_find_min_y(c, node, xpos, width, &waste); + if (y + height <= c->height) { + if (y <= best_y) { + if (y < best_y || waste < best_waste || (waste==best_waste && xpos < best_x)) { + best_x = xpos; + STBRP_ASSERT(y <= best_y); + best_y = y; + best_waste = waste; + best = prev; + } + } + } + tail = tail->next; + } + } + + fr.prev_link = best; + fr.x = best_x; + fr.y = best_y; + return fr; +} + +static stbrp__findresult stbrp__skyline_pack_rectangle(stbrp_context *context, int width, int height) +{ + // find best position according to heuristic + stbrp__findresult res = stbrp__skyline_find_best_pos(context, width, height); + stbrp_node *node, *cur; + + // bail if: + // 1. it failed + // 2. the best node doesn't fit (we don't always check this) + // 3. we're out of memory + if (res.prev_link == NULL || res.y + height > context->height || context->free_head == NULL) { + res.prev_link = NULL; + return res; + } + + // on success, create new node + node = context->free_head; + node->x = (stbrp_coord) res.x; + node->y = (stbrp_coord) (res.y + height); + + context->free_head = node->next; + + // insert the new node into the right starting point, and + // let 'cur' point to the remaining nodes needing to be + // stiched back in + + cur = *res.prev_link; + if (cur->x < res.x) { + // preserve the existing one, so start testing with the next one + stbrp_node *next = cur->next; + cur->next = node; + cur = next; + } else { + *res.prev_link = node; + } + + // from here, traverse cur and free the nodes, until we get to one + // that shouldn't be freed + while (cur->next && cur->next->x <= res.x + width) { + stbrp_node *next = cur->next; + // move the current node to the free list + cur->next = context->free_head; + context->free_head = cur; + cur = next; + } + + // stitch the list back in + node->next = cur; + + if (cur->x < res.x + width) + cur->x = (stbrp_coord) (res.x + width); + +#ifdef _DEBUG + cur = context->active_head; + while (cur->x < context->width) { + STBRP_ASSERT(cur->x < cur->next->x); + cur = cur->next; + } + STBRP_ASSERT(cur->next == NULL); + + { + int count=0; + cur = context->active_head; + while (cur) { + cur = cur->next; + ++count; + } + cur = context->free_head; + while (cur) { + cur = cur->next; + ++count; + } + STBRP_ASSERT(count == context->num_nodes+2); + } +#endif + + return res; +} + +// [DEAR IMGUI] Added STBRP__CDECL +static int STBRP__CDECL rect_height_compare(const void *a, const void *b) +{ + const stbrp_rect *p = (const stbrp_rect *) a; + const stbrp_rect *q = (const stbrp_rect *) b; + if (p->h > q->h) + return -1; + if (p->h < q->h) + return 1; + return (p->w > q->w) ? -1 : (p->w < q->w); +} + +// [DEAR IMGUI] Added STBRP__CDECL +static int STBRP__CDECL rect_original_order(const void *a, const void *b) +{ + const stbrp_rect *p = (const stbrp_rect *) a; + const stbrp_rect *q = (const stbrp_rect *) b; + return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed); +} + +#ifdef STBRP_LARGE_RECTS +#define STBRP__MAXVAL 0xffffffff +#else +#define STBRP__MAXVAL 0xffff +#endif + +STBRP_DEF int stbrp_pack_rects(stbrp_context *context, stbrp_rect *rects, int num_rects) +{ + int i, all_rects_packed = 1; + + // we use the 'was_packed' field internally to allow sorting/unsorting + for (i=0; i < num_rects; ++i) { + rects[i].was_packed = i; + } + + // sort according to heuristic + STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_height_compare); + + for (i=0; i < num_rects; ++i) { + if (rects[i].w == 0 || rects[i].h == 0) { + rects[i].x = rects[i].y = 0; // empty rect needs no space + } else { + stbrp__findresult fr = stbrp__skyline_pack_rectangle(context, rects[i].w, rects[i].h); + if (fr.prev_link) { + rects[i].x = (stbrp_coord) fr.x; + rects[i].y = (stbrp_coord) fr.y; + } else { + rects[i].x = rects[i].y = STBRP__MAXVAL; + } + } + } + + // unsort + STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_original_order); + + // set was_packed flags and all_rects_packed status + for (i=0; i < num_rects; ++i) { + rects[i].was_packed = !(rects[i].x == STBRP__MAXVAL && rects[i].y == STBRP__MAXVAL); + if (!rects[i].was_packed) + all_rects_packed = 0; + } + + // return the all_rects_packed status + return all_rects_packed; +} +#endif + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/cpp-projects/3d-engine/imgui/imstb_textedit.h b/cpp-projects/3d-engine/imgui/imstb_textedit.h new file mode 100644 index 0000000..7644670 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/imstb_textedit.h @@ -0,0 +1,1447 @@ +// [DEAR IMGUI] +// This is a slightly modified version of stb_textedit.h 1.13. +// Those changes would need to be pushed into nothings/stb: +// - Fix in stb_textedit_discard_redo (see https://github.com/nothings/stb/issues/321) +// Grep for [DEAR IMGUI] to find the changes. + +// stb_textedit.h - v1.13 - public domain - Sean Barrett +// Development of this library was sponsored by RAD Game Tools +// +// This C header file implements the guts of a multi-line text-editing +// widget; you implement display, word-wrapping, and low-level string +// insertion/deletion, and stb_textedit will map user inputs into +// insertions & deletions, plus updates to the cursor position, +// selection state, and undo state. +// +// It is intended for use in games and other systems that need to build +// their own custom widgets and which do not have heavy text-editing +// requirements (this library is not recommended for use for editing large +// texts, as its performance does not scale and it has limited undo). +// +// Non-trivial behaviors are modelled after Windows text controls. +// +// +// LICENSE +// +// See end of file for license information. +// +// +// DEPENDENCIES +// +// Uses the C runtime function 'memmove', which you can override +// by defining STB_TEXTEDIT_memmove before the implementation. +// Uses no other functions. Performs no runtime allocations. +// +// +// VERSION HISTORY +// +// 1.13 (2019-02-07) fix bug in undo size management +// 1.12 (2018-01-29) user can change STB_TEXTEDIT_KEYTYPE, fix redo to avoid crash +// 1.11 (2017-03-03) fix HOME on last line, dragging off single-line textfield +// 1.10 (2016-10-25) supress warnings about casting away const with -Wcast-qual +// 1.9 (2016-08-27) customizable move-by-word +// 1.8 (2016-04-02) better keyboard handling when mouse button is down +// 1.7 (2015-09-13) change y range handling in case baseline is non-0 +// 1.6 (2015-04-15) allow STB_TEXTEDIT_memmove +// 1.5 (2014-09-10) add support for secondary keys for OS X +// 1.4 (2014-08-17) fix signed/unsigned warnings +// 1.3 (2014-06-19) fix mouse clicking to round to nearest char boundary +// 1.2 (2014-05-27) fix some RAD types that had crept into the new code +// 1.1 (2013-12-15) move-by-word (requires STB_TEXTEDIT_IS_SPACE ) +// 1.0 (2012-07-26) improve documentation, initial public release +// 0.3 (2012-02-24) bugfixes, single-line mode; insert mode +// 0.2 (2011-11-28) fixes to undo/redo +// 0.1 (2010-07-08) initial version +// +// ADDITIONAL CONTRIBUTORS +// +// Ulf Winklemann: move-by-word in 1.1 +// Fabian Giesen: secondary key inputs in 1.5 +// Martins Mozeiko: STB_TEXTEDIT_memmove in 1.6 +// +// Bugfixes: +// Scott Graham +// Daniel Keller +// Omar Cornut +// Dan Thompson +// +// USAGE +// +// This file behaves differently depending on what symbols you define +// before including it. +// +// +// Header-file mode: +// +// If you do not define STB_TEXTEDIT_IMPLEMENTATION before including this, +// it will operate in "header file" mode. In this mode, it declares a +// single public symbol, STB_TexteditState, which encapsulates the current +// state of a text widget (except for the string, which you will store +// separately). +// +// To compile in this mode, you must define STB_TEXTEDIT_CHARTYPE to a +// primitive type that defines a single character (e.g. char, wchar_t, etc). +// +// To save space or increase undo-ability, you can optionally define the +// following things that are used by the undo system: +// +// STB_TEXTEDIT_POSITIONTYPE small int type encoding a valid cursor position +// STB_TEXTEDIT_UNDOSTATECOUNT the number of undo states to allow +// STB_TEXTEDIT_UNDOCHARCOUNT the number of characters to store in the undo buffer +// +// If you don't define these, they are set to permissive types and +// moderate sizes. The undo system does no memory allocations, so +// it grows STB_TexteditState by the worst-case storage which is (in bytes): +// +// [4 + 3 * sizeof(STB_TEXTEDIT_POSITIONTYPE)] * STB_TEXTEDIT_UNDOSTATE_COUNT +// + sizeof(STB_TEXTEDIT_CHARTYPE) * STB_TEXTEDIT_UNDOCHAR_COUNT +// +// +// Implementation mode: +// +// If you define STB_TEXTEDIT_IMPLEMENTATION before including this, it +// will compile the implementation of the text edit widget, depending +// on a large number of symbols which must be defined before the include. +// +// The implementation is defined only as static functions. You will then +// need to provide your own APIs in the same file which will access the +// static functions. +// +// The basic concept is that you provide a "string" object which +// behaves like an array of characters. stb_textedit uses indices to +// refer to positions in the string, implicitly representing positions +// in the displayed textedit. This is true for both plain text and +// rich text; even with rich text stb_truetype interacts with your +// code as if there was an array of all the displayed characters. +// +// Symbols that must be the same in header-file and implementation mode: +// +// STB_TEXTEDIT_CHARTYPE the character type +// STB_TEXTEDIT_POSITIONTYPE small type that is a valid cursor position +// STB_TEXTEDIT_UNDOSTATECOUNT the number of undo states to allow +// STB_TEXTEDIT_UNDOCHARCOUNT the number of characters to store in the undo buffer +// +// Symbols you must define for implementation mode: +// +// STB_TEXTEDIT_STRING the type of object representing a string being edited, +// typically this is a wrapper object with other data you need +// +// STB_TEXTEDIT_STRINGLEN(obj) the length of the string (ideally O(1)) +// STB_TEXTEDIT_LAYOUTROW(&r,obj,n) returns the results of laying out a line of characters +// starting from character #n (see discussion below) +// STB_TEXTEDIT_GETWIDTH(obj,n,i) returns the pixel delta from the xpos of the i'th character +// to the xpos of the i+1'th char for a line of characters +// starting at character #n (i.e. accounts for kerning +// with previous char) +// STB_TEXTEDIT_KEYTOTEXT(k) maps a keyboard input to an insertable character +// (return type is int, -1 means not valid to insert) +// STB_TEXTEDIT_GETCHAR(obj,i) returns the i'th character of obj, 0-based +// STB_TEXTEDIT_NEWLINE the character returned by _GETCHAR() we recognize +// as manually wordwrapping for end-of-line positioning +// +// STB_TEXTEDIT_DELETECHARS(obj,i,n) delete n characters starting at i +// STB_TEXTEDIT_INSERTCHARS(obj,i,c*,n) insert n characters at i (pointed to by STB_TEXTEDIT_CHARTYPE*) +// +// STB_TEXTEDIT_K_SHIFT a power of two that is or'd in to a keyboard input to represent the shift key +// +// STB_TEXTEDIT_K_LEFT keyboard input to move cursor left +// STB_TEXTEDIT_K_RIGHT keyboard input to move cursor right +// STB_TEXTEDIT_K_UP keyboard input to move cursor up +// STB_TEXTEDIT_K_DOWN keyboard input to move cursor down +// STB_TEXTEDIT_K_PGUP keyboard input to move cursor up a page +// STB_TEXTEDIT_K_PGDOWN keyboard input to move cursor down a page +// STB_TEXTEDIT_K_LINESTART keyboard input to move cursor to start of line // e.g. HOME +// STB_TEXTEDIT_K_LINEEND keyboard input to move cursor to end of line // e.g. END +// STB_TEXTEDIT_K_TEXTSTART keyboard input to move cursor to start of text // e.g. ctrl-HOME +// STB_TEXTEDIT_K_TEXTEND keyboard input to move cursor to end of text // e.g. ctrl-END +// STB_TEXTEDIT_K_DELETE keyboard input to delete selection or character under cursor +// STB_TEXTEDIT_K_BACKSPACE keyboard input to delete selection or character left of cursor +// STB_TEXTEDIT_K_UNDO keyboard input to perform undo +// STB_TEXTEDIT_K_REDO keyboard input to perform redo +// +// Optional: +// STB_TEXTEDIT_K_INSERT keyboard input to toggle insert mode +// STB_TEXTEDIT_IS_SPACE(ch) true if character is whitespace (e.g. 'isspace'), +// required for default WORDLEFT/WORDRIGHT handlers +// STB_TEXTEDIT_MOVEWORDLEFT(obj,i) custom handler for WORDLEFT, returns index to move cursor to +// STB_TEXTEDIT_MOVEWORDRIGHT(obj,i) custom handler for WORDRIGHT, returns index to move cursor to +// STB_TEXTEDIT_K_WORDLEFT keyboard input to move cursor left one word // e.g. ctrl-LEFT +// STB_TEXTEDIT_K_WORDRIGHT keyboard input to move cursor right one word // e.g. ctrl-RIGHT +// STB_TEXTEDIT_K_LINESTART2 secondary keyboard input to move cursor to start of line +// STB_TEXTEDIT_K_LINEEND2 secondary keyboard input to move cursor to end of line +// STB_TEXTEDIT_K_TEXTSTART2 secondary keyboard input to move cursor to start of text +// STB_TEXTEDIT_K_TEXTEND2 secondary keyboard input to move cursor to end of text +// +// Keyboard input must be encoded as a single integer value; e.g. a character code +// and some bitflags that represent shift states. to simplify the interface, SHIFT must +// be a bitflag, so we can test the shifted state of cursor movements to allow selection, +// i.e. (STB_TEXTEDIT_K_RIGHT|STB_TEXTEDIT_K_SHIFT) should be shifted right-arrow. +// +// You can encode other things, such as CONTROL or ALT, in additional bits, and +// then test for their presence in e.g. STB_TEXTEDIT_K_WORDLEFT. For example, +// my Windows implementations add an additional CONTROL bit, and an additional KEYDOWN +// bit. Then all of the STB_TEXTEDIT_K_ values bitwise-or in the KEYDOWN bit, +// and I pass both WM_KEYDOWN and WM_CHAR events to the "key" function in the +// API below. The control keys will only match WM_KEYDOWN events because of the +// keydown bit I add, and STB_TEXTEDIT_KEYTOTEXT only tests for the KEYDOWN +// bit so it only decodes WM_CHAR events. +// +// STB_TEXTEDIT_LAYOUTROW returns information about the shape of one displayed +// row of characters assuming they start on the i'th character--the width and +// the height and the number of characters consumed. This allows this library +// to traverse the entire layout incrementally. You need to compute word-wrapping +// here. +// +// Each textfield keeps its own insert mode state, which is not how normal +// applications work. To keep an app-wide insert mode, update/copy the +// "insert_mode" field of STB_TexteditState before/after calling API functions. +// +// API +// +// void stb_textedit_initialize_state(STB_TexteditState *state, int is_single_line) +// +// void stb_textedit_click(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, float x, float y) +// void stb_textedit_drag(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, float x, float y) +// int stb_textedit_cut(STB_TEXTEDIT_STRING *str, STB_TexteditState *state) +// int stb_textedit_paste(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, STB_TEXTEDIT_CHARTYPE *text, int len) +// void stb_textedit_key(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, STB_TEXEDIT_KEYTYPE key) +// +// Each of these functions potentially updates the string and updates the +// state. +// +// initialize_state: +// set the textedit state to a known good default state when initially +// constructing the textedit. +// +// click: +// call this with the mouse x,y on a mouse down; it will update the cursor +// and reset the selection start/end to the cursor point. the x,y must +// be relative to the text widget, with (0,0) being the top left. +// +// drag: +// call this with the mouse x,y on a mouse drag/up; it will update the +// cursor and the selection end point +// +// cut: +// call this to delete the current selection; returns true if there was +// one. you should FIRST copy the current selection to the system paste buffer. +// (To copy, just copy the current selection out of the string yourself.) +// +// paste: +// call this to paste text at the current cursor point or over the current +// selection if there is one. +// +// key: +// call this for keyboard inputs sent to the textfield. you can use it +// for "key down" events or for "translated" key events. if you need to +// do both (as in Win32), or distinguish Unicode characters from control +// inputs, set a high bit to distinguish the two; then you can define the +// various definitions like STB_TEXTEDIT_K_LEFT have the is-key-event bit +// set, and make STB_TEXTEDIT_KEYTOCHAR check that the is-key-event bit is +// clear. STB_TEXTEDIT_KEYTYPE defaults to int, but you can #define it to +// anything other type you wante before including. +// +// +// When rendering, you can read the cursor position and selection state from +// the STB_TexteditState. +// +// +// Notes: +// +// This is designed to be usable in IMGUI, so it allows for the possibility of +// running in an IMGUI that has NOT cached the multi-line layout. For this +// reason, it provides an interface that is compatible with computing the +// layout incrementally--we try to make sure we make as few passes through +// as possible. (For example, to locate the mouse pointer in the text, we +// could define functions that return the X and Y positions of characters +// and binary search Y and then X, but if we're doing dynamic layout this +// will run the layout algorithm many times, so instead we manually search +// forward in one pass. Similar logic applies to e.g. up-arrow and +// down-arrow movement.) +// +// If it's run in a widget that *has* cached the layout, then this is less +// efficient, but it's not horrible on modern computers. But you wouldn't +// want to edit million-line files with it. + + +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// +//// +//// Header-file mode +//// +//// + +#ifndef INCLUDE_STB_TEXTEDIT_H +#define INCLUDE_STB_TEXTEDIT_H + +//////////////////////////////////////////////////////////////////////// +// +// STB_TexteditState +// +// Definition of STB_TexteditState which you should store +// per-textfield; it includes cursor position, selection state, +// and undo state. +// + +#ifndef STB_TEXTEDIT_UNDOSTATECOUNT +#define STB_TEXTEDIT_UNDOSTATECOUNT 99 +#endif +#ifndef STB_TEXTEDIT_UNDOCHARCOUNT +#define STB_TEXTEDIT_UNDOCHARCOUNT 999 +#endif +#ifndef STB_TEXTEDIT_CHARTYPE +#define STB_TEXTEDIT_CHARTYPE int +#endif +#ifndef STB_TEXTEDIT_POSITIONTYPE +#define STB_TEXTEDIT_POSITIONTYPE int +#endif + +typedef struct +{ + // private data + STB_TEXTEDIT_POSITIONTYPE where; + STB_TEXTEDIT_POSITIONTYPE insert_length; + STB_TEXTEDIT_POSITIONTYPE delete_length; + int char_storage; +} StbUndoRecord; + +typedef struct +{ + // private data + StbUndoRecord undo_rec [STB_TEXTEDIT_UNDOSTATECOUNT]; + STB_TEXTEDIT_CHARTYPE undo_char[STB_TEXTEDIT_UNDOCHARCOUNT]; + short undo_point, redo_point; + int undo_char_point, redo_char_point; +} StbUndoState; + +typedef struct +{ + ///////////////////// + // + // public data + // + + int cursor; + // position of the text cursor within the string + + int select_start; // selection start point + int select_end; + // selection start and end point in characters; if equal, no selection. + // note that start may be less than or greater than end (e.g. when + // dragging the mouse, start is where the initial click was, and you + // can drag in either direction) + + unsigned char insert_mode; + // each textfield keeps its own insert mode state. to keep an app-wide + // insert mode, copy this value in/out of the app state + + int row_count_per_page; + // page size in number of row. + // this value MUST be set to >0 for pageup or pagedown in multilines documents. + + ///////////////////// + // + // private data + // + unsigned char cursor_at_end_of_line; // not implemented yet + unsigned char initialized; + unsigned char has_preferred_x; + unsigned char single_line; + unsigned char padding1, padding2, padding3; + float preferred_x; // this determines where the cursor up/down tries to seek to along x + StbUndoState undostate; +} STB_TexteditState; + + +//////////////////////////////////////////////////////////////////////// +// +// StbTexteditRow +// +// Result of layout query, used by stb_textedit to determine where +// the text in each row is. + +// result of layout query +typedef struct +{ + float x0,x1; // starting x location, end x location (allows for align=right, etc) + float baseline_y_delta; // position of baseline relative to previous row's baseline + float ymin,ymax; // height of row above and below baseline + int num_chars; +} StbTexteditRow; +#endif //INCLUDE_STB_TEXTEDIT_H + + +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// +//// +//// Implementation mode +//// +//// + + +// implementation isn't include-guarded, since it might have indirectly +// included just the "header" portion +#ifdef STB_TEXTEDIT_IMPLEMENTATION + +#ifndef STB_TEXTEDIT_memmove +#include +#define STB_TEXTEDIT_memmove memmove +#endif + + +///////////////////////////////////////////////////////////////////////////// +// +// Mouse input handling +// + +// traverse the layout to locate the nearest character to a display position +static int stb_text_locate_coord(STB_TEXTEDIT_STRING *str, float x, float y) +{ + StbTexteditRow r; + int n = STB_TEXTEDIT_STRINGLEN(str); + float base_y = 0, prev_x; + int i=0, k; + + r.x0 = r.x1 = 0; + r.ymin = r.ymax = 0; + r.num_chars = 0; + + // search rows to find one that straddles 'y' + while (i < n) { + STB_TEXTEDIT_LAYOUTROW(&r, str, i); + if (r.num_chars <= 0) + return n; + + if (i==0 && y < base_y + r.ymin) + return 0; + + if (y < base_y + r.ymax) + break; + + i += r.num_chars; + base_y += r.baseline_y_delta; + } + + // below all text, return 'after' last character + if (i >= n) + return n; + + // check if it's before the beginning of the line + if (x < r.x0) + return i; + + // check if it's before the end of the line + if (x < r.x1) { + // search characters in row for one that straddles 'x' + prev_x = r.x0; + for (k=0; k < r.num_chars; ++k) { + float w = STB_TEXTEDIT_GETWIDTH(str, i, k); + if (x < prev_x+w) { + if (x < prev_x+w/2) + return k+i; + else + return k+i+1; + } + prev_x += w; + } + // shouldn't happen, but if it does, fall through to end-of-line case + } + + // if the last character is a newline, return that. otherwise return 'after' the last character + if (STB_TEXTEDIT_GETCHAR(str, i+r.num_chars-1) == STB_TEXTEDIT_NEWLINE) + return i+r.num_chars-1; + else + return i+r.num_chars; +} + +// API click: on mouse down, move the cursor to the clicked location, and reset the selection +static void stb_textedit_click(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, float x, float y) +{ + // In single-line mode, just always make y = 0. This lets the drag keep working if the mouse + // goes off the top or bottom of the text + if( state->single_line ) + { + StbTexteditRow r; + STB_TEXTEDIT_LAYOUTROW(&r, str, 0); + y = r.ymin; + } + + state->cursor = stb_text_locate_coord(str, x, y); + state->select_start = state->cursor; + state->select_end = state->cursor; + state->has_preferred_x = 0; +} + +// API drag: on mouse drag, move the cursor and selection endpoint to the clicked location +static void stb_textedit_drag(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, float x, float y) +{ + int p = 0; + + // In single-line mode, just always make y = 0. This lets the drag keep working if the mouse + // goes off the top or bottom of the text + if( state->single_line ) + { + StbTexteditRow r; + STB_TEXTEDIT_LAYOUTROW(&r, str, 0); + y = r.ymin; + } + + if (state->select_start == state->select_end) + state->select_start = state->cursor; + + p = stb_text_locate_coord(str, x, y); + state->cursor = state->select_end = p; +} + +///////////////////////////////////////////////////////////////////////////// +// +// Keyboard input handling +// + +// forward declarations +static void stb_text_undo(STB_TEXTEDIT_STRING *str, STB_TexteditState *state); +static void stb_text_redo(STB_TEXTEDIT_STRING *str, STB_TexteditState *state); +static void stb_text_makeundo_delete(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, int where, int length); +static void stb_text_makeundo_insert(STB_TexteditState *state, int where, int length); +static void stb_text_makeundo_replace(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, int where, int old_length, int new_length); + +typedef struct +{ + float x,y; // position of n'th character + float height; // height of line + int first_char, length; // first char of row, and length + int prev_first; // first char of previous row +} StbFindState; + +// find the x/y location of a character, and remember info about the previous row in +// case we get a move-up event (for page up, we'll have to rescan) +static void stb_textedit_find_charpos(StbFindState *find, STB_TEXTEDIT_STRING *str, int n, int single_line) +{ + StbTexteditRow r; + int prev_start = 0; + int z = STB_TEXTEDIT_STRINGLEN(str); + int i=0, first; + + if (n == z) { + // if it's at the end, then find the last line -- simpler than trying to + // explicitly handle this case in the regular code + if (single_line) { + STB_TEXTEDIT_LAYOUTROW(&r, str, 0); + find->y = 0; + find->first_char = 0; + find->length = z; + find->height = r.ymax - r.ymin; + find->x = r.x1; + } else { + find->y = 0; + find->x = 0; + find->height = 1; + while (i < z) { + STB_TEXTEDIT_LAYOUTROW(&r, str, i); + prev_start = i; + i += r.num_chars; + } + find->first_char = i; + find->length = 0; + find->prev_first = prev_start; + } + return; + } + + // search rows to find the one that straddles character n + find->y = 0; + + for(;;) { + STB_TEXTEDIT_LAYOUTROW(&r, str, i); + if (n < i + r.num_chars) + break; + prev_start = i; + i += r.num_chars; + find->y += r.baseline_y_delta; + } + + find->first_char = first = i; + find->length = r.num_chars; + find->height = r.ymax - r.ymin; + find->prev_first = prev_start; + + // now scan to find xpos + find->x = r.x0; + for (i=0; first+i < n; ++i) + find->x += STB_TEXTEDIT_GETWIDTH(str, first, i); +} + +#define STB_TEXT_HAS_SELECTION(s) ((s)->select_start != (s)->select_end) + +// make the selection/cursor state valid if client altered the string +static void stb_textedit_clamp(STB_TEXTEDIT_STRING *str, STB_TexteditState *state) +{ + int n = STB_TEXTEDIT_STRINGLEN(str); + if (STB_TEXT_HAS_SELECTION(state)) { + if (state->select_start > n) state->select_start = n; + if (state->select_end > n) state->select_end = n; + // if clamping forced them to be equal, move the cursor to match + if (state->select_start == state->select_end) + state->cursor = state->select_start; + } + if (state->cursor > n) state->cursor = n; +} + +// delete characters while updating undo +static void stb_textedit_delete(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, int where, int len) +{ + stb_text_makeundo_delete(str, state, where, len); + STB_TEXTEDIT_DELETECHARS(str, where, len); + state->has_preferred_x = 0; +} + +// delete the section +static void stb_textedit_delete_selection(STB_TEXTEDIT_STRING *str, STB_TexteditState *state) +{ + stb_textedit_clamp(str, state); + if (STB_TEXT_HAS_SELECTION(state)) { + if (state->select_start < state->select_end) { + stb_textedit_delete(str, state, state->select_start, state->select_end - state->select_start); + state->select_end = state->cursor = state->select_start; + } else { + stb_textedit_delete(str, state, state->select_end, state->select_start - state->select_end); + state->select_start = state->cursor = state->select_end; + } + state->has_preferred_x = 0; + } +} + +// canoncialize the selection so start <= end +static void stb_textedit_sortselection(STB_TexteditState *state) +{ + if (state->select_end < state->select_start) { + int temp = state->select_end; + state->select_end = state->select_start; + state->select_start = temp; + } +} + +// move cursor to first character of selection +static void stb_textedit_move_to_first(STB_TexteditState *state) +{ + if (STB_TEXT_HAS_SELECTION(state)) { + stb_textedit_sortselection(state); + state->cursor = state->select_start; + state->select_end = state->select_start; + state->has_preferred_x = 0; + } +} + +// move cursor to last character of selection +static void stb_textedit_move_to_last(STB_TEXTEDIT_STRING *str, STB_TexteditState *state) +{ + if (STB_TEXT_HAS_SELECTION(state)) { + stb_textedit_sortselection(state); + stb_textedit_clamp(str, state); + state->cursor = state->select_end; + state->select_start = state->select_end; + state->has_preferred_x = 0; + } +} + +#ifdef STB_TEXTEDIT_IS_SPACE +static int is_word_boundary( STB_TEXTEDIT_STRING *str, int idx ) +{ + return idx > 0 ? (STB_TEXTEDIT_IS_SPACE( STB_TEXTEDIT_GETCHAR(str,idx-1) ) && !STB_TEXTEDIT_IS_SPACE( STB_TEXTEDIT_GETCHAR(str, idx) ) ) : 1; +} + +#ifndef STB_TEXTEDIT_MOVEWORDLEFT +static int stb_textedit_move_to_word_previous( STB_TEXTEDIT_STRING *str, int c ) +{ + --c; // always move at least one character + while( c >= 0 && !is_word_boundary( str, c ) ) + --c; + + if( c < 0 ) + c = 0; + + return c; +} +#define STB_TEXTEDIT_MOVEWORDLEFT stb_textedit_move_to_word_previous +#endif + +#ifndef STB_TEXTEDIT_MOVEWORDRIGHT +static int stb_textedit_move_to_word_next( STB_TEXTEDIT_STRING *str, int c ) +{ + const int len = STB_TEXTEDIT_STRINGLEN(str); + ++c; // always move at least one character + while( c < len && !is_word_boundary( str, c ) ) + ++c; + + if( c > len ) + c = len; + + return c; +} +#define STB_TEXTEDIT_MOVEWORDRIGHT stb_textedit_move_to_word_next +#endif + +#endif + +// update selection and cursor to match each other +static void stb_textedit_prep_selection_at_cursor(STB_TexteditState *state) +{ + if (!STB_TEXT_HAS_SELECTION(state)) + state->select_start = state->select_end = state->cursor; + else + state->cursor = state->select_end; +} + +// API cut: delete selection +static int stb_textedit_cut(STB_TEXTEDIT_STRING *str, STB_TexteditState *state) +{ + if (STB_TEXT_HAS_SELECTION(state)) { + stb_textedit_delete_selection(str,state); // implicitly clamps + state->has_preferred_x = 0; + return 1; + } + return 0; +} + +// API paste: replace existing selection with passed-in text +static int stb_textedit_paste_internal(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, STB_TEXTEDIT_CHARTYPE *text, int len) +{ + // if there's a selection, the paste should delete it + stb_textedit_clamp(str, state); + stb_textedit_delete_selection(str,state); + // try to insert the characters + if (STB_TEXTEDIT_INSERTCHARS(str, state->cursor, text, len)) { + stb_text_makeundo_insert(state, state->cursor, len); + state->cursor += len; + state->has_preferred_x = 0; + return 1; + } + // remove the undo since we didn't actually insert the characters + if (state->undostate.undo_point) + --state->undostate.undo_point; + return 0; +} + +#ifndef STB_TEXTEDIT_KEYTYPE +#define STB_TEXTEDIT_KEYTYPE int +#endif + +// API key: process a keyboard input +static void stb_textedit_key(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, STB_TEXTEDIT_KEYTYPE key) +{ +retry: + switch (key) { + default: { + int c = STB_TEXTEDIT_KEYTOTEXT(key); + if (c > 0) { + STB_TEXTEDIT_CHARTYPE ch = (STB_TEXTEDIT_CHARTYPE) c; + + // can't add newline in single-line mode + if (c == '\n' && state->single_line) + break; + + if (state->insert_mode && !STB_TEXT_HAS_SELECTION(state) && state->cursor < STB_TEXTEDIT_STRINGLEN(str)) { + stb_text_makeundo_replace(str, state, state->cursor, 1, 1); + STB_TEXTEDIT_DELETECHARS(str, state->cursor, 1); + if (STB_TEXTEDIT_INSERTCHARS(str, state->cursor, &ch, 1)) { + ++state->cursor; + state->has_preferred_x = 0; + } + } else { + stb_textedit_delete_selection(str,state); // implicitly clamps + if (STB_TEXTEDIT_INSERTCHARS(str, state->cursor, &ch, 1)) { + stb_text_makeundo_insert(state, state->cursor, 1); + ++state->cursor; + state->has_preferred_x = 0; + } + } + } + break; + } + +#ifdef STB_TEXTEDIT_K_INSERT + case STB_TEXTEDIT_K_INSERT: + state->insert_mode = !state->insert_mode; + break; +#endif + + case STB_TEXTEDIT_K_UNDO: + stb_text_undo(str, state); + state->has_preferred_x = 0; + break; + + case STB_TEXTEDIT_K_REDO: + stb_text_redo(str, state); + state->has_preferred_x = 0; + break; + + case STB_TEXTEDIT_K_LEFT: + // if currently there's a selection, move cursor to start of selection + if (STB_TEXT_HAS_SELECTION(state)) + stb_textedit_move_to_first(state); + else + if (state->cursor > 0) + --state->cursor; + state->has_preferred_x = 0; + break; + + case STB_TEXTEDIT_K_RIGHT: + // if currently there's a selection, move cursor to end of selection + if (STB_TEXT_HAS_SELECTION(state)) + stb_textedit_move_to_last(str, state); + else + ++state->cursor; + stb_textedit_clamp(str, state); + state->has_preferred_x = 0; + break; + + case STB_TEXTEDIT_K_LEFT | STB_TEXTEDIT_K_SHIFT: + stb_textedit_clamp(str, state); + stb_textedit_prep_selection_at_cursor(state); + // move selection left + if (state->select_end > 0) + --state->select_end; + state->cursor = state->select_end; + state->has_preferred_x = 0; + break; + +#ifdef STB_TEXTEDIT_MOVEWORDLEFT + case STB_TEXTEDIT_K_WORDLEFT: + if (STB_TEXT_HAS_SELECTION(state)) + stb_textedit_move_to_first(state); + else { + state->cursor = STB_TEXTEDIT_MOVEWORDLEFT(str, state->cursor); + stb_textedit_clamp( str, state ); + } + break; + + case STB_TEXTEDIT_K_WORDLEFT | STB_TEXTEDIT_K_SHIFT: + if( !STB_TEXT_HAS_SELECTION( state ) ) + stb_textedit_prep_selection_at_cursor(state); + + state->cursor = STB_TEXTEDIT_MOVEWORDLEFT(str, state->cursor); + state->select_end = state->cursor; + + stb_textedit_clamp( str, state ); + break; +#endif + +#ifdef STB_TEXTEDIT_MOVEWORDRIGHT + case STB_TEXTEDIT_K_WORDRIGHT: + if (STB_TEXT_HAS_SELECTION(state)) + stb_textedit_move_to_last(str, state); + else { + state->cursor = STB_TEXTEDIT_MOVEWORDRIGHT(str, state->cursor); + stb_textedit_clamp( str, state ); + } + break; + + case STB_TEXTEDIT_K_WORDRIGHT | STB_TEXTEDIT_K_SHIFT: + if( !STB_TEXT_HAS_SELECTION( state ) ) + stb_textedit_prep_selection_at_cursor(state); + + state->cursor = STB_TEXTEDIT_MOVEWORDRIGHT(str, state->cursor); + state->select_end = state->cursor; + + stb_textedit_clamp( str, state ); + break; +#endif + + case STB_TEXTEDIT_K_RIGHT | STB_TEXTEDIT_K_SHIFT: + stb_textedit_prep_selection_at_cursor(state); + // move selection right + ++state->select_end; + stb_textedit_clamp(str, state); + state->cursor = state->select_end; + state->has_preferred_x = 0; + break; + + case STB_TEXTEDIT_K_DOWN: + case STB_TEXTEDIT_K_DOWN | STB_TEXTEDIT_K_SHIFT: + case STB_TEXTEDIT_K_PGDOWN: + case STB_TEXTEDIT_K_PGDOWN | STB_TEXTEDIT_K_SHIFT: { + StbFindState find; + StbTexteditRow row; + int i, j, sel = (key & STB_TEXTEDIT_K_SHIFT) != 0; + int is_page = (key & ~STB_TEXTEDIT_K_SHIFT) == STB_TEXTEDIT_K_PGDOWN; + int row_count = is_page ? state->row_count_per_page : 1; + + if (!is_page && state->single_line) { + // on windows, up&down in single-line behave like left&right + key = STB_TEXTEDIT_K_RIGHT | (key & STB_TEXTEDIT_K_SHIFT); + goto retry; + } + + if (sel) + stb_textedit_prep_selection_at_cursor(state); + else if (STB_TEXT_HAS_SELECTION(state)) + stb_textedit_move_to_last(str, state); + + // compute current position of cursor point + stb_textedit_clamp(str, state); + stb_textedit_find_charpos(&find, str, state->cursor, state->single_line); + + for (j = 0; j < row_count; ++j) { + float x, goal_x = state->has_preferred_x ? state->preferred_x : find.x; + int start = find.first_char + find.length; + + if (find.length == 0) + break; + + // [DEAR IMGUI] + // going down while being on the last line shouldn't bring us to that line end + if (STB_TEXTEDIT_GETCHAR(str, find.first_char + find.length - 1) != STB_TEXTEDIT_NEWLINE) + break; + + // now find character position down a row + state->cursor = start; + STB_TEXTEDIT_LAYOUTROW(&row, str, state->cursor); + x = row.x0; + for (i=0; i < row.num_chars; ++i) { + float dx = STB_TEXTEDIT_GETWIDTH(str, start, i); + #ifdef STB_TEXTEDIT_GETWIDTH_NEWLINE + if (dx == STB_TEXTEDIT_GETWIDTH_NEWLINE) + break; + #endif + x += dx; + if (x > goal_x) + break; + ++state->cursor; + } + stb_textedit_clamp(str, state); + + state->has_preferred_x = 1; + state->preferred_x = goal_x; + + if (sel) + state->select_end = state->cursor; + + // go to next line + find.first_char = find.first_char + find.length; + find.length = row.num_chars; + } + break; + } + + case STB_TEXTEDIT_K_UP: + case STB_TEXTEDIT_K_UP | STB_TEXTEDIT_K_SHIFT: + case STB_TEXTEDIT_K_PGUP: + case STB_TEXTEDIT_K_PGUP | STB_TEXTEDIT_K_SHIFT: { + StbFindState find; + StbTexteditRow row; + int i, j, prev_scan, sel = (key & STB_TEXTEDIT_K_SHIFT) != 0; + int is_page = (key & ~STB_TEXTEDIT_K_SHIFT) == STB_TEXTEDIT_K_PGUP; + int row_count = is_page ? state->row_count_per_page : 1; + + if (!is_page && state->single_line) { + // on windows, up&down become left&right + key = STB_TEXTEDIT_K_LEFT | (key & STB_TEXTEDIT_K_SHIFT); + goto retry; + } + + if (sel) + stb_textedit_prep_selection_at_cursor(state); + else if (STB_TEXT_HAS_SELECTION(state)) + stb_textedit_move_to_first(state); + + // compute current position of cursor point + stb_textedit_clamp(str, state); + stb_textedit_find_charpos(&find, str, state->cursor, state->single_line); + + for (j = 0; j < row_count; ++j) { + float x, goal_x = state->has_preferred_x ? state->preferred_x : find.x; + + // can only go up if there's a previous row + if (find.prev_first == find.first_char) + break; + + // now find character position up a row + state->cursor = find.prev_first; + STB_TEXTEDIT_LAYOUTROW(&row, str, state->cursor); + x = row.x0; + for (i=0; i < row.num_chars; ++i) { + float dx = STB_TEXTEDIT_GETWIDTH(str, find.prev_first, i); + #ifdef STB_TEXTEDIT_GETWIDTH_NEWLINE + if (dx == STB_TEXTEDIT_GETWIDTH_NEWLINE) + break; + #endif + x += dx; + if (x > goal_x) + break; + ++state->cursor; + } + stb_textedit_clamp(str, state); + + state->has_preferred_x = 1; + state->preferred_x = goal_x; + + if (sel) + state->select_end = state->cursor; + + // go to previous line + // (we need to scan previous line the hard way. maybe we could expose this as a new API function?) + prev_scan = find.prev_first > 0 ? find.prev_first - 1 : 0; + while (prev_scan > 0 && STB_TEXTEDIT_GETCHAR(str, prev_scan - 1) != STB_TEXTEDIT_NEWLINE) + --prev_scan; + find.first_char = find.prev_first; + find.prev_first = prev_scan; + } + break; + } + + case STB_TEXTEDIT_K_DELETE: + case STB_TEXTEDIT_K_DELETE | STB_TEXTEDIT_K_SHIFT: + if (STB_TEXT_HAS_SELECTION(state)) + stb_textedit_delete_selection(str, state); + else { + int n = STB_TEXTEDIT_STRINGLEN(str); + if (state->cursor < n) + stb_textedit_delete(str, state, state->cursor, 1); + } + state->has_preferred_x = 0; + break; + + case STB_TEXTEDIT_K_BACKSPACE: + case STB_TEXTEDIT_K_BACKSPACE | STB_TEXTEDIT_K_SHIFT: + if (STB_TEXT_HAS_SELECTION(state)) + stb_textedit_delete_selection(str, state); + else { + stb_textedit_clamp(str, state); + if (state->cursor > 0) { + stb_textedit_delete(str, state, state->cursor-1, 1); + --state->cursor; + } + } + state->has_preferred_x = 0; + break; + +#ifdef STB_TEXTEDIT_K_TEXTSTART2 + case STB_TEXTEDIT_K_TEXTSTART2: +#endif + case STB_TEXTEDIT_K_TEXTSTART: + state->cursor = state->select_start = state->select_end = 0; + state->has_preferred_x = 0; + break; + +#ifdef STB_TEXTEDIT_K_TEXTEND2 + case STB_TEXTEDIT_K_TEXTEND2: +#endif + case STB_TEXTEDIT_K_TEXTEND: + state->cursor = STB_TEXTEDIT_STRINGLEN(str); + state->select_start = state->select_end = 0; + state->has_preferred_x = 0; + break; + +#ifdef STB_TEXTEDIT_K_TEXTSTART2 + case STB_TEXTEDIT_K_TEXTSTART2 | STB_TEXTEDIT_K_SHIFT: +#endif + case STB_TEXTEDIT_K_TEXTSTART | STB_TEXTEDIT_K_SHIFT: + stb_textedit_prep_selection_at_cursor(state); + state->cursor = state->select_end = 0; + state->has_preferred_x = 0; + break; + +#ifdef STB_TEXTEDIT_K_TEXTEND2 + case STB_TEXTEDIT_K_TEXTEND2 | STB_TEXTEDIT_K_SHIFT: +#endif + case STB_TEXTEDIT_K_TEXTEND | STB_TEXTEDIT_K_SHIFT: + stb_textedit_prep_selection_at_cursor(state); + state->cursor = state->select_end = STB_TEXTEDIT_STRINGLEN(str); + state->has_preferred_x = 0; + break; + + +#ifdef STB_TEXTEDIT_K_LINESTART2 + case STB_TEXTEDIT_K_LINESTART2: +#endif + case STB_TEXTEDIT_K_LINESTART: + stb_textedit_clamp(str, state); + stb_textedit_move_to_first(state); + if (state->single_line) + state->cursor = 0; + else while (state->cursor > 0 && STB_TEXTEDIT_GETCHAR(str, state->cursor-1) != STB_TEXTEDIT_NEWLINE) + --state->cursor; + state->has_preferred_x = 0; + break; + +#ifdef STB_TEXTEDIT_K_LINEEND2 + case STB_TEXTEDIT_K_LINEEND2: +#endif + case STB_TEXTEDIT_K_LINEEND: { + int n = STB_TEXTEDIT_STRINGLEN(str); + stb_textedit_clamp(str, state); + stb_textedit_move_to_first(state); + if (state->single_line) + state->cursor = n; + else while (state->cursor < n && STB_TEXTEDIT_GETCHAR(str, state->cursor) != STB_TEXTEDIT_NEWLINE) + ++state->cursor; + state->has_preferred_x = 0; + break; + } + +#ifdef STB_TEXTEDIT_K_LINESTART2 + case STB_TEXTEDIT_K_LINESTART2 | STB_TEXTEDIT_K_SHIFT: +#endif + case STB_TEXTEDIT_K_LINESTART | STB_TEXTEDIT_K_SHIFT: + stb_textedit_clamp(str, state); + stb_textedit_prep_selection_at_cursor(state); + if (state->single_line) + state->cursor = 0; + else while (state->cursor > 0 && STB_TEXTEDIT_GETCHAR(str, state->cursor-1) != STB_TEXTEDIT_NEWLINE) + --state->cursor; + state->select_end = state->cursor; + state->has_preferred_x = 0; + break; + +#ifdef STB_TEXTEDIT_K_LINEEND2 + case STB_TEXTEDIT_K_LINEEND2 | STB_TEXTEDIT_K_SHIFT: +#endif + case STB_TEXTEDIT_K_LINEEND | STB_TEXTEDIT_K_SHIFT: { + int n = STB_TEXTEDIT_STRINGLEN(str); + stb_textedit_clamp(str, state); + stb_textedit_prep_selection_at_cursor(state); + if (state->single_line) + state->cursor = n; + else while (state->cursor < n && STB_TEXTEDIT_GETCHAR(str, state->cursor) != STB_TEXTEDIT_NEWLINE) + ++state->cursor; + state->select_end = state->cursor; + state->has_preferred_x = 0; + break; + } + } +} + +///////////////////////////////////////////////////////////////////////////// +// +// Undo processing +// +// @OPTIMIZE: the undo/redo buffer should be circular + +static void stb_textedit_flush_redo(StbUndoState *state) +{ + state->redo_point = STB_TEXTEDIT_UNDOSTATECOUNT; + state->redo_char_point = STB_TEXTEDIT_UNDOCHARCOUNT; +} + +// discard the oldest entry in the undo list +static void stb_textedit_discard_undo(StbUndoState *state) +{ + if (state->undo_point > 0) { + // if the 0th undo state has characters, clean those up + if (state->undo_rec[0].char_storage >= 0) { + int n = state->undo_rec[0].insert_length, i; + // delete n characters from all other records + state->undo_char_point -= n; + STB_TEXTEDIT_memmove(state->undo_char, state->undo_char + n, (size_t) (state->undo_char_point*sizeof(STB_TEXTEDIT_CHARTYPE))); + for (i=0; i < state->undo_point; ++i) + if (state->undo_rec[i].char_storage >= 0) + state->undo_rec[i].char_storage -= n; // @OPTIMIZE: get rid of char_storage and infer it + } + --state->undo_point; + STB_TEXTEDIT_memmove(state->undo_rec, state->undo_rec+1, (size_t) (state->undo_point*sizeof(state->undo_rec[0]))); + } +} + +// discard the oldest entry in the redo list--it's bad if this +// ever happens, but because undo & redo have to store the actual +// characters in different cases, the redo character buffer can +// fill up even though the undo buffer didn't +static void stb_textedit_discard_redo(StbUndoState *state) +{ + int k = STB_TEXTEDIT_UNDOSTATECOUNT-1; + + if (state->redo_point <= k) { + // if the k'th undo state has characters, clean those up + if (state->undo_rec[k].char_storage >= 0) { + int n = state->undo_rec[k].insert_length, i; + // move the remaining redo character data to the end of the buffer + state->redo_char_point += n; + STB_TEXTEDIT_memmove(state->undo_char + state->redo_char_point, state->undo_char + state->redo_char_point-n, (size_t) ((STB_TEXTEDIT_UNDOCHARCOUNT - state->redo_char_point)*sizeof(STB_TEXTEDIT_CHARTYPE))); + // adjust the position of all the other records to account for above memmove + for (i=state->redo_point; i < k; ++i) + if (state->undo_rec[i].char_storage >= 0) + state->undo_rec[i].char_storage += n; + } + // now move all the redo records towards the end of the buffer; the first one is at 'redo_point' + // [DEAR IMGUI] + size_t move_size = (size_t)((STB_TEXTEDIT_UNDOSTATECOUNT - state->redo_point - 1) * sizeof(state->undo_rec[0])); + const char* buf_begin = (char*)state->undo_rec; (void)buf_begin; + const char* buf_end = (char*)state->undo_rec + sizeof(state->undo_rec); (void)buf_end; + IM_ASSERT(((char*)(state->undo_rec + state->redo_point)) >= buf_begin); + IM_ASSERT(((char*)(state->undo_rec + state->redo_point + 1) + move_size) <= buf_end); + STB_TEXTEDIT_memmove(state->undo_rec + state->redo_point+1, state->undo_rec + state->redo_point, move_size); + + // now move redo_point to point to the new one + ++state->redo_point; + } +} + +static StbUndoRecord *stb_text_create_undo_record(StbUndoState *state, int numchars) +{ + // any time we create a new undo record, we discard redo + stb_textedit_flush_redo(state); + + // if we have no free records, we have to make room, by sliding the + // existing records down + if (state->undo_point == STB_TEXTEDIT_UNDOSTATECOUNT) + stb_textedit_discard_undo(state); + + // if the characters to store won't possibly fit in the buffer, we can't undo + if (numchars > STB_TEXTEDIT_UNDOCHARCOUNT) { + state->undo_point = 0; + state->undo_char_point = 0; + return NULL; + } + + // if we don't have enough free characters in the buffer, we have to make room + while (state->undo_char_point + numchars > STB_TEXTEDIT_UNDOCHARCOUNT) + stb_textedit_discard_undo(state); + + return &state->undo_rec[state->undo_point++]; +} + +static STB_TEXTEDIT_CHARTYPE *stb_text_createundo(StbUndoState *state, int pos, int insert_len, int delete_len) +{ + StbUndoRecord *r = stb_text_create_undo_record(state, insert_len); + if (r == NULL) + return NULL; + + r->where = pos; + r->insert_length = (STB_TEXTEDIT_POSITIONTYPE) insert_len; + r->delete_length = (STB_TEXTEDIT_POSITIONTYPE) delete_len; + + if (insert_len == 0) { + r->char_storage = -1; + return NULL; + } else { + r->char_storage = state->undo_char_point; + state->undo_char_point += insert_len; + return &state->undo_char[r->char_storage]; + } +} + +static void stb_text_undo(STB_TEXTEDIT_STRING *str, STB_TexteditState *state) +{ + StbUndoState *s = &state->undostate; + StbUndoRecord u, *r; + if (s->undo_point == 0) + return; + + // we need to do two things: apply the undo record, and create a redo record + u = s->undo_rec[s->undo_point-1]; + r = &s->undo_rec[s->redo_point-1]; + r->char_storage = -1; + + r->insert_length = u.delete_length; + r->delete_length = u.insert_length; + r->where = u.where; + + if (u.delete_length) { + // if the undo record says to delete characters, then the redo record will + // need to re-insert the characters that get deleted, so we need to store + // them. + + // there are three cases: + // there's enough room to store the characters + // characters stored for *redoing* don't leave room for redo + // characters stored for *undoing* don't leave room for redo + // if the last is true, we have to bail + + if (s->undo_char_point + u.delete_length >= STB_TEXTEDIT_UNDOCHARCOUNT) { + // the undo records take up too much character space; there's no space to store the redo characters + r->insert_length = 0; + } else { + int i; + + // there's definitely room to store the characters eventually + while (s->undo_char_point + u.delete_length > s->redo_char_point) { + // should never happen: + if (s->redo_point == STB_TEXTEDIT_UNDOSTATECOUNT) + return; + // there's currently not enough room, so discard a redo record + stb_textedit_discard_redo(s); + } + r = &s->undo_rec[s->redo_point-1]; + + r->char_storage = s->redo_char_point - u.delete_length; + s->redo_char_point = s->redo_char_point - u.delete_length; + + // now save the characters + for (i=0; i < u.delete_length; ++i) + s->undo_char[r->char_storage + i] = STB_TEXTEDIT_GETCHAR(str, u.where + i); + } + + // now we can carry out the deletion + STB_TEXTEDIT_DELETECHARS(str, u.where, u.delete_length); + } + + // check type of recorded action: + if (u.insert_length) { + // easy case: was a deletion, so we need to insert n characters + STB_TEXTEDIT_INSERTCHARS(str, u.where, &s->undo_char[u.char_storage], u.insert_length); + s->undo_char_point -= u.insert_length; + } + + state->cursor = u.where + u.insert_length; + + s->undo_point--; + s->redo_point--; +} + +static void stb_text_redo(STB_TEXTEDIT_STRING *str, STB_TexteditState *state) +{ + StbUndoState *s = &state->undostate; + StbUndoRecord *u, r; + if (s->redo_point == STB_TEXTEDIT_UNDOSTATECOUNT) + return; + + // we need to do two things: apply the redo record, and create an undo record + u = &s->undo_rec[s->undo_point]; + r = s->undo_rec[s->redo_point]; + + // we KNOW there must be room for the undo record, because the redo record + // was derived from an undo record + + u->delete_length = r.insert_length; + u->insert_length = r.delete_length; + u->where = r.where; + u->char_storage = -1; + + if (r.delete_length) { + // the redo record requires us to delete characters, so the undo record + // needs to store the characters + + if (s->undo_char_point + u->insert_length > s->redo_char_point) { + u->insert_length = 0; + u->delete_length = 0; + } else { + int i; + u->char_storage = s->undo_char_point; + s->undo_char_point = s->undo_char_point + u->insert_length; + + // now save the characters + for (i=0; i < u->insert_length; ++i) + s->undo_char[u->char_storage + i] = STB_TEXTEDIT_GETCHAR(str, u->where + i); + } + + STB_TEXTEDIT_DELETECHARS(str, r.where, r.delete_length); + } + + if (r.insert_length) { + // easy case: need to insert n characters + STB_TEXTEDIT_INSERTCHARS(str, r.where, &s->undo_char[r.char_storage], r.insert_length); + s->redo_char_point += r.insert_length; + } + + state->cursor = r.where + r.insert_length; + + s->undo_point++; + s->redo_point++; +} + +static void stb_text_makeundo_insert(STB_TexteditState *state, int where, int length) +{ + stb_text_createundo(&state->undostate, where, 0, length); +} + +static void stb_text_makeundo_delete(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, int where, int length) +{ + int i; + STB_TEXTEDIT_CHARTYPE *p = stb_text_createundo(&state->undostate, where, length, 0); + if (p) { + for (i=0; i < length; ++i) + p[i] = STB_TEXTEDIT_GETCHAR(str, where+i); + } +} + +static void stb_text_makeundo_replace(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, int where, int old_length, int new_length) +{ + int i; + STB_TEXTEDIT_CHARTYPE *p = stb_text_createundo(&state->undostate, where, old_length, new_length); + if (p) { + for (i=0; i < old_length; ++i) + p[i] = STB_TEXTEDIT_GETCHAR(str, where+i); + } +} + +// reset the state to default +static void stb_textedit_clear_state(STB_TexteditState *state, int is_single_line) +{ + state->undostate.undo_point = 0; + state->undostate.undo_char_point = 0; + state->undostate.redo_point = STB_TEXTEDIT_UNDOSTATECOUNT; + state->undostate.redo_char_point = STB_TEXTEDIT_UNDOCHARCOUNT; + state->select_end = state->select_start = 0; + state->cursor = 0; + state->has_preferred_x = 0; + state->preferred_x = 0; + state->cursor_at_end_of_line = 0; + state->initialized = 1; + state->single_line = (unsigned char) is_single_line; + state->insert_mode = 0; + state->row_count_per_page = 0; +} + +// API initialize +static void stb_textedit_initialize_state(STB_TexteditState *state, int is_single_line) +{ + stb_textedit_clear_state(state, is_single_line); +} + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif + +static int stb_textedit_paste(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, STB_TEXTEDIT_CHARTYPE const *ctext, int len) +{ + return stb_textedit_paste_internal(str, state, (STB_TEXTEDIT_CHARTYPE *) ctext, len); +} + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + +#endif//STB_TEXTEDIT_IMPLEMENTATION + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/cpp-projects/3d-engine/imgui/imstb_truetype.h b/cpp-projects/3d-engine/imgui/imstb_truetype.h new file mode 100644 index 0000000..b4bdbd8 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/imstb_truetype.h @@ -0,0 +1,4903 @@ +// [DEAR IMGUI] +// This is a slightly modified version of stb_truetype.h 1.20. +// Mostly fixing for compiler and static analyzer warnings. +// Grep for [DEAR IMGUI] to find the changes. + +// stb_truetype.h - v1.20 - public domain +// authored from 2009-2016 by Sean Barrett / RAD Game Tools +// +// This library processes TrueType files: +// parse files +// extract glyph metrics +// extract glyph shapes +// render glyphs to one-channel bitmaps with antialiasing (box filter) +// render glyphs to one-channel SDF bitmaps (signed-distance field/function) +// +// Todo: +// non-MS cmaps +// crashproof on bad data +// hinting? (no longer patented) +// cleartype-style AA? +// optimize: use simple memory allocator for intermediates +// optimize: build edge-list directly from curves +// optimize: rasterize directly from curves? +// +// ADDITIONAL CONTRIBUTORS +// +// Mikko Mononen: compound shape support, more cmap formats +// Tor Andersson: kerning, subpixel rendering +// Dougall Johnson: OpenType / Type 2 font handling +// Daniel Ribeiro Maciel: basic GPOS-based kerning +// +// Misc other: +// Ryan Gordon +// Simon Glass +// github:IntellectualKitty +// Imanol Celaya +// Daniel Ribeiro Maciel +// +// Bug/warning reports/fixes: +// "Zer" on mollyrocket Fabian "ryg" Giesen +// Cass Everitt Martins Mozeiko +// stoiko (Haemimont Games) Cap Petschulat +// Brian Hook Omar Cornut +// Walter van Niftrik github:aloucks +// David Gow Peter LaValle +// David Given Sergey Popov +// Ivan-Assen Ivanov Giumo X. Clanjor +// Anthony Pesch Higor Euripedes +// Johan Duparc Thomas Fields +// Hou Qiming Derek Vinyard +// Rob Loach Cort Stratton +// Kenney Phillis Jr. github:oyvindjam +// Brian Costabile github:vassvik +// +// VERSION HISTORY +// +// 1.20 (2019-02-07) PackFontRange skips missing codepoints; GetScaleFontVMetrics() +// 1.19 (2018-02-11) GPOS kerning, STBTT_fmod +// 1.18 (2018-01-29) add missing function +// 1.17 (2017-07-23) make more arguments const; doc fix +// 1.16 (2017-07-12) SDF support +// 1.15 (2017-03-03) make more arguments const +// 1.14 (2017-01-16) num-fonts-in-TTC function +// 1.13 (2017-01-02) support OpenType fonts, certain Apple fonts +// 1.12 (2016-10-25) suppress warnings about casting away const with -Wcast-qual +// 1.11 (2016-04-02) fix unused-variable warning +// 1.10 (2016-04-02) user-defined fabs(); rare memory leak; remove duplicate typedef +// 1.09 (2016-01-16) warning fix; avoid crash on outofmem; use allocation userdata properly +// 1.08 (2015-09-13) document stbtt_Rasterize(); fixes for vertical & horizontal edges +// 1.07 (2015-08-01) allow PackFontRanges to accept arrays of sparse codepoints; +// variant PackFontRanges to pack and render in separate phases; +// fix stbtt_GetFontOFfsetForIndex (never worked for non-0 input?); +// fixed an assert() bug in the new rasterizer +// replace assert() with STBTT_assert() in new rasterizer +// +// Full history can be found at the end of this file. +// +// LICENSE +// +// See end of file for license information. +// +// USAGE +// +// Include this file in whatever places need to refer to it. In ONE C/C++ +// file, write: +// #define STB_TRUETYPE_IMPLEMENTATION +// before the #include of this file. This expands out the actual +// implementation into that C/C++ file. +// +// To make the implementation private to the file that generates the implementation, +// #define STBTT_STATIC +// +// Simple 3D API (don't ship this, but it's fine for tools and quick start) +// stbtt_BakeFontBitmap() -- bake a font to a bitmap for use as texture +// stbtt_GetBakedQuad() -- compute quad to draw for a given char +// +// Improved 3D API (more shippable): +// #include "stb_rect_pack.h" -- optional, but you really want it +// stbtt_PackBegin() +// stbtt_PackSetOversampling() -- for improved quality on small fonts +// stbtt_PackFontRanges() -- pack and renders +// stbtt_PackEnd() +// stbtt_GetPackedQuad() +// +// "Load" a font file from a memory buffer (you have to keep the buffer loaded) +// stbtt_InitFont() +// stbtt_GetFontOffsetForIndex() -- indexing for TTC font collections +// stbtt_GetNumberOfFonts() -- number of fonts for TTC font collections +// +// Render a unicode codepoint to a bitmap +// stbtt_GetCodepointBitmap() -- allocates and returns a bitmap +// stbtt_MakeCodepointBitmap() -- renders into bitmap you provide +// stbtt_GetCodepointBitmapBox() -- how big the bitmap must be +// +// Character advance/positioning +// stbtt_GetCodepointHMetrics() +// stbtt_GetFontVMetrics() +// stbtt_GetFontVMetricsOS2() +// stbtt_GetCodepointKernAdvance() +// +// Starting with version 1.06, the rasterizer was replaced with a new, +// faster and generally-more-precise rasterizer. The new rasterizer more +// accurately measures pixel coverage for anti-aliasing, except in the case +// where multiple shapes overlap, in which case it overestimates the AA pixel +// coverage. Thus, anti-aliasing of intersecting shapes may look wrong. If +// this turns out to be a problem, you can re-enable the old rasterizer with +// #define STBTT_RASTERIZER_VERSION 1 +// which will incur about a 15% speed hit. +// +// ADDITIONAL DOCUMENTATION +// +// Immediately after this block comment are a series of sample programs. +// +// After the sample programs is the "header file" section. This section +// includes documentation for each API function. +// +// Some important concepts to understand to use this library: +// +// Codepoint +// Characters are defined by unicode codepoints, e.g. 65 is +// uppercase A, 231 is lowercase c with a cedilla, 0x7e30 is +// the hiragana for "ma". +// +// Glyph +// A visual character shape (every codepoint is rendered as +// some glyph) +// +// Glyph index +// A font-specific integer ID representing a glyph +// +// Baseline +// Glyph shapes are defined relative to a baseline, which is the +// bottom of uppercase characters. Characters extend both above +// and below the baseline. +// +// Current Point +// As you draw text to the screen, you keep track of a "current point" +// which is the origin of each character. The current point's vertical +// position is the baseline. Even "baked fonts" use this model. +// +// Vertical Font Metrics +// The vertical qualities of the font, used to vertically position +// and space the characters. See docs for stbtt_GetFontVMetrics. +// +// Font Size in Pixels or Points +// The preferred interface for specifying font sizes in stb_truetype +// is to specify how tall the font's vertical extent should be in pixels. +// If that sounds good enough, skip the next paragraph. +// +// Most font APIs instead use "points", which are a common typographic +// measurement for describing font size, defined as 72 points per inch. +// stb_truetype provides a point API for compatibility. However, true +// "per inch" conventions don't make much sense on computer displays +// since different monitors have different number of pixels per +// inch. For example, Windows traditionally uses a convention that +// there are 96 pixels per inch, thus making 'inch' measurements have +// nothing to do with inches, and thus effectively defining a point to +// be 1.333 pixels. Additionally, the TrueType font data provides +// an explicit scale factor to scale a given font's glyphs to points, +// but the author has observed that this scale factor is often wrong +// for non-commercial fonts, thus making fonts scaled in points +// according to the TrueType spec incoherently sized in practice. +// +// DETAILED USAGE: +// +// Scale: +// Select how high you want the font to be, in points or pixels. +// Call ScaleForPixelHeight or ScaleForMappingEmToPixels to compute +// a scale factor SF that will be used by all other functions. +// +// Baseline: +// You need to select a y-coordinate that is the baseline of where +// your text will appear. Call GetFontBoundingBox to get the baseline-relative +// bounding box for all characters. SF*-y0 will be the distance in pixels +// that the worst-case character could extend above the baseline, so if +// you want the top edge of characters to appear at the top of the +// screen where y=0, then you would set the baseline to SF*-y0. +// +// Current point: +// Set the current point where the first character will appear. The +// first character could extend left of the current point; this is font +// dependent. You can either choose a current point that is the leftmost +// point and hope, or add some padding, or check the bounding box or +// left-side-bearing of the first character to be displayed and set +// the current point based on that. +// +// Displaying a character: +// Compute the bounding box of the character. It will contain signed values +// relative to . I.e. if it returns x0,y0,x1,y1, +// then the character should be displayed in the rectangle from +// to = 32 && *text < 128) { + stbtt_aligned_quad q; + stbtt_GetBakedQuad(cdata, 512,512, *text-32, &x,&y,&q,1);//1=opengl & d3d10+,0=d3d9 + glTexCoord2f(q.s0,q.t1); glVertex2f(q.x0,q.y0); + glTexCoord2f(q.s1,q.t1); glVertex2f(q.x1,q.y0); + glTexCoord2f(q.s1,q.t0); glVertex2f(q.x1,q.y1); + glTexCoord2f(q.s0,q.t0); glVertex2f(q.x0,q.y1); + } + ++text; + } + glEnd(); +} +#endif +// +// +////////////////////////////////////////////////////////////////////////////// +// +// Complete program (this compiles): get a single bitmap, print as ASCII art +// +#if 0 +#include +#define STB_TRUETYPE_IMPLEMENTATION // force following include to generate implementation +#include "stb_truetype.h" + +char ttf_buffer[1<<25]; + +int main(int argc, char **argv) +{ + stbtt_fontinfo font; + unsigned char *bitmap; + int w,h,i,j,c = (argc > 1 ? atoi(argv[1]) : 'a'), s = (argc > 2 ? atoi(argv[2]) : 20); + + fread(ttf_buffer, 1, 1<<25, fopen(argc > 3 ? argv[3] : "c:/windows/fonts/arialbd.ttf", "rb")); + + stbtt_InitFont(&font, ttf_buffer, stbtt_GetFontOffsetForIndex(ttf_buffer,0)); + bitmap = stbtt_GetCodepointBitmap(&font, 0,stbtt_ScaleForPixelHeight(&font, s), c, &w, &h, 0,0); + + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) + putchar(" .:ioVM@"[bitmap[j*w+i]>>5]); + putchar('\n'); + } + return 0; +} +#endif +// +// Output: +// +// .ii. +// @@@@@@. +// V@Mio@@o +// :i. V@V +// :oM@@M +// :@@@MM@M +// @@o o@M +// :@@. M@M +// @@@o@@@@ +// :M@@V:@@. +// +////////////////////////////////////////////////////////////////////////////// +// +// Complete program: print "Hello World!" banner, with bugs +// +#if 0 +char buffer[24<<20]; +unsigned char screen[20][79]; + +int main(int arg, char **argv) +{ + stbtt_fontinfo font; + int i,j,ascent,baseline,ch=0; + float scale, xpos=2; // leave a little padding in case the character extends left + char *text = "Heljo World!"; // intentionally misspelled to show 'lj' brokenness + + fread(buffer, 1, 1000000, fopen("c:/windows/fonts/arialbd.ttf", "rb")); + stbtt_InitFont(&font, buffer, 0); + + scale = stbtt_ScaleForPixelHeight(&font, 15); + stbtt_GetFontVMetrics(&font, &ascent,0,0); + baseline = (int) (ascent*scale); + + while (text[ch]) { + int advance,lsb,x0,y0,x1,y1; + float x_shift = xpos - (float) floor(xpos); + stbtt_GetCodepointHMetrics(&font, text[ch], &advance, &lsb); + stbtt_GetCodepointBitmapBoxSubpixel(&font, text[ch], scale,scale,x_shift,0, &x0,&y0,&x1,&y1); + stbtt_MakeCodepointBitmapSubpixel(&font, &screen[baseline + y0][(int) xpos + x0], x1-x0,y1-y0, 79, scale,scale,x_shift,0, text[ch]); + // note that this stomps the old data, so where character boxes overlap (e.g. 'lj') it's wrong + // because this API is really for baking character bitmaps into textures. if you want to render + // a sequence of characters, you really need to render each bitmap to a temp buffer, then + // "alpha blend" that into the working buffer + xpos += (advance * scale); + if (text[ch+1]) + xpos += scale*stbtt_GetCodepointKernAdvance(&font, text[ch],text[ch+1]); + ++ch; + } + + for (j=0; j < 20; ++j) { + for (i=0; i < 78; ++i) + putchar(" .:ioVM@"[screen[j][i]>>5]); + putchar('\n'); + } + + return 0; +} +#endif + + +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// +//// +//// INTEGRATION WITH YOUR CODEBASE +//// +//// The following sections allow you to supply alternate definitions +//// of C library functions used by stb_truetype, e.g. if you don't +//// link with the C runtime library. + +#ifdef STB_TRUETYPE_IMPLEMENTATION + // #define your own (u)stbtt_int8/16/32 before including to override this + #ifndef stbtt_uint8 + typedef unsigned char stbtt_uint8; + typedef signed char stbtt_int8; + typedef unsigned short stbtt_uint16; + typedef signed short stbtt_int16; + typedef unsigned int stbtt_uint32; + typedef signed int stbtt_int32; + #endif + + typedef char stbtt__check_size32[sizeof(stbtt_int32)==4 ? 1 : -1]; + typedef char stbtt__check_size16[sizeof(stbtt_int16)==2 ? 1 : -1]; + + // e.g. #define your own STBTT_ifloor/STBTT_iceil() to avoid math.h + #ifndef STBTT_ifloor + #include + #define STBTT_ifloor(x) ((int) floor(x)) + #define STBTT_iceil(x) ((int) ceil(x)) + #endif + + #ifndef STBTT_sqrt + #include + #define STBTT_sqrt(x) sqrt(x) + #define STBTT_pow(x,y) pow(x,y) + #endif + + #ifndef STBTT_fmod + #include + #define STBTT_fmod(x,y) fmod(x,y) + #endif + + #ifndef STBTT_cos + #include + #define STBTT_cos(x) cos(x) + #define STBTT_acos(x) acos(x) + #endif + + #ifndef STBTT_fabs + #include + #define STBTT_fabs(x) fabs(x) + #endif + + // #define your own functions "STBTT_malloc" / "STBTT_free" to avoid malloc.h + #ifndef STBTT_malloc + #include + #define STBTT_malloc(x,u) ((void)(u),malloc(x)) + #define STBTT_free(x,u) ((void)(u),free(x)) + #endif + + #ifndef STBTT_assert + #include + #define STBTT_assert(x) assert(x) + #endif + + #ifndef STBTT_strlen + #include + #define STBTT_strlen(x) strlen(x) + #endif + + #ifndef STBTT_memcpy + #include + #define STBTT_memcpy memcpy + #define STBTT_memset memset + #endif +#endif + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +//// +//// INTERFACE +//// +//// + +#ifndef __STB_INCLUDE_STB_TRUETYPE_H__ +#define __STB_INCLUDE_STB_TRUETYPE_H__ + +#ifdef STBTT_STATIC +#define STBTT_DEF static +#else +#define STBTT_DEF extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// private structure +typedef struct +{ + unsigned char *data; + int cursor; + int size; +} stbtt__buf; + +////////////////////////////////////////////////////////////////////////////// +// +// TEXTURE BAKING API +// +// If you use this API, you only have to call two functions ever. +// + +typedef struct +{ + unsigned short x0,y0,x1,y1; // coordinates of bbox in bitmap + float xoff,yoff,xadvance; +} stbtt_bakedchar; + +STBTT_DEF int stbtt_BakeFontBitmap(const unsigned char *data, int offset, // font location (use offset=0 for plain .ttf) + float pixel_height, // height of font in pixels + unsigned char *pixels, int pw, int ph, // bitmap to be filled in + int first_char, int num_chars, // characters to bake + stbtt_bakedchar *chardata); // you allocate this, it's num_chars long +// if return is positive, the first unused row of the bitmap +// if return is negative, returns the negative of the number of characters that fit +// if return is 0, no characters fit and no rows were used +// This uses a very crappy packing. + +typedef struct +{ + float x0,y0,s0,t0; // top-left + float x1,y1,s1,t1; // bottom-right +} stbtt_aligned_quad; + +STBTT_DEF void stbtt_GetBakedQuad(const stbtt_bakedchar *chardata, int pw, int ph, // same data as above + int char_index, // character to display + float *xpos, float *ypos, // pointers to current position in screen pixel space + stbtt_aligned_quad *q, // output: quad to draw + int opengl_fillrule); // true if opengl fill rule; false if DX9 or earlier +// Call GetBakedQuad with char_index = 'character - first_char', and it +// creates the quad you need to draw and advances the current position. +// +// The coordinate system used assumes y increases downwards. +// +// Characters will extend both above and below the current position; +// see discussion of "BASELINE" above. +// +// It's inefficient; you might want to c&p it and optimize it. + +STBTT_DEF void stbtt_GetScaledFontVMetrics(const unsigned char *fontdata, int index, float size, float *ascent, float *descent, float *lineGap); +// Query the font vertical metrics without having to create a font first. + + +////////////////////////////////////////////////////////////////////////////// +// +// NEW TEXTURE BAKING API +// +// This provides options for packing multiple fonts into one atlas, not +// perfectly but better than nothing. + +typedef struct +{ + unsigned short x0,y0,x1,y1; // coordinates of bbox in bitmap + float xoff,yoff,xadvance; + float xoff2,yoff2; +} stbtt_packedchar; + +typedef struct stbtt_pack_context stbtt_pack_context; +typedef struct stbtt_fontinfo stbtt_fontinfo; +#ifndef STB_RECT_PACK_VERSION +typedef struct stbrp_rect stbrp_rect; +#endif + +STBTT_DEF int stbtt_PackBegin(stbtt_pack_context *spc, unsigned char *pixels, int width, int height, int stride_in_bytes, int padding, void *alloc_context); +// Initializes a packing context stored in the passed-in stbtt_pack_context. +// Future calls using this context will pack characters into the bitmap passed +// in here: a 1-channel bitmap that is width * height. stride_in_bytes is +// the distance from one row to the next (or 0 to mean they are packed tightly +// together). "padding" is the amount of padding to leave between each +// character (normally you want '1' for bitmaps you'll use as textures with +// bilinear filtering). +// +// Returns 0 on failure, 1 on success. + +STBTT_DEF void stbtt_PackEnd (stbtt_pack_context *spc); +// Cleans up the packing context and frees all memory. + +#define STBTT_POINT_SIZE(x) (-(x)) + +STBTT_DEF int stbtt_PackFontRange(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, float font_size, + int first_unicode_char_in_range, int num_chars_in_range, stbtt_packedchar *chardata_for_range); +// Creates character bitmaps from the font_index'th font found in fontdata (use +// font_index=0 if you don't know what that is). It creates num_chars_in_range +// bitmaps for characters with unicode values starting at first_unicode_char_in_range +// and increasing. Data for how to render them is stored in chardata_for_range; +// pass these to stbtt_GetPackedQuad to get back renderable quads. +// +// font_size is the full height of the character from ascender to descender, +// as computed by stbtt_ScaleForPixelHeight. To use a point size as computed +// by stbtt_ScaleForMappingEmToPixels, wrap the point size in STBTT_POINT_SIZE() +// and pass that result as 'font_size': +// ..., 20 , ... // font max minus min y is 20 pixels tall +// ..., STBTT_POINT_SIZE(20), ... // 'M' is 20 pixels tall + +typedef struct +{ + float font_size; + int first_unicode_codepoint_in_range; // if non-zero, then the chars are continuous, and this is the first codepoint + int *array_of_unicode_codepoints; // if non-zero, then this is an array of unicode codepoints + int num_chars; + stbtt_packedchar *chardata_for_range; // output + unsigned char h_oversample, v_oversample; // don't set these, they're used internally +} stbtt_pack_range; + +STBTT_DEF int stbtt_PackFontRanges(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, stbtt_pack_range *ranges, int num_ranges); +// Creates character bitmaps from multiple ranges of characters stored in +// ranges. This will usually create a better-packed bitmap than multiple +// calls to stbtt_PackFontRange. Note that you can call this multiple +// times within a single PackBegin/PackEnd. + +STBTT_DEF void stbtt_PackSetOversampling(stbtt_pack_context *spc, unsigned int h_oversample, unsigned int v_oversample); +// Oversampling a font increases the quality by allowing higher-quality subpixel +// positioning, and is especially valuable at smaller text sizes. +// +// This function sets the amount of oversampling for all following calls to +// stbtt_PackFontRange(s) or stbtt_PackFontRangesGatherRects for a given +// pack context. The default (no oversampling) is achieved by h_oversample=1 +// and v_oversample=1. The total number of pixels required is +// h_oversample*v_oversample larger than the default; for example, 2x2 +// oversampling requires 4x the storage of 1x1. For best results, render +// oversampled textures with bilinear filtering. Look at the readme in +// stb/tests/oversample for information about oversampled fonts +// +// To use with PackFontRangesGather etc., you must set it before calls +// call to PackFontRangesGatherRects. + +STBTT_DEF void stbtt_PackSetSkipMissingCodepoints(stbtt_pack_context *spc, int skip); +// If skip != 0, this tells stb_truetype to skip any codepoints for which +// there is no corresponding glyph. If skip=0, which is the default, then +// codepoints without a glyph recived the font's "missing character" glyph, +// typically an empty box by convention. + +STBTT_DEF void stbtt_GetPackedQuad(const stbtt_packedchar *chardata, int pw, int ph, // same data as above + int char_index, // character to display + float *xpos, float *ypos, // pointers to current position in screen pixel space + stbtt_aligned_quad *q, // output: quad to draw + int align_to_integer); + +STBTT_DEF int stbtt_PackFontRangesGatherRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects); +STBTT_DEF void stbtt_PackFontRangesPackRects(stbtt_pack_context *spc, stbrp_rect *rects, int num_rects); +STBTT_DEF int stbtt_PackFontRangesRenderIntoRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects); +// Calling these functions in sequence is roughly equivalent to calling +// stbtt_PackFontRanges(). If you more control over the packing of multiple +// fonts, or if you want to pack custom data into a font texture, take a look +// at the source to of stbtt_PackFontRanges() and create a custom version +// using these functions, e.g. call GatherRects multiple times, +// building up a single array of rects, then call PackRects once, +// then call RenderIntoRects repeatedly. This may result in a +// better packing than calling PackFontRanges multiple times +// (or it may not). + +// this is an opaque structure that you shouldn't mess with which holds +// all the context needed from PackBegin to PackEnd. +struct stbtt_pack_context { + void *user_allocator_context; + void *pack_info; + int width; + int height; + int stride_in_bytes; + int padding; + int skip_missing; + unsigned int h_oversample, v_oversample; + unsigned char *pixels; + void *nodes; +}; + +////////////////////////////////////////////////////////////////////////////// +// +// FONT LOADING +// +// + +STBTT_DEF int stbtt_GetNumberOfFonts(const unsigned char *data); +// This function will determine the number of fonts in a font file. TrueType +// collection (.ttc) files may contain multiple fonts, while TrueType font +// (.ttf) files only contain one font. The number of fonts can be used for +// indexing with the previous function where the index is between zero and one +// less than the total fonts. If an error occurs, -1 is returned. + +STBTT_DEF int stbtt_GetFontOffsetForIndex(const unsigned char *data, int index); +// Each .ttf/.ttc file may have more than one font. Each font has a sequential +// index number starting from 0. Call this function to get the font offset for +// a given index; it returns -1 if the index is out of range. A regular .ttf +// file will only define one font and it always be at offset 0, so it will +// return '0' for index 0, and -1 for all other indices. + +// The following structure is defined publicly so you can declare one on +// the stack or as a global or etc, but you should treat it as opaque. +struct stbtt_fontinfo +{ + void * userdata; + unsigned char * data; // pointer to .ttf file + int fontstart; // offset of start of font + + int numGlyphs; // number of glyphs, needed for range checking + + int loca,head,glyf,hhea,hmtx,kern,gpos; // table locations as offset from start of .ttf + int index_map; // a cmap mapping for our chosen character encoding + int indexToLocFormat; // format needed to map from glyph index to glyph + + stbtt__buf cff; // cff font data + stbtt__buf charstrings; // the charstring index + stbtt__buf gsubrs; // global charstring subroutines index + stbtt__buf subrs; // private charstring subroutines index + stbtt__buf fontdicts; // array of font dicts + stbtt__buf fdselect; // map from glyph to fontdict +}; + +STBTT_DEF int stbtt_InitFont(stbtt_fontinfo *info, const unsigned char *data, int offset); +// Given an offset into the file that defines a font, this function builds +// the necessary cached info for the rest of the system. You must allocate +// the stbtt_fontinfo yourself, and stbtt_InitFont will fill it out. You don't +// need to do anything special to free it, because the contents are pure +// value data with no additional data structures. Returns 0 on failure. + + +////////////////////////////////////////////////////////////////////////////// +// +// CHARACTER TO GLYPH-INDEX CONVERSIOn + +STBTT_DEF int stbtt_FindGlyphIndex(const stbtt_fontinfo *info, int unicode_codepoint); +// If you're going to perform multiple operations on the same character +// and you want a speed-up, call this function with the character you're +// going to process, then use glyph-based functions instead of the +// codepoint-based functions. +// Returns 0 if the character codepoint is not defined in the font. + + +////////////////////////////////////////////////////////////////////////////// +// +// CHARACTER PROPERTIES +// + +STBTT_DEF float stbtt_ScaleForPixelHeight(const stbtt_fontinfo *info, float pixels); +// computes a scale factor to produce a font whose "height" is 'pixels' tall. +// Height is measured as the distance from the highest ascender to the lowest +// descender; in other words, it's equivalent to calling stbtt_GetFontVMetrics +// and computing: +// scale = pixels / (ascent - descent) +// so if you prefer to measure height by the ascent only, use a similar calculation. + +STBTT_DEF float stbtt_ScaleForMappingEmToPixels(const stbtt_fontinfo *info, float pixels); +// computes a scale factor to produce a font whose EM size is mapped to +// 'pixels' tall. This is probably what traditional APIs compute, but +// I'm not positive. + +STBTT_DEF void stbtt_GetFontVMetrics(const stbtt_fontinfo *info, int *ascent, int *descent, int *lineGap); +// ascent is the coordinate above the baseline the font extends; descent +// is the coordinate below the baseline the font extends (i.e. it is typically negative) +// lineGap is the spacing between one row's descent and the next row's ascent... +// so you should advance the vertical position by "*ascent - *descent + *lineGap" +// these are expressed in unscaled coordinates, so you must multiply by +// the scale factor for a given size + +STBTT_DEF int stbtt_GetFontVMetricsOS2(const stbtt_fontinfo *info, int *typoAscent, int *typoDescent, int *typoLineGap); +// analogous to GetFontVMetrics, but returns the "typographic" values from the OS/2 +// table (specific to MS/Windows TTF files). +// +// Returns 1 on success (table present), 0 on failure. + +STBTT_DEF void stbtt_GetFontBoundingBox(const stbtt_fontinfo *info, int *x0, int *y0, int *x1, int *y1); +// the bounding box around all possible characters + +STBTT_DEF void stbtt_GetCodepointHMetrics(const stbtt_fontinfo *info, int codepoint, int *advanceWidth, int *leftSideBearing); +// leftSideBearing is the offset from the current horizontal position to the left edge of the character +// advanceWidth is the offset from the current horizontal position to the next horizontal position +// these are expressed in unscaled coordinates + +STBTT_DEF int stbtt_GetCodepointKernAdvance(const stbtt_fontinfo *info, int ch1, int ch2); +// an additional amount to add to the 'advance' value between ch1 and ch2 + +STBTT_DEF int stbtt_GetCodepointBox(const stbtt_fontinfo *info, int codepoint, int *x0, int *y0, int *x1, int *y1); +// Gets the bounding box of the visible part of the glyph, in unscaled coordinates + +STBTT_DEF void stbtt_GetGlyphHMetrics(const stbtt_fontinfo *info, int glyph_index, int *advanceWidth, int *leftSideBearing); +STBTT_DEF int stbtt_GetGlyphKernAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2); +STBTT_DEF int stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1); +// as above, but takes one or more glyph indices for greater efficiency + + +////////////////////////////////////////////////////////////////////////////// +// +// GLYPH SHAPES (you probably don't need these, but they have to go before +// the bitmaps for C declaration-order reasons) +// + +#ifndef STBTT_vmove // you can predefine these to use different values (but why?) + enum { + STBTT_vmove=1, + STBTT_vline, + STBTT_vcurve, + STBTT_vcubic + }; +#endif + +#ifndef stbtt_vertex // you can predefine this to use different values + // (we share this with other code at RAD) + #define stbtt_vertex_type short // can't use stbtt_int16 because that's not visible in the header file + typedef struct + { + stbtt_vertex_type x,y,cx,cy,cx1,cy1; + unsigned char type,padding; + } stbtt_vertex; +#endif + +STBTT_DEF int stbtt_IsGlyphEmpty(const stbtt_fontinfo *info, int glyph_index); +// returns non-zero if nothing is drawn for this glyph + +STBTT_DEF int stbtt_GetCodepointShape(const stbtt_fontinfo *info, int unicode_codepoint, stbtt_vertex **vertices); +STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **vertices); +// returns # of vertices and fills *vertices with the pointer to them +// these are expressed in "unscaled" coordinates +// +// The shape is a series of contours. Each one starts with +// a STBTT_moveto, then consists of a series of mixed +// STBTT_lineto and STBTT_curveto segments. A lineto +// draws a line from previous endpoint to its x,y; a curveto +// draws a quadratic bezier from previous endpoint to +// its x,y, using cx,cy as the bezier control point. + +STBTT_DEF void stbtt_FreeShape(const stbtt_fontinfo *info, stbtt_vertex *vertices); +// frees the data allocated above + +////////////////////////////////////////////////////////////////////////////// +// +// BITMAP RENDERING +// + +STBTT_DEF void stbtt_FreeBitmap(unsigned char *bitmap, void *userdata); +// frees the bitmap allocated below + +STBTT_DEF unsigned char *stbtt_GetCodepointBitmap(const stbtt_fontinfo *info, float scale_x, float scale_y, int codepoint, int *width, int *height, int *xoff, int *yoff); +// allocates a large-enough single-channel 8bpp bitmap and renders the +// specified character/glyph at the specified scale into it, with +// antialiasing. 0 is no coverage (transparent), 255 is fully covered (opaque). +// *width & *height are filled out with the width & height of the bitmap, +// which is stored left-to-right, top-to-bottom. +// +// xoff/yoff are the offset it pixel space from the glyph origin to the top-left of the bitmap + +STBTT_DEF unsigned char *stbtt_GetCodepointBitmapSubpixel(const stbtt_fontinfo *info, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint, int *width, int *height, int *xoff, int *yoff); +// the same as stbtt_GetCodepoitnBitmap, but you can specify a subpixel +// shift for the character + +STBTT_DEF void stbtt_MakeCodepointBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int codepoint); +// the same as stbtt_GetCodepointBitmap, but you pass in storage for the bitmap +// in the form of 'output', with row spacing of 'out_stride' bytes. the bitmap +// is clipped to out_w/out_h bytes. Call stbtt_GetCodepointBitmapBox to get the +// width and height and positioning info for it first. + +STBTT_DEF void stbtt_MakeCodepointBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint); +// same as stbtt_MakeCodepointBitmap, but you can specify a subpixel +// shift for the character + +STBTT_DEF void stbtt_MakeCodepointBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int oversample_x, int oversample_y, float *sub_x, float *sub_y, int codepoint); +// same as stbtt_MakeCodepointBitmapSubpixel, but prefiltering +// is performed (see stbtt_PackSetOversampling) + +STBTT_DEF void stbtt_GetCodepointBitmapBox(const stbtt_fontinfo *font, int codepoint, float scale_x, float scale_y, int *ix0, int *iy0, int *ix1, int *iy1); +// get the bbox of the bitmap centered around the glyph origin; so the +// bitmap width is ix1-ix0, height is iy1-iy0, and location to place +// the bitmap top left is (leftSideBearing*scale,iy0). +// (Note that the bitmap uses y-increases-down, but the shape uses +// y-increases-up, so CodepointBitmapBox and CodepointBox are inverted.) + +STBTT_DEF void stbtt_GetCodepointBitmapBoxSubpixel(const stbtt_fontinfo *font, int codepoint, float scale_x, float scale_y, float shift_x, float shift_y, int *ix0, int *iy0, int *ix1, int *iy1); +// same as stbtt_GetCodepointBitmapBox, but you can specify a subpixel +// shift for the character + +// the following functions are equivalent to the above functions, but operate +// on glyph indices instead of Unicode codepoints (for efficiency) +STBTT_DEF unsigned char *stbtt_GetGlyphBitmap(const stbtt_fontinfo *info, float scale_x, float scale_y, int glyph, int *width, int *height, int *xoff, int *yoff); +STBTT_DEF unsigned char *stbtt_GetGlyphBitmapSubpixel(const stbtt_fontinfo *info, float scale_x, float scale_y, float shift_x, float shift_y, int glyph, int *width, int *height, int *xoff, int *yoff); +STBTT_DEF void stbtt_MakeGlyphBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int glyph); +STBTT_DEF void stbtt_MakeGlyphBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int glyph); +STBTT_DEF void stbtt_MakeGlyphBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int oversample_x, int oversample_y, float *sub_x, float *sub_y, int glyph); +STBTT_DEF void stbtt_GetGlyphBitmapBox(const stbtt_fontinfo *font, int glyph, float scale_x, float scale_y, int *ix0, int *iy0, int *ix1, int *iy1); +STBTT_DEF void stbtt_GetGlyphBitmapBoxSubpixel(const stbtt_fontinfo *font, int glyph, float scale_x, float scale_y,float shift_x, float shift_y, int *ix0, int *iy0, int *ix1, int *iy1); + + +// @TODO: don't expose this structure +typedef struct +{ + int w,h,stride; + unsigned char *pixels; +} stbtt__bitmap; + +// rasterize a shape with quadratic beziers into a bitmap +STBTT_DEF void stbtt_Rasterize(stbtt__bitmap *result, // 1-channel bitmap to draw into + float flatness_in_pixels, // allowable error of curve in pixels + stbtt_vertex *vertices, // array of vertices defining shape + int num_verts, // number of vertices in above array + float scale_x, float scale_y, // scale applied to input vertices + float shift_x, float shift_y, // translation applied to input vertices + int x_off, int y_off, // another translation applied to input + int invert, // if non-zero, vertically flip shape + void *userdata); // context for to STBTT_MALLOC + +////////////////////////////////////////////////////////////////////////////// +// +// Signed Distance Function (or Field) rendering + +STBTT_DEF void stbtt_FreeSDF(unsigned char *bitmap, void *userdata); +// frees the SDF bitmap allocated below + +STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, float scale, int glyph, int padding, unsigned char onedge_value, float pixel_dist_scale, int *width, int *height, int *xoff, int *yoff); +STBTT_DEF unsigned char * stbtt_GetCodepointSDF(const stbtt_fontinfo *info, float scale, int codepoint, int padding, unsigned char onedge_value, float pixel_dist_scale, int *width, int *height, int *xoff, int *yoff); +// These functions compute a discretized SDF field for a single character, suitable for storing +// in a single-channel texture, sampling with bilinear filtering, and testing against +// larger than some threshold to produce scalable fonts. +// info -- the font +// scale -- controls the size of the resulting SDF bitmap, same as it would be creating a regular bitmap +// glyph/codepoint -- the character to generate the SDF for +// padding -- extra "pixels" around the character which are filled with the distance to the character (not 0), +// which allows effects like bit outlines +// onedge_value -- value 0-255 to test the SDF against to reconstruct the character (i.e. the isocontour of the character) +// pixel_dist_scale -- what value the SDF should increase by when moving one SDF "pixel" away from the edge (on the 0..255 scale) +// if positive, > onedge_value is inside; if negative, < onedge_value is inside +// width,height -- output height & width of the SDF bitmap (including padding) +// xoff,yoff -- output origin of the character +// return value -- a 2D array of bytes 0..255, width*height in size +// +// pixel_dist_scale & onedge_value are a scale & bias that allows you to make +// optimal use of the limited 0..255 for your application, trading off precision +// and special effects. SDF values outside the range 0..255 are clamped to 0..255. +// +// Example: +// scale = stbtt_ScaleForPixelHeight(22) +// padding = 5 +// onedge_value = 180 +// pixel_dist_scale = 180/5.0 = 36.0 +// +// This will create an SDF bitmap in which the character is about 22 pixels +// high but the whole bitmap is about 22+5+5=32 pixels high. To produce a filled +// shape, sample the SDF at each pixel and fill the pixel if the SDF value +// is greater than or equal to 180/255. (You'll actually want to antialias, +// which is beyond the scope of this example.) Additionally, you can compute +// offset outlines (e.g. to stroke the character border inside & outside, +// or only outside). For example, to fill outside the character up to 3 SDF +// pixels, you would compare against (180-36.0*3)/255 = 72/255. The above +// choice of variables maps a range from 5 pixels outside the shape to +// 2 pixels inside the shape to 0..255; this is intended primarily for apply +// outside effects only (the interior range is needed to allow proper +// antialiasing of the font at *smaller* sizes) +// +// The function computes the SDF analytically at each SDF pixel, not by e.g. +// building a higher-res bitmap and approximating it. In theory the quality +// should be as high as possible for an SDF of this size & representation, but +// unclear if this is true in practice (perhaps building a higher-res bitmap +// and computing from that can allow drop-out prevention). +// +// The algorithm has not been optimized at all, so expect it to be slow +// if computing lots of characters or very large sizes. + + + +////////////////////////////////////////////////////////////////////////////// +// +// Finding the right font... +// +// You should really just solve this offline, keep your own tables +// of what font is what, and don't try to get it out of the .ttf file. +// That's because getting it out of the .ttf file is really hard, because +// the names in the file can appear in many possible encodings, in many +// possible languages, and e.g. if you need a case-insensitive comparison, +// the details of that depend on the encoding & language in a complex way +// (actually underspecified in truetype, but also gigantic). +// +// But you can use the provided functions in two possible ways: +// stbtt_FindMatchingFont() will use *case-sensitive* comparisons on +// unicode-encoded names to try to find the font you want; +// you can run this before calling stbtt_InitFont() +// +// stbtt_GetFontNameString() lets you get any of the various strings +// from the file yourself and do your own comparisons on them. +// You have to have called stbtt_InitFont() first. + + +STBTT_DEF int stbtt_FindMatchingFont(const unsigned char *fontdata, const char *name, int flags); +// returns the offset (not index) of the font that matches, or -1 if none +// if you use STBTT_MACSTYLE_DONTCARE, use a font name like "Arial Bold". +// if you use any other flag, use a font name like "Arial"; this checks +// the 'macStyle' header field; i don't know if fonts set this consistently +#define STBTT_MACSTYLE_DONTCARE 0 +#define STBTT_MACSTYLE_BOLD 1 +#define STBTT_MACSTYLE_ITALIC 2 +#define STBTT_MACSTYLE_UNDERSCORE 4 +#define STBTT_MACSTYLE_NONE 8 // <= not same as 0, this makes us check the bitfield is 0 + +STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian(const char *s1, int len1, const char *s2, int len2); +// returns 1/0 whether the first string interpreted as utf8 is identical to +// the second string interpreted as big-endian utf16... useful for strings from next func + +STBTT_DEF const char *stbtt_GetFontNameString(const stbtt_fontinfo *font, int *length, int platformID, int encodingID, int languageID, int nameID); +// returns the string (which may be big-endian double byte, e.g. for unicode) +// and puts the length in bytes in *length. +// +// some of the values for the IDs are below; for more see the truetype spec: +// http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6name.html +// http://www.microsoft.com/typography/otspec/name.htm + +enum { // platformID + STBTT_PLATFORM_ID_UNICODE =0, + STBTT_PLATFORM_ID_MAC =1, + STBTT_PLATFORM_ID_ISO =2, + STBTT_PLATFORM_ID_MICROSOFT =3 +}; + +enum { // encodingID for STBTT_PLATFORM_ID_UNICODE + STBTT_UNICODE_EID_UNICODE_1_0 =0, + STBTT_UNICODE_EID_UNICODE_1_1 =1, + STBTT_UNICODE_EID_ISO_10646 =2, + STBTT_UNICODE_EID_UNICODE_2_0_BMP=3, + STBTT_UNICODE_EID_UNICODE_2_0_FULL=4 +}; + +enum { // encodingID for STBTT_PLATFORM_ID_MICROSOFT + STBTT_MS_EID_SYMBOL =0, + STBTT_MS_EID_UNICODE_BMP =1, + STBTT_MS_EID_SHIFTJIS =2, + STBTT_MS_EID_UNICODE_FULL =10 +}; + +enum { // encodingID for STBTT_PLATFORM_ID_MAC; same as Script Manager codes + STBTT_MAC_EID_ROMAN =0, STBTT_MAC_EID_ARABIC =4, + STBTT_MAC_EID_JAPANESE =1, STBTT_MAC_EID_HEBREW =5, + STBTT_MAC_EID_CHINESE_TRAD =2, STBTT_MAC_EID_GREEK =6, + STBTT_MAC_EID_KOREAN =3, STBTT_MAC_EID_RUSSIAN =7 +}; + +enum { // languageID for STBTT_PLATFORM_ID_MICROSOFT; same as LCID... + // problematic because there are e.g. 16 english LCIDs and 16 arabic LCIDs + STBTT_MS_LANG_ENGLISH =0x0409, STBTT_MS_LANG_ITALIAN =0x0410, + STBTT_MS_LANG_CHINESE =0x0804, STBTT_MS_LANG_JAPANESE =0x0411, + STBTT_MS_LANG_DUTCH =0x0413, STBTT_MS_LANG_KOREAN =0x0412, + STBTT_MS_LANG_FRENCH =0x040c, STBTT_MS_LANG_RUSSIAN =0x0419, + STBTT_MS_LANG_GERMAN =0x0407, STBTT_MS_LANG_SPANISH =0x0409, + STBTT_MS_LANG_HEBREW =0x040d, STBTT_MS_LANG_SWEDISH =0x041D +}; + +enum { // languageID for STBTT_PLATFORM_ID_MAC + STBTT_MAC_LANG_ENGLISH =0 , STBTT_MAC_LANG_JAPANESE =11, + STBTT_MAC_LANG_ARABIC =12, STBTT_MAC_LANG_KOREAN =23, + STBTT_MAC_LANG_DUTCH =4 , STBTT_MAC_LANG_RUSSIAN =32, + STBTT_MAC_LANG_FRENCH =1 , STBTT_MAC_LANG_SPANISH =6 , + STBTT_MAC_LANG_GERMAN =2 , STBTT_MAC_LANG_SWEDISH =5 , + STBTT_MAC_LANG_HEBREW =10, STBTT_MAC_LANG_CHINESE_SIMPLIFIED =33, + STBTT_MAC_LANG_ITALIAN =3 , STBTT_MAC_LANG_CHINESE_TRAD =19 +}; + +#ifdef __cplusplus +} +#endif + +#endif // __STB_INCLUDE_STB_TRUETYPE_H__ + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +//// +//// IMPLEMENTATION +//// +//// + +#ifdef STB_TRUETYPE_IMPLEMENTATION + +#ifndef STBTT_MAX_OVERSAMPLE +#define STBTT_MAX_OVERSAMPLE 8 +#endif + +#if STBTT_MAX_OVERSAMPLE > 255 +#error "STBTT_MAX_OVERSAMPLE cannot be > 255" +#endif + +typedef int stbtt__test_oversample_pow2[(STBTT_MAX_OVERSAMPLE & (STBTT_MAX_OVERSAMPLE-1)) == 0 ? 1 : -1]; + +#ifndef STBTT_RASTERIZER_VERSION +#define STBTT_RASTERIZER_VERSION 2 +#endif + +#ifdef _MSC_VER +#define STBTT__NOTUSED(v) (void)(v) +#else +#define STBTT__NOTUSED(v) (void)sizeof(v) +#endif + +////////////////////////////////////////////////////////////////////////// +// +// stbtt__buf helpers to parse data from file +// + +static stbtt_uint8 stbtt__buf_get8(stbtt__buf *b) +{ + if (b->cursor >= b->size) + return 0; + return b->data[b->cursor++]; +} + +static stbtt_uint8 stbtt__buf_peek8(stbtt__buf *b) +{ + if (b->cursor >= b->size) + return 0; + return b->data[b->cursor]; +} + +static void stbtt__buf_seek(stbtt__buf *b, int o) +{ + STBTT_assert(!(o > b->size || o < 0)); + b->cursor = (o > b->size || o < 0) ? b->size : o; +} + +static void stbtt__buf_skip(stbtt__buf *b, int o) +{ + stbtt__buf_seek(b, b->cursor + o); +} + +static stbtt_uint32 stbtt__buf_get(stbtt__buf *b, int n) +{ + stbtt_uint32 v = 0; + int i; + STBTT_assert(n >= 1 && n <= 4); + for (i = 0; i < n; i++) + v = (v << 8) | stbtt__buf_get8(b); + return v; +} + +static stbtt__buf stbtt__new_buf(const void *p, size_t size) +{ + stbtt__buf r; + STBTT_assert(size < 0x40000000); + r.data = (stbtt_uint8*) p; + r.size = (int) size; + r.cursor = 0; + return r; +} + +#define stbtt__buf_get16(b) stbtt__buf_get((b), 2) +#define stbtt__buf_get32(b) stbtt__buf_get((b), 4) + +static stbtt__buf stbtt__buf_range(const stbtt__buf *b, int o, int s) +{ + stbtt__buf r = stbtt__new_buf(NULL, 0); + if (o < 0 || s < 0 || o > b->size || s > b->size - o) return r; + r.data = b->data + o; + r.size = s; + return r; +} + +static stbtt__buf stbtt__cff_get_index(stbtt__buf *b) +{ + int count, start, offsize; + start = b->cursor; + count = stbtt__buf_get16(b); + if (count) { + offsize = stbtt__buf_get8(b); + STBTT_assert(offsize >= 1 && offsize <= 4); + stbtt__buf_skip(b, offsize * count); + stbtt__buf_skip(b, stbtt__buf_get(b, offsize) - 1); + } + return stbtt__buf_range(b, start, b->cursor - start); +} + +static stbtt_uint32 stbtt__cff_int(stbtt__buf *b) +{ + int b0 = stbtt__buf_get8(b); + if (b0 >= 32 && b0 <= 246) return b0 - 139; + else if (b0 >= 247 && b0 <= 250) return (b0 - 247)*256 + stbtt__buf_get8(b) + 108; + else if (b0 >= 251 && b0 <= 254) return -(b0 - 251)*256 - stbtt__buf_get8(b) - 108; + else if (b0 == 28) return stbtt__buf_get16(b); + else if (b0 == 29) return stbtt__buf_get32(b); + STBTT_assert(0); + return 0; +} + +static void stbtt__cff_skip_operand(stbtt__buf *b) { + int v, b0 = stbtt__buf_peek8(b); + STBTT_assert(b0 >= 28); + if (b0 == 30) { + stbtt__buf_skip(b, 1); + while (b->cursor < b->size) { + v = stbtt__buf_get8(b); + if ((v & 0xF) == 0xF || (v >> 4) == 0xF) + break; + } + } else { + stbtt__cff_int(b); + } +} + +static stbtt__buf stbtt__dict_get(stbtt__buf *b, int key) +{ + stbtt__buf_seek(b, 0); + while (b->cursor < b->size) { + int start = b->cursor, end, op; + while (stbtt__buf_peek8(b) >= 28) + stbtt__cff_skip_operand(b); + end = b->cursor; + op = stbtt__buf_get8(b); + if (op == 12) op = stbtt__buf_get8(b) | 0x100; + if (op == key) return stbtt__buf_range(b, start, end-start); + } + return stbtt__buf_range(b, 0, 0); +} + +static void stbtt__dict_get_ints(stbtt__buf *b, int key, int outcount, stbtt_uint32 *out) +{ + int i; + stbtt__buf operands = stbtt__dict_get(b, key); + for (i = 0; i < outcount && operands.cursor < operands.size; i++) + out[i] = stbtt__cff_int(&operands); +} + +static int stbtt__cff_index_count(stbtt__buf *b) +{ + stbtt__buf_seek(b, 0); + return stbtt__buf_get16(b); +} + +static stbtt__buf stbtt__cff_index_get(stbtt__buf b, int i) +{ + int count, offsize, start, end; + stbtt__buf_seek(&b, 0); + count = stbtt__buf_get16(&b); + offsize = stbtt__buf_get8(&b); + STBTT_assert(i >= 0 && i < count); + STBTT_assert(offsize >= 1 && offsize <= 4); + stbtt__buf_skip(&b, i*offsize); + start = stbtt__buf_get(&b, offsize); + end = stbtt__buf_get(&b, offsize); + return stbtt__buf_range(&b, 2+(count+1)*offsize+start, end - start); +} + +////////////////////////////////////////////////////////////////////////// +// +// accessors to parse data from file +// + +// on platforms that don't allow misaligned reads, if we want to allow +// truetype fonts that aren't padded to alignment, define ALLOW_UNALIGNED_TRUETYPE + +#define ttBYTE(p) (* (stbtt_uint8 *) (p)) +#define ttCHAR(p) (* (stbtt_int8 *) (p)) +#define ttFixed(p) ttLONG(p) + +static stbtt_uint16 ttUSHORT(stbtt_uint8 *p) { return p[0]*256 + p[1]; } +static stbtt_int16 ttSHORT(stbtt_uint8 *p) { return p[0]*256 + p[1]; } +static stbtt_uint32 ttULONG(stbtt_uint8 *p) { return (p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]; } +static stbtt_int32 ttLONG(stbtt_uint8 *p) { return (p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]; } + +#define stbtt_tag4(p,c0,c1,c2,c3) ((p)[0] == (c0) && (p)[1] == (c1) && (p)[2] == (c2) && (p)[3] == (c3)) +#define stbtt_tag(p,str) stbtt_tag4(p,str[0],str[1],str[2],str[3]) + +static int stbtt__isfont(stbtt_uint8 *font) +{ + // check the version number + if (stbtt_tag4(font, '1',0,0,0)) return 1; // TrueType 1 + if (stbtt_tag(font, "typ1")) return 1; // TrueType with type 1 font -- we don't support this! + if (stbtt_tag(font, "OTTO")) return 1; // OpenType with CFF + if (stbtt_tag4(font, 0,1,0,0)) return 1; // OpenType 1.0 + if (stbtt_tag(font, "true")) return 1; // Apple specification for TrueType fonts + return 0; +} + +// @OPTIMIZE: binary search +static stbtt_uint32 stbtt__find_table(stbtt_uint8 *data, stbtt_uint32 fontstart, const char *tag) +{ + stbtt_int32 num_tables = ttUSHORT(data+fontstart+4); + stbtt_uint32 tabledir = fontstart + 12; + stbtt_int32 i; + for (i=0; i < num_tables; ++i) { + stbtt_uint32 loc = tabledir + 16*i; + if (stbtt_tag(data+loc+0, tag)) + return ttULONG(data+loc+8); + } + return 0; +} + +static int stbtt_GetFontOffsetForIndex_internal(unsigned char *font_collection, int index) +{ + // if it's just a font, there's only one valid index + if (stbtt__isfont(font_collection)) + return index == 0 ? 0 : -1; + + // check if it's a TTC + if (stbtt_tag(font_collection, "ttcf")) { + // version 1? + if (ttULONG(font_collection+4) == 0x00010000 || ttULONG(font_collection+4) == 0x00020000) { + stbtt_int32 n = ttLONG(font_collection+8); + if (index >= n) + return -1; + return ttULONG(font_collection+12+index*4); + } + } + return -1; +} + +static int stbtt_GetNumberOfFonts_internal(unsigned char *font_collection) +{ + // if it's just a font, there's only one valid font + if (stbtt__isfont(font_collection)) + return 1; + + // check if it's a TTC + if (stbtt_tag(font_collection, "ttcf")) { + // version 1? + if (ttULONG(font_collection+4) == 0x00010000 || ttULONG(font_collection+4) == 0x00020000) { + return ttLONG(font_collection+8); + } + } + return 0; +} + +static stbtt__buf stbtt__get_subrs(stbtt__buf cff, stbtt__buf fontdict) +{ + stbtt_uint32 subrsoff = 0, private_loc[2] = { 0, 0 }; + stbtt__buf pdict; + stbtt__dict_get_ints(&fontdict, 18, 2, private_loc); + if (!private_loc[1] || !private_loc[0]) return stbtt__new_buf(NULL, 0); + pdict = stbtt__buf_range(&cff, private_loc[1], private_loc[0]); + stbtt__dict_get_ints(&pdict, 19, 1, &subrsoff); + if (!subrsoff) return stbtt__new_buf(NULL, 0); + stbtt__buf_seek(&cff, private_loc[1]+subrsoff); + return stbtt__cff_get_index(&cff); +} + +static int stbtt_InitFont_internal(stbtt_fontinfo *info, unsigned char *data, int fontstart) +{ + stbtt_uint32 cmap, t; + stbtt_int32 i,numTables; + + info->data = data; + info->fontstart = fontstart; + info->cff = stbtt__new_buf(NULL, 0); + + cmap = stbtt__find_table(data, fontstart, "cmap"); // required + info->loca = stbtt__find_table(data, fontstart, "loca"); // required + info->head = stbtt__find_table(data, fontstart, "head"); // required + info->glyf = stbtt__find_table(data, fontstart, "glyf"); // required + info->hhea = stbtt__find_table(data, fontstart, "hhea"); // required + info->hmtx = stbtt__find_table(data, fontstart, "hmtx"); // required + info->kern = stbtt__find_table(data, fontstart, "kern"); // not required + info->gpos = stbtt__find_table(data, fontstart, "GPOS"); // not required + + if (!cmap || !info->head || !info->hhea || !info->hmtx) + return 0; + if (info->glyf) { + // required for truetype + if (!info->loca) return 0; + } else { + // initialization for CFF / Type2 fonts (OTF) + stbtt__buf b, topdict, topdictidx; + stbtt_uint32 cstype = 2, charstrings = 0, fdarrayoff = 0, fdselectoff = 0; + stbtt_uint32 cff; + + cff = stbtt__find_table(data, fontstart, "CFF "); + if (!cff) return 0; + + info->fontdicts = stbtt__new_buf(NULL, 0); + info->fdselect = stbtt__new_buf(NULL, 0); + + // @TODO this should use size from table (not 512MB) + info->cff = stbtt__new_buf(data+cff, 512*1024*1024); + b = info->cff; + + // read the header + stbtt__buf_skip(&b, 2); + stbtt__buf_seek(&b, stbtt__buf_get8(&b)); // hdrsize + + // @TODO the name INDEX could list multiple fonts, + // but we just use the first one. + stbtt__cff_get_index(&b); // name INDEX + topdictidx = stbtt__cff_get_index(&b); + topdict = stbtt__cff_index_get(topdictidx, 0); + stbtt__cff_get_index(&b); // string INDEX + info->gsubrs = stbtt__cff_get_index(&b); + + stbtt__dict_get_ints(&topdict, 17, 1, &charstrings); + stbtt__dict_get_ints(&topdict, 0x100 | 6, 1, &cstype); + stbtt__dict_get_ints(&topdict, 0x100 | 36, 1, &fdarrayoff); + stbtt__dict_get_ints(&topdict, 0x100 | 37, 1, &fdselectoff); + info->subrs = stbtt__get_subrs(b, topdict); + + // we only support Type 2 charstrings + if (cstype != 2) return 0; + if (charstrings == 0) return 0; + + if (fdarrayoff) { + // looks like a CID font + if (!fdselectoff) return 0; + stbtt__buf_seek(&b, fdarrayoff); + info->fontdicts = stbtt__cff_get_index(&b); + info->fdselect = stbtt__buf_range(&b, fdselectoff, b.size-fdselectoff); + } + + stbtt__buf_seek(&b, charstrings); + info->charstrings = stbtt__cff_get_index(&b); + } + + t = stbtt__find_table(data, fontstart, "maxp"); + if (t) + info->numGlyphs = ttUSHORT(data+t+4); + else + info->numGlyphs = 0xffff; + + // find a cmap encoding table we understand *now* to avoid searching + // later. (todo: could make this installable) + // the same regardless of glyph. + numTables = ttUSHORT(data + cmap + 2); + info->index_map = 0; + for (i=0; i < numTables; ++i) { + stbtt_uint32 encoding_record = cmap + 4 + 8 * i; + // find an encoding we understand: + switch(ttUSHORT(data+encoding_record)) { + case STBTT_PLATFORM_ID_MICROSOFT: + switch (ttUSHORT(data+encoding_record+2)) { + case STBTT_MS_EID_UNICODE_BMP: + case STBTT_MS_EID_UNICODE_FULL: + // MS/Unicode + info->index_map = cmap + ttULONG(data+encoding_record+4); + break; + } + break; + case STBTT_PLATFORM_ID_UNICODE: + // Mac/iOS has these + // all the encodingIDs are unicode, so we don't bother to check it + info->index_map = cmap + ttULONG(data+encoding_record+4); + break; + } + } + if (info->index_map == 0) + return 0; + + info->indexToLocFormat = ttUSHORT(data+info->head + 50); + return 1; +} + +STBTT_DEF int stbtt_FindGlyphIndex(const stbtt_fontinfo *info, int unicode_codepoint) +{ + stbtt_uint8 *data = info->data; + stbtt_uint32 index_map = info->index_map; + + stbtt_uint16 format = ttUSHORT(data + index_map + 0); + if (format == 0) { // apple byte encoding + stbtt_int32 bytes = ttUSHORT(data + index_map + 2); + if (unicode_codepoint < bytes-6) + return ttBYTE(data + index_map + 6 + unicode_codepoint); + return 0; + } else if (format == 6) { + stbtt_uint32 first = ttUSHORT(data + index_map + 6); + stbtt_uint32 count = ttUSHORT(data + index_map + 8); + if ((stbtt_uint32) unicode_codepoint >= first && (stbtt_uint32) unicode_codepoint < first+count) + return ttUSHORT(data + index_map + 10 + (unicode_codepoint - first)*2); + return 0; + } else if (format == 2) { + STBTT_assert(0); // @TODO: high-byte mapping for japanese/chinese/korean + return 0; + } else if (format == 4) { // standard mapping for windows fonts: binary search collection of ranges + stbtt_uint16 segcount = ttUSHORT(data+index_map+6) >> 1; + stbtt_uint16 searchRange = ttUSHORT(data+index_map+8) >> 1; + stbtt_uint16 entrySelector = ttUSHORT(data+index_map+10); + stbtt_uint16 rangeShift = ttUSHORT(data+index_map+12) >> 1; + + // do a binary search of the segments + stbtt_uint32 endCount = index_map + 14; + stbtt_uint32 search = endCount; + + if (unicode_codepoint > 0xffff) + return 0; + + // they lie from endCount .. endCount + segCount + // but searchRange is the nearest power of two, so... + if (unicode_codepoint >= ttUSHORT(data + search + rangeShift*2)) + search += rangeShift*2; + + // now decrement to bias correctly to find smallest + search -= 2; + while (entrySelector) { + stbtt_uint16 end; + searchRange >>= 1; + end = ttUSHORT(data + search + searchRange*2); + if (unicode_codepoint > end) + search += searchRange*2; + --entrySelector; + } + search += 2; + + { + stbtt_uint16 offset, start; + stbtt_uint16 item = (stbtt_uint16) ((search - endCount) >> 1); + + STBTT_assert(unicode_codepoint <= ttUSHORT(data + endCount + 2*item)); + start = ttUSHORT(data + index_map + 14 + segcount*2 + 2 + 2*item); + if (unicode_codepoint < start) + return 0; + + offset = ttUSHORT(data + index_map + 14 + segcount*6 + 2 + 2*item); + if (offset == 0) + return (stbtt_uint16) (unicode_codepoint + ttSHORT(data + index_map + 14 + segcount*4 + 2 + 2*item)); + + return ttUSHORT(data + offset + (unicode_codepoint-start)*2 + index_map + 14 + segcount*6 + 2 + 2*item); + } + } else if (format == 12 || format == 13) { + stbtt_uint32 ngroups = ttULONG(data+index_map+12); + stbtt_int32 low,high; + low = 0; high = (stbtt_int32)ngroups; + // Binary search the right group. + while (low < high) { + stbtt_int32 mid = low + ((high-low) >> 1); // rounds down, so low <= mid < high + stbtt_uint32 start_char = ttULONG(data+index_map+16+mid*12); + stbtt_uint32 end_char = ttULONG(data+index_map+16+mid*12+4); + if ((stbtt_uint32) unicode_codepoint < start_char) + high = mid; + else if ((stbtt_uint32) unicode_codepoint > end_char) + low = mid+1; + else { + stbtt_uint32 start_glyph = ttULONG(data+index_map+16+mid*12+8); + if (format == 12) + return start_glyph + unicode_codepoint-start_char; + else // format == 13 + return start_glyph; + } + } + return 0; // not found + } + // @TODO + STBTT_assert(0); + return 0; +} + +STBTT_DEF int stbtt_GetCodepointShape(const stbtt_fontinfo *info, int unicode_codepoint, stbtt_vertex **vertices) +{ + return stbtt_GetGlyphShape(info, stbtt_FindGlyphIndex(info, unicode_codepoint), vertices); +} + +static void stbtt_setvertex(stbtt_vertex *v, stbtt_uint8 type, stbtt_int32 x, stbtt_int32 y, stbtt_int32 cx, stbtt_int32 cy) +{ + v->type = type; + v->x = (stbtt_int16) x; + v->y = (stbtt_int16) y; + v->cx = (stbtt_int16) cx; + v->cy = (stbtt_int16) cy; +} + +static int stbtt__GetGlyfOffset(const stbtt_fontinfo *info, int glyph_index) +{ + int g1,g2; + + STBTT_assert(!info->cff.size); + + if (glyph_index >= info->numGlyphs) return -1; // glyph index out of range + if (info->indexToLocFormat >= 2) return -1; // unknown index->glyph map format + + if (info->indexToLocFormat == 0) { + g1 = info->glyf + ttUSHORT(info->data + info->loca + glyph_index * 2) * 2; + g2 = info->glyf + ttUSHORT(info->data + info->loca + glyph_index * 2 + 2) * 2; + } else { + g1 = info->glyf + ttULONG (info->data + info->loca + glyph_index * 4); + g2 = info->glyf + ttULONG (info->data + info->loca + glyph_index * 4 + 4); + } + + return g1==g2 ? -1 : g1; // if length is 0, return -1 +} + +static int stbtt__GetGlyphInfoT2(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1); + +STBTT_DEF int stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1) +{ + if (info->cff.size) { + stbtt__GetGlyphInfoT2(info, glyph_index, x0, y0, x1, y1); + } else { + int g = stbtt__GetGlyfOffset(info, glyph_index); + if (g < 0) return 0; + + if (x0) *x0 = ttSHORT(info->data + g + 2); + if (y0) *y0 = ttSHORT(info->data + g + 4); + if (x1) *x1 = ttSHORT(info->data + g + 6); + if (y1) *y1 = ttSHORT(info->data + g + 8); + } + return 1; +} + +STBTT_DEF int stbtt_GetCodepointBox(const stbtt_fontinfo *info, int codepoint, int *x0, int *y0, int *x1, int *y1) +{ + return stbtt_GetGlyphBox(info, stbtt_FindGlyphIndex(info,codepoint), x0,y0,x1,y1); +} + +STBTT_DEF int stbtt_IsGlyphEmpty(const stbtt_fontinfo *info, int glyph_index) +{ + stbtt_int16 numberOfContours; + int g; + if (info->cff.size) + return stbtt__GetGlyphInfoT2(info, glyph_index, NULL, NULL, NULL, NULL) == 0; + g = stbtt__GetGlyfOffset(info, glyph_index); + if (g < 0) return 1; + numberOfContours = ttSHORT(info->data + g); + return numberOfContours == 0; +} + +static int stbtt__close_shape(stbtt_vertex *vertices, int num_vertices, int was_off, int start_off, + stbtt_int32 sx, stbtt_int32 sy, stbtt_int32 scx, stbtt_int32 scy, stbtt_int32 cx, stbtt_int32 cy) +{ + if (start_off) { + if (was_off) + stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, (cx+scx)>>1, (cy+scy)>>1, cx,cy); + stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, sx,sy,scx,scy); + } else { + if (was_off) + stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve,sx,sy,cx,cy); + else + stbtt_setvertex(&vertices[num_vertices++], STBTT_vline,sx,sy,0,0); + } + return num_vertices; +} + +static int stbtt__GetGlyphShapeTT(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices) +{ + stbtt_int16 numberOfContours; + stbtt_uint8 *endPtsOfContours; + stbtt_uint8 *data = info->data; + stbtt_vertex *vertices=0; + int num_vertices=0; + int g = stbtt__GetGlyfOffset(info, glyph_index); + + *pvertices = NULL; + + if (g < 0) return 0; + + numberOfContours = ttSHORT(data + g); + + if (numberOfContours > 0) { + stbtt_uint8 flags=0,flagcount; + stbtt_int32 ins, i,j=0,m,n, next_move, was_off=0, off, start_off=0; + stbtt_int32 x,y,cx,cy,sx,sy, scx,scy; + stbtt_uint8 *points; + endPtsOfContours = (data + g + 10); + ins = ttUSHORT(data + g + 10 + numberOfContours * 2); + points = data + g + 10 + numberOfContours * 2 + 2 + ins; + + n = 1+ttUSHORT(endPtsOfContours + numberOfContours*2-2); + + m = n + 2*numberOfContours; // a loose bound on how many vertices we might need + vertices = (stbtt_vertex *) STBTT_malloc(m * sizeof(vertices[0]), info->userdata); + if (vertices == 0) + return 0; + + next_move = 0; + flagcount=0; + + // in first pass, we load uninterpreted data into the allocated array + // above, shifted to the end of the array so we won't overwrite it when + // we create our final data starting from the front + + off = m - n; // starting offset for uninterpreted data, regardless of how m ends up being calculated + + // first load flags + + for (i=0; i < n; ++i) { + if (flagcount == 0) { + flags = *points++; + if (flags & 8) + flagcount = *points++; + } else + --flagcount; + vertices[off+i].type = flags; + } + + // now load x coordinates + x=0; + for (i=0; i < n; ++i) { + flags = vertices[off+i].type; + if (flags & 2) { + stbtt_int16 dx = *points++; + x += (flags & 16) ? dx : -dx; // ??? + } else { + if (!(flags & 16)) { + x = x + (stbtt_int16) (points[0]*256 + points[1]); + points += 2; + } + } + vertices[off+i].x = (stbtt_int16) x; + } + + // now load y coordinates + y=0; + for (i=0; i < n; ++i) { + flags = vertices[off+i].type; + if (flags & 4) { + stbtt_int16 dy = *points++; + y += (flags & 32) ? dy : -dy; // ??? + } else { + if (!(flags & 32)) { + y = y + (stbtt_int16) (points[0]*256 + points[1]); + points += 2; + } + } + vertices[off+i].y = (stbtt_int16) y; + } + + // now convert them to our format + num_vertices=0; + sx = sy = cx = cy = scx = scy = 0; + for (i=0; i < n; ++i) { + flags = vertices[off+i].type; + x = (stbtt_int16) vertices[off+i].x; + y = (stbtt_int16) vertices[off+i].y; + + if (next_move == i) { + if (i != 0) + num_vertices = stbtt__close_shape(vertices, num_vertices, was_off, start_off, sx,sy,scx,scy,cx,cy); + + // now start the new one + start_off = !(flags & 1); + if (start_off) { + // if we start off with an off-curve point, then when we need to find a point on the curve + // where we can start, and we need to save some state for when we wraparound. + scx = x; + scy = y; + if (!(vertices[off+i+1].type & 1)) { + // next point is also a curve point, so interpolate an on-point curve + sx = (x + (stbtt_int32) vertices[off+i+1].x) >> 1; + sy = (y + (stbtt_int32) vertices[off+i+1].y) >> 1; + } else { + // otherwise just use the next point as our start point + sx = (stbtt_int32) vertices[off+i+1].x; + sy = (stbtt_int32) vertices[off+i+1].y; + ++i; // we're using point i+1 as the starting point, so skip it + } + } else { + sx = x; + sy = y; + } + stbtt_setvertex(&vertices[num_vertices++], STBTT_vmove,sx,sy,0,0); + was_off = 0; + next_move = 1 + ttUSHORT(endPtsOfContours+j*2); + ++j; + } else { + if (!(flags & 1)) { // if it's a curve + if (was_off) // two off-curve control points in a row means interpolate an on-curve midpoint + stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, (cx+x)>>1, (cy+y)>>1, cx, cy); + cx = x; + cy = y; + was_off = 1; + } else { + if (was_off) + stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, x,y, cx, cy); + else + stbtt_setvertex(&vertices[num_vertices++], STBTT_vline, x,y,0,0); + was_off = 0; + } + } + } + num_vertices = stbtt__close_shape(vertices, num_vertices, was_off, start_off, sx,sy,scx,scy,cx,cy); + } else if (numberOfContours == -1) { + // Compound shapes. + int more = 1; + stbtt_uint8 *comp = data + g + 10; + num_vertices = 0; + vertices = 0; + while (more) { + stbtt_uint16 flags, gidx; + int comp_num_verts = 0, i; + stbtt_vertex *comp_verts = 0, *tmp = 0; + float mtx[6] = {1,0,0,1,0,0}, m, n; + + flags = ttSHORT(comp); comp+=2; + gidx = ttSHORT(comp); comp+=2; + + if (flags & 2) { // XY values + if (flags & 1) { // shorts + mtx[4] = ttSHORT(comp); comp+=2; + mtx[5] = ttSHORT(comp); comp+=2; + } else { + mtx[4] = ttCHAR(comp); comp+=1; + mtx[5] = ttCHAR(comp); comp+=1; + } + } + else { + // @TODO handle matching point + STBTT_assert(0); + } + if (flags & (1<<3)) { // WE_HAVE_A_SCALE + mtx[0] = mtx[3] = ttSHORT(comp)/16384.0f; comp+=2; + mtx[1] = mtx[2] = 0; + } else if (flags & (1<<6)) { // WE_HAVE_AN_X_AND_YSCALE + mtx[0] = ttSHORT(comp)/16384.0f; comp+=2; + mtx[1] = mtx[2] = 0; + mtx[3] = ttSHORT(comp)/16384.0f; comp+=2; + } else if (flags & (1<<7)) { // WE_HAVE_A_TWO_BY_TWO + mtx[0] = ttSHORT(comp)/16384.0f; comp+=2; + mtx[1] = ttSHORT(comp)/16384.0f; comp+=2; + mtx[2] = ttSHORT(comp)/16384.0f; comp+=2; + mtx[3] = ttSHORT(comp)/16384.0f; comp+=2; + } + + // Find transformation scales. + m = (float) STBTT_sqrt(mtx[0]*mtx[0] + mtx[1]*mtx[1]); + n = (float) STBTT_sqrt(mtx[2]*mtx[2] + mtx[3]*mtx[3]); + + // Get indexed glyph. + comp_num_verts = stbtt_GetGlyphShape(info, gidx, &comp_verts); + if (comp_num_verts > 0) { + // Transform vertices. + for (i = 0; i < comp_num_verts; ++i) { + stbtt_vertex* v = &comp_verts[i]; + stbtt_vertex_type x,y; + x=v->x; y=v->y; + v->x = (stbtt_vertex_type)(m * (mtx[0]*x + mtx[2]*y + mtx[4])); + v->y = (stbtt_vertex_type)(n * (mtx[1]*x + mtx[3]*y + mtx[5])); + x=v->cx; y=v->cy; + v->cx = (stbtt_vertex_type)(m * (mtx[0]*x + mtx[2]*y + mtx[4])); + v->cy = (stbtt_vertex_type)(n * (mtx[1]*x + mtx[3]*y + mtx[5])); + } + // Append vertices. + tmp = (stbtt_vertex*)STBTT_malloc((num_vertices+comp_num_verts)*sizeof(stbtt_vertex), info->userdata); + if (!tmp) { + if (vertices) STBTT_free(vertices, info->userdata); + if (comp_verts) STBTT_free(comp_verts, info->userdata); + return 0; + } + if (num_vertices > 0) STBTT_memcpy(tmp, vertices, num_vertices*sizeof(stbtt_vertex)); //-V595 + STBTT_memcpy(tmp+num_vertices, comp_verts, comp_num_verts*sizeof(stbtt_vertex)); + if (vertices) STBTT_free(vertices, info->userdata); + vertices = tmp; + STBTT_free(comp_verts, info->userdata); + num_vertices += comp_num_verts; + } + // More components ? + more = flags & (1<<5); + } + } else if (numberOfContours < 0) { + // @TODO other compound variations? + STBTT_assert(0); + } else { + // numberOfCounters == 0, do nothing + } + + *pvertices = vertices; + return num_vertices; +} + +typedef struct +{ + int bounds; + int started; + float first_x, first_y; + float x, y; + stbtt_int32 min_x, max_x, min_y, max_y; + + stbtt_vertex *pvertices; + int num_vertices; +} stbtt__csctx; + +#define STBTT__CSCTX_INIT(bounds) {bounds,0, 0,0, 0,0, 0,0,0,0, NULL, 0} + +static void stbtt__track_vertex(stbtt__csctx *c, stbtt_int32 x, stbtt_int32 y) +{ + if (x > c->max_x || !c->started) c->max_x = x; + if (y > c->max_y || !c->started) c->max_y = y; + if (x < c->min_x || !c->started) c->min_x = x; + if (y < c->min_y || !c->started) c->min_y = y; + c->started = 1; +} + +static void stbtt__csctx_v(stbtt__csctx *c, stbtt_uint8 type, stbtt_int32 x, stbtt_int32 y, stbtt_int32 cx, stbtt_int32 cy, stbtt_int32 cx1, stbtt_int32 cy1) +{ + if (c->bounds) { + stbtt__track_vertex(c, x, y); + if (type == STBTT_vcubic) { + stbtt__track_vertex(c, cx, cy); + stbtt__track_vertex(c, cx1, cy1); + } + } else { + stbtt_setvertex(&c->pvertices[c->num_vertices], type, x, y, cx, cy); + c->pvertices[c->num_vertices].cx1 = (stbtt_int16) cx1; + c->pvertices[c->num_vertices].cy1 = (stbtt_int16) cy1; + } + c->num_vertices++; +} + +static void stbtt__csctx_close_shape(stbtt__csctx *ctx) +{ + if (ctx->first_x != ctx->x || ctx->first_y != ctx->y) + stbtt__csctx_v(ctx, STBTT_vline, (int)ctx->first_x, (int)ctx->first_y, 0, 0, 0, 0); +} + +static void stbtt__csctx_rmove_to(stbtt__csctx *ctx, float dx, float dy) +{ + stbtt__csctx_close_shape(ctx); + ctx->first_x = ctx->x = ctx->x + dx; + ctx->first_y = ctx->y = ctx->y + dy; + stbtt__csctx_v(ctx, STBTT_vmove, (int)ctx->x, (int)ctx->y, 0, 0, 0, 0); +} + +static void stbtt__csctx_rline_to(stbtt__csctx *ctx, float dx, float dy) +{ + ctx->x += dx; + ctx->y += dy; + stbtt__csctx_v(ctx, STBTT_vline, (int)ctx->x, (int)ctx->y, 0, 0, 0, 0); +} + +static void stbtt__csctx_rccurve_to(stbtt__csctx *ctx, float dx1, float dy1, float dx2, float dy2, float dx3, float dy3) +{ + float cx1 = ctx->x + dx1; + float cy1 = ctx->y + dy1; + float cx2 = cx1 + dx2; + float cy2 = cy1 + dy2; + ctx->x = cx2 + dx3; + ctx->y = cy2 + dy3; + stbtt__csctx_v(ctx, STBTT_vcubic, (int)ctx->x, (int)ctx->y, (int)cx1, (int)cy1, (int)cx2, (int)cy2); +} + +static stbtt__buf stbtt__get_subr(stbtt__buf idx, int n) +{ + int count = stbtt__cff_index_count(&idx); + int bias = 107; + if (count >= 33900) + bias = 32768; + else if (count >= 1240) + bias = 1131; + n += bias; + if (n < 0 || n >= count) + return stbtt__new_buf(NULL, 0); + return stbtt__cff_index_get(idx, n); +} + +static stbtt__buf stbtt__cid_get_glyph_subrs(const stbtt_fontinfo *info, int glyph_index) +{ + stbtt__buf fdselect = info->fdselect; + int nranges, start, end, v, fmt, fdselector = -1, i; + + stbtt__buf_seek(&fdselect, 0); + fmt = stbtt__buf_get8(&fdselect); + if (fmt == 0) { + // untested + stbtt__buf_skip(&fdselect, glyph_index); + fdselector = stbtt__buf_get8(&fdselect); + } else if (fmt == 3) { + nranges = stbtt__buf_get16(&fdselect); + start = stbtt__buf_get16(&fdselect); + for (i = 0; i < nranges; i++) { + v = stbtt__buf_get8(&fdselect); + end = stbtt__buf_get16(&fdselect); + if (glyph_index >= start && glyph_index < end) { + fdselector = v; + break; + } + start = end; + } + } + if (fdselector == -1) stbtt__new_buf(NULL, 0); + return stbtt__get_subrs(info->cff, stbtt__cff_index_get(info->fontdicts, fdselector)); +} + +static int stbtt__run_charstring(const stbtt_fontinfo *info, int glyph_index, stbtt__csctx *c) +{ + int in_header = 1, maskbits = 0, subr_stack_height = 0, sp = 0, v, i, b0; + int has_subrs = 0, clear_stack; + float s[48]; + stbtt__buf subr_stack[10], subrs = info->subrs, b; + float f; + +#define STBTT__CSERR(s) (0) + + // this currently ignores the initial width value, which isn't needed if we have hmtx + b = stbtt__cff_index_get(info->charstrings, glyph_index); + while (b.cursor < b.size) { + i = 0; + clear_stack = 1; + b0 = stbtt__buf_get8(&b); + switch (b0) { + // @TODO implement hinting + case 0x13: // hintmask + case 0x14: // cntrmask + if (in_header) + maskbits += (sp / 2); // implicit "vstem" + in_header = 0; + stbtt__buf_skip(&b, (maskbits + 7) / 8); + break; + + case 0x01: // hstem + case 0x03: // vstem + case 0x12: // hstemhm + case 0x17: // vstemhm + maskbits += (sp / 2); + break; + + case 0x15: // rmoveto + in_header = 0; + if (sp < 2) return STBTT__CSERR("rmoveto stack"); + stbtt__csctx_rmove_to(c, s[sp-2], s[sp-1]); + break; + case 0x04: // vmoveto + in_header = 0; + if (sp < 1) return STBTT__CSERR("vmoveto stack"); + stbtt__csctx_rmove_to(c, 0, s[sp-1]); + break; + case 0x16: // hmoveto + in_header = 0; + if (sp < 1) return STBTT__CSERR("hmoveto stack"); + stbtt__csctx_rmove_to(c, s[sp-1], 0); + break; + + case 0x05: // rlineto + if (sp < 2) return STBTT__CSERR("rlineto stack"); + for (; i + 1 < sp; i += 2) + stbtt__csctx_rline_to(c, s[i], s[i+1]); + break; + + // hlineto/vlineto and vhcurveto/hvcurveto alternate horizontal and vertical + // starting from a different place. + + case 0x07: // vlineto + if (sp < 1) return STBTT__CSERR("vlineto stack"); + goto vlineto; + case 0x06: // hlineto + if (sp < 1) return STBTT__CSERR("hlineto stack"); + for (;;) { + if (i >= sp) break; + stbtt__csctx_rline_to(c, s[i], 0); + i++; + vlineto: + if (i >= sp) break; + stbtt__csctx_rline_to(c, 0, s[i]); + i++; + } + break; + + case 0x1F: // hvcurveto + if (sp < 4) return STBTT__CSERR("hvcurveto stack"); + goto hvcurveto; + case 0x1E: // vhcurveto + if (sp < 4) return STBTT__CSERR("vhcurveto stack"); + for (;;) { + if (i + 3 >= sp) break; + stbtt__csctx_rccurve_to(c, 0, s[i], s[i+1], s[i+2], s[i+3], (sp - i == 5) ? s[i + 4] : 0.0f); + i += 4; + hvcurveto: + if (i + 3 >= sp) break; + stbtt__csctx_rccurve_to(c, s[i], 0, s[i+1], s[i+2], (sp - i == 5) ? s[i+4] : 0.0f, s[i+3]); + i += 4; + } + break; + + case 0x08: // rrcurveto + if (sp < 6) return STBTT__CSERR("rcurveline stack"); + for (; i + 5 < sp; i += 6) + stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]); + break; + + case 0x18: // rcurveline + if (sp < 8) return STBTT__CSERR("rcurveline stack"); + for (; i + 5 < sp - 2; i += 6) + stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]); + if (i + 1 >= sp) return STBTT__CSERR("rcurveline stack"); + stbtt__csctx_rline_to(c, s[i], s[i+1]); + break; + + case 0x19: // rlinecurve + if (sp < 8) return STBTT__CSERR("rlinecurve stack"); + for (; i + 1 < sp - 6; i += 2) + stbtt__csctx_rline_to(c, s[i], s[i+1]); + if (i + 5 >= sp) return STBTT__CSERR("rlinecurve stack"); + stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]); + break; + + case 0x1A: // vvcurveto + case 0x1B: // hhcurveto + if (sp < 4) return STBTT__CSERR("(vv|hh)curveto stack"); + f = 0.0; + if (sp & 1) { f = s[i]; i++; } + for (; i + 3 < sp; i += 4) { + if (b0 == 0x1B) + stbtt__csctx_rccurve_to(c, s[i], f, s[i+1], s[i+2], s[i+3], 0.0); + else + stbtt__csctx_rccurve_to(c, f, s[i], s[i+1], s[i+2], 0.0, s[i+3]); + f = 0.0; + } + break; + + case 0x0A: // callsubr + if (!has_subrs) { + if (info->fdselect.size) + subrs = stbtt__cid_get_glyph_subrs(info, glyph_index); + has_subrs = 1; + } + // fallthrough + case 0x1D: // callgsubr + if (sp < 1) return STBTT__CSERR("call(g|)subr stack"); + v = (int) s[--sp]; + if (subr_stack_height >= 10) return STBTT__CSERR("recursion limit"); + subr_stack[subr_stack_height++] = b; + b = stbtt__get_subr(b0 == 0x0A ? subrs : info->gsubrs, v); + if (b.size == 0) return STBTT__CSERR("subr not found"); + b.cursor = 0; + clear_stack = 0; + break; + + case 0x0B: // return + if (subr_stack_height <= 0) return STBTT__CSERR("return outside subr"); + b = subr_stack[--subr_stack_height]; + clear_stack = 0; + break; + + case 0x0E: // endchar + stbtt__csctx_close_shape(c); + return 1; + + case 0x0C: { // two-byte escape + float dx1, dx2, dx3, dx4, dx5, dx6, dy1, dy2, dy3, dy4, dy5, dy6; + float dx, dy; + int b1 = stbtt__buf_get8(&b); + switch (b1) { + // @TODO These "flex" implementations ignore the flex-depth and resolution, + // and always draw beziers. + case 0x22: // hflex + if (sp < 7) return STBTT__CSERR("hflex stack"); + dx1 = s[0]; + dx2 = s[1]; + dy2 = s[2]; + dx3 = s[3]; + dx4 = s[4]; + dx5 = s[5]; + dx6 = s[6]; + stbtt__csctx_rccurve_to(c, dx1, 0, dx2, dy2, dx3, 0); + stbtt__csctx_rccurve_to(c, dx4, 0, dx5, -dy2, dx6, 0); + break; + + case 0x23: // flex + if (sp < 13) return STBTT__CSERR("flex stack"); + dx1 = s[0]; + dy1 = s[1]; + dx2 = s[2]; + dy2 = s[3]; + dx3 = s[4]; + dy3 = s[5]; + dx4 = s[6]; + dy4 = s[7]; + dx5 = s[8]; + dy5 = s[9]; + dx6 = s[10]; + dy6 = s[11]; + //fd is s[12] + stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, dy3); + stbtt__csctx_rccurve_to(c, dx4, dy4, dx5, dy5, dx6, dy6); + break; + + case 0x24: // hflex1 + if (sp < 9) return STBTT__CSERR("hflex1 stack"); + dx1 = s[0]; + dy1 = s[1]; + dx2 = s[2]; + dy2 = s[3]; + dx3 = s[4]; + dx4 = s[5]; + dx5 = s[6]; + dy5 = s[7]; + dx6 = s[8]; + stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, 0); + stbtt__csctx_rccurve_to(c, dx4, 0, dx5, dy5, dx6, -(dy1+dy2+dy5)); + break; + + case 0x25: // flex1 + if (sp < 11) return STBTT__CSERR("flex1 stack"); + dx1 = s[0]; + dy1 = s[1]; + dx2 = s[2]; + dy2 = s[3]; + dx3 = s[4]; + dy3 = s[5]; + dx4 = s[6]; + dy4 = s[7]; + dx5 = s[8]; + dy5 = s[9]; + dx6 = dy6 = s[10]; + dx = dx1+dx2+dx3+dx4+dx5; + dy = dy1+dy2+dy3+dy4+dy5; + if (STBTT_fabs(dx) > STBTT_fabs(dy)) + dy6 = -dy; + else + dx6 = -dx; + stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, dy3); + stbtt__csctx_rccurve_to(c, dx4, dy4, dx5, dy5, dx6, dy6); + break; + + default: + return STBTT__CSERR("unimplemented"); + } + } break; + + default: + if (b0 != 255 && b0 != 28 && (b0 < 32 || b0 > 254)) //-V560 + return STBTT__CSERR("reserved operator"); + + // push immediate + if (b0 == 255) { + f = (float)(stbtt_int32)stbtt__buf_get32(&b) / 0x10000; + } else { + stbtt__buf_skip(&b, -1); + f = (float)(stbtt_int16)stbtt__cff_int(&b); + } + if (sp >= 48) return STBTT__CSERR("push stack overflow"); + s[sp++] = f; + clear_stack = 0; + break; + } + if (clear_stack) sp = 0; + } + return STBTT__CSERR("no endchar"); + +#undef STBTT__CSERR +} + +static int stbtt__GetGlyphShapeT2(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices) +{ + // runs the charstring twice, once to count and once to output (to avoid realloc) + stbtt__csctx count_ctx = STBTT__CSCTX_INIT(1); + stbtt__csctx output_ctx = STBTT__CSCTX_INIT(0); + if (stbtt__run_charstring(info, glyph_index, &count_ctx)) { + *pvertices = (stbtt_vertex*)STBTT_malloc(count_ctx.num_vertices*sizeof(stbtt_vertex), info->userdata); + output_ctx.pvertices = *pvertices; + if (stbtt__run_charstring(info, glyph_index, &output_ctx)) { + STBTT_assert(output_ctx.num_vertices == count_ctx.num_vertices); + return output_ctx.num_vertices; + } + } + *pvertices = NULL; + return 0; +} + +static int stbtt__GetGlyphInfoT2(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1) +{ + stbtt__csctx c = STBTT__CSCTX_INIT(1); + int r = stbtt__run_charstring(info, glyph_index, &c); + if (x0) *x0 = r ? c.min_x : 0; + if (y0) *y0 = r ? c.min_y : 0; + if (x1) *x1 = r ? c.max_x : 0; + if (y1) *y1 = r ? c.max_y : 0; + return r ? c.num_vertices : 0; +} + +STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices) +{ + if (!info->cff.size) + return stbtt__GetGlyphShapeTT(info, glyph_index, pvertices); + else + return stbtt__GetGlyphShapeT2(info, glyph_index, pvertices); +} + +STBTT_DEF void stbtt_GetGlyphHMetrics(const stbtt_fontinfo *info, int glyph_index, int *advanceWidth, int *leftSideBearing) +{ + stbtt_uint16 numOfLongHorMetrics = ttUSHORT(info->data+info->hhea + 34); + if (glyph_index < numOfLongHorMetrics) { + if (advanceWidth) *advanceWidth = ttSHORT(info->data + info->hmtx + 4*glyph_index); + if (leftSideBearing) *leftSideBearing = ttSHORT(info->data + info->hmtx + 4*glyph_index + 2); + } else { + if (advanceWidth) *advanceWidth = ttSHORT(info->data + info->hmtx + 4*(numOfLongHorMetrics-1)); + if (leftSideBearing) *leftSideBearing = ttSHORT(info->data + info->hmtx + 4*numOfLongHorMetrics + 2*(glyph_index - numOfLongHorMetrics)); + } +} + +static int stbtt__GetGlyphKernInfoAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2) +{ + stbtt_uint8 *data = info->data + info->kern; + stbtt_uint32 needle, straw; + int l, r, m; + + // we only look at the first table. it must be 'horizontal' and format 0. + if (!info->kern) + return 0; + if (ttUSHORT(data+2) < 1) // number of tables, need at least 1 + return 0; + if (ttUSHORT(data+8) != 1) // horizontal flag must be set in format + return 0; + + l = 0; + r = ttUSHORT(data+10) - 1; + needle = glyph1 << 16 | glyph2; + while (l <= r) { + m = (l + r) >> 1; + straw = ttULONG(data+18+(m*6)); // note: unaligned read + if (needle < straw) + r = m - 1; + else if (needle > straw) + l = m + 1; + else + return ttSHORT(data+22+(m*6)); + } + return 0; +} + +static stbtt_int32 stbtt__GetCoverageIndex(stbtt_uint8 *coverageTable, int glyph) +{ + stbtt_uint16 coverageFormat = ttUSHORT(coverageTable); + switch(coverageFormat) { + case 1: { + stbtt_uint16 glyphCount = ttUSHORT(coverageTable + 2); + + // Binary search. + stbtt_int32 l=0, r=glyphCount-1, m; + int straw, needle=glyph; + while (l <= r) { + stbtt_uint8 *glyphArray = coverageTable + 4; + stbtt_uint16 glyphID; + m = (l + r) >> 1; + glyphID = ttUSHORT(glyphArray + 2 * m); + straw = glyphID; + if (needle < straw) + r = m - 1; + else if (needle > straw) + l = m + 1; + else { + return m; + } + } + } break; + + case 2: { + stbtt_uint16 rangeCount = ttUSHORT(coverageTable + 2); + stbtt_uint8 *rangeArray = coverageTable + 4; + + // Binary search. + stbtt_int32 l=0, r=rangeCount-1, m; + int strawStart, strawEnd, needle=glyph; + while (l <= r) { + stbtt_uint8 *rangeRecord; + m = (l + r) >> 1; + rangeRecord = rangeArray + 6 * m; + strawStart = ttUSHORT(rangeRecord); + strawEnd = ttUSHORT(rangeRecord + 2); + if (needle < strawStart) + r = m - 1; + else if (needle > strawEnd) + l = m + 1; + else { + stbtt_uint16 startCoverageIndex = ttUSHORT(rangeRecord + 4); + return startCoverageIndex + glyph - strawStart; + } + } + } break; + + default: { + // There are no other cases. + STBTT_assert(0); + } break; + } + + return -1; +} + +static stbtt_int32 stbtt__GetGlyphClass(stbtt_uint8 *classDefTable, int glyph) +{ + stbtt_uint16 classDefFormat = ttUSHORT(classDefTable); + switch(classDefFormat) + { + case 1: { + stbtt_uint16 startGlyphID = ttUSHORT(classDefTable + 2); + stbtt_uint16 glyphCount = ttUSHORT(classDefTable + 4); + stbtt_uint8 *classDef1ValueArray = classDefTable + 6; + + if (glyph >= startGlyphID && glyph < startGlyphID + glyphCount) + return (stbtt_int32)ttUSHORT(classDef1ValueArray + 2 * (glyph - startGlyphID)); + + // [DEAR IMGUI] Commented to fix static analyzer warning + //classDefTable = classDef1ValueArray + 2 * glyphCount; + } break; + + case 2: { + stbtt_uint16 classRangeCount = ttUSHORT(classDefTable + 2); + stbtt_uint8 *classRangeRecords = classDefTable + 4; + + // Binary search. + stbtt_int32 l=0, r=classRangeCount-1, m; + int strawStart, strawEnd, needle=glyph; + while (l <= r) { + stbtt_uint8 *classRangeRecord; + m = (l + r) >> 1; + classRangeRecord = classRangeRecords + 6 * m; + strawStart = ttUSHORT(classRangeRecord); + strawEnd = ttUSHORT(classRangeRecord + 2); + if (needle < strawStart) + r = m - 1; + else if (needle > strawEnd) + l = m + 1; + else + return (stbtt_int32)ttUSHORT(classRangeRecord + 4); + } + + // [DEAR IMGUI] Commented to fix static analyzer warning + //classDefTable = classRangeRecords + 6 * classRangeCount; + } break; + + default: { + // There are no other cases. + STBTT_assert(0); + } break; + } + + return -1; +} + +// Define to STBTT_assert(x) if you want to break on unimplemented formats. +#define STBTT_GPOS_TODO_assert(x) + +static stbtt_int32 stbtt__GetGlyphGPOSInfoAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2) +{ + stbtt_uint16 lookupListOffset; + stbtt_uint8 *lookupList; + stbtt_uint16 lookupCount; + stbtt_uint8 *data; + stbtt_int32 i; + + if (!info->gpos) return 0; + + data = info->data + info->gpos; + + if (ttUSHORT(data+0) != 1) return 0; // Major version 1 + if (ttUSHORT(data+2) != 0) return 0; // Minor version 0 + + lookupListOffset = ttUSHORT(data+8); + lookupList = data + lookupListOffset; + lookupCount = ttUSHORT(lookupList); + + for (i=0; i> 1; + pairValue = pairValueArray + (2 + valueRecordPairSizeInBytes) * m; + secondGlyph = ttUSHORT(pairValue); + straw = secondGlyph; + if (needle < straw) + r = m - 1; + else if (needle > straw) + l = m + 1; + else { + stbtt_int16 xAdvance = ttSHORT(pairValue + 2); + return xAdvance; + } + } + } break; + + case 2: { + stbtt_uint16 valueFormat1 = ttUSHORT(table + 4); + stbtt_uint16 valueFormat2 = ttUSHORT(table + 6); + + stbtt_uint16 classDef1Offset = ttUSHORT(table + 8); + stbtt_uint16 classDef2Offset = ttUSHORT(table + 10); + int glyph1class = stbtt__GetGlyphClass(table + classDef1Offset, glyph1); + int glyph2class = stbtt__GetGlyphClass(table + classDef2Offset, glyph2); + + stbtt_uint16 class1Count = ttUSHORT(table + 12); + stbtt_uint16 class2Count = ttUSHORT(table + 14); + STBTT_assert(glyph1class < class1Count); + STBTT_assert(glyph2class < class2Count); + + // TODO: Support more formats. + STBTT_GPOS_TODO_assert(valueFormat1 == 4); + if (valueFormat1 != 4) return 0; + STBTT_GPOS_TODO_assert(valueFormat2 == 0); + if (valueFormat2 != 0) return 0; + + if (glyph1class >= 0 && glyph1class < class1Count && glyph2class >= 0 && glyph2class < class2Count) { + stbtt_uint8 *class1Records = table + 16; + stbtt_uint8 *class2Records = class1Records + 2 * (glyph1class * class2Count); + stbtt_int16 xAdvance = ttSHORT(class2Records + 2 * glyph2class); + return xAdvance; + } + } break; + + default: { + // There are no other cases. + STBTT_assert(0); + break; + } // [DEAR IMGUI] removed ; + } + } + break; + } // [DEAR IMGUI] removed ; + + default: + // TODO: Implement other stuff. + break; + } + } + + return 0; +} + +STBTT_DEF int stbtt_GetGlyphKernAdvance(const stbtt_fontinfo *info, int g1, int g2) +{ + int xAdvance = 0; + + if (info->gpos) + xAdvance += stbtt__GetGlyphGPOSInfoAdvance(info, g1, g2); + + if (info->kern) + xAdvance += stbtt__GetGlyphKernInfoAdvance(info, g1, g2); + + return xAdvance; +} + +STBTT_DEF int stbtt_GetCodepointKernAdvance(const stbtt_fontinfo *info, int ch1, int ch2) +{ + if (!info->kern && !info->gpos) // if no kerning table, don't waste time looking up both codepoint->glyphs + return 0; + return stbtt_GetGlyphKernAdvance(info, stbtt_FindGlyphIndex(info,ch1), stbtt_FindGlyphIndex(info,ch2)); +} + +STBTT_DEF void stbtt_GetCodepointHMetrics(const stbtt_fontinfo *info, int codepoint, int *advanceWidth, int *leftSideBearing) +{ + stbtt_GetGlyphHMetrics(info, stbtt_FindGlyphIndex(info,codepoint), advanceWidth, leftSideBearing); +} + +STBTT_DEF void stbtt_GetFontVMetrics(const stbtt_fontinfo *info, int *ascent, int *descent, int *lineGap) +{ + if (ascent ) *ascent = ttSHORT(info->data+info->hhea + 4); + if (descent) *descent = ttSHORT(info->data+info->hhea + 6); + if (lineGap) *lineGap = ttSHORT(info->data+info->hhea + 8); +} + +STBTT_DEF int stbtt_GetFontVMetricsOS2(const stbtt_fontinfo *info, int *typoAscent, int *typoDescent, int *typoLineGap) +{ + int tab = stbtt__find_table(info->data, info->fontstart, "OS/2"); + if (!tab) + return 0; + if (typoAscent ) *typoAscent = ttSHORT(info->data+tab + 68); + if (typoDescent) *typoDescent = ttSHORT(info->data+tab + 70); + if (typoLineGap) *typoLineGap = ttSHORT(info->data+tab + 72); + return 1; +} + +STBTT_DEF void stbtt_GetFontBoundingBox(const stbtt_fontinfo *info, int *x0, int *y0, int *x1, int *y1) +{ + *x0 = ttSHORT(info->data + info->head + 36); + *y0 = ttSHORT(info->data + info->head + 38); + *x1 = ttSHORT(info->data + info->head + 40); + *y1 = ttSHORT(info->data + info->head + 42); +} + +STBTT_DEF float stbtt_ScaleForPixelHeight(const stbtt_fontinfo *info, float height) +{ + int fheight = ttSHORT(info->data + info->hhea + 4) - ttSHORT(info->data + info->hhea + 6); + return (float) height / fheight; +} + +STBTT_DEF float stbtt_ScaleForMappingEmToPixels(const stbtt_fontinfo *info, float pixels) +{ + int unitsPerEm = ttUSHORT(info->data + info->head + 18); + return pixels / unitsPerEm; +} + +STBTT_DEF void stbtt_FreeShape(const stbtt_fontinfo *info, stbtt_vertex *v) +{ + STBTT_free(v, info->userdata); +} + +////////////////////////////////////////////////////////////////////////////// +// +// antialiasing software rasterizer +// + +STBTT_DEF void stbtt_GetGlyphBitmapBoxSubpixel(const stbtt_fontinfo *font, int glyph, float scale_x, float scale_y,float shift_x, float shift_y, int *ix0, int *iy0, int *ix1, int *iy1) +{ + int x0=0,y0=0,x1,y1; // =0 suppresses compiler warning + if (!stbtt_GetGlyphBox(font, glyph, &x0,&y0,&x1,&y1)) { + // e.g. space character + if (ix0) *ix0 = 0; + if (iy0) *iy0 = 0; + if (ix1) *ix1 = 0; + if (iy1) *iy1 = 0; + } else { + // move to integral bboxes (treating pixels as little squares, what pixels get touched)? + if (ix0) *ix0 = STBTT_ifloor( x0 * scale_x + shift_x); + if (iy0) *iy0 = STBTT_ifloor(-y1 * scale_y + shift_y); + if (ix1) *ix1 = STBTT_iceil ( x1 * scale_x + shift_x); + if (iy1) *iy1 = STBTT_iceil (-y0 * scale_y + shift_y); + } +} + +STBTT_DEF void stbtt_GetGlyphBitmapBox(const stbtt_fontinfo *font, int glyph, float scale_x, float scale_y, int *ix0, int *iy0, int *ix1, int *iy1) +{ + stbtt_GetGlyphBitmapBoxSubpixel(font, glyph, scale_x, scale_y,0.0f,0.0f, ix0, iy0, ix1, iy1); +} + +STBTT_DEF void stbtt_GetCodepointBitmapBoxSubpixel(const stbtt_fontinfo *font, int codepoint, float scale_x, float scale_y, float shift_x, float shift_y, int *ix0, int *iy0, int *ix1, int *iy1) +{ + stbtt_GetGlyphBitmapBoxSubpixel(font, stbtt_FindGlyphIndex(font,codepoint), scale_x, scale_y,shift_x,shift_y, ix0,iy0,ix1,iy1); +} + +STBTT_DEF void stbtt_GetCodepointBitmapBox(const stbtt_fontinfo *font, int codepoint, float scale_x, float scale_y, int *ix0, int *iy0, int *ix1, int *iy1) +{ + stbtt_GetCodepointBitmapBoxSubpixel(font, codepoint, scale_x, scale_y,0.0f,0.0f, ix0,iy0,ix1,iy1); +} + +////////////////////////////////////////////////////////////////////////////// +// +// Rasterizer + +typedef struct stbtt__hheap_chunk +{ + struct stbtt__hheap_chunk *next; +} stbtt__hheap_chunk; + +typedef struct stbtt__hheap +{ + struct stbtt__hheap_chunk *head; + void *first_free; + int num_remaining_in_head_chunk; +} stbtt__hheap; + +static void *stbtt__hheap_alloc(stbtt__hheap *hh, size_t size, void *userdata) +{ + if (hh->first_free) { + void *p = hh->first_free; + hh->first_free = * (void **) p; + return p; + } else { + if (hh->num_remaining_in_head_chunk == 0) { + int count = (size < 32 ? 2000 : size < 128 ? 800 : 100); + stbtt__hheap_chunk *c = (stbtt__hheap_chunk *) STBTT_malloc(sizeof(stbtt__hheap_chunk) + size * count, userdata); + if (c == NULL) + return NULL; + c->next = hh->head; + hh->head = c; + hh->num_remaining_in_head_chunk = count; + } + --hh->num_remaining_in_head_chunk; + return (char *) (hh->head) + sizeof(stbtt__hheap_chunk) + size * hh->num_remaining_in_head_chunk; + } +} + +static void stbtt__hheap_free(stbtt__hheap *hh, void *p) +{ + *(void **) p = hh->first_free; + hh->first_free = p; +} + +static void stbtt__hheap_cleanup(stbtt__hheap *hh, void *userdata) +{ + stbtt__hheap_chunk *c = hh->head; + while (c) { + stbtt__hheap_chunk *n = c->next; + STBTT_free(c, userdata); + c = n; + } +} + +typedef struct stbtt__edge { + float x0,y0, x1,y1; + int invert; +} stbtt__edge; + + +typedef struct stbtt__active_edge +{ + struct stbtt__active_edge *next; + #if STBTT_RASTERIZER_VERSION==1 + int x,dx; + float ey; + int direction; + #elif STBTT_RASTERIZER_VERSION==2 + float fx,fdx,fdy; + float direction; + float sy; + float ey; + #else + #error "Unrecognized value of STBTT_RASTERIZER_VERSION" + #endif +} stbtt__active_edge; + +#if STBTT_RASTERIZER_VERSION == 1 +#define STBTT_FIXSHIFT 10 +#define STBTT_FIX (1 << STBTT_FIXSHIFT) +#define STBTT_FIXMASK (STBTT_FIX-1) + +static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, float start_point, void *userdata) +{ + stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata); + float dxdy = (e->x1 - e->x0) / (e->y1 - e->y0); + STBTT_assert(z != NULL); + if (!z) return z; + + // round dx down to avoid overshooting + if (dxdy < 0) + z->dx = -STBTT_ifloor(STBTT_FIX * -dxdy); + else + z->dx = STBTT_ifloor(STBTT_FIX * dxdy); + + z->x = STBTT_ifloor(STBTT_FIX * e->x0 + z->dx * (start_point - e->y0)); // use z->dx so when we offset later it's by the same amount + z->x -= off_x * STBTT_FIX; + + z->ey = e->y1; + z->next = 0; + z->direction = e->invert ? 1 : -1; + return z; +} +#elif STBTT_RASTERIZER_VERSION == 2 +static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, float start_point, void *userdata) +{ + stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata); + float dxdy = (e->x1 - e->x0) / (e->y1 - e->y0); + STBTT_assert(z != NULL); + //STBTT_assert(e->y0 <= start_point); + if (!z) return z; + z->fdx = dxdy; + z->fdy = dxdy != 0.0f ? (1.0f/dxdy) : 0.0f; + z->fx = e->x0 + dxdy * (start_point - e->y0); + z->fx -= off_x; + z->direction = e->invert ? 1.0f : -1.0f; + z->sy = e->y0; + z->ey = e->y1; + z->next = 0; + return z; +} +#else +#error "Unrecognized value of STBTT_RASTERIZER_VERSION" +#endif + +#if STBTT_RASTERIZER_VERSION == 1 +// note: this routine clips fills that extend off the edges... ideally this +// wouldn't happen, but it could happen if the truetype glyph bounding boxes +// are wrong, or if the user supplies a too-small bitmap +static void stbtt__fill_active_edges(unsigned char *scanline, int len, stbtt__active_edge *e, int max_weight) +{ + // non-zero winding fill + int x0=0, w=0; + + while (e) { + if (w == 0) { + // if we're currently at zero, we need to record the edge start point + x0 = e->x; w += e->direction; + } else { + int x1 = e->x; w += e->direction; + // if we went to zero, we need to draw + if (w == 0) { + int i = x0 >> STBTT_FIXSHIFT; + int j = x1 >> STBTT_FIXSHIFT; + + if (i < len && j >= 0) { + if (i == j) { + // x0,x1 are the same pixel, so compute combined coverage + scanline[i] = scanline[i] + (stbtt_uint8) ((x1 - x0) * max_weight >> STBTT_FIXSHIFT); + } else { + if (i >= 0) // add antialiasing for x0 + scanline[i] = scanline[i] + (stbtt_uint8) (((STBTT_FIX - (x0 & STBTT_FIXMASK)) * max_weight) >> STBTT_FIXSHIFT); + else + i = -1; // clip + + if (j < len) // add antialiasing for x1 + scanline[j] = scanline[j] + (stbtt_uint8) (((x1 & STBTT_FIXMASK) * max_weight) >> STBTT_FIXSHIFT); + else + j = len; // clip + + for (++i; i < j; ++i) // fill pixels between x0 and x1 + scanline[i] = scanline[i] + (stbtt_uint8) max_weight; + } + } + } + } + + e = e->next; + } +} + +static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata) +{ + stbtt__hheap hh = { 0, 0, 0 }; + stbtt__active_edge *active = NULL; + int y,j=0; + int max_weight = (255 / vsubsample); // weight per vertical scanline + int s; // vertical subsample index + unsigned char scanline_data[512], *scanline; + + if (result->w > 512) + scanline = (unsigned char *) STBTT_malloc(result->w, userdata); + else + scanline = scanline_data; + + y = off_y * vsubsample; + e[n].y0 = (off_y + result->h) * (float) vsubsample + 1; + + while (j < result->h) { + STBTT_memset(scanline, 0, result->w); + for (s=0; s < vsubsample; ++s) { + // find center of pixel for this scanline + float scan_y = y + 0.5f; + stbtt__active_edge **step = &active; + + // update all active edges; + // remove all active edges that terminate before the center of this scanline + while (*step) { + stbtt__active_edge * z = *step; + if (z->ey <= scan_y) { + *step = z->next; // delete from list + STBTT_assert(z->direction); + z->direction = 0; + stbtt__hheap_free(&hh, z); + } else { + z->x += z->dx; // advance to position for current scanline + step = &((*step)->next); // advance through list + } + } + + // resort the list if needed + for(;;) { + int changed=0; + step = &active; + while (*step && (*step)->next) { + if ((*step)->x > (*step)->next->x) { + stbtt__active_edge *t = *step; + stbtt__active_edge *q = t->next; + + t->next = q->next; + q->next = t; + *step = q; + changed = 1; + } + step = &(*step)->next; + } + if (!changed) break; + } + + // insert all edges that start before the center of this scanline -- omit ones that also end on this scanline + while (e->y0 <= scan_y) { + if (e->y1 > scan_y) { + stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y, userdata); + if (z != NULL) { + // find insertion point + if (active == NULL) + active = z; + else if (z->x < active->x) { + // insert at front + z->next = active; + active = z; + } else { + // find thing to insert AFTER + stbtt__active_edge *p = active; + while (p->next && p->next->x < z->x) + p = p->next; + // at this point, p->next->x is NOT < z->x + z->next = p->next; + p->next = z; + } + } + } + ++e; + } + + // now process all active edges in XOR fashion + if (active) + stbtt__fill_active_edges(scanline, result->w, active, max_weight); + + ++y; + } + STBTT_memcpy(result->pixels + j * result->stride, scanline, result->w); + ++j; + } + + stbtt__hheap_cleanup(&hh, userdata); + + if (scanline != scanline_data) + STBTT_free(scanline, userdata); +} + +#elif STBTT_RASTERIZER_VERSION == 2 + +// the edge passed in here does not cross the vertical line at x or the vertical line at x+1 +// (i.e. it has already been clipped to those) +static void stbtt__handle_clipped_edge(float *scanline, int x, stbtt__active_edge *e, float x0, float y0, float x1, float y1) +{ + if (y0 == y1) return; + STBTT_assert(y0 < y1); + STBTT_assert(e->sy <= e->ey); + if (y0 > e->ey) return; + if (y1 < e->sy) return; + if (y0 < e->sy) { + x0 += (x1-x0) * (e->sy - y0) / (y1-y0); + y0 = e->sy; + } + if (y1 > e->ey) { + x1 += (x1-x0) * (e->ey - y1) / (y1-y0); + y1 = e->ey; + } + + if (x0 == x) + STBTT_assert(x1 <= x+1); + else if (x0 == x+1) + STBTT_assert(x1 >= x); + else if (x0 <= x) + STBTT_assert(x1 <= x); + else if (x0 >= x+1) + STBTT_assert(x1 >= x+1); + else + STBTT_assert(x1 >= x && x1 <= x+1); + + if (x0 <= x && x1 <= x) + scanline[x] += e->direction * (y1-y0); + else if (x0 >= x+1 && x1 >= x+1) + ; + else { + STBTT_assert(x0 >= x && x0 <= x+1 && x1 >= x && x1 <= x+1); + scanline[x] += e->direction * (y1-y0) * (1-((x0-x)+(x1-x))/2); // coverage = 1 - average x position + } +} + +static void stbtt__fill_active_edges_new(float *scanline, float *scanline_fill, int len, stbtt__active_edge *e, float y_top) +{ + float y_bottom = y_top+1; + + while (e) { + // brute force every pixel + + // compute intersection points with top & bottom + STBTT_assert(e->ey >= y_top); + + if (e->fdx == 0) { + float x0 = e->fx; + if (x0 < len) { + if (x0 >= 0) { + stbtt__handle_clipped_edge(scanline,(int) x0,e, x0,y_top, x0,y_bottom); + stbtt__handle_clipped_edge(scanline_fill-1,(int) x0+1,e, x0,y_top, x0,y_bottom); + } else { + stbtt__handle_clipped_edge(scanline_fill-1,0,e, x0,y_top, x0,y_bottom); + } + } + } else { + float x0 = e->fx; + float dx = e->fdx; + float xb = x0 + dx; + float x_top, x_bottom; + float sy0,sy1; + float dy = e->fdy; + STBTT_assert(e->sy <= y_bottom && e->ey >= y_top); + + // compute endpoints of line segment clipped to this scanline (if the + // line segment starts on this scanline. x0 is the intersection of the + // line with y_top, but that may be off the line segment. + if (e->sy > y_top) { + x_top = x0 + dx * (e->sy - y_top); + sy0 = e->sy; + } else { + x_top = x0; + sy0 = y_top; + } + if (e->ey < y_bottom) { + x_bottom = x0 + dx * (e->ey - y_top); + sy1 = e->ey; + } else { + x_bottom = xb; + sy1 = y_bottom; + } + + if (x_top >= 0 && x_bottom >= 0 && x_top < len && x_bottom < len) { + // from here on, we don't have to range check x values + + if ((int) x_top == (int) x_bottom) { + float height; + // simple case, only spans one pixel + int x = (int) x_top; + height = sy1 - sy0; + STBTT_assert(x >= 0 && x < len); + scanline[x] += e->direction * (1-((x_top - x) + (x_bottom-x))/2) * height; + scanline_fill[x] += e->direction * height; // everything right of this pixel is filled + } else { + int x,x1,x2; + float y_crossing, step, sign, area; + // covers 2+ pixels + if (x_top > x_bottom) { + // flip scanline vertically; signed area is the same + float t; + sy0 = y_bottom - (sy0 - y_top); + sy1 = y_bottom - (sy1 - y_top); + t = sy0, sy0 = sy1, sy1 = t; + t = x_bottom, x_bottom = x_top, x_top = t; + dx = -dx; + dy = -dy; + t = x0, x0 = xb, xb = t; + // [DEAR IMGUI] Fix static analyzer warning + (void)dx; // [ImGui: fix static analyzer warning] + } + + x1 = (int) x_top; + x2 = (int) x_bottom; + // compute intersection with y axis at x1+1 + y_crossing = (x1+1 - x0) * dy + y_top; + + sign = e->direction; + // area of the rectangle covered from y0..y_crossing + area = sign * (y_crossing-sy0); + // area of the triangle (x_top,y0), (x+1,y0), (x+1,y_crossing) + scanline[x1] += area * (1-((x_top - x1)+(x1+1-x1))/2); + + step = sign * dy; + for (x = x1+1; x < x2; ++x) { + scanline[x] += area + step/2; + area += step; + } + y_crossing += dy * (x2 - (x1+1)); + + STBTT_assert(STBTT_fabs(area) <= 1.01f); + + scanline[x2] += area + sign * (1-((x2-x2)+(x_bottom-x2))/2) * (sy1-y_crossing); + + scanline_fill[x2] += sign * (sy1-sy0); + } + } else { + // if edge goes outside of box we're drawing, we require + // clipping logic. since this does not match the intended use + // of this library, we use a different, very slow brute + // force implementation + int x; + for (x=0; x < len; ++x) { + // cases: + // + // there can be up to two intersections with the pixel. any intersection + // with left or right edges can be handled by splitting into two (or three) + // regions. intersections with top & bottom do not necessitate case-wise logic. + // + // the old way of doing this found the intersections with the left & right edges, + // then used some simple logic to produce up to three segments in sorted order + // from top-to-bottom. however, this had a problem: if an x edge was epsilon + // across the x border, then the corresponding y position might not be distinct + // from the other y segment, and it might ignored as an empty segment. to avoid + // that, we need to explicitly produce segments based on x positions. + + // rename variables to clearly-defined pairs + float y0 = y_top; + float x1 = (float) (x); + float x2 = (float) (x+1); + float x3 = xb; + float y3 = y_bottom; + + // x = e->x + e->dx * (y-y_top) + // (y-y_top) = (x - e->x) / e->dx + // y = (x - e->x) / e->dx + y_top + float y1 = (x - x0) / dx + y_top; + float y2 = (x+1 - x0) / dx + y_top; + + if (x0 < x1 && x3 > x2) { // three segments descending down-right + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1); + stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x2,y2); + stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3); + } else if (x3 < x1 && x0 > x2) { // three segments descending down-left + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2); + stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x1,y1); + stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3); + } else if (x0 < x1 && x3 > x1) { // two segments across x, down-right + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1); + stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3); + } else if (x3 < x1 && x0 > x1) { // two segments across x, down-left + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1); + stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3); + } else if (x0 < x2 && x3 > x2) { // two segments across x+1, down-right + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2); + stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3); + } else if (x3 < x2 && x0 > x2) { // two segments across x+1, down-left + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2); + stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3); + } else { // one segment + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x3,y3); + } + } + } + } + e = e->next; + } +} + +// directly AA rasterize edges w/o supersampling +static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata) +{ + stbtt__hheap hh = { 0, 0, 0 }; + stbtt__active_edge *active = NULL; + int y,j=0, i; + float scanline_data[129], *scanline, *scanline2; + + STBTT__NOTUSED(vsubsample); + + if (result->w > 64) + scanline = (float *) STBTT_malloc((result->w*2+1) * sizeof(float), userdata); + else + scanline = scanline_data; + + scanline2 = scanline + result->w; + + y = off_y; + e[n].y0 = (float) (off_y + result->h) + 1; + + while (j < result->h) { + // find center of pixel for this scanline + float scan_y_top = y + 0.0f; + float scan_y_bottom = y + 1.0f; + stbtt__active_edge **step = &active; + + STBTT_memset(scanline , 0, result->w*sizeof(scanline[0])); + STBTT_memset(scanline2, 0, (result->w+1)*sizeof(scanline[0])); + + // update all active edges; + // remove all active edges that terminate before the top of this scanline + while (*step) { + stbtt__active_edge * z = *step; + if (z->ey <= scan_y_top) { + *step = z->next; // delete from list + STBTT_assert(z->direction); + z->direction = 0; + stbtt__hheap_free(&hh, z); + } else { + step = &((*step)->next); // advance through list + } + } + + // insert all edges that start before the bottom of this scanline + while (e->y0 <= scan_y_bottom) { + if (e->y0 != e->y1) { + stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y_top, userdata); + if (z != NULL) { + if (j == 0 && off_y != 0) { + if (z->ey < scan_y_top) { + // this can happen due to subpixel positioning and some kind of fp rounding error i think + z->ey = scan_y_top; + } + } + STBTT_assert(z->ey >= scan_y_top); // if we get really unlucky a tiny bit of an edge can be out of bounds + // insert at front + z->next = active; + active = z; + } + } + ++e; + } + + // now process all active edges + if (active) + stbtt__fill_active_edges_new(scanline, scanline2+1, result->w, active, scan_y_top); + + { + float sum = 0; + for (i=0; i < result->w; ++i) { + float k; + int m; + sum += scanline2[i]; + k = scanline[i] + sum; + k = (float) STBTT_fabs(k)*255 + 0.5f; + m = (int) k; + if (m > 255) m = 255; + result->pixels[j*result->stride + i] = (unsigned char) m; + } + } + // advance all the edges + step = &active; + while (*step) { + stbtt__active_edge *z = *step; + z->fx += z->fdx; // advance to position for current scanline + step = &((*step)->next); // advance through list + } + + ++y; + ++j; + } + + stbtt__hheap_cleanup(&hh, userdata); + + if (scanline != scanline_data) + STBTT_free(scanline, userdata); +} +#else +#error "Unrecognized value of STBTT_RASTERIZER_VERSION" +#endif + +#define STBTT__COMPARE(a,b) ((a)->y0 < (b)->y0) + +static void stbtt__sort_edges_ins_sort(stbtt__edge *p, int n) +{ + int i,j; + for (i=1; i < n; ++i) { + stbtt__edge t = p[i], *a = &t; + j = i; + while (j > 0) { + stbtt__edge *b = &p[j-1]; + int c = STBTT__COMPARE(a,b); + if (!c) break; + p[j] = p[j-1]; + --j; + } + if (i != j) + p[j] = t; + } +} + +static void stbtt__sort_edges_quicksort(stbtt__edge *p, int n) +{ + /* threshold for transitioning to insertion sort */ + while (n > 12) { + stbtt__edge t; + int c01,c12,c,m,i,j; + + /* compute median of three */ + m = n >> 1; + c01 = STBTT__COMPARE(&p[0],&p[m]); + c12 = STBTT__COMPARE(&p[m],&p[n-1]); + /* if 0 >= mid >= end, or 0 < mid < end, then use mid */ + if (c01 != c12) { + /* otherwise, we'll need to swap something else to middle */ + int z; + c = STBTT__COMPARE(&p[0],&p[n-1]); + /* 0>mid && midn => n; 0 0 */ + /* 0n: 0>n => 0; 0 n */ + z = (c == c12) ? 0 : n-1; + t = p[z]; + p[z] = p[m]; + p[m] = t; + } + /* now p[m] is the median-of-three */ + /* swap it to the beginning so it won't move around */ + t = p[0]; + p[0] = p[m]; + p[m] = t; + + /* partition loop */ + i=1; + j=n-1; + for(;;) { + /* handling of equality is crucial here */ + /* for sentinels & efficiency with duplicates */ + for (;;++i) { + if (!STBTT__COMPARE(&p[i], &p[0])) break; + } + for (;;--j) { + if (!STBTT__COMPARE(&p[0], &p[j])) break; + } + /* make sure we haven't crossed */ + if (i >= j) break; + t = p[i]; + p[i] = p[j]; + p[j] = t; + + ++i; + --j; + } + /* recurse on smaller side, iterate on larger */ + if (j < (n-i)) { + stbtt__sort_edges_quicksort(p,j); + p = p+i; + n = n-i; + } else { + stbtt__sort_edges_quicksort(p+i, n-i); + n = j; + } + } +} + +static void stbtt__sort_edges(stbtt__edge *p, int n) +{ + stbtt__sort_edges_quicksort(p, n); + stbtt__sort_edges_ins_sort(p, n); +} + +typedef struct +{ + float x,y; +} stbtt__point; + +static void stbtt__rasterize(stbtt__bitmap *result, stbtt__point *pts, int *wcount, int windings, float scale_x, float scale_y, float shift_x, float shift_y, int off_x, int off_y, int invert, void *userdata) +{ + float y_scale_inv = invert ? -scale_y : scale_y; + stbtt__edge *e; + int n,i,j,k,m; +#if STBTT_RASTERIZER_VERSION == 1 + int vsubsample = result->h < 8 ? 15 : 5; +#elif STBTT_RASTERIZER_VERSION == 2 + int vsubsample = 1; +#else + #error "Unrecognized value of STBTT_RASTERIZER_VERSION" +#endif + // vsubsample should divide 255 evenly; otherwise we won't reach full opacity + + // now we have to blow out the windings into explicit edge lists + n = 0; + for (i=0; i < windings; ++i) + n += wcount[i]; + + e = (stbtt__edge *) STBTT_malloc(sizeof(*e) * (n+1), userdata); // add an extra one as a sentinel + if (e == 0) return; + n = 0; + + m=0; + for (i=0; i < windings; ++i) { + stbtt__point *p = pts + m; + m += wcount[i]; + j = wcount[i]-1; + for (k=0; k < wcount[i]; j=k++) { + int a=k,b=j; + // skip the edge if horizontal + if (p[j].y == p[k].y) + continue; + // add edge from j to k to the list + e[n].invert = 0; + if (invert ? p[j].y > p[k].y : p[j].y < p[k].y) { + e[n].invert = 1; + a=j,b=k; + } + e[n].x0 = p[a].x * scale_x + shift_x; + e[n].y0 = (p[a].y * y_scale_inv + shift_y) * vsubsample; + e[n].x1 = p[b].x * scale_x + shift_x; + e[n].y1 = (p[b].y * y_scale_inv + shift_y) * vsubsample; + ++n; + } + } + + // now sort the edges by their highest point (should snap to integer, and then by x) + //STBTT_sort(e, n, sizeof(e[0]), stbtt__edge_compare); + stbtt__sort_edges(e, n); + + // now, traverse the scanlines and find the intersections on each scanline, use xor winding rule + stbtt__rasterize_sorted_edges(result, e, n, vsubsample, off_x, off_y, userdata); + + STBTT_free(e, userdata); +} + +static void stbtt__add_point(stbtt__point *points, int n, float x, float y) +{ + if (!points) return; // during first pass, it's unallocated + points[n].x = x; + points[n].y = y; +} + +// tessellate until threshold p is happy... @TODO warped to compensate for non-linear stretching +static int stbtt__tesselate_curve(stbtt__point *points, int *num_points, float x0, float y0, float x1, float y1, float x2, float y2, float objspace_flatness_squared, int n) +{ + // midpoint + float mx = (x0 + 2*x1 + x2)/4; + float my = (y0 + 2*y1 + y2)/4; + // versus directly drawn line + float dx = (x0+x2)/2 - mx; + float dy = (y0+y2)/2 - my; + if (n > 16) // 65536 segments on one curve better be enough! + return 1; + if (dx*dx+dy*dy > objspace_flatness_squared) { // half-pixel error allowed... need to be smaller if AA + stbtt__tesselate_curve(points, num_points, x0,y0, (x0+x1)/2.0f,(y0+y1)/2.0f, mx,my, objspace_flatness_squared,n+1); + stbtt__tesselate_curve(points, num_points, mx,my, (x1+x2)/2.0f,(y1+y2)/2.0f, x2,y2, objspace_flatness_squared,n+1); + } else { + stbtt__add_point(points, *num_points,x2,y2); + *num_points = *num_points+1; + } + return 1; +} + +static void stbtt__tesselate_cubic(stbtt__point *points, int *num_points, float x0, float y0, float x1, float y1, float x2, float y2, float x3, float y3, float objspace_flatness_squared, int n) +{ + // @TODO this "flatness" calculation is just made-up nonsense that seems to work well enough + float dx0 = x1-x0; + float dy0 = y1-y0; + float dx1 = x2-x1; + float dy1 = y2-y1; + float dx2 = x3-x2; + float dy2 = y3-y2; + float dx = x3-x0; + float dy = y3-y0; + float longlen = (float) (STBTT_sqrt(dx0*dx0+dy0*dy0)+STBTT_sqrt(dx1*dx1+dy1*dy1)+STBTT_sqrt(dx2*dx2+dy2*dy2)); + float shortlen = (float) STBTT_sqrt(dx*dx+dy*dy); + float flatness_squared = longlen*longlen-shortlen*shortlen; + + if (n > 16) // 65536 segments on one curve better be enough! + return; + + if (flatness_squared > objspace_flatness_squared) { + float x01 = (x0+x1)/2; + float y01 = (y0+y1)/2; + float x12 = (x1+x2)/2; + float y12 = (y1+y2)/2; + float x23 = (x2+x3)/2; + float y23 = (y2+y3)/2; + + float xa = (x01+x12)/2; + float ya = (y01+y12)/2; + float xb = (x12+x23)/2; + float yb = (y12+y23)/2; + + float mx = (xa+xb)/2; + float my = (ya+yb)/2; + + stbtt__tesselate_cubic(points, num_points, x0,y0, x01,y01, xa,ya, mx,my, objspace_flatness_squared,n+1); + stbtt__tesselate_cubic(points, num_points, mx,my, xb,yb, x23,y23, x3,y3, objspace_flatness_squared,n+1); + } else { + stbtt__add_point(points, *num_points,x3,y3); + *num_points = *num_points+1; + } +} + +// returns number of contours +static stbtt__point *stbtt_FlattenCurves(stbtt_vertex *vertices, int num_verts, float objspace_flatness, int **contour_lengths, int *num_contours, void *userdata) +{ + stbtt__point *points=0; + int num_points=0; + + float objspace_flatness_squared = objspace_flatness * objspace_flatness; + int i,n=0,start=0, pass; + + // count how many "moves" there are to get the contour count + for (i=0; i < num_verts; ++i) + if (vertices[i].type == STBTT_vmove) + ++n; + + *num_contours = n; + if (n == 0) return 0; + + *contour_lengths = (int *) STBTT_malloc(sizeof(**contour_lengths) * n, userdata); + + if (*contour_lengths == 0) { + *num_contours = 0; + return 0; + } + + // make two passes through the points so we don't need to realloc + for (pass=0; pass < 2; ++pass) { + float x=0,y=0; + if (pass == 1) { + points = (stbtt__point *) STBTT_malloc(num_points * sizeof(points[0]), userdata); + if (points == NULL) goto error; + } + num_points = 0; + n= -1; + for (i=0; i < num_verts; ++i) { + switch (vertices[i].type) { + case STBTT_vmove: + // start the next contour + if (n >= 0) + (*contour_lengths)[n] = num_points - start; + ++n; + start = num_points; + + x = vertices[i].x, y = vertices[i].y; + stbtt__add_point(points, num_points++, x,y); + break; + case STBTT_vline: + x = vertices[i].x, y = vertices[i].y; + stbtt__add_point(points, num_points++, x, y); + break; + case STBTT_vcurve: + stbtt__tesselate_curve(points, &num_points, x,y, + vertices[i].cx, vertices[i].cy, + vertices[i].x, vertices[i].y, + objspace_flatness_squared, 0); + x = vertices[i].x, y = vertices[i].y; + break; + case STBTT_vcubic: + stbtt__tesselate_cubic(points, &num_points, x,y, + vertices[i].cx, vertices[i].cy, + vertices[i].cx1, vertices[i].cy1, + vertices[i].x, vertices[i].y, + objspace_flatness_squared, 0); + x = vertices[i].x, y = vertices[i].y; + break; + } + } + (*contour_lengths)[n] = num_points - start; + } + + return points; +error: + STBTT_free(points, userdata); + STBTT_free(*contour_lengths, userdata); + *contour_lengths = 0; + *num_contours = 0; + return NULL; +} + +STBTT_DEF void stbtt_Rasterize(stbtt__bitmap *result, float flatness_in_pixels, stbtt_vertex *vertices, int num_verts, float scale_x, float scale_y, float shift_x, float shift_y, int x_off, int y_off, int invert, void *userdata) +{ + float scale = scale_x > scale_y ? scale_y : scale_x; + int winding_count = 0; + int *winding_lengths = NULL; + stbtt__point *windings = stbtt_FlattenCurves(vertices, num_verts, flatness_in_pixels / scale, &winding_lengths, &winding_count, userdata); + if (windings) { + stbtt__rasterize(result, windings, winding_lengths, winding_count, scale_x, scale_y, shift_x, shift_y, x_off, y_off, invert, userdata); + STBTT_free(winding_lengths, userdata); + STBTT_free(windings, userdata); + } +} + +STBTT_DEF void stbtt_FreeBitmap(unsigned char *bitmap, void *userdata) +{ + STBTT_free(bitmap, userdata); +} + +STBTT_DEF unsigned char *stbtt_GetGlyphBitmapSubpixel(const stbtt_fontinfo *info, float scale_x, float scale_y, float shift_x, float shift_y, int glyph, int *width, int *height, int *xoff, int *yoff) +{ + int ix0,iy0,ix1,iy1; + stbtt__bitmap gbm; + stbtt_vertex *vertices; + int num_verts = stbtt_GetGlyphShape(info, glyph, &vertices); + + if (scale_x == 0) scale_x = scale_y; + if (scale_y == 0) { + if (scale_x == 0) { + STBTT_free(vertices, info->userdata); + return NULL; + } + scale_y = scale_x; + } + + stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale_x, scale_y, shift_x, shift_y, &ix0,&iy0,&ix1,&iy1); + + // now we get the size + gbm.w = (ix1 - ix0); + gbm.h = (iy1 - iy0); + gbm.pixels = NULL; // in case we error + + if (width ) *width = gbm.w; + if (height) *height = gbm.h; + if (xoff ) *xoff = ix0; + if (yoff ) *yoff = iy0; + + if (gbm.w && gbm.h) { + gbm.pixels = (unsigned char *) STBTT_malloc(gbm.w * gbm.h, info->userdata); + if (gbm.pixels) { + gbm.stride = gbm.w; + + stbtt_Rasterize(&gbm, 0.35f, vertices, num_verts, scale_x, scale_y, shift_x, shift_y, ix0, iy0, 1, info->userdata); + } + } + STBTT_free(vertices, info->userdata); + return gbm.pixels; +} + +STBTT_DEF unsigned char *stbtt_GetGlyphBitmap(const stbtt_fontinfo *info, float scale_x, float scale_y, int glyph, int *width, int *height, int *xoff, int *yoff) +{ + return stbtt_GetGlyphBitmapSubpixel(info, scale_x, scale_y, 0.0f, 0.0f, glyph, width, height, xoff, yoff); +} + +STBTT_DEF void stbtt_MakeGlyphBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int glyph) +{ + int ix0,iy0; + stbtt_vertex *vertices; + int num_verts = stbtt_GetGlyphShape(info, glyph, &vertices); + stbtt__bitmap gbm; + + stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale_x, scale_y, shift_x, shift_y, &ix0,&iy0,0,0); + gbm.pixels = output; + gbm.w = out_w; + gbm.h = out_h; + gbm.stride = out_stride; + + if (gbm.w && gbm.h) + stbtt_Rasterize(&gbm, 0.35f, vertices, num_verts, scale_x, scale_y, shift_x, shift_y, ix0,iy0, 1, info->userdata); + + STBTT_free(vertices, info->userdata); +} + +STBTT_DEF void stbtt_MakeGlyphBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int glyph) +{ + stbtt_MakeGlyphBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, 0.0f,0.0f, glyph); +} + +STBTT_DEF unsigned char *stbtt_GetCodepointBitmapSubpixel(const stbtt_fontinfo *info, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint, int *width, int *height, int *xoff, int *yoff) +{ + return stbtt_GetGlyphBitmapSubpixel(info, scale_x, scale_y,shift_x,shift_y, stbtt_FindGlyphIndex(info,codepoint), width,height,xoff,yoff); +} + +STBTT_DEF void stbtt_MakeCodepointBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int oversample_x, int oversample_y, float *sub_x, float *sub_y, int codepoint) +{ + stbtt_MakeGlyphBitmapSubpixelPrefilter(info, output, out_w, out_h, out_stride, scale_x, scale_y, shift_x, shift_y, oversample_x, oversample_y, sub_x, sub_y, stbtt_FindGlyphIndex(info,codepoint)); +} + +STBTT_DEF void stbtt_MakeCodepointBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint) +{ + stbtt_MakeGlyphBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, shift_x, shift_y, stbtt_FindGlyphIndex(info,codepoint)); +} + +STBTT_DEF unsigned char *stbtt_GetCodepointBitmap(const stbtt_fontinfo *info, float scale_x, float scale_y, int codepoint, int *width, int *height, int *xoff, int *yoff) +{ + return stbtt_GetCodepointBitmapSubpixel(info, scale_x, scale_y, 0.0f,0.0f, codepoint, width,height,xoff,yoff); +} + +STBTT_DEF void stbtt_MakeCodepointBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int codepoint) +{ + stbtt_MakeCodepointBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, 0.0f,0.0f, codepoint); +} + +////////////////////////////////////////////////////////////////////////////// +// +// bitmap baking +// +// This is SUPER-CRAPPY packing to keep source code small + +static int stbtt_BakeFontBitmap_internal(unsigned char *data, int offset, // font location (use offset=0 for plain .ttf) + float pixel_height, // height of font in pixels + unsigned char *pixels, int pw, int ph, // bitmap to be filled in + int first_char, int num_chars, // characters to bake + stbtt_bakedchar *chardata) +{ + float scale; + int x,y,bottom_y, i; + stbtt_fontinfo f; + f.userdata = NULL; + if (!stbtt_InitFont(&f, data, offset)) + return -1; + STBTT_memset(pixels, 0, pw*ph); // background of 0 around pixels + x=y=1; + bottom_y = 1; + + scale = stbtt_ScaleForPixelHeight(&f, pixel_height); + + for (i=0; i < num_chars; ++i) { + int advance, lsb, x0,y0,x1,y1,gw,gh; + int g = stbtt_FindGlyphIndex(&f, first_char + i); + stbtt_GetGlyphHMetrics(&f, g, &advance, &lsb); + stbtt_GetGlyphBitmapBox(&f, g, scale,scale, &x0,&y0,&x1,&y1); + gw = x1-x0; + gh = y1-y0; + if (x + gw + 1 >= pw) + y = bottom_y, x = 1; // advance to next row + if (y + gh + 1 >= ph) // check if it fits vertically AFTER potentially moving to next row + return -i; + STBTT_assert(x+gw < pw); + STBTT_assert(y+gh < ph); + stbtt_MakeGlyphBitmap(&f, pixels+x+y*pw, gw,gh,pw, scale,scale, g); + chardata[i].x0 = (stbtt_int16) x; + chardata[i].y0 = (stbtt_int16) y; + chardata[i].x1 = (stbtt_int16) (x + gw); + chardata[i].y1 = (stbtt_int16) (y + gh); + chardata[i].xadvance = scale * advance; + chardata[i].xoff = (float) x0; + chardata[i].yoff = (float) y0; + x = x + gw + 1; + if (y+gh+1 > bottom_y) + bottom_y = y+gh+1; + } + return bottom_y; +} + +STBTT_DEF void stbtt_GetBakedQuad(const stbtt_bakedchar *chardata, int pw, int ph, int char_index, float *xpos, float *ypos, stbtt_aligned_quad *q, int opengl_fillrule) +{ + float d3d_bias = opengl_fillrule ? 0 : -0.5f; + float ipw = 1.0f / pw, iph = 1.0f / ph; + const stbtt_bakedchar *b = chardata + char_index; + int round_x = STBTT_ifloor((*xpos + b->xoff) + 0.5f); + int round_y = STBTT_ifloor((*ypos + b->yoff) + 0.5f); + + q->x0 = round_x + d3d_bias; + q->y0 = round_y + d3d_bias; + q->x1 = round_x + b->x1 - b->x0 + d3d_bias; + q->y1 = round_y + b->y1 - b->y0 + d3d_bias; + + q->s0 = b->x0 * ipw; + q->t0 = b->y0 * iph; + q->s1 = b->x1 * ipw; + q->t1 = b->y1 * iph; + + *xpos += b->xadvance; +} + +////////////////////////////////////////////////////////////////////////////// +// +// rectangle packing replacement routines if you don't have stb_rect_pack.h +// + +#ifndef STB_RECT_PACK_VERSION + +typedef int stbrp_coord; + +//////////////////////////////////////////////////////////////////////////////////// +// // +// // +// COMPILER WARNING ?!?!? // +// // +// // +// if you get a compile warning due to these symbols being defined more than // +// once, move #include "stb_rect_pack.h" before #include "stb_truetype.h" // +// // +//////////////////////////////////////////////////////////////////////////////////// + +typedef struct +{ + int width,height; + int x,y,bottom_y; +} stbrp_context; + +typedef struct +{ + unsigned char x; +} stbrp_node; + +struct stbrp_rect +{ + stbrp_coord x,y; + int id,w,h,was_packed; +}; + +static void stbrp_init_target(stbrp_context *con, int pw, int ph, stbrp_node *nodes, int num_nodes) +{ + con->width = pw; + con->height = ph; + con->x = 0; + con->y = 0; + con->bottom_y = 0; + STBTT__NOTUSED(nodes); + STBTT__NOTUSED(num_nodes); +} + +static void stbrp_pack_rects(stbrp_context *con, stbrp_rect *rects, int num_rects) +{ + int i; + for (i=0; i < num_rects; ++i) { + if (con->x + rects[i].w > con->width) { + con->x = 0; + con->y = con->bottom_y; + } + if (con->y + rects[i].h > con->height) + break; + rects[i].x = con->x; + rects[i].y = con->y; + rects[i].was_packed = 1; + con->x += rects[i].w; + if (con->y + rects[i].h > con->bottom_y) + con->bottom_y = con->y + rects[i].h; + } + for ( ; i < num_rects; ++i) + rects[i].was_packed = 0; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// bitmap baking +// +// This is SUPER-AWESOME (tm Ryan Gordon) packing using stb_rect_pack.h. If +// stb_rect_pack.h isn't available, it uses the BakeFontBitmap strategy. + +STBTT_DEF int stbtt_PackBegin(stbtt_pack_context *spc, unsigned char *pixels, int pw, int ph, int stride_in_bytes, int padding, void *alloc_context) +{ + stbrp_context *context = (stbrp_context *) STBTT_malloc(sizeof(*context) ,alloc_context); + int num_nodes = pw - padding; + stbrp_node *nodes = (stbrp_node *) STBTT_malloc(sizeof(*nodes ) * num_nodes,alloc_context); + + if (context == NULL || nodes == NULL) { + if (context != NULL) STBTT_free(context, alloc_context); + if (nodes != NULL) STBTT_free(nodes , alloc_context); + return 0; + } + + spc->user_allocator_context = alloc_context; + spc->width = pw; + spc->height = ph; + spc->pixels = pixels; + spc->pack_info = context; + spc->nodes = nodes; + spc->padding = padding; + spc->stride_in_bytes = stride_in_bytes != 0 ? stride_in_bytes : pw; + spc->h_oversample = 1; + spc->v_oversample = 1; + spc->skip_missing = 0; + + stbrp_init_target(context, pw-padding, ph-padding, nodes, num_nodes); + + if (pixels) + STBTT_memset(pixels, 0, pw*ph); // background of 0 around pixels + + return 1; +} + +STBTT_DEF void stbtt_PackEnd (stbtt_pack_context *spc) +{ + STBTT_free(spc->nodes , spc->user_allocator_context); + STBTT_free(spc->pack_info, spc->user_allocator_context); +} + +STBTT_DEF void stbtt_PackSetOversampling(stbtt_pack_context *spc, unsigned int h_oversample, unsigned int v_oversample) +{ + STBTT_assert(h_oversample <= STBTT_MAX_OVERSAMPLE); + STBTT_assert(v_oversample <= STBTT_MAX_OVERSAMPLE); + if (h_oversample <= STBTT_MAX_OVERSAMPLE) + spc->h_oversample = h_oversample; + if (v_oversample <= STBTT_MAX_OVERSAMPLE) + spc->v_oversample = v_oversample; +} + +STBTT_DEF void stbtt_PackSetSkipMissingCodepoints(stbtt_pack_context *spc, int skip) +{ + spc->skip_missing = skip; +} + +#define STBTT__OVER_MASK (STBTT_MAX_OVERSAMPLE-1) + +static void stbtt__h_prefilter(unsigned char *pixels, int w, int h, int stride_in_bytes, unsigned int kernel_width) +{ + unsigned char buffer[STBTT_MAX_OVERSAMPLE]; + int safe_w = w - kernel_width; + int j; + STBTT_memset(buffer, 0, STBTT_MAX_OVERSAMPLE); // suppress bogus warning from VS2013 -analyze + for (j=0; j < h; ++j) { + int i; + unsigned int total; + STBTT_memset(buffer, 0, kernel_width); + + total = 0; + + // make kernel_width a constant in common cases so compiler can optimize out the divide + switch (kernel_width) { + case 2: + for (i=0; i <= safe_w; ++i) { + total += pixels[i] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i]; + pixels[i] = (unsigned char) (total / 2); + } + break; + case 3: + for (i=0; i <= safe_w; ++i) { + total += pixels[i] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i]; + pixels[i] = (unsigned char) (total / 3); + } + break; + case 4: + for (i=0; i <= safe_w; ++i) { + total += pixels[i] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i]; + pixels[i] = (unsigned char) (total / 4); + } + break; + case 5: + for (i=0; i <= safe_w; ++i) { + total += pixels[i] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i]; + pixels[i] = (unsigned char) (total / 5); + } + break; + default: + for (i=0; i <= safe_w; ++i) { + total += pixels[i] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i]; + pixels[i] = (unsigned char) (total / kernel_width); + } + break; + } + + for (; i < w; ++i) { + STBTT_assert(pixels[i] == 0); + total -= buffer[i & STBTT__OVER_MASK]; + pixels[i] = (unsigned char) (total / kernel_width); + } + + pixels += stride_in_bytes; + } +} + +static void stbtt__v_prefilter(unsigned char *pixels, int w, int h, int stride_in_bytes, unsigned int kernel_width) +{ + unsigned char buffer[STBTT_MAX_OVERSAMPLE]; + int safe_h = h - kernel_width; + int j; + STBTT_memset(buffer, 0, STBTT_MAX_OVERSAMPLE); // suppress bogus warning from VS2013 -analyze + for (j=0; j < w; ++j) { + int i; + unsigned int total; + STBTT_memset(buffer, 0, kernel_width); + + total = 0; + + // make kernel_width a constant in common cases so compiler can optimize out the divide + switch (kernel_width) { + case 2: + for (i=0; i <= safe_h; ++i) { + total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes]; + pixels[i*stride_in_bytes] = (unsigned char) (total / 2); + } + break; + case 3: + for (i=0; i <= safe_h; ++i) { + total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes]; + pixels[i*stride_in_bytes] = (unsigned char) (total / 3); + } + break; + case 4: + for (i=0; i <= safe_h; ++i) { + total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes]; + pixels[i*stride_in_bytes] = (unsigned char) (total / 4); + } + break; + case 5: + for (i=0; i <= safe_h; ++i) { + total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes]; + pixels[i*stride_in_bytes] = (unsigned char) (total / 5); + } + break; + default: + for (i=0; i <= safe_h; ++i) { + total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes]; + pixels[i*stride_in_bytes] = (unsigned char) (total / kernel_width); + } + break; + } + + for (; i < h; ++i) { + STBTT_assert(pixels[i*stride_in_bytes] == 0); + total -= buffer[i & STBTT__OVER_MASK]; + pixels[i*stride_in_bytes] = (unsigned char) (total / kernel_width); + } + + pixels += 1; + } +} + +static float stbtt__oversample_shift(int oversample) +{ + if (!oversample) + return 0.0f; + + // The prefilter is a box filter of width "oversample", + // which shifts phase by (oversample - 1)/2 pixels in + // oversampled space. We want to shift in the opposite + // direction to counter this. + return (float)-(oversample - 1) / (2.0f * (float)oversample); +} + +// rects array must be big enough to accommodate all characters in the given ranges +STBTT_DEF int stbtt_PackFontRangesGatherRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects) +{ + int i,j,k; + + k=0; + for (i=0; i < num_ranges; ++i) { + float fh = ranges[i].font_size; + float scale = fh > 0 ? stbtt_ScaleForPixelHeight(info, fh) : stbtt_ScaleForMappingEmToPixels(info, -fh); + ranges[i].h_oversample = (unsigned char) spc->h_oversample; + ranges[i].v_oversample = (unsigned char) spc->v_oversample; + for (j=0; j < ranges[i].num_chars; ++j) { + int x0,y0,x1,y1; + int codepoint = ranges[i].array_of_unicode_codepoints == NULL ? ranges[i].first_unicode_codepoint_in_range + j : ranges[i].array_of_unicode_codepoints[j]; + int glyph = stbtt_FindGlyphIndex(info, codepoint); + if (glyph == 0 && spc->skip_missing) { + rects[k].w = rects[k].h = 0; + } else { + stbtt_GetGlyphBitmapBoxSubpixel(info,glyph, + scale * spc->h_oversample, + scale * spc->v_oversample, + 0,0, + &x0,&y0,&x1,&y1); + rects[k].w = (stbrp_coord) (x1-x0 + spc->padding + spc->h_oversample-1); + rects[k].h = (stbrp_coord) (y1-y0 + spc->padding + spc->v_oversample-1); + } + ++k; + } + } + + return k; +} + +STBTT_DEF void stbtt_MakeGlyphBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int prefilter_x, int prefilter_y, float *sub_x, float *sub_y, int glyph) +{ + stbtt_MakeGlyphBitmapSubpixel(info, + output, + out_w - (prefilter_x - 1), + out_h - (prefilter_y - 1), + out_stride, + scale_x, + scale_y, + shift_x, + shift_y, + glyph); + + if (prefilter_x > 1) + stbtt__h_prefilter(output, out_w, out_h, out_stride, prefilter_x); + + if (prefilter_y > 1) + stbtt__v_prefilter(output, out_w, out_h, out_stride, prefilter_y); + + *sub_x = stbtt__oversample_shift(prefilter_x); + *sub_y = stbtt__oversample_shift(prefilter_y); +} + +// rects array must be big enough to accommodate all characters in the given ranges +STBTT_DEF int stbtt_PackFontRangesRenderIntoRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects) +{ + int i,j,k, return_value = 1; + + // save current values + int old_h_over = spc->h_oversample; + int old_v_over = spc->v_oversample; + + k = 0; + for (i=0; i < num_ranges; ++i) { + float fh = ranges[i].font_size; + float scale = fh > 0 ? stbtt_ScaleForPixelHeight(info, fh) : stbtt_ScaleForMappingEmToPixels(info, -fh); + float recip_h,recip_v,sub_x,sub_y; + spc->h_oversample = ranges[i].h_oversample; + spc->v_oversample = ranges[i].v_oversample; + recip_h = 1.0f / spc->h_oversample; + recip_v = 1.0f / spc->v_oversample; + sub_x = stbtt__oversample_shift(spc->h_oversample); + sub_y = stbtt__oversample_shift(spc->v_oversample); + for (j=0; j < ranges[i].num_chars; ++j) { + stbrp_rect *r = &rects[k]; + if (r->was_packed && r->w != 0 && r->h != 0) { + stbtt_packedchar *bc = &ranges[i].chardata_for_range[j]; + int advance, lsb, x0,y0,x1,y1; + int codepoint = ranges[i].array_of_unicode_codepoints == NULL ? ranges[i].first_unicode_codepoint_in_range + j : ranges[i].array_of_unicode_codepoints[j]; + int glyph = stbtt_FindGlyphIndex(info, codepoint); + stbrp_coord pad = (stbrp_coord) spc->padding; + + // pad on left and top + r->x += pad; + r->y += pad; + r->w -= pad; + r->h -= pad; + stbtt_GetGlyphHMetrics(info, glyph, &advance, &lsb); + stbtt_GetGlyphBitmapBox(info, glyph, + scale * spc->h_oversample, + scale * spc->v_oversample, + &x0,&y0,&x1,&y1); + stbtt_MakeGlyphBitmapSubpixel(info, + spc->pixels + r->x + r->y*spc->stride_in_bytes, + r->w - spc->h_oversample+1, + r->h - spc->v_oversample+1, + spc->stride_in_bytes, + scale * spc->h_oversample, + scale * spc->v_oversample, + 0,0, + glyph); + + if (spc->h_oversample > 1) + stbtt__h_prefilter(spc->pixels + r->x + r->y*spc->stride_in_bytes, + r->w, r->h, spc->stride_in_bytes, + spc->h_oversample); + + if (spc->v_oversample > 1) + stbtt__v_prefilter(spc->pixels + r->x + r->y*spc->stride_in_bytes, + r->w, r->h, spc->stride_in_bytes, + spc->v_oversample); + + bc->x0 = (stbtt_int16) r->x; + bc->y0 = (stbtt_int16) r->y; + bc->x1 = (stbtt_int16) (r->x + r->w); + bc->y1 = (stbtt_int16) (r->y + r->h); + bc->xadvance = scale * advance; + bc->xoff = (float) x0 * recip_h + sub_x; + bc->yoff = (float) y0 * recip_v + sub_y; + bc->xoff2 = (x0 + r->w) * recip_h + sub_x; + bc->yoff2 = (y0 + r->h) * recip_v + sub_y; + } else { + return_value = 0; // if any fail, report failure + } + + ++k; + } + } + + // restore original values + spc->h_oversample = old_h_over; + spc->v_oversample = old_v_over; + + return return_value; +} + +STBTT_DEF void stbtt_PackFontRangesPackRects(stbtt_pack_context *spc, stbrp_rect *rects, int num_rects) +{ + stbrp_pack_rects((stbrp_context *) spc->pack_info, rects, num_rects); +} + +STBTT_DEF int stbtt_PackFontRanges(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, stbtt_pack_range *ranges, int num_ranges) +{ + stbtt_fontinfo info; + int i,j,n, return_value; // [DEAR IMGUI] removed = 1 + //stbrp_context *context = (stbrp_context *) spc->pack_info; + stbrp_rect *rects; + + // flag all characters as NOT packed + for (i=0; i < num_ranges; ++i) + for (j=0; j < ranges[i].num_chars; ++j) + ranges[i].chardata_for_range[j].x0 = + ranges[i].chardata_for_range[j].y0 = + ranges[i].chardata_for_range[j].x1 = + ranges[i].chardata_for_range[j].y1 = 0; + + n = 0; + for (i=0; i < num_ranges; ++i) + n += ranges[i].num_chars; + + rects = (stbrp_rect *) STBTT_malloc(sizeof(*rects) * n, spc->user_allocator_context); + if (rects == NULL) + return 0; + + info.userdata = spc->user_allocator_context; + stbtt_InitFont(&info, fontdata, stbtt_GetFontOffsetForIndex(fontdata,font_index)); + + n = stbtt_PackFontRangesGatherRects(spc, &info, ranges, num_ranges, rects); + + stbtt_PackFontRangesPackRects(spc, rects, n); + + return_value = stbtt_PackFontRangesRenderIntoRects(spc, &info, ranges, num_ranges, rects); + + STBTT_free(rects, spc->user_allocator_context); + return return_value; +} + +STBTT_DEF int stbtt_PackFontRange(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, float font_size, + int first_unicode_codepoint_in_range, int num_chars_in_range, stbtt_packedchar *chardata_for_range) +{ + stbtt_pack_range range; + range.first_unicode_codepoint_in_range = first_unicode_codepoint_in_range; + range.array_of_unicode_codepoints = NULL; + range.num_chars = num_chars_in_range; + range.chardata_for_range = chardata_for_range; + range.font_size = font_size; + return stbtt_PackFontRanges(spc, fontdata, font_index, &range, 1); +} + +STBTT_DEF void stbtt_GetScaledFontVMetrics(const unsigned char *fontdata, int index, float size, float *ascent, float *descent, float *lineGap) +{ + int i_ascent, i_descent, i_lineGap; + float scale; + stbtt_fontinfo info; + stbtt_InitFont(&info, fontdata, stbtt_GetFontOffsetForIndex(fontdata, index)); + scale = size > 0 ? stbtt_ScaleForPixelHeight(&info, size) : stbtt_ScaleForMappingEmToPixels(&info, -size); + stbtt_GetFontVMetrics(&info, &i_ascent, &i_descent, &i_lineGap); + *ascent = (float) i_ascent * scale; + *descent = (float) i_descent * scale; + *lineGap = (float) i_lineGap * scale; +} + +STBTT_DEF void stbtt_GetPackedQuad(const stbtt_packedchar *chardata, int pw, int ph, int char_index, float *xpos, float *ypos, stbtt_aligned_quad *q, int align_to_integer) +{ + float ipw = 1.0f / pw, iph = 1.0f / ph; + const stbtt_packedchar *b = chardata + char_index; + + if (align_to_integer) { + float x = (float) STBTT_ifloor((*xpos + b->xoff) + 0.5f); + float y = (float) STBTT_ifloor((*ypos + b->yoff) + 0.5f); + q->x0 = x; + q->y0 = y; + q->x1 = x + b->xoff2 - b->xoff; + q->y1 = y + b->yoff2 - b->yoff; + } else { + q->x0 = *xpos + b->xoff; + q->y0 = *ypos + b->yoff; + q->x1 = *xpos + b->xoff2; + q->y1 = *ypos + b->yoff2; + } + + q->s0 = b->x0 * ipw; + q->t0 = b->y0 * iph; + q->s1 = b->x1 * ipw; + q->t1 = b->y1 * iph; + + *xpos += b->xadvance; +} + +////////////////////////////////////////////////////////////////////////////// +// +// sdf computation +// + +#define STBTT_min(a,b) ((a) < (b) ? (a) : (b)) +#define STBTT_max(a,b) ((a) < (b) ? (b) : (a)) + +static int stbtt__ray_intersect_bezier(float orig[2], float ray[2], float q0[2], float q1[2], float q2[2], float hits[2][2]) +{ + float q0perp = q0[1]*ray[0] - q0[0]*ray[1]; + float q1perp = q1[1]*ray[0] - q1[0]*ray[1]; + float q2perp = q2[1]*ray[0] - q2[0]*ray[1]; + float roperp = orig[1]*ray[0] - orig[0]*ray[1]; + + float a = q0perp - 2*q1perp + q2perp; + float b = q1perp - q0perp; + float c = q0perp - roperp; + + float s0 = 0., s1 = 0.; + int num_s = 0; + + if (a != 0.0) { + float discr = b*b - a*c; + if (discr > 0.0) { + float rcpna = -1 / a; + float d = (float) STBTT_sqrt(discr); + s0 = (b+d) * rcpna; + s1 = (b-d) * rcpna; + if (s0 >= 0.0 && s0 <= 1.0) + num_s = 1; + if (d > 0.0 && s1 >= 0.0 && s1 <= 1.0) { + if (num_s == 0) s0 = s1; + ++num_s; + } + } + } else { + // 2*b*s + c = 0 + // s = -c / (2*b) + s0 = c / (-2 * b); + if (s0 >= 0.0 && s0 <= 1.0) + num_s = 1; + } + + if (num_s == 0) + return 0; + else { + float rcp_len2 = 1 / (ray[0]*ray[0] + ray[1]*ray[1]); + float rayn_x = ray[0] * rcp_len2, rayn_y = ray[1] * rcp_len2; + + float q0d = q0[0]*rayn_x + q0[1]*rayn_y; + float q1d = q1[0]*rayn_x + q1[1]*rayn_y; + float q2d = q2[0]*rayn_x + q2[1]*rayn_y; + float rod = orig[0]*rayn_x + orig[1]*rayn_y; + + float q10d = q1d - q0d; + float q20d = q2d - q0d; + float q0rd = q0d - rod; + + hits[0][0] = q0rd + s0*(2.0f - 2.0f*s0)*q10d + s0*s0*q20d; + hits[0][1] = a*s0+b; + + if (num_s > 1) { + hits[1][0] = q0rd + s1*(2.0f - 2.0f*s1)*q10d + s1*s1*q20d; + hits[1][1] = a*s1+b; + return 2; + } else { + return 1; + } + } +} + +static int equal(float *a, float *b) +{ + return (a[0] == b[0] && a[1] == b[1]); +} + +static int stbtt__compute_crossings_x(float x, float y, int nverts, stbtt_vertex *verts) +{ + int i; + float orig[2], ray[2] = { 1, 0 }; + float y_frac; + int winding = 0; + + orig[0] = x; + //orig[1] = y; // [DEAR IMGUI] commmented double assignment + + // make sure y never passes through a vertex of the shape + y_frac = (float) STBTT_fmod(y, 1.0f); + if (y_frac < 0.01f) + y += 0.01f; + else if (y_frac > 0.99f) + y -= 0.01f; + orig[1] = y; + + // test a ray from (-infinity,y) to (x,y) + for (i=0; i < nverts; ++i) { + if (verts[i].type == STBTT_vline) { + int x0 = (int) verts[i-1].x, y0 = (int) verts[i-1].y; + int x1 = (int) verts[i ].x, y1 = (int) verts[i ].y; + if (y > STBTT_min(y0,y1) && y < STBTT_max(y0,y1) && x > STBTT_min(x0,x1)) { + float x_inter = (y - y0) / (y1 - y0) * (x1-x0) + x0; + if (x_inter < x) + winding += (y0 < y1) ? 1 : -1; + } + } + if (verts[i].type == STBTT_vcurve) { + int x0 = (int) verts[i-1].x , y0 = (int) verts[i-1].y ; + int x1 = (int) verts[i ].cx, y1 = (int) verts[i ].cy; + int x2 = (int) verts[i ].x , y2 = (int) verts[i ].y ; + int ax = STBTT_min(x0,STBTT_min(x1,x2)), ay = STBTT_min(y0,STBTT_min(y1,y2)); + int by = STBTT_max(y0,STBTT_max(y1,y2)); + if (y > ay && y < by && x > ax) { + float q0[2],q1[2],q2[2]; + float hits[2][2]; + q0[0] = (float)x0; + q0[1] = (float)y0; + q1[0] = (float)x1; + q1[1] = (float)y1; + q2[0] = (float)x2; + q2[1] = (float)y2; + if (equal(q0,q1) || equal(q1,q2)) { + x0 = (int)verts[i-1].x; + y0 = (int)verts[i-1].y; + x1 = (int)verts[i ].x; + y1 = (int)verts[i ].y; + if (y > STBTT_min(y0,y1) && y < STBTT_max(y0,y1) && x > STBTT_min(x0,x1)) { + float x_inter = (y - y0) / (y1 - y0) * (x1-x0) + x0; + if (x_inter < x) + winding += (y0 < y1) ? 1 : -1; + } + } else { + int num_hits = stbtt__ray_intersect_bezier(orig, ray, q0, q1, q2, hits); + if (num_hits >= 1) + if (hits[0][0] < 0) + winding += (hits[0][1] < 0 ? -1 : 1); + if (num_hits >= 2) + if (hits[1][0] < 0) + winding += (hits[1][1] < 0 ? -1 : 1); + } + } + } + } + return winding; +} + +static float stbtt__cuberoot( float x ) +{ + if (x<0) + return -(float) STBTT_pow(-x,1.0f/3.0f); + else + return (float) STBTT_pow( x,1.0f/3.0f); +} + +// x^3 + c*x^2 + b*x + a = 0 +static int stbtt__solve_cubic(float a, float b, float c, float* r) +{ + float s = -a / 3; + float p = b - a*a / 3; + float q = a * (2*a*a - 9*b) / 27 + c; + float p3 = p*p*p; + float d = q*q + 4*p3 / 27; + if (d >= 0) { + float z = (float) STBTT_sqrt(d); + float u = (-q + z) / 2; + float v = (-q - z) / 2; + u = stbtt__cuberoot(u); + v = stbtt__cuberoot(v); + r[0] = s + u + v; + return 1; + } else { + float u = (float) STBTT_sqrt(-p/3); + float v = (float) STBTT_acos(-STBTT_sqrt(-27/p3) * q / 2) / 3; // p3 must be negative, since d is negative + float m = (float) STBTT_cos(v); + float n = (float) STBTT_cos(v-3.141592/2)*1.732050808f; + r[0] = s + u * 2 * m; + r[1] = s - u * (m + n); + r[2] = s - u * (m - n); + + //STBTT_assert( STBTT_fabs(((r[0]+a)*r[0]+b)*r[0]+c) < 0.05f); // these asserts may not be safe at all scales, though they're in bezier t parameter units so maybe? + //STBTT_assert( STBTT_fabs(((r[1]+a)*r[1]+b)*r[1]+c) < 0.05f); + //STBTT_assert( STBTT_fabs(((r[2]+a)*r[2]+b)*r[2]+c) < 0.05f); + return 3; + } +} + +STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, float scale, int glyph, int padding, unsigned char onedge_value, float pixel_dist_scale, int *width, int *height, int *xoff, int *yoff) +{ + float scale_x = scale, scale_y = scale; + int ix0,iy0,ix1,iy1; + int w,h; + unsigned char *data; + + // if one scale is 0, use same scale for both + if (scale_x == 0) scale_x = scale_y; + if (scale_y == 0) { + if (scale_x == 0) return NULL; // if both scales are 0, return NULL + scale_y = scale_x; + } + + stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale, scale, 0.0f,0.0f, &ix0,&iy0,&ix1,&iy1); + + // if empty, return NULL + if (ix0 == ix1 || iy0 == iy1) + return NULL; + + ix0 -= padding; + iy0 -= padding; + ix1 += padding; + iy1 += padding; + + w = (ix1 - ix0); + h = (iy1 - iy0); + + if (width ) *width = w; + if (height) *height = h; + if (xoff ) *xoff = ix0; + if (yoff ) *yoff = iy0; + + // invert for y-downwards bitmaps + scale_y = -scale_y; + + { + int x,y,i,j; + float *precompute; + stbtt_vertex *verts; + int num_verts = stbtt_GetGlyphShape(info, glyph, &verts); + data = (unsigned char *) STBTT_malloc(w * h, info->userdata); + precompute = (float *) STBTT_malloc(num_verts * sizeof(float), info->userdata); + + for (i=0,j=num_verts-1; i < num_verts; j=i++) { + if (verts[i].type == STBTT_vline) { + float x0 = verts[i].x*scale_x, y0 = verts[i].y*scale_y; + float x1 = verts[j].x*scale_x, y1 = verts[j].y*scale_y; + float dist = (float) STBTT_sqrt((x1-x0)*(x1-x0) + (y1-y0)*(y1-y0)); + precompute[i] = (dist == 0) ? 0.0f : 1.0f / dist; + } else if (verts[i].type == STBTT_vcurve) { + float x2 = verts[j].x *scale_x, y2 = verts[j].y *scale_y; + float x1 = verts[i].cx*scale_x, y1 = verts[i].cy*scale_y; + float x0 = verts[i].x *scale_x, y0 = verts[i].y *scale_y; + float bx = x0 - 2*x1 + x2, by = y0 - 2*y1 + y2; + float len2 = bx*bx + by*by; + if (len2 != 0.0f) + precompute[i] = 1.0f / (bx*bx + by*by); + else + precompute[i] = 0.0f; + } else + precompute[i] = 0.0f; + } + + for (y=iy0; y < iy1; ++y) { + for (x=ix0; x < ix1; ++x) { + float val; + float min_dist = 999999.0f; + float sx = (float) x + 0.5f; + float sy = (float) y + 0.5f; + float x_gspace = (sx / scale_x); + float y_gspace = (sy / scale_y); + + int winding = stbtt__compute_crossings_x(x_gspace, y_gspace, num_verts, verts); // @OPTIMIZE: this could just be a rasterization, but needs to be line vs. non-tesselated curves so a new path + + for (i=0; i < num_verts; ++i) { + float x0 = verts[i].x*scale_x, y0 = verts[i].y*scale_y; + + // check against every point here rather than inside line/curve primitives -- @TODO: wrong if multiple 'moves' in a row produce a garbage point, and given culling, probably more efficient to do within line/curve + float dist2 = (x0-sx)*(x0-sx) + (y0-sy)*(y0-sy); + if (dist2 < min_dist*min_dist) + min_dist = (float) STBTT_sqrt(dist2); + + if (verts[i].type == STBTT_vline) { + float x1 = verts[i-1].x*scale_x, y1 = verts[i-1].y*scale_y; + + // coarse culling against bbox + //if (sx > STBTT_min(x0,x1)-min_dist && sx < STBTT_max(x0,x1)+min_dist && + // sy > STBTT_min(y0,y1)-min_dist && sy < STBTT_max(y0,y1)+min_dist) + float dist = (float) STBTT_fabs((x1-x0)*(y0-sy) - (y1-y0)*(x0-sx)) * precompute[i]; + STBTT_assert(i != 0); + if (dist < min_dist) { + // check position along line + // x' = x0 + t*(x1-x0), y' = y0 + t*(y1-y0) + // minimize (x'-sx)*(x'-sx)+(y'-sy)*(y'-sy) + float dx = x1-x0, dy = y1-y0; + float px = x0-sx, py = y0-sy; + // minimize (px+t*dx)^2 + (py+t*dy)^2 = px*px + 2*px*dx*t + t^2*dx*dx + py*py + 2*py*dy*t + t^2*dy*dy + // derivative: 2*px*dx + 2*py*dy + (2*dx*dx+2*dy*dy)*t, set to 0 and solve + float t = -(px*dx + py*dy) / (dx*dx + dy*dy); + if (t >= 0.0f && t <= 1.0f) + min_dist = dist; + } + } else if (verts[i].type == STBTT_vcurve) { + float x2 = verts[i-1].x *scale_x, y2 = verts[i-1].y *scale_y; + float x1 = verts[i ].cx*scale_x, y1 = verts[i ].cy*scale_y; + float box_x0 = STBTT_min(STBTT_min(x0,x1),x2); + float box_y0 = STBTT_min(STBTT_min(y0,y1),y2); + float box_x1 = STBTT_max(STBTT_max(x0,x1),x2); + float box_y1 = STBTT_max(STBTT_max(y0,y1),y2); + // coarse culling against bbox to avoid computing cubic unnecessarily + if (sx > box_x0-min_dist && sx < box_x1+min_dist && sy > box_y0-min_dist && sy < box_y1+min_dist) { + int num=0; + float ax = x1-x0, ay = y1-y0; + float bx = x0 - 2*x1 + x2, by = y0 - 2*y1 + y2; + float mx = x0 - sx, my = y0 - sy; + float res[3],px,py,t,it; + float a_inv = precompute[i]; + if (a_inv == 0.0) { // if a_inv is 0, it's 2nd degree so use quadratic formula + float a = 3*(ax*bx + ay*by); + float b = 2*(ax*ax + ay*ay) + (mx*bx+my*by); + float c = mx*ax+my*ay; + if (a == 0.0) { // if a is 0, it's linear + if (b != 0.0) { + res[num++] = -c/b; + } + } else { + float discriminant = b*b - 4*a*c; + if (discriminant < 0) + num = 0; + else { + float root = (float) STBTT_sqrt(discriminant); + res[0] = (-b - root)/(2*a); + res[1] = (-b + root)/(2*a); + num = 2; // don't bother distinguishing 1-solution case, as code below will still work + } + } + } else { + float b = 3*(ax*bx + ay*by) * a_inv; // could precompute this as it doesn't depend on sample point + float c = (2*(ax*ax + ay*ay) + (mx*bx+my*by)) * a_inv; + float d = (mx*ax+my*ay) * a_inv; + num = stbtt__solve_cubic(b, c, d, res); + } + if (num >= 1 && res[0] >= 0.0f && res[0] <= 1.0f) { + t = res[0], it = 1.0f - t; + px = it*it*x0 + 2*t*it*x1 + t*t*x2; + py = it*it*y0 + 2*t*it*y1 + t*t*y2; + dist2 = (px-sx)*(px-sx) + (py-sy)*(py-sy); + if (dist2 < min_dist * min_dist) + min_dist = (float) STBTT_sqrt(dist2); + } + if (num >= 2 && res[1] >= 0.0f && res[1] <= 1.0f) { + t = res[1], it = 1.0f - t; + px = it*it*x0 + 2*t*it*x1 + t*t*x2; + py = it*it*y0 + 2*t*it*y1 + t*t*y2; + dist2 = (px-sx)*(px-sx) + (py-sy)*(py-sy); + if (dist2 < min_dist * min_dist) + min_dist = (float) STBTT_sqrt(dist2); + } + if (num >= 3 && res[2] >= 0.0f && res[2] <= 1.0f) { + t = res[2], it = 1.0f - t; + px = it*it*x0 + 2*t*it*x1 + t*t*x2; + py = it*it*y0 + 2*t*it*y1 + t*t*y2; + dist2 = (px-sx)*(px-sx) + (py-sy)*(py-sy); + if (dist2 < min_dist * min_dist) + min_dist = (float) STBTT_sqrt(dist2); + } + } + } + } + if (winding == 0) + min_dist = -min_dist; // if outside the shape, value is negative + val = onedge_value + pixel_dist_scale * min_dist; + if (val < 0) + val = 0; + else if (val > 255) + val = 255; + data[(y-iy0)*w+(x-ix0)] = (unsigned char) val; + } + } + STBTT_free(precompute, info->userdata); + STBTT_free(verts, info->userdata); + } + return data; +} + +STBTT_DEF unsigned char * stbtt_GetCodepointSDF(const stbtt_fontinfo *info, float scale, int codepoint, int padding, unsigned char onedge_value, float pixel_dist_scale, int *width, int *height, int *xoff, int *yoff) +{ + return stbtt_GetGlyphSDF(info, scale, stbtt_FindGlyphIndex(info, codepoint), padding, onedge_value, pixel_dist_scale, width, height, xoff, yoff); +} + +STBTT_DEF void stbtt_FreeSDF(unsigned char *bitmap, void *userdata) +{ + STBTT_free(bitmap, userdata); +} + +////////////////////////////////////////////////////////////////////////////// +// +// font name matching -- recommended not to use this +// + +// check if a utf8 string contains a prefix which is the utf16 string; if so return length of matching utf8 string +static stbtt_int32 stbtt__CompareUTF8toUTF16_bigendian_prefix(stbtt_uint8 *s1, stbtt_int32 len1, stbtt_uint8 *s2, stbtt_int32 len2) +{ + stbtt_int32 i=0; + + // convert utf16 to utf8 and compare the results while converting + while (len2) { + stbtt_uint16 ch = s2[0]*256 + s2[1]; + if (ch < 0x80) { + if (i >= len1) return -1; + if (s1[i++] != ch) return -1; + } else if (ch < 0x800) { + if (i+1 >= len1) return -1; + if (s1[i++] != 0xc0 + (ch >> 6)) return -1; + if (s1[i++] != 0x80 + (ch & 0x3f)) return -1; + } else if (ch >= 0xd800 && ch < 0xdc00) { + stbtt_uint32 c; + stbtt_uint16 ch2 = s2[2]*256 + s2[3]; + if (i+3 >= len1) return -1; + c = ((ch - 0xd800) << 10) + (ch2 - 0xdc00) + 0x10000; + if (s1[i++] != 0xf0 + (c >> 18)) return -1; + if (s1[i++] != 0x80 + ((c >> 12) & 0x3f)) return -1; + if (s1[i++] != 0x80 + ((c >> 6) & 0x3f)) return -1; + if (s1[i++] != 0x80 + ((c ) & 0x3f)) return -1; + s2 += 2; // plus another 2 below + len2 -= 2; + } else if (ch >= 0xdc00 && ch < 0xe000) { + return -1; + } else { + if (i+2 >= len1) return -1; + if (s1[i++] != 0xe0 + (ch >> 12)) return -1; + if (s1[i++] != 0x80 + ((ch >> 6) & 0x3f)) return -1; + if (s1[i++] != 0x80 + ((ch ) & 0x3f)) return -1; + } + s2 += 2; + len2 -= 2; + } + return i; +} + +static int stbtt_CompareUTF8toUTF16_bigendian_internal(char *s1, int len1, char *s2, int len2) +{ + return len1 == stbtt__CompareUTF8toUTF16_bigendian_prefix((stbtt_uint8*) s1, len1, (stbtt_uint8*) s2, len2); +} + +// returns results in whatever encoding you request... but note that 2-byte encodings +// will be BIG-ENDIAN... use stbtt_CompareUTF8toUTF16_bigendian() to compare +STBTT_DEF const char *stbtt_GetFontNameString(const stbtt_fontinfo *font, int *length, int platformID, int encodingID, int languageID, int nameID) +{ + stbtt_int32 i,count,stringOffset; + stbtt_uint8 *fc = font->data; + stbtt_uint32 offset = font->fontstart; + stbtt_uint32 nm = stbtt__find_table(fc, offset, "name"); + if (!nm) return NULL; + + count = ttUSHORT(fc+nm+2); + stringOffset = nm + ttUSHORT(fc+nm+4); + for (i=0; i < count; ++i) { + stbtt_uint32 loc = nm + 6 + 12 * i; + if (platformID == ttUSHORT(fc+loc+0) && encodingID == ttUSHORT(fc+loc+2) + && languageID == ttUSHORT(fc+loc+4) && nameID == ttUSHORT(fc+loc+6)) { + *length = ttUSHORT(fc+loc+8); + return (const char *) (fc+stringOffset+ttUSHORT(fc+loc+10)); + } + } + return NULL; +} + +static int stbtt__matchpair(stbtt_uint8 *fc, stbtt_uint32 nm, stbtt_uint8 *name, stbtt_int32 nlen, stbtt_int32 target_id, stbtt_int32 next_id) +{ + stbtt_int32 i; + stbtt_int32 count = ttUSHORT(fc+nm+2); + stbtt_int32 stringOffset = nm + ttUSHORT(fc+nm+4); + + for (i=0; i < count; ++i) { + stbtt_uint32 loc = nm + 6 + 12 * i; + stbtt_int32 id = ttUSHORT(fc+loc+6); + if (id == target_id) { + // find the encoding + stbtt_int32 platform = ttUSHORT(fc+loc+0), encoding = ttUSHORT(fc+loc+2), language = ttUSHORT(fc+loc+4); + + // is this a Unicode encoding? + if (platform == 0 || (platform == 3 && encoding == 1) || (platform == 3 && encoding == 10)) { + stbtt_int32 slen = ttUSHORT(fc+loc+8); + stbtt_int32 off = ttUSHORT(fc+loc+10); + + // check if there's a prefix match + stbtt_int32 matchlen = stbtt__CompareUTF8toUTF16_bigendian_prefix(name, nlen, fc+stringOffset+off,slen); + if (matchlen >= 0) { + // check for target_id+1 immediately following, with same encoding & language + if (i+1 < count && ttUSHORT(fc+loc+12+6) == next_id && ttUSHORT(fc+loc+12) == platform && ttUSHORT(fc+loc+12+2) == encoding && ttUSHORT(fc+loc+12+4) == language) { + slen = ttUSHORT(fc+loc+12+8); + off = ttUSHORT(fc+loc+12+10); + if (slen == 0) { + if (matchlen == nlen) + return 1; + } else if (matchlen < nlen && name[matchlen] == ' ') { + ++matchlen; + if (stbtt_CompareUTF8toUTF16_bigendian_internal((char*) (name+matchlen), nlen-matchlen, (char*)(fc+stringOffset+off),slen)) + return 1; + } + } else { + // if nothing immediately following + if (matchlen == nlen) + return 1; + } + } + } + + // @TODO handle other encodings + } + } + return 0; +} + +static int stbtt__matches(stbtt_uint8 *fc, stbtt_uint32 offset, stbtt_uint8 *name, stbtt_int32 flags) +{ + stbtt_int32 nlen = (stbtt_int32) STBTT_strlen((char *) name); + stbtt_uint32 nm,hd; + if (!stbtt__isfont(fc+offset)) return 0; + + // check italics/bold/underline flags in macStyle... + if (flags) { + hd = stbtt__find_table(fc, offset, "head"); + if ((ttUSHORT(fc+hd+44) & 7) != (flags & 7)) return 0; + } + + nm = stbtt__find_table(fc, offset, "name"); + if (!nm) return 0; + + if (flags) { + // if we checked the macStyle flags, then just check the family and ignore the subfamily + if (stbtt__matchpair(fc, nm, name, nlen, 16, -1)) return 1; + if (stbtt__matchpair(fc, nm, name, nlen, 1, -1)) return 1; + if (stbtt__matchpair(fc, nm, name, nlen, 3, -1)) return 1; + } else { + if (stbtt__matchpair(fc, nm, name, nlen, 16, 17)) return 1; + if (stbtt__matchpair(fc, nm, name, nlen, 1, 2)) return 1; + if (stbtt__matchpair(fc, nm, name, nlen, 3, -1)) return 1; + } + + return 0; +} + +static int stbtt_FindMatchingFont_internal(unsigned char *font_collection, char *name_utf8, stbtt_int32 flags) +{ + stbtt_int32 i; + for (i=0;;++i) { + stbtt_int32 off = stbtt_GetFontOffsetForIndex(font_collection, i); + if (off < 0) return off; + if (stbtt__matches((stbtt_uint8 *) font_collection, off, (stbtt_uint8*) name_utf8, flags)) + return off; + } +} + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif + +STBTT_DEF int stbtt_BakeFontBitmap(const unsigned char *data, int offset, + float pixel_height, unsigned char *pixels, int pw, int ph, + int first_char, int num_chars, stbtt_bakedchar *chardata) +{ + return stbtt_BakeFontBitmap_internal((unsigned char *) data, offset, pixel_height, pixels, pw, ph, first_char, num_chars, chardata); +} + +STBTT_DEF int stbtt_GetFontOffsetForIndex(const unsigned char *data, int index) +{ + return stbtt_GetFontOffsetForIndex_internal((unsigned char *) data, index); +} + +STBTT_DEF int stbtt_GetNumberOfFonts(const unsigned char *data) +{ + return stbtt_GetNumberOfFonts_internal((unsigned char *) data); +} + +STBTT_DEF int stbtt_InitFont(stbtt_fontinfo *info, const unsigned char *data, int offset) +{ + return stbtt_InitFont_internal(info, (unsigned char *) data, offset); +} + +STBTT_DEF int stbtt_FindMatchingFont(const unsigned char *fontdata, const char *name, int flags) +{ + return stbtt_FindMatchingFont_internal((unsigned char *) fontdata, (char *) name, flags); +} + +STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian(const char *s1, int len1, const char *s2, int len2) +{ + return stbtt_CompareUTF8toUTF16_bigendian_internal((char *) s1, len1, (char *) s2, len2); +} + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + +#endif // STB_TRUETYPE_IMPLEMENTATION + + +// FULL VERSION HISTORY +// +// 1.19 (2018-02-11) OpenType GPOS kerning (horizontal only), STBTT_fmod +// 1.18 (2018-01-29) add missing function +// 1.17 (2017-07-23) make more arguments const; doc fix +// 1.16 (2017-07-12) SDF support +// 1.15 (2017-03-03) make more arguments const +// 1.14 (2017-01-16) num-fonts-in-TTC function +// 1.13 (2017-01-02) support OpenType fonts, certain Apple fonts +// 1.12 (2016-10-25) suppress warnings about casting away const with -Wcast-qual +// 1.11 (2016-04-02) fix unused-variable warning +// 1.10 (2016-04-02) allow user-defined fabs() replacement +// fix memory leak if fontsize=0.0 +// fix warning from duplicate typedef +// 1.09 (2016-01-16) warning fix; avoid crash on outofmem; use alloc userdata for PackFontRanges +// 1.08 (2015-09-13) document stbtt_Rasterize(); fixes for vertical & horizontal edges +// 1.07 (2015-08-01) allow PackFontRanges to accept arrays of sparse codepoints; +// allow PackFontRanges to pack and render in separate phases; +// fix stbtt_GetFontOFfsetForIndex (never worked for non-0 input?); +// fixed an assert() bug in the new rasterizer +// replace assert() with STBTT_assert() in new rasterizer +// 1.06 (2015-07-14) performance improvements (~35% faster on x86 and x64 on test machine) +// also more precise AA rasterizer, except if shapes overlap +// remove need for STBTT_sort +// 1.05 (2015-04-15) fix misplaced definitions for STBTT_STATIC +// 1.04 (2015-04-15) typo in example +// 1.03 (2015-04-12) STBTT_STATIC, fix memory leak in new packing, various fixes +// 1.02 (2014-12-10) fix various warnings & compile issues w/ stb_rect_pack, C++ +// 1.01 (2014-12-08) fix subpixel position when oversampling to exactly match +// non-oversampled; STBTT_POINT_SIZE for packed case only +// 1.00 (2014-12-06) add new PackBegin etc. API, w/ support for oversampling +// 0.99 (2014-09-18) fix multiple bugs with subpixel rendering (ryg) +// 0.9 (2014-08-07) support certain mac/iOS fonts without an MS platformID +// 0.8b (2014-07-07) fix a warning +// 0.8 (2014-05-25) fix a few more warnings +// 0.7 (2013-09-25) bugfix: subpixel glyph bug fixed in 0.5 had come back +// 0.6c (2012-07-24) improve documentation +// 0.6b (2012-07-20) fix a few more warnings +// 0.6 (2012-07-17) fix warnings; added stbtt_ScaleForMappingEmToPixels, +// stbtt_GetFontBoundingBox, stbtt_IsGlyphEmpty +// 0.5 (2011-12-09) bugfixes: +// subpixel glyph renderer computed wrong bounding box +// first vertex of shape can be off-curve (FreeSans) +// 0.4b (2011-12-03) fixed an error in the font baking example +// 0.4 (2011-12-01) kerning, subpixel rendering (tor) +// bugfixes for: +// codepoint-to-glyph conversion using table fmt=12 +// codepoint-to-glyph conversion using table fmt=4 +// stbtt_GetBakedQuad with non-square texture (Zer) +// updated Hello World! sample to use kerning and subpixel +// fixed some warnings +// 0.3 (2009-06-24) cmap fmt=12, compound shapes (MM) +// userdata, malloc-from-userdata, non-zero fill (stb) +// 0.2 (2009-03-11) Fix unsigned/signed char warnings +// 0.1 (2009-03-09) First public release +// + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/cpp-projects/3d-engine/imgui/misc/README.txt b/cpp-projects/3d-engine/imgui/misc/README.txt new file mode 100644 index 0000000..8690008 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/misc/README.txt @@ -0,0 +1,24 @@ + +misc/cpp/ + InputText() wrappers for C++ standard library (STL) type: std::string. + This is also an example of how you may wrap your own similar types. + +misc/fonts/ + Fonts loading/merging instructions (e.g. How to handle glyph ranges, how to merge icons fonts). + Command line tool "binary_to_compressed_c" to create compressed arrays to embed data in source code. + Suggested fonts and links. + +misc/freetype/ + Font atlas builder/rasterizer using FreeType instead of stb_truetype. + Benefit from better FreeType rasterization, in particular for small fonts. + +misc/natvis/ + Natvis file to describe dear imgui types in the Visual Studio debugger. + With this, types like ImVector<> will be displayed nicely in the debugger. + You can include this file a Visual Studio project file, or install it in Visual Studio folder. + +misc/single_file/ + Single-file header stub. + We use this to validate compiling all *.cpp files in a same compilation unit. + Users of that technique (also called "Unity builds") can generally provide this themselves, + so we don't really recommend you use this in your projects. diff --git a/cpp-projects/3d-engine/imgui/misc/cpp/README.txt b/cpp-projects/3d-engine/imgui/misc/cpp/README.txt new file mode 100644 index 0000000..8d5982e --- /dev/null +++ b/cpp-projects/3d-engine/imgui/misc/cpp/README.txt @@ -0,0 +1,10 @@ + +imgui_stdlib.h + imgui_stdlib.cpp + InputText() wrappers for C++ standard library (STL) type: std::string. + This is also an example of how you may wrap your own similar types. + +imgui_scoped.h + [Experimental, not currently in main repository] + Additional header file with some RAII-style wrappers for common Dear ImGui functions. + Try by merging: https://github.com/ocornut/imgui/pull/2197 + Discuss at: https://github.com/ocornut/imgui/issues/2096 diff --git a/cpp-projects/3d-engine/imgui/misc/cpp/imgui_stdlib.cpp b/cpp-projects/3d-engine/imgui/misc/cpp/imgui_stdlib.cpp new file mode 100644 index 0000000..acd526e --- /dev/null +++ b/cpp-projects/3d-engine/imgui/misc/cpp/imgui_stdlib.cpp @@ -0,0 +1,75 @@ +// dear imgui: wrappers for C++ standard library (STL) types (std::string, etc.) +// This is also an example of how you may wrap your own similar types. + +// Compatibility: +// - std::string support is only guaranteed to work from C++11. +// If you try to use it pre-C++11, please share your findings (w/ info about compiler/architecture) + +// Changelog: +// - v0.10: Initial version. Added InputText() / InputTextMultiline() calls with std::string + +#include "imgui_stdlib.h" + +struct InputTextCallback_UserData +{ + std::string* Str; + ImGuiInputTextCallback ChainCallback; + void* ChainCallbackUserData; +}; + +static int InputTextCallback(ImGuiInputTextCallbackData* data) +{ + InputTextCallback_UserData* user_data = (InputTextCallback_UserData*)data->UserData; + if (data->EventFlag == ImGuiInputTextFlags_CallbackResize) + { + // Resize string callback + // If for some reason we refuse the new length (BufTextLen) and/or capacity (BufSize) we need to set them back to what we want. + std::string* str = user_data->Str; + IM_ASSERT(data->Buf == str->c_str()); + str->resize(data->BufTextLen); + data->Buf = (char*)str->c_str(); + } + else if (user_data->ChainCallback) + { + // Forward to user callback, if any + data->UserData = user_data->ChainCallbackUserData; + return user_data->ChainCallback(data); + } + return 0; +} + +bool ImGui::InputText(const char* label, std::string* str, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback, void* user_data) +{ + IM_ASSERT((flags & ImGuiInputTextFlags_CallbackResize) == 0); + flags |= ImGuiInputTextFlags_CallbackResize; + + InputTextCallback_UserData cb_user_data; + cb_user_data.Str = str; + cb_user_data.ChainCallback = callback; + cb_user_data.ChainCallbackUserData = user_data; + return InputText(label, (char*)str->c_str(), str->capacity() + 1, flags, InputTextCallback, &cb_user_data); +} + +bool ImGui::InputTextMultiline(const char* label, std::string* str, const ImVec2& size, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback, void* user_data) +{ + IM_ASSERT((flags & ImGuiInputTextFlags_CallbackResize) == 0); + flags |= ImGuiInputTextFlags_CallbackResize; + + InputTextCallback_UserData cb_user_data; + cb_user_data.Str = str; + cb_user_data.ChainCallback = callback; + cb_user_data.ChainCallbackUserData = user_data; + return InputTextMultiline(label, (char*)str->c_str(), str->capacity() + 1, size, flags, InputTextCallback, &cb_user_data); +} + +bool ImGui::InputTextWithHint(const char* label, const char* hint, std::string* str, ImGuiInputTextFlags flags, ImGuiInputTextCallback callback, void* user_data) +{ + IM_ASSERT((flags & ImGuiInputTextFlags_CallbackResize) == 0); + flags |= ImGuiInputTextFlags_CallbackResize; + + InputTextCallback_UserData cb_user_data; + cb_user_data.Str = str; + cb_user_data.ChainCallback = callback; + cb_user_data.ChainCallbackUserData = user_data; + return InputTextWithHint(label, hint, (char*)str->c_str(), str->capacity() + 1, flags, InputTextCallback, &cb_user_data); +} diff --git a/cpp-projects/3d-engine/imgui/misc/cpp/imgui_stdlib.h b/cpp-projects/3d-engine/imgui/misc/cpp/imgui_stdlib.h new file mode 100644 index 0000000..26d5ae3 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/misc/cpp/imgui_stdlib.h @@ -0,0 +1,23 @@ +// dear imgui: wrappers for C++ standard library (STL) types (std::string, etc.) +// This is also an example of how you may wrap your own similar types. + +// Compatibility: +// - std::string support is only guaranteed to work from C++11. +// If you try to use it pre-C++11, please share your findings (w/ info about compiler/architecture) + +// Changelog: +// - v0.10: Initial version. Added InputText() / InputTextMultiline() calls with std::string + +#pragma once + +#include +#include "imgui/imgui.h" + +namespace ImGui +{ + // ImGui::InputText() with std::string + // Because text input needs dynamic resizing, we need to setup a callback to grow the capacity + bool IMGUI_API InputText(const char* label, std::string* str, ImGuiInputTextFlags flags = 0, ImGuiInputTextCallback callback = NULL, void* user_data = NULL); + bool IMGUI_API InputTextMultiline(const char* label, std::string* str, const ImVec2& size = ImVec2(0, 0), ImGuiInputTextFlags flags = 0, ImGuiInputTextCallback callback = NULL, void* user_data = NULL); + bool IMGUI_API InputTextWithHint(const char* label, const char* hint, std::string* str, ImGuiInputTextFlags flags = 0, ImGuiInputTextCallback callback = NULL, void* user_data = NULL); +} diff --git a/cpp-projects/3d-engine/imgui/misc/fonts/Cousine-Regular.ttf b/cpp-projects/3d-engine/imgui/misc/fonts/Cousine-Regular.ttf new file mode 100644 index 0000000..70a0bf9 Binary files /dev/null and b/cpp-projects/3d-engine/imgui/misc/fonts/Cousine-Regular.ttf differ diff --git a/cpp-projects/3d-engine/imgui/misc/fonts/DroidSans.ttf b/cpp-projects/3d-engine/imgui/misc/fonts/DroidSans.ttf new file mode 100644 index 0000000..767c63a Binary files /dev/null and b/cpp-projects/3d-engine/imgui/misc/fonts/DroidSans.ttf differ diff --git a/cpp-projects/3d-engine/imgui/misc/fonts/Karla-Regular.ttf b/cpp-projects/3d-engine/imgui/misc/fonts/Karla-Regular.ttf new file mode 100644 index 0000000..81b3de6 Binary files /dev/null and b/cpp-projects/3d-engine/imgui/misc/fonts/Karla-Regular.ttf differ diff --git a/cpp-projects/3d-engine/imgui/misc/fonts/ProggyClean.ttf b/cpp-projects/3d-engine/imgui/misc/fonts/ProggyClean.ttf new file mode 100644 index 0000000..0270cdf Binary files /dev/null and b/cpp-projects/3d-engine/imgui/misc/fonts/ProggyClean.ttf differ diff --git a/cpp-projects/3d-engine/imgui/misc/fonts/ProggyTiny.ttf b/cpp-projects/3d-engine/imgui/misc/fonts/ProggyTiny.ttf new file mode 100644 index 0000000..1c4312c Binary files /dev/null and b/cpp-projects/3d-engine/imgui/misc/fonts/ProggyTiny.ttf differ diff --git a/cpp-projects/3d-engine/imgui/misc/fonts/Roboto-Medium.ttf b/cpp-projects/3d-engine/imgui/misc/fonts/Roboto-Medium.ttf new file mode 100644 index 0000000..39c63d7 Binary files /dev/null and b/cpp-projects/3d-engine/imgui/misc/fonts/Roboto-Medium.ttf differ diff --git a/cpp-projects/3d-engine/imgui/misc/fonts/binary_to_compressed_c.cpp b/cpp-projects/3d-engine/imgui/misc/fonts/binary_to_compressed_c.cpp new file mode 100644 index 0000000..441c8f6 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/misc/fonts/binary_to_compressed_c.cpp @@ -0,0 +1,385 @@ +// dear imgui +// (binary_to_compressed_c.cpp) +// Helper tool to turn a file into a C array, if you want to embed font data in your source code. + +// The data is first compressed with stb_compress() to reduce source code size, +// then encoded in Base85 to fit in a string so we can fit roughly 4 bytes of compressed data into 5 bytes of source code (suggested by @mmalex) +// (If we used 32-bit constants it would require take 11 bytes of source code to encode 4 bytes, and be endianness dependent) +// Note that even with compression, the output array is likely to be bigger than the binary file.. +// Load compressed TTF fonts with ImGui::GetIO().Fonts->AddFontFromMemoryCompressedTTF() + +// Build with, e.g: +// # cl.exe binary_to_compressed_c.cpp +// # g++ binary_to_compressed_c.cpp +// # clang++ binary_to_compressed_c.cpp +// You can also find a precompiled Windows binary in the binary/demo package available from https://github.com/ocornut/imgui + +// Usage: +// binary_to_compressed_c.exe [-base85] [-nocompress] +// Usage example: +// # binary_to_compressed_c.exe myfont.ttf MyFont > myfont.cpp +// # binary_to_compressed_c.exe -base85 myfont.ttf MyFont > myfont.cpp + +#define _CRT_SECURE_NO_WARNINGS +#include +#include +#include +#include + +// stb_compress* from stb.h - declaration +typedef unsigned int stb_uint; +typedef unsigned char stb_uchar; +stb_uint stb_compress(stb_uchar* out, stb_uchar* in, stb_uint len); + +static bool binary_to_compressed_c(const char* filename, const char* symbol, bool use_base85_encoding, bool use_compression); + +int main(int argc, char** argv) +{ + if (argc < 3) + { + printf("Syntax: %s [-base85] [-nocompress] \n", argv[0]); + return 0; + } + + int argn = 1; + bool use_base85_encoding = false; + bool use_compression = true; + if (argv[argn][0] == '-') + { + if (strcmp(argv[argn], "-base85") == 0) { use_base85_encoding = true; argn++; } + else if (strcmp(argv[argn], "-nocompress") == 0) { use_compression = false; argn++; } + else + { + fprintf(stderr, "Unknown argument: '%s'\n", argv[argn]); + return 1; + } + } + + bool ret = binary_to_compressed_c(argv[argn], argv[argn + 1], use_base85_encoding, use_compression); + if (!ret) + fprintf(stderr, "Error opening or reading file: '%s'\n", argv[argn]); + return ret ? 0 : 1; +} + +char Encode85Byte(unsigned int x) +{ + x = (x % 85) + 35; + return (x >= '\\') ? x + 1 : x; +} + +bool binary_to_compressed_c(const char* filename, const char* symbol, bool use_base85_encoding, bool use_compression) +{ + // Read file + FILE* f = fopen(filename, "rb"); + if (!f) return false; + int data_sz; + if (fseek(f, 0, SEEK_END) || (data_sz = (int)ftell(f)) == -1 || fseek(f, 0, SEEK_SET)) { fclose(f); return false; } + char* data = new char[data_sz + 4]; + if (fread(data, 1, data_sz, f) != (size_t)data_sz) { fclose(f); delete[] data; return false; } + memset((void*)(((char*)data) + data_sz), 0, 4); + fclose(f); + + // Compress + int maxlen = data_sz + 512 + (data_sz >> 2) + sizeof(int); // total guess + char* compressed = use_compression ? new char[maxlen] : data; + int compressed_sz = use_compression ? stb_compress((stb_uchar*)compressed, (stb_uchar*)data, data_sz) : data_sz; + if (use_compression) + memset(compressed + compressed_sz, 0, maxlen - compressed_sz); + + // Output as Base85 encoded + FILE* out = stdout; + fprintf(out, "// File: '%s' (%d bytes)\n", filename, (int)data_sz); + fprintf(out, "// Exported using binary_to_compressed_c.cpp\n"); + const char* compressed_str = use_compression ? "compressed_" : ""; + if (use_base85_encoding) + { + fprintf(out, "static const char %s_%sdata_base85[%d+1] =\n \"", symbol, compressed_str, (int)((compressed_sz + 3) / 4)*5); + char prev_c = 0; + for (int src_i = 0; src_i < compressed_sz; src_i += 4) + { + // This is made a little more complicated by the fact that ??X sequences are interpreted as trigraphs by old C/C++ compilers. So we need to escape pairs of ??. + unsigned int d = *(unsigned int*)(compressed + src_i); + for (unsigned int n5 = 0; n5 < 5; n5++, d /= 85) + { + char c = Encode85Byte(d); + fprintf(out, (c == '?' && prev_c == '?') ? "\\%c" : "%c", c); + prev_c = c; + } + if ((src_i % 112) == 112 - 4) + fprintf(out, "\"\n \""); + } + fprintf(out, "\";\n\n"); + } + else + { + fprintf(out, "static const unsigned int %s_%ssize = %d;\n", symbol, compressed_str, (int)compressed_sz); + fprintf(out, "static const unsigned int %s_%sdata[%d/4] =\n{", symbol, compressed_str, (int)((compressed_sz + 3) / 4)*4); + int column = 0; + for (int i = 0; i < compressed_sz; i += 4) + { + unsigned int d = *(unsigned int*)(compressed + i); + if ((column++ % 12) == 0) + fprintf(out, "\n 0x%08x, ", d); + else + fprintf(out, "0x%08x, ", d); + } + fprintf(out, "\n};\n\n"); + } + + // Cleanup + delete[] data; + if (use_compression) + delete[] compressed; + return true; +} + +// stb_compress* from stb.h - definition + +//////////////////// compressor /////////////////////// + +static stb_uint stb_adler32(stb_uint adler32, stb_uchar *buffer, stb_uint buflen) +{ + const unsigned long ADLER_MOD = 65521; + unsigned long s1 = adler32 & 0xffff, s2 = adler32 >> 16; + unsigned long blocklen, i; + + blocklen = buflen % 5552; + while (buflen) { + for (i=0; i + 7 < blocklen; i += 8) { + s1 += buffer[0], s2 += s1; + s1 += buffer[1], s2 += s1; + s1 += buffer[2], s2 += s1; + s1 += buffer[3], s2 += s1; + s1 += buffer[4], s2 += s1; + s1 += buffer[5], s2 += s1; + s1 += buffer[6], s2 += s1; + s1 += buffer[7], s2 += s1; + + buffer += 8; + } + + for (; i < blocklen; ++i) + s1 += *buffer++, s2 += s1; + + s1 %= ADLER_MOD, s2 %= ADLER_MOD; + buflen -= blocklen; + blocklen = 5552; + } + return (s2 << 16) + s1; +} + +static unsigned int stb_matchlen(stb_uchar *m1, stb_uchar *m2, stb_uint maxlen) +{ + stb_uint i; + for (i=0; i < maxlen; ++i) + if (m1[i] != m2[i]) return i; + return i; +} + +// simple implementation that just takes the source data in a big block + +static stb_uchar *stb__out; +static FILE *stb__outfile; +static stb_uint stb__outbytes; + +static void stb__write(unsigned char v) +{ + fputc(v, stb__outfile); + ++stb__outbytes; +} + +//#define stb_out(v) (stb__out ? *stb__out++ = (stb_uchar) (v) : stb__write((stb_uchar) (v))) +#define stb_out(v) do { if (stb__out) *stb__out++ = (stb_uchar) (v); else stb__write((stb_uchar) (v)); } while (0) + +static void stb_out2(stb_uint v) { stb_out(v >> 8); stb_out(v); } +static void stb_out3(stb_uint v) { stb_out(v >> 16); stb_out(v >> 8); stb_out(v); } +static void stb_out4(stb_uint v) { stb_out(v >> 24); stb_out(v >> 16); stb_out(v >> 8 ); stb_out(v); } + +static void outliterals(stb_uchar *in, int numlit) +{ + while (numlit > 65536) { + outliterals(in,65536); + in += 65536; + numlit -= 65536; + } + + if (numlit == 0) ; + else if (numlit <= 32) stb_out (0x000020 + numlit-1); + else if (numlit <= 2048) stb_out2(0x000800 + numlit-1); + else /* numlit <= 65536) */ stb_out3(0x070000 + numlit-1); + + if (stb__out) { + memcpy(stb__out,in,numlit); + stb__out += numlit; + } else + fwrite(in, 1, numlit, stb__outfile); +} + +static int stb__window = 0x40000; // 256K + +static int stb_not_crap(int best, int dist) +{ + return ((best > 2 && dist <= 0x00100) + || (best > 5 && dist <= 0x04000) + || (best > 7 && dist <= 0x80000)); +} + +static stb_uint stb__hashsize = 32768; + +// note that you can play with the hashing functions all you +// want without needing to change the decompressor +#define stb__hc(q,h,c) (((h) << 7) + ((h) >> 25) + q[c]) +#define stb__hc2(q,h,c,d) (((h) << 14) + ((h) >> 18) + (q[c] << 7) + q[d]) +#define stb__hc3(q,c,d,e) ((q[c] << 14) + (q[d] << 7) + q[e]) + +static unsigned int stb__running_adler; + +static int stb_compress_chunk(stb_uchar *history, + stb_uchar *start, + stb_uchar *end, + int length, + int *pending_literals, + stb_uchar **chash, + stb_uint mask) +{ + (void)history; + int window = stb__window; + stb_uint match_max; + stb_uchar *lit_start = start - *pending_literals; + stb_uchar *q = start; + +#define STB__SCRAMBLE(h) (((h) + ((h) >> 16)) & mask) + + // stop short of the end so we don't scan off the end doing + // the hashing; this means we won't compress the last few bytes + // unless they were part of something longer + while (q < start+length && q+12 < end) { + int m; + stb_uint h1,h2,h3,h4, h; + stb_uchar *t; + int best = 2, dist=0; + + if (q+65536 > end) + match_max = end-q; + else + match_max = 65536; + +#define stb__nc(b,d) ((d) <= window && ((b) > 9 || stb_not_crap(b,d))) + +#define STB__TRY(t,p) /* avoid retrying a match we already tried */ \ + if (p ? dist != q-t : 1) \ + if ((m = stb_matchlen(t, q, match_max)) > best) \ + if (stb__nc(m,q-(t))) \ + best = m, dist = q - (t) + + // rather than search for all matches, only try 4 candidate locations, + // chosen based on 4 different hash functions of different lengths. + // this strategy is inspired by LZO; hashing is unrolled here using the + // 'hc' macro + h = stb__hc3(q,0, 1, 2); h1 = STB__SCRAMBLE(h); + t = chash[h1]; if (t) STB__TRY(t,0); + h = stb__hc2(q,h, 3, 4); h2 = STB__SCRAMBLE(h); + h = stb__hc2(q,h, 5, 6); t = chash[h2]; if (t) STB__TRY(t,1); + h = stb__hc2(q,h, 7, 8); h3 = STB__SCRAMBLE(h); + h = stb__hc2(q,h, 9,10); t = chash[h3]; if (t) STB__TRY(t,1); + h = stb__hc2(q,h,11,12); h4 = STB__SCRAMBLE(h); + t = chash[h4]; if (t) STB__TRY(t,1); + + // because we use a shared hash table, can only update it + // _after_ we've probed all of them + chash[h1] = chash[h2] = chash[h3] = chash[h4] = q; + + if (best > 2) + assert(dist > 0); + + // see if our best match qualifies + if (best < 3) { // fast path literals + ++q; + } else if (best > 2 && best <= 0x80 && dist <= 0x100) { + outliterals(lit_start, q-lit_start); lit_start = (q += best); + stb_out(0x80 + best-1); + stb_out(dist-1); + } else if (best > 5 && best <= 0x100 && dist <= 0x4000) { + outliterals(lit_start, q-lit_start); lit_start = (q += best); + stb_out2(0x4000 + dist-1); + stb_out(best-1); + } else if (best > 7 && best <= 0x100 && dist <= 0x80000) { + outliterals(lit_start, q-lit_start); lit_start = (q += best); + stb_out3(0x180000 + dist-1); + stb_out(best-1); + } else if (best > 8 && best <= 0x10000 && dist <= 0x80000) { + outliterals(lit_start, q-lit_start); lit_start = (q += best); + stb_out3(0x100000 + dist-1); + stb_out2(best-1); + } else if (best > 9 && dist <= 0x1000000) { + if (best > 65536) best = 65536; + outliterals(lit_start, q-lit_start); lit_start = (q += best); + if (best <= 0x100) { + stb_out(0x06); + stb_out3(dist-1); + stb_out(best-1); + } else { + stb_out(0x04); + stb_out3(dist-1); + stb_out2(best-1); + } + } else { // fallback literals if no match was a balanced tradeoff + ++q; + } + } + + // if we didn't get all the way, add the rest to literals + if (q-start < length) + q = start+length; + + // the literals are everything from lit_start to q + *pending_literals = (q - lit_start); + + stb__running_adler = stb_adler32(stb__running_adler, start, q - start); + return q - start; +} + +static int stb_compress_inner(stb_uchar *input, stb_uint length) +{ + int literals = 0; + stb_uint len,i; + + stb_uchar **chash; + chash = (stb_uchar**) malloc(stb__hashsize * sizeof(stb_uchar*)); + if (chash == NULL) return 0; // failure + for (i=0; i < stb__hashsize; ++i) + chash[i] = NULL; + + // stream signature + stb_out(0x57); stb_out(0xbc); + stb_out2(0); + + stb_out4(0); // 64-bit length requires 32-bit leading 0 + stb_out4(length); + stb_out4(stb__window); + + stb__running_adler = 1; + + len = stb_compress_chunk(input, input, input+length, length, &literals, chash, stb__hashsize-1); + assert(len == length); + + outliterals(input+length - literals, literals); + + free(chash); + + stb_out2(0x05fa); // end opcode + + stb_out4(stb__running_adler); + + return 1; // success +} + +stb_uint stb_compress(stb_uchar *out, stb_uchar *input, stb_uint length) +{ + stb__out = out; + stb__outfile = NULL; + + stb_compress_inner(input, length); + + return stb__out - out; +} diff --git a/cpp-projects/3d-engine/imgui/misc/freetype/README.md b/cpp-projects/3d-engine/imgui/misc/freetype/README.md new file mode 100644 index 0000000..440fe85 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/misc/freetype/README.md @@ -0,0 +1,25 @@ +# imgui_freetype + +Build font atlases using FreeType instead of stb_truetype (which is the default font rasterizer). +
by @vuhdo, @mikesart, @ocornut. + +### Usage + +1. Get latest FreeType binaries or build yourself (under Windows you may use vcpkg with `vcpkg install freetype`, `vcpkg integrate install`). +2. Add imgui_freetype.h/cpp alongside your project files. +3. Add `#define IMGUI_ENABLE_FREETYPE` in your [imconfig.h](https://github.com/ocornut/imgui/blob/master/imconfig.h) file + +### About Gamma Correct Blending + +FreeType assumes blending in linear space rather than gamma space. +See FreeType note for [FT_Render_Glyph](https://www.freetype.org/freetype2/docs/reference/ft2-base_interface.html#FT_Render_Glyph). +For correct results you need to be using sRGB and convert to linear space in the pixel shader output. +The default Dear ImGui styles will be impacted by this change (alpha values will need tweaking). + +### Testbed for toying with settings (for developers) + +See https://gist.github.com/ocornut/b3a9ecf13502fd818799a452969649ad + +### Known issues +- `cfg.OversampleH`, `OversampleV` are ignored (but perhaps not so necessary with this rasterizer). + diff --git a/cpp-projects/3d-engine/imgui/misc/freetype/imgui_freetype.cpp b/cpp-projects/3d-engine/imgui/misc/freetype/imgui_freetype.cpp new file mode 100644 index 0000000..e96559b --- /dev/null +++ b/cpp-projects/3d-engine/imgui/misc/freetype/imgui_freetype.cpp @@ -0,0 +1,700 @@ +// dear imgui: FreeType font builder (used as a replacement for the stb_truetype builder) +// (code) + +// Get latest version at https://github.com/ocornut/imgui/tree/master/misc/freetype +// Original code by @vuhdo (Aleksei Skriabin). Improvements by @mikesart. Maintained since 2019 by @ocornut. + +// CHANGELOG +// (minor and older changes stripped away, please see git history for details) +// 2021/01/26: simplified integration by using '#define IMGUI_ENABLE_FREETYPE'. +// renamed ImGuiFreeType::XXX flags to ImGuiFreeTypeBuilderFlags_XXX for consistency with other API. removed ImGuiFreeType::BuildFontAtlas(). +// 2020/06/04: fix for rare case where FT_Get_Char_Index() succeed but FT_Load_Glyph() fails. +// 2019/02/09: added RasterizerFlags::Monochrome flag to disable font anti-aliasing (combine with ::MonoHinting for best results!) +// 2019/01/15: added support for imgui allocators + added FreeType only override function SetAllocatorFunctions(). +// 2019/01/10: re-factored to match big update in STB builder. fixed texture height waste. fixed redundant glyphs when merging. support for glyph padding. +// 2018/06/08: added support for ImFontConfig::GlyphMinAdvanceX, GlyphMaxAdvanceX. +// 2018/02/04: moved to main imgui repository (away from http://www.github.com/ocornut/imgui_club) +// 2018/01/22: fix for addition of ImFontAtlas::TexUvscale member. +// 2017/10/22: minor inconsequential change to match change in master (removed an unnecessary statement). +// 2017/09/26: fixes for imgui internal changes. +// 2017/08/26: cleanup, optimizations, support for ImFontConfig::RasterizerFlags, ImFontConfig::RasterizerMultiply. +// 2017/08/16: imported from https://github.com/Vuhdo/imgui_freetype into http://www.github.com/ocornut/imgui_club, updated for latest changes in ImFontAtlas, minor tweaks. + +// About Gamma Correct Blending: +// - FreeType assumes blending in linear space rather than gamma space. +// - See https://www.freetype.org/freetype2/docs/reference/ft2-base_interface.html#FT_Render_Glyph +// - For correct results you need to be using sRGB and convert to linear space in the pixel shader output. +// - The default dear imgui styles will be impacted by this change (alpha values will need tweaking). + +// FIXME: cfg.OversampleH, OversampleV are not supported (but perhaps not so necessary with this rasterizer). + +#include "imgui_freetype.h" +#include "imgui_internal.h" // ImMin,ImMax,ImFontAtlasBuild*, +#include +#include +#include FT_FREETYPE_H // +#include FT_MODULE_H // +#include FT_GLYPH_H // +#include FT_SYNTHESIS_H // + +#ifdef _MSC_VER +#pragma warning (disable: 4505) // unreferenced local function has been removed (stb stuff) +#endif + +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wpragmas" // warning: unknown option after '#pragma GCC diagnostic' kind +#pragma GCC diagnostic ignored "-Wunused-function" // warning: 'xxxx' defined but not used +#endif + +//------------------------------------------------------------------------- +// Data +//------------------------------------------------------------------------- + +// Default memory allocators +static void* ImGuiFreeTypeDefaultAllocFunc(size_t size, void* user_data) { IM_UNUSED(user_data); return IM_ALLOC(size); } +static void ImGuiFreeTypeDefaultFreeFunc(void* ptr, void* user_data) { IM_UNUSED(user_data); IM_FREE(ptr); } + +// Current memory allocators +static void* (*GImGuiFreeTypeAllocFunc)(size_t size, void* user_data) = ImGuiFreeTypeDefaultAllocFunc; +static void (*GImGuiFreeTypeFreeFunc)(void* ptr, void* user_data) = ImGuiFreeTypeDefaultFreeFunc; +static void* GImGuiFreeTypeAllocatorUserData = NULL; + +//------------------------------------------------------------------------- +// Code +//------------------------------------------------------------------------- + +namespace +{ + // Glyph metrics: + // -------------- + // + // xmin xmax + // | | + // |<-------- width -------->| + // | | + // | +-------------------------+----------------- ymax + // | | ggggggggg ggggg | ^ ^ + // | | g:::::::::ggg::::g | | | + // | | g:::::::::::::::::g | | | + // | | g::::::ggggg::::::gg | | | + // | | g:::::g g:::::g | | | + // offsetX -|-------->| g:::::g g:::::g | offsetY | + // | | g:::::g g:::::g | | | + // | | g::::::g g:::::g | | | + // | | g:::::::ggggg:::::g | | | + // | | g::::::::::::::::g | | height + // | | gg::::::::::::::g | | | + // baseline ---*---------|---- gggggggg::::::g-----*-------- | + // / | | g:::::g | | + // origin | | gggggg g:::::g | | + // | | g:::::gg gg:::::g | | + // | | g::::::ggg:::::::g | | + // | | gg:::::::::::::g | | + // | | ggg::::::ggg | | + // | | gggggg | v + // | +-------------------------+----------------- ymin + // | | + // |------------- advanceX ----------->| + + /// A structure that describe a glyph. + struct GlyphInfo + { + int Width; // Glyph's width in pixels. + int Height; // Glyph's height in pixels. + FT_Int OffsetX; // The distance from the origin ("pen position") to the left of the glyph. + FT_Int OffsetY; // The distance from the origin to the top of the glyph. This is usually a value < 0. + float AdvanceX; // The distance from the origin to the origin of the next glyph. This is usually a value > 0. + }; + + // Font parameters and metrics. + struct FontInfo + { + uint32_t PixelHeight; // Size this font was generated with. + float Ascender; // The pixel extents above the baseline in pixels (typically positive). + float Descender; // The extents below the baseline in pixels (typically negative). + float LineSpacing; // The baseline-to-baseline distance. Note that it usually is larger than the sum of the ascender and descender taken as absolute values. There is also no guarantee that no glyphs extend above or below subsequent baselines when using this distance. Think of it as a value the designer of the font finds appropriate. + float LineGap; // The spacing in pixels between one row's descent and the next row's ascent. + float MaxAdvanceWidth; // This field gives the maximum horizontal cursor advance for all glyphs in the font. + }; + + // FreeType glyph rasterizer. + // NB: No ctor/dtor, explicitly call Init()/Shutdown() + struct FreeTypeFont + { + bool InitFont(FT_Library ft_library, const ImFontConfig& cfg, unsigned int extra_user_flags); // Initialize from an external data buffer. Doesn't copy data, and you must ensure it stays valid up to this object lifetime. + void CloseFont(); + void SetPixelHeight(int pixel_height); // Change font pixel size. All following calls to RasterizeGlyph() will use this size + const FT_Glyph_Metrics* LoadGlyph(uint32_t in_codepoint); + const FT_Bitmap* RenderGlyphAndGetInfo(GlyphInfo* out_glyph_info); + void BlitGlyph(const FT_Bitmap* ft_bitmap, uint8_t* dst, uint32_t dst_pitch, unsigned char* multiply_table = NULL); + ~FreeTypeFont() { CloseFont(); } + + // [Internals] + FontInfo Info; // Font descriptor of the current font. + FT_Face Face; + unsigned int UserFlags; // = ImFontConfig::RasterizerFlags + FT_Int32 LoadFlags; + FT_Render_Mode RenderMode; + }; + + // From SDL_ttf: Handy routines for converting from fixed point + #define FT_CEIL(X) (((X + 63) & -64) / 64) + + bool FreeTypeFont::InitFont(FT_Library ft_library, const ImFontConfig& cfg, unsigned int extra_font_builder_flags) + { + FT_Error error = FT_New_Memory_Face(ft_library, (uint8_t*)cfg.FontData, (uint32_t)cfg.FontDataSize, (uint32_t)cfg.FontNo, &Face); + if (error != 0) + return false; + error = FT_Select_Charmap(Face, FT_ENCODING_UNICODE); + if (error != 0) + return false; + + memset(&Info, 0, sizeof(Info)); + SetPixelHeight((uint32_t)cfg.SizePixels); + + // Convert to FreeType flags (NB: Bold and Oblique are processed separately) + UserFlags = cfg.FontBuilderFlags | extra_font_builder_flags; + LoadFlags = FT_LOAD_NO_BITMAP; + if (UserFlags & ImGuiFreeTypeBuilderFlags_NoHinting) + LoadFlags |= FT_LOAD_NO_HINTING; + if (UserFlags & ImGuiFreeTypeBuilderFlags_NoAutoHint) + LoadFlags |= FT_LOAD_NO_AUTOHINT; + if (UserFlags & ImGuiFreeTypeBuilderFlags_ForceAutoHint) + LoadFlags |= FT_LOAD_FORCE_AUTOHINT; + if (UserFlags & ImGuiFreeTypeBuilderFlags_LightHinting) + LoadFlags |= FT_LOAD_TARGET_LIGHT; + else if (UserFlags & ImGuiFreeTypeBuilderFlags_MonoHinting) + LoadFlags |= FT_LOAD_TARGET_MONO; + else + LoadFlags |= FT_LOAD_TARGET_NORMAL; + + if (UserFlags & ImGuiFreeTypeBuilderFlags_Monochrome) + RenderMode = FT_RENDER_MODE_MONO; + else + RenderMode = FT_RENDER_MODE_NORMAL; + + return true; + } + + void FreeTypeFont::CloseFont() + { + if (Face) + { + FT_Done_Face(Face); + Face = NULL; + } + } + + void FreeTypeFont::SetPixelHeight(int pixel_height) + { + // Vuhdo: I'm not sure how to deal with font sizes properly. As far as I understand, currently ImGui assumes that the 'pixel_height' + // is a maximum height of an any given glyph, i.e. it's the sum of font's ascender and descender. Seems strange to me. + // NB: FT_Set_Pixel_Sizes() doesn't seem to get us the same result. + FT_Size_RequestRec req; + req.type = FT_SIZE_REQUEST_TYPE_REAL_DIM; + req.width = 0; + req.height = (uint32_t)pixel_height * 64; + req.horiResolution = 0; + req.vertResolution = 0; + FT_Request_Size(Face, &req); + + // Update font info + FT_Size_Metrics metrics = Face->size->metrics; + Info.PixelHeight = (uint32_t)pixel_height; + Info.Ascender = (float)FT_CEIL(metrics.ascender); + Info.Descender = (float)FT_CEIL(metrics.descender); + Info.LineSpacing = (float)FT_CEIL(metrics.height); + Info.LineGap = (float)FT_CEIL(metrics.height - metrics.ascender + metrics.descender); + Info.MaxAdvanceWidth = (float)FT_CEIL(metrics.max_advance); + } + + const FT_Glyph_Metrics* FreeTypeFont::LoadGlyph(uint32_t codepoint) + { + uint32_t glyph_index = FT_Get_Char_Index(Face, codepoint); + if (glyph_index == 0) + return NULL; + FT_Error error = FT_Load_Glyph(Face, glyph_index, LoadFlags); + if (error) + return NULL; + + // Need an outline for this to work + FT_GlyphSlot slot = Face->glyph; + IM_ASSERT(slot->format == FT_GLYPH_FORMAT_OUTLINE); + + // Apply convenience transform (this is not picking from real "Bold"/"Italic" fonts! Merely applying FreeType helper transform. Oblique == Slanting) + if (UserFlags & ImGuiFreeTypeBuilderFlags_Bold) + FT_GlyphSlot_Embolden(slot); + if (UserFlags & ImGuiFreeTypeBuilderFlags_Oblique) + { + FT_GlyphSlot_Oblique(slot); + //FT_BBox bbox; + //FT_Outline_Get_BBox(&slot->outline, &bbox); + //slot->metrics.width = bbox.xMax - bbox.xMin; + //slot->metrics.height = bbox.yMax - bbox.yMin; + } + + return &slot->metrics; + } + + const FT_Bitmap* FreeTypeFont::RenderGlyphAndGetInfo(GlyphInfo* out_glyph_info) + { + FT_GlyphSlot slot = Face->glyph; + FT_Error error = FT_Render_Glyph(slot, RenderMode); + if (error != 0) + return NULL; + + FT_Bitmap* ft_bitmap = &Face->glyph->bitmap; + out_glyph_info->Width = (int)ft_bitmap->width; + out_glyph_info->Height = (int)ft_bitmap->rows; + out_glyph_info->OffsetX = Face->glyph->bitmap_left; + out_glyph_info->OffsetY = -Face->glyph->bitmap_top; + out_glyph_info->AdvanceX = (float)FT_CEIL(slot->advance.x); + + return ft_bitmap; + } + + void FreeTypeFont::BlitGlyph(const FT_Bitmap* ft_bitmap, uint8_t* dst, uint32_t dst_pitch, unsigned char* multiply_table) + { + IM_ASSERT(ft_bitmap != NULL); + const uint32_t w = ft_bitmap->width; + const uint32_t h = ft_bitmap->rows; + const uint8_t* src = ft_bitmap->buffer; + const uint32_t src_pitch = ft_bitmap->pitch; + + switch (ft_bitmap->pixel_mode) + { + case FT_PIXEL_MODE_GRAY: // Grayscale image, 1 byte per pixel. + { + if (multiply_table == NULL) + { + for (uint32_t y = 0; y < h; y++, src += src_pitch, dst += dst_pitch) + memcpy(dst, src, w); + } + else + { + for (uint32_t y = 0; y < h; y++, src += src_pitch, dst += dst_pitch) + for (uint32_t x = 0; x < w; x++) + dst[x] = multiply_table[src[x]]; + } + break; + } + case FT_PIXEL_MODE_MONO: // Monochrome image, 1 bit per pixel. The bits in each byte are ordered from MSB to LSB. + { + uint8_t color0 = multiply_table ? multiply_table[0] : 0; + uint8_t color1 = multiply_table ? multiply_table[255] : 255; + for (uint32_t y = 0; y < h; y++, src += src_pitch, dst += dst_pitch) + { + uint8_t bits = 0; + const uint8_t* bits_ptr = src; + for (uint32_t x = 0; x < w; x++, bits <<= 1) + { + if ((x & 7) == 0) + bits = *bits_ptr++; + dst[x] = (bits & 0x80) ? color1 : color0; + } + } + break; + } + default: + IM_ASSERT(0 && "FreeTypeFont::BlitGlyph(): Unknown bitmap pixel mode!"); + } + } +} + +#ifndef STB_RECT_PACK_IMPLEMENTATION // in case the user already have an implementation in the _same_ compilation unit (e.g. unity builds) +#ifndef IMGUI_DISABLE_STB_RECT_PACK_IMPLEMENTATION +#define STBRP_ASSERT(x) do { IM_ASSERT(x); } while (0) +#define STBRP_STATIC +#define STB_RECT_PACK_IMPLEMENTATION +#endif +#ifdef IMGUI_STB_RECT_PACK_FILENAME +#include IMGUI_STB_RECT_PACK_FILENAME +#else +#include "imstb_rectpack.h" +#endif +#endif + +struct ImFontBuildSrcGlyphFT +{ + GlyphInfo Info; + uint32_t Codepoint; + unsigned char* BitmapData; // Point within one of the dst_tmp_bitmap_buffers[] array +}; + +struct ImFontBuildSrcDataFT +{ + FreeTypeFont Font; + stbrp_rect* Rects; // Rectangle to pack. We first fill in their size and the packer will give us their position. + const ImWchar* SrcRanges; // Ranges as requested by user (user is allowed to request too much, e.g. 0x0020..0xFFFF) + int DstIndex; // Index into atlas->Fonts[] and dst_tmp_array[] + int GlyphsHighest; // Highest requested codepoint + int GlyphsCount; // Glyph count (excluding missing glyphs and glyphs already set by an earlier source font) + ImBitVector GlyphsSet; // Glyph bit map (random access, 1-bit per codepoint. This will be a maximum of 8KB) + ImVector GlyphsList; +}; + +// Temporary data for one destination ImFont* (multiple source fonts can be merged into one destination ImFont) +struct ImFontBuildDstDataFT +{ + int SrcCount; // Number of source fonts targeting this destination font. + int GlyphsHighest; + int GlyphsCount; + ImBitVector GlyphsSet; // This is used to resolve collision when multiple sources are merged into a same destination font. +}; + +bool ImFontAtlasBuildWithFreeTypeEx(FT_Library ft_library, ImFontAtlas* atlas, unsigned int extra_flags) +{ + IM_ASSERT(atlas->ConfigData.Size > 0); + + ImFontAtlasBuildInit(atlas); + + // Clear atlas + atlas->TexID = (ImTextureID)NULL; + atlas->TexWidth = atlas->TexHeight = 0; + atlas->TexUvScale = ImVec2(0.0f, 0.0f); + atlas->TexUvWhitePixel = ImVec2(0.0f, 0.0f); + atlas->ClearTexData(); + + // Temporary storage for building + ImVector src_tmp_array; + ImVector dst_tmp_array; + src_tmp_array.resize(atlas->ConfigData.Size); + dst_tmp_array.resize(atlas->Fonts.Size); + memset((void*)src_tmp_array.Data, 0, (size_t)src_tmp_array.size_in_bytes()); + memset((void*)dst_tmp_array.Data, 0, (size_t)dst_tmp_array.size_in_bytes()); + + // 1. Initialize font loading structure, check font data validity + for (int src_i = 0; src_i < atlas->ConfigData.Size; src_i++) + { + ImFontBuildSrcDataFT& src_tmp = src_tmp_array[src_i]; + ImFontConfig& cfg = atlas->ConfigData[src_i]; + FreeTypeFont& font_face = src_tmp.Font; + IM_ASSERT(cfg.DstFont && (!cfg.DstFont->IsLoaded() || cfg.DstFont->ContainerAtlas == atlas)); + + // Find index from cfg.DstFont (we allow the user to set cfg.DstFont. Also it makes casual debugging nicer than when storing indices) + src_tmp.DstIndex = -1; + for (int output_i = 0; output_i < atlas->Fonts.Size && src_tmp.DstIndex == -1; output_i++) + if (cfg.DstFont == atlas->Fonts[output_i]) + src_tmp.DstIndex = output_i; + IM_ASSERT(src_tmp.DstIndex != -1); // cfg.DstFont not pointing within atlas->Fonts[] array? + if (src_tmp.DstIndex == -1) + return false; + + // Load font + if (!font_face.InitFont(ft_library, cfg, extra_flags)) + return false; + + // Measure highest codepoints + ImFontBuildDstDataFT& dst_tmp = dst_tmp_array[src_tmp.DstIndex]; + src_tmp.SrcRanges = cfg.GlyphRanges ? cfg.GlyphRanges : atlas->GetGlyphRangesDefault(); + for (const ImWchar* src_range = src_tmp.SrcRanges; src_range[0] && src_range[1]; src_range += 2) + src_tmp.GlyphsHighest = ImMax(src_tmp.GlyphsHighest, (int)src_range[1]); + dst_tmp.SrcCount++; + dst_tmp.GlyphsHighest = ImMax(dst_tmp.GlyphsHighest, src_tmp.GlyphsHighest); + } + + // 2. For every requested codepoint, check for their presence in the font data, and handle redundancy or overlaps between source fonts to avoid unused glyphs. + int total_glyphs_count = 0; + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + { + ImFontBuildSrcDataFT& src_tmp = src_tmp_array[src_i]; + ImFontBuildDstDataFT& dst_tmp = dst_tmp_array[src_tmp.DstIndex]; + src_tmp.GlyphsSet.Create(src_tmp.GlyphsHighest + 1); + if (dst_tmp.GlyphsSet.Storage.empty()) + dst_tmp.GlyphsSet.Create(dst_tmp.GlyphsHighest + 1); + + for (const ImWchar* src_range = src_tmp.SrcRanges; src_range[0] && src_range[1]; src_range += 2) + for (int codepoint = src_range[0]; codepoint <= (int)src_range[1]; codepoint++) + { + if (dst_tmp.GlyphsSet.TestBit(codepoint)) // Don't overwrite existing glyphs. We could make this an option (e.g. MergeOverwrite) + continue; + uint32_t glyph_index = FT_Get_Char_Index(src_tmp.Font.Face, codepoint); // It is actually in the font? (FIXME-OPT: We are not storing the glyph_index..) + if (glyph_index == 0) + continue; + + // Add to avail set/counters + src_tmp.GlyphsCount++; + dst_tmp.GlyphsCount++; + src_tmp.GlyphsSet.SetBit(codepoint); + dst_tmp.GlyphsSet.SetBit(codepoint); + total_glyphs_count++; + } + } + + // 3. Unpack our bit map into a flat list (we now have all the Unicode points that we know are requested _and_ available _and_ not overlapping another) + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + { + ImFontBuildSrcDataFT& src_tmp = src_tmp_array[src_i]; + src_tmp.GlyphsList.reserve(src_tmp.GlyphsCount); + + IM_ASSERT(sizeof(src_tmp.GlyphsSet.Storage.Data[0]) == sizeof(ImU32)); + const ImU32* it_begin = src_tmp.GlyphsSet.Storage.begin(); + const ImU32* it_end = src_tmp.GlyphsSet.Storage.end(); + for (const ImU32* it = it_begin; it < it_end; it++) + if (ImU32 entries_32 = *it) + for (ImU32 bit_n = 0; bit_n < 32; bit_n++) + if (entries_32 & ((ImU32)1 << bit_n)) + { + ImFontBuildSrcGlyphFT src_glyph; + memset(&src_glyph, 0, sizeof(src_glyph)); + src_glyph.Codepoint = (ImWchar)(((it - it_begin) << 5) + bit_n); + //src_glyph.GlyphIndex = 0; // FIXME-OPT: We had this info in the previous step and lost it.. + src_tmp.GlyphsList.push_back(src_glyph); + } + src_tmp.GlyphsSet.Clear(); + IM_ASSERT(src_tmp.GlyphsList.Size == src_tmp.GlyphsCount); + } + for (int dst_i = 0; dst_i < dst_tmp_array.Size; dst_i++) + dst_tmp_array[dst_i].GlyphsSet.Clear(); + dst_tmp_array.clear(); + + // Allocate packing character data and flag packed characters buffer as non-packed (x0=y0=x1=y1=0) + // (We technically don't need to zero-clear buf_rects, but let's do it for the sake of sanity) + ImVector buf_rects; + buf_rects.resize(total_glyphs_count); + memset(buf_rects.Data, 0, (size_t)buf_rects.size_in_bytes()); + + // Allocate temporary rasterization data buffers. + // We could not find a way to retrieve accurate glyph size without rendering them. + // (e.g. slot->metrics->width not always matching bitmap->width, especially considering the Oblique transform) + // We allocate in chunks of 256 KB to not waste too much extra memory ahead. Hopefully users of FreeType won't find the temporary allocations. + const int BITMAP_BUFFERS_CHUNK_SIZE = 256 * 1024; + int buf_bitmap_current_used_bytes = 0; + ImVector buf_bitmap_buffers; + buf_bitmap_buffers.push_back((unsigned char*)IM_ALLOC(BITMAP_BUFFERS_CHUNK_SIZE)); + + // 4. Gather glyphs sizes so we can pack them in our virtual canvas. + // 8. Render/rasterize font characters into the texture + int total_surface = 0; + int buf_rects_out_n = 0; + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + { + ImFontBuildSrcDataFT& src_tmp = src_tmp_array[src_i]; + ImFontConfig& cfg = atlas->ConfigData[src_i]; + if (src_tmp.GlyphsCount == 0) + continue; + + src_tmp.Rects = &buf_rects[buf_rects_out_n]; + buf_rects_out_n += src_tmp.GlyphsCount; + + // Compute multiply table if requested + const bool multiply_enabled = (cfg.RasterizerMultiply != 1.0f); + unsigned char multiply_table[256]; + if (multiply_enabled) + ImFontAtlasBuildMultiplyCalcLookupTable(multiply_table, cfg.RasterizerMultiply); + + // Gather the sizes of all rectangles we will need to pack + const int padding = atlas->TexGlyphPadding; + for (int glyph_i = 0; glyph_i < src_tmp.GlyphsList.Size; glyph_i++) + { + ImFontBuildSrcGlyphFT& src_glyph = src_tmp.GlyphsList[glyph_i]; + + const FT_Glyph_Metrics* metrics = src_tmp.Font.LoadGlyph(src_glyph.Codepoint); + if (metrics == NULL) + continue; + + // Render glyph into a bitmap (currently held by FreeType) + const FT_Bitmap* ft_bitmap = src_tmp.Font.RenderGlyphAndGetInfo(&src_glyph.Info); + IM_ASSERT(ft_bitmap); + + // Allocate new temporary chunk if needed + const int bitmap_size_in_bytes = src_glyph.Info.Width * src_glyph.Info.Height; + if (buf_bitmap_current_used_bytes + bitmap_size_in_bytes > BITMAP_BUFFERS_CHUNK_SIZE) + { + buf_bitmap_current_used_bytes = 0; + buf_bitmap_buffers.push_back((unsigned char*)IM_ALLOC(BITMAP_BUFFERS_CHUNK_SIZE)); + } + + // Blit rasterized pixels to our temporary buffer and keep a pointer to it. + src_glyph.BitmapData = buf_bitmap_buffers.back() + buf_bitmap_current_used_bytes; + buf_bitmap_current_used_bytes += bitmap_size_in_bytes; + src_tmp.Font.BlitGlyph(ft_bitmap, src_glyph.BitmapData, src_glyph.Info.Width * 1, multiply_enabled ? multiply_table : NULL); + + src_tmp.Rects[glyph_i].w = (stbrp_coord)(src_glyph.Info.Width + padding); + src_tmp.Rects[glyph_i].h = (stbrp_coord)(src_glyph.Info.Height + padding); + total_surface += src_tmp.Rects[glyph_i].w * src_tmp.Rects[glyph_i].h; + } + } + + // We need a width for the skyline algorithm, any width! + // The exact width doesn't really matter much, but some API/GPU have texture size limitations and increasing width can decrease height. + // User can override TexDesiredWidth and TexGlyphPadding if they wish, otherwise we use a simple heuristic to select the width based on expected surface. + const int surface_sqrt = (int)ImSqrt((float)total_surface) + 1; + atlas->TexHeight = 0; + if (atlas->TexDesiredWidth > 0) + atlas->TexWidth = atlas->TexDesiredWidth; + else + atlas->TexWidth = (surface_sqrt >= 4096 * 0.7f) ? 4096 : (surface_sqrt >= 2048 * 0.7f) ? 2048 : (surface_sqrt >= 1024 * 0.7f) ? 1024 : 512; + + // 5. Start packing + // Pack our extra data rectangles first, so it will be on the upper-left corner of our texture (UV will have small values). + const int TEX_HEIGHT_MAX = 1024 * 32; + const int num_nodes_for_packing_algorithm = atlas->TexWidth - atlas->TexGlyphPadding; + ImVector pack_nodes; + pack_nodes.resize(num_nodes_for_packing_algorithm); + stbrp_context pack_context; + stbrp_init_target(&pack_context, atlas->TexWidth, TEX_HEIGHT_MAX, pack_nodes.Data, pack_nodes.Size); + ImFontAtlasBuildPackCustomRects(atlas, &pack_context); + + // 6. Pack each source font. No rendering yet, we are working with rectangles in an infinitely tall texture at this point. + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + { + ImFontBuildSrcDataFT& src_tmp = src_tmp_array[src_i]; + if (src_tmp.GlyphsCount == 0) + continue; + + stbrp_pack_rects(&pack_context, src_tmp.Rects, src_tmp.GlyphsCount); + + // Extend texture height and mark missing glyphs as non-packed so we won't render them. + // FIXME: We are not handling packing failure here (would happen if we got off TEX_HEIGHT_MAX or if a single if larger than TexWidth?) + for (int glyph_i = 0; glyph_i < src_tmp.GlyphsCount; glyph_i++) + if (src_tmp.Rects[glyph_i].was_packed) + atlas->TexHeight = ImMax(atlas->TexHeight, src_tmp.Rects[glyph_i].y + src_tmp.Rects[glyph_i].h); + } + + // 7. Allocate texture + atlas->TexHeight = (atlas->Flags & ImFontAtlasFlags_NoPowerOfTwoHeight) ? (atlas->TexHeight + 1) : ImUpperPowerOfTwo(atlas->TexHeight); + atlas->TexUvScale = ImVec2(1.0f / atlas->TexWidth, 1.0f / atlas->TexHeight); + atlas->TexPixelsAlpha8 = (unsigned char*)IM_ALLOC(atlas->TexWidth * atlas->TexHeight); + memset(atlas->TexPixelsAlpha8, 0, atlas->TexWidth * atlas->TexHeight); + + // 8. Copy rasterized font characters back into the main texture + // 9. Setup ImFont and glyphs for runtime + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + { + ImFontBuildSrcDataFT& src_tmp = src_tmp_array[src_i]; + if (src_tmp.GlyphsCount == 0) + continue; + + // When merging fonts with MergeMode=true: + // - We can have multiple input fonts writing into a same destination font. + // - dst_font->ConfigData is != from cfg which is our source configuration. + ImFontConfig& cfg = atlas->ConfigData[src_i]; + ImFont* dst_font = cfg.DstFont; + + const float ascent = src_tmp.Font.Info.Ascender; + const float descent = src_tmp.Font.Info.Descender; + ImFontAtlasBuildSetupFont(atlas, dst_font, &cfg, ascent, descent); + const float font_off_x = cfg.GlyphOffset.x; + const float font_off_y = cfg.GlyphOffset.y + IM_ROUND(dst_font->Ascent); + + const int padding = atlas->TexGlyphPadding; + for (int glyph_i = 0; glyph_i < src_tmp.GlyphsCount; glyph_i++) + { + ImFontBuildSrcGlyphFT& src_glyph = src_tmp.GlyphsList[glyph_i]; + stbrp_rect& pack_rect = src_tmp.Rects[glyph_i]; + IM_ASSERT(pack_rect.was_packed); + if (pack_rect.w == 0 && pack_rect.h == 0) + continue; + + GlyphInfo& info = src_glyph.Info; + IM_ASSERT(info.Width + padding <= pack_rect.w); + IM_ASSERT(info.Height + padding <= pack_rect.h); + const int tx = pack_rect.x + padding; + const int ty = pack_rect.y + padding; + + // Blit from temporary buffer to final texture + size_t blit_src_stride = (size_t)src_glyph.Info.Width; + size_t blit_dst_stride = (size_t)atlas->TexWidth; + unsigned char* blit_src = src_glyph.BitmapData; + unsigned char* blit_dst = atlas->TexPixelsAlpha8 + (ty * blit_dst_stride) + tx; + for (int y = info.Height; y > 0; y--, blit_dst += blit_dst_stride, blit_src += blit_src_stride) + memcpy(blit_dst, blit_src, blit_src_stride); + + // Register glyph + float x0 = info.OffsetX + font_off_x; + float y0 = info.OffsetY + font_off_y; + float x1 = x0 + info.Width; + float y1 = y0 + info.Height; + float u0 = (tx) / (float)atlas->TexWidth; + float v0 = (ty) / (float)atlas->TexHeight; + float u1 = (tx + info.Width) / (float)atlas->TexWidth; + float v1 = (ty + info.Height) / (float)atlas->TexHeight; + dst_font->AddGlyph(&cfg, (ImWchar)src_glyph.Codepoint, x0, y0, x1, y1, u0, v0, u1, v1, info.AdvanceX); + } + + src_tmp.Rects = NULL; + } + + // Cleanup + for (int buf_i = 0; buf_i < buf_bitmap_buffers.Size; buf_i++) + IM_FREE(buf_bitmap_buffers[buf_i]); + for (int src_i = 0; src_i < src_tmp_array.Size; src_i++) + src_tmp_array[src_i].~ImFontBuildSrcDataFT(); + + ImFontAtlasBuildFinish(atlas); + + return true; +} + +// FreeType memory allocation callbacks +static void* FreeType_Alloc(FT_Memory /*memory*/, long size) +{ + return GImGuiFreeTypeAllocFunc((size_t)size, GImGuiFreeTypeAllocatorUserData); +} + +static void FreeType_Free(FT_Memory /*memory*/, void* block) +{ + GImGuiFreeTypeFreeFunc(block, GImGuiFreeTypeAllocatorUserData); +} + +static void* FreeType_Realloc(FT_Memory /*memory*/, long cur_size, long new_size, void* block) +{ + // Implement realloc() as we don't ask user to provide it. + if (block == NULL) + return GImGuiFreeTypeAllocFunc((size_t)new_size, GImGuiFreeTypeAllocatorUserData); + + if (new_size == 0) + { + GImGuiFreeTypeFreeFunc(block, GImGuiFreeTypeAllocatorUserData); + return NULL; + } + + if (new_size > cur_size) + { + void* new_block = GImGuiFreeTypeAllocFunc((size_t)new_size, GImGuiFreeTypeAllocatorUserData); + memcpy(new_block, block, (size_t)cur_size); + GImGuiFreeTypeFreeFunc(block, GImGuiFreeTypeAllocatorUserData); + return new_block; + } + + return block; +} + +static bool ImFontAtlasBuildWithFreeType(ImFontAtlas* atlas) +{ + // FreeType memory management: https://www.freetype.org/freetype2/docs/design/design-4.html + FT_MemoryRec_ memory_rec = {}; + memory_rec.user = NULL; + memory_rec.alloc = &FreeType_Alloc; + memory_rec.free = &FreeType_Free; + memory_rec.realloc = &FreeType_Realloc; + + // https://www.freetype.org/freetype2/docs/reference/ft2-module_management.html#FT_New_Library + FT_Library ft_library; + FT_Error error = FT_New_Library(&memory_rec, &ft_library); + if (error != 0) + return false; + + // If you don't call FT_Add_Default_Modules() the rest of code may work, but FreeType won't use our custom allocator. + FT_Add_Default_Modules(ft_library); + + bool ret = ImFontAtlasBuildWithFreeTypeEx(ft_library, atlas, atlas->FontBuilderFlags); + FT_Done_Library(ft_library); + + return ret; +} + +const ImFontBuilderIO* ImGuiFreeType::GetBuilderForFreeType() +{ + static ImFontBuilderIO io; + io.FontBuilder_Build = ImFontAtlasBuildWithFreeType; + return &io; +} + +void ImGuiFreeType::SetAllocatorFunctions(void* (*alloc_func)(size_t sz, void* user_data), void (*free_func)(void* ptr, void* user_data), void* user_data) +{ + GImGuiFreeTypeAllocFunc = alloc_func; + GImGuiFreeTypeFreeFunc = free_func; + GImGuiFreeTypeAllocatorUserData = user_data; +} diff --git a/cpp-projects/3d-engine/imgui/misc/freetype/imgui_freetype.h b/cpp-projects/3d-engine/imgui/misc/freetype/imgui_freetype.h new file mode 100644 index 0000000..2b953f2 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/misc/freetype/imgui_freetype.h @@ -0,0 +1,48 @@ +// dear imgui: FreeType font builder (used as a replacement for the stb_truetype builder) +// (headers) + +#pragma once + +#include "imgui.h" // IMGUI_API + +// Forward declarations +struct ImFontAtlas; +struct ImFontBuilderIO; + +// Hinting greatly impacts visuals (and glyph sizes). +// - By default, hinting is enabled and the font's native hinter is preferred over the auto-hinter. +// - When disabled, FreeType generates blurrier glyphs, more or less matches the stb_truetype.h +// - The Default hinting mode usually looks good, but may distort glyphs in an unusual way. +// - The Light hinting mode generates fuzzier glyphs but better matches Microsoft's rasterizer. +// You can set those flags globaly in ImFontAtlas::FontBuilderFlags +// You can set those flags on a per font basis in ImFontConfig::FontBuilderFlags +enum ImGuiFreeTypeBuilderFlags +{ + ImGuiFreeTypeBuilderFlags_NoHinting = 1 << 0, // Disable hinting. This generally generates 'blurrier' bitmap glyphs when the glyph are rendered in any of the anti-aliased modes. + ImGuiFreeTypeBuilderFlags_NoAutoHint = 1 << 1, // Disable auto-hinter. + ImGuiFreeTypeBuilderFlags_ForceAutoHint = 1 << 2, // Indicates that the auto-hinter is preferred over the font's native hinter. + ImGuiFreeTypeBuilderFlags_LightHinting = 1 << 3, // A lighter hinting algorithm for gray-level modes. Many generated glyphs are fuzzier but better resemble their original shape. This is achieved by snapping glyphs to the pixel grid only vertically (Y-axis), as is done by Microsoft's ClearType and Adobe's proprietary font renderer. This preserves inter-glyph spacing in horizontal text. + ImGuiFreeTypeBuilderFlags_MonoHinting = 1 << 4, // Strong hinting algorithm that should only be used for monochrome output. + ImGuiFreeTypeBuilderFlags_Bold = 1 << 5, // Styling: Should we artificially embolden the font? + ImGuiFreeTypeBuilderFlags_Oblique = 1 << 6, // Styling: Should we slant the font, emulating italic style? + ImGuiFreeTypeBuilderFlags_Monochrome = 1 << 7 // Disable anti-aliasing. Combine this with MonoHinting for best results! +}; + +namespace ImGuiFreeType +{ + // This is automatically assigned when using '#define IMGUI_ENABLE_FREETYPE'. + // If you need to dynamically select between multiple builders: + // - you can manually assign this builder with 'atlas->FontBuilderIO = ImGuiFreeType::GetBuilderForFreeType()' + // - prefer deep-copying this into your own ImFontBuilderIO instance if you use hot-reloading that messes up static data. + IMGUI_API const ImFontBuilderIO* GetBuilderForFreeType(); + + // Override allocators. By default ImGuiFreeType will use IM_ALLOC()/IM_FREE() + // However, as FreeType does lots of allocations we provide a way for the user to redirect it to a separate memory heap if desired. + IMGUI_API void SetAllocatorFunctions(void* (*alloc_func)(size_t sz, void* user_data), void (*free_func)(void* ptr, void* user_data), void* user_data = NULL); + + // Obsolete names (will be removed soon) + // Prefer using '#define IMGUI_ENABLE_FREETYPE' +#ifndef IMGUI_DISABLE_OBSOLETE_FUNCTIONS + bool BuildFontAtlas(ImFontAtlas* atlas, unsigned int flags = 0) { atlas->FontBuilderIO = GetBuilderForFreeType(); atlas->FontBuilderFlags = flags; return atlas->Build(); } +#endif +} diff --git a/cpp-projects/3d-engine/imgui/misc/natvis/README.txt b/cpp-projects/3d-engine/imgui/misc/natvis/README.txt new file mode 100644 index 0000000..1219db4 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/misc/natvis/README.txt @@ -0,0 +1,4 @@ + +Natvis file to describe dear imgui types in the Visual Studio debugger. +With this, types like ImVector<> will be displayed nicely in the debugger. +You can include this file a Visual Studio project file, or install it in Visual Studio folder. diff --git a/cpp-projects/3d-engine/imgui/misc/natvis/imgui.natvis b/cpp-projects/3d-engine/imgui/misc/natvis/imgui.natvis new file mode 100644 index 0000000..25d72fb --- /dev/null +++ b/cpp-projects/3d-engine/imgui/misc/natvis/imgui.natvis @@ -0,0 +1,49 @@ + + + + + + + + {{Size={Size} Capacity={Capacity}}} + + + Size + Data + + + + + + {{Size={DataEnd-Data} }} + + + DataEnd-Data + Data + + + + + + {{x={x,g} y={y,g}}} + + + + {{x={x,g} y={y,g} z={z,g} w={w,g}}} + + + + {{Min=({Min.x,g} {Min.y,g}) Max=({Max.x,g} {Max.y,g}) Size=({Max.x-Min.x,g} {Max.y-Min.y,g})}} + + Min + Max + Max.x - Min.x + Max.y - Min.y + + + + + {{Name {Name,s} Active {(Active||WasActive)?1:0,d} Child {(Flags & 0x01000000)?1:0,d} Popup {(Flags & 0x04000000)?1:0,d} Hidden {(Hidden)?1:0,d}} + + + \ No newline at end of file diff --git a/cpp-projects/3d-engine/imgui/misc/single_file/imgui_single_file.h b/cpp-projects/3d-engine/imgui/misc/single_file/imgui_single_file.h new file mode 100644 index 0000000..6c1fb36 --- /dev/null +++ b/cpp-projects/3d-engine/imgui/misc/single_file/imgui_single_file.h @@ -0,0 +1,18 @@ +// dear imgui: single-file wrapper include +// We use this to validate compiling all *.cpp files in a same compilation unit. +// Users of that technique (also called "Unity builds") can generally provide this themselves, +// so we don't really recommend you use this in your projects. + +// Do this: +// #define IMGUI_IMPLEMENTATION +// Before you include this file in *one* C++ file to create the implementation. +// Using this in your project will leak the contents of imgui_internal.h and ImVec2 operators in this compilation unit. +#include "../../imgui.h" + +#ifdef IMGUI_IMPLEMENTATION +#include "../../imgui.cpp" +#include "../../imgui_demo.cpp" +#include "../../imgui_draw.cpp" +#include "../../imgui_tables.cpp" +#include "../../imgui_widgets.cpp" +#endif diff --git a/cpp-projects/3d-engine/resources/resources.cpp b/cpp-projects/3d-engine/resources/resources.cpp new file mode 100644 index 0000000..3fd9913 --- /dev/null +++ b/cpp-projects/3d-engine/resources/resources.cpp @@ -0,0 +1,30 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "resources.hpp" + +using namespace tool::ui; + diff --git a/cpp-projects/3d-engine/resources/resources.hpp b/cpp-projects/3d-engine/resources/resources.hpp new file mode 100644 index 0000000..115d805 --- /dev/null +++ b/cpp-projects/3d-engine/resources/resources.hpp @@ -0,0 +1,54 @@ + +/******************************************************************************* +** Toolbox-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + + +// std +#include + + +namespace tool::ui { + +class Resources{ + + Resources(){ + + } + + +}; + +class Paths{ + +public : + Paths(){} + + static const inline std::string exeDirPath = std::filesystem::current_path().generic_string(); + static const inline std::string texturesDirPath = Paths::exeDirPath + "/../../../../cpp/resources/textures"; +}; +} + diff --git a/cpp-projects/3d-engine/ui/immediate_ui.hpp b/cpp-projects/3d-engine/ui/immediate_ui.hpp new file mode 100644 index 0000000..9a25995 --- /dev/null +++ b/cpp-projects/3d-engine/ui/immediate_ui.hpp @@ -0,0 +1,52 @@ + +#pragma once + + +// if(ImGui::Button("Look at this pretty button")){ +// m_scene.close(); +// } + +// ImGuiIO& io = ImGui::GetIO(); +// ImGui::BulletText("Double-click on title bar to collapse window."); +// ImGui::BulletText("Click and drag on lower corner to resize window\n(double-click to auto fit window to its contents)."); +// ImGui::BulletText("CTRL+Click on a slider or drag box to input value as text."); +// ImGui::BulletText("TAB/SHIFT+TAB to cycle through keyboard editable fields."); +// if (io.FontAllowUserScaling) +// ImGui::BulletText("CTRL+Mouse Wheel to zoom window contents."); +// ImGui::BulletText("While inputing text:\n"); +// ImGui::Indent(); +// ImGui::BulletText("CTRL+Left/Right to word jump."); +// ImGui::BulletText("CTRL+A or double-click to select all."); +// ImGui::BulletText("CTRL+X/C/V to use clipboard cut/copy/paste."); +// ImGui::BulletText("CTRL+Z,CTRL+Y to undo/redo."); +// ImGui::BulletText("ESCAPE to revert."); +// ImGui::BulletText("You can apply arithmetic operators +,*,/ on numerical values.\nUse +- to subtract."); +// ImGui::Unindent(); +// ImGui::BulletText("With keyboard navigation enabled:"); +// ImGui::Indent(); +// ImGui::BulletText("Arrow keys to navigate."); +// ImGui::BulletText("Space to activate a widget."); +// ImGui::BulletText("Return to input text into a widget."); +// ImGui::BulletText("Escape to deactivate a widget, close popup, exit child window."); +// ImGui::BulletText("Alt to jump to the menu layer of a window."); +// ImGui::BulletText("CTRL+Tab to select a window."); +// ImGui::Unindent(); + + // Examples Apps (accessible from the "Examples" menu) +// static bool show_app_documents = false; +// static bool show_app_main_menu_bar = false; +// static bool show_app_console = false; +// static bool show_app_log = false; +// static bool show_app_layout = false; +// static bool show_app_property_editor = false; +// static bool show_app_long_text = false; +// static bool show_app_auto_resize = false; +// static bool show_app_constrained_resize = false; +// static bool show_app_simple_overlay = false; +// static bool show_app_window_titles = false; +// static bool show_app_custom_rendering = false; + +// bool no_titlebar; +// if (ImGui::CollapsingHeader("Window options")){ +// ImGui::Checkbox("No titlebar", &no_titlebar); ImGui::SameLine(150); +// } diff --git a/cpp-projects/3d-engine/windows/base_sfml_gl_window.cpp b/cpp-projects/3d-engine/windows/base_sfml_gl_window.cpp new file mode 100644 index 0000000..5deef25 --- /dev/null +++ b/cpp-projects/3d-engine/windows/base_sfml_gl_window.cpp @@ -0,0 +1,396 @@ + +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "base_sfml_gl_window.hpp" + +// std +#include +#include + +// sfml +#include "imgui-sfml/imgui-SFML.h" + +// imgui +#include "imgui/imgui.h" + +// base +#include "utility/benchmark.hpp" +#include "utility/logger.hpp" + +// opengl-utility +#include "opengl/utility/glew_utility.hpp" +#include "opengl/vao.hpp" + +using namespace tool::gl; +using namespace tool::geo; +using namespace tool::graphics; + +BaseSfmlGlWindow::BaseSfmlGlWindow(std::string_view title, graphics::Screen screen, std::optional context) ://, std::optional style) : + m_title(title), m_screen(screen), m_camera(&m_screen, {0,0,0}, {0,0,1}) { + + if(context.has_value()){ + glContext = context.value(); + }else{ + glContext.depthBits = 24; + glContext.stencilBits = 8; + glContext.antialiasingLevel = 4; + glContext.majorVersion = 4; + glContext.minorVersion = 6; + glContext.attributeFlags = sf::ContextSettings::Attribute::Debug; + } +} + +BaseSfmlGlWindow::~BaseSfmlGlWindow(){ + m_scene.close(); +} + +auto BaseSfmlGlWindow::init() -> bool{ + + // sfml + init_sfml_window(); + + // glew + if(!init_glew()){ + m_scene.close(); + return false; + } + display_glew_info(); + + // init imgui + m_scene.setActive(true); + + if(!initialize_gl()){ + return false; + } + + ImGui::SFML::Init(m_scene); + initialize_imgui(); + + resize_windows(); + + return true; +} + +auto BaseSfmlGlWindow::start() -> void{ + + if(!m_glInitialized){ + return; + } + + m_scene.setActive(); + + startL = std::chrono::high_resolution_clock::now(); + + running = true; + while(running){ + + const auto &io = ImGui::GetIO(); + imguiMouse = io.WantCaptureMouse; + imguiKeyboard = io.WantCaptureKeyboard; + + Bench::start("main_loop"); + + currentFrame = std::chrono::high_resolution_clock::now(); + + // retrieve sfml events + sf::Event event; + + while (m_scene.pollEvent(event)){ + + ImGui::SFML::ProcessEvent(event); + switch (event.type) { + case sf::Event::Closed: // The window requested to be closed + running = false; + break; + case sf::Event::Resized: // The window was resized + Logger::message("resize_event-"); + base_resize_windows(event.size); + break; + case sf::Event::KeyPressed: // A key was pressed + keyboard_keypress_event(event.key); + break; + case sf::Event::KeyReleased: // A key was released + keyboard_keyrelease_event(event.key); + break; + case sf::Event::MouseButtonPressed: // A mouse button was pressed + mouse_button_pressed_event(event.mouseButton); + break; + case sf::Event::MouseButtonReleased: // A mouse button was released + mouse_button_released_event(event.mouseButton); + break; + case sf::Event::MouseMoved: // The mouse cursor moved + mouse_moved_event(event.mouseMove); + break; + case sf::Event::MouseWheelScrolled: // The mouse wheel was scrolled + mouse_wheel_scroll_event(event.mouseWheelScroll); + break; + case sf::Event::MouseEntered: // The mouse cursor entered the area of the window + break; + case sf::Event::MouseLeft: // The mouse cursor left the area of the window + break; + case sf::Event::JoystickButtonPressed: // A joystick button was pressed + break; // event.joystickButton + case sf::Event::JoystickButtonReleased: // A joystick button was released + break; // event.joystickButton + case sf::Event::JoystickMoved: // The joystick moved along an axis + break; // event.joystickMove + case sf::Event::JoystickConnected: // A joystick was connected + break; // event.joystickConnect + case sf::Event::JoystickDisconnected: // A joystick was disconnected + break; // event.joystickConnect + default: + break; + } + // LostFocus, < The window lost the focus (no data) + // GainedFocus, < The window gained the focus (no data) + // TextEntered, < A character was entered (data in event.text) + // TouchBegan, < A touch event began (data in event.touch) + // TouchMoved, < A touch moved (data in event.touch) + // TouchEnded, < A touch event ended (data in event.touch) + // SensorChanged, < A sensor value changed (data in event.sensor) + } + + if(imguiMouse){ + mouseLeftClickPressed = false; + mouseRightClickPressed = false; + mouseMiddleClickPressed = false; + } + if(imguiKeyboard){ + + } + + // update + pre_update(); + pre_update_signal(); + update(); + update_signal(); + post_update(); + post_update_signal(); + + m_scene.clear(sf::Color::White); + + // draw opengl + draw_gl(); + draw_gl_signal(); + + // ubind vao after drawing opengl + VAO::unbind(); + + // store gl states + m_scene.pushGLStates(); + { + // update sfml + ImGui::SFML::Update(m_scene, deltaClock.restart()); + + // imgui + imguiMouse = false; + draw_imgui(); + draw_imgui_signal(geo::Pt2f{ + 1.f*m_camera.screen()->width(), + 1.f*m_camera.screen()->height() + }); + ImGui::EndFrame(); + + // render sfml scene + draw_sfml(); + draw_sfml_signal(); + ImGui::SFML::Render(m_scene); + m_scene.display(); + } + // restore gl states + m_scene.popGLStates(); + + + // sleep for fps + frameDuration = std::chrono::high_resolution_clock::now()-currentFrame; + if((timePerFrame-frameDuration).count() > 0){ + std::this_thread::sleep_for(timePerFrame-frameDuration); + } + Bench::stop(); + // Bench::display(); + } + + clean(); + + ImGui::SFML::Shutdown(); +} + + +auto BaseSfmlGlWindow::init_sfml_window() -> bool{ + + // close previously opened window + if(m_scene.isOpen()){ + m_scene.close(); + } + + // create window + m_scene.create(sf::VideoMode(m_screen.width(), m_screen.height()), m_title, sf::Style::Default, glContext); + m_scene.setFramerateLimit(framerate); + m_scene.setPosition(sf::Vector2i{m_screen.x_pos(),m_screen.y_pos()}); + + return true; +} + +auto BaseSfmlGlWindow::base_resize_windows(sf::Event::SizeEvent size) -> void{ + m_screen = graphics::Screen{size.width,size.height}; + m_camera.update_projection(); + glViewport(0, 0, static_cast(m_screen.width()), static_cast(m_screen.height())); + + resize_windows(); +} + +auto BaseSfmlGlWindow::mouse_button_pressed_event(sf::Event::MouseButtonEvent event) -> void{ + if(!imguiMouse){ + update_camera_with_mouse_button_event(event, true); + } +} + +auto BaseSfmlGlWindow::mouse_button_released_event(sf::Event::MouseButtonEvent event) -> void{ + if(!imguiMouse){ + update_camera_with_mouse_button_event(event, false); + } +} + +auto BaseSfmlGlWindow::mouse_moved_event(sf::Event::MouseMoveEvent event) -> void{ + if(!imguiMouse){ + update_camera_with_mouse_moved_event(event); + } +} + +auto BaseSfmlGlWindow::mouse_wheel_scroll_event(sf::Event::MouseWheelScrollEvent event) -> void{ + if(!imguiMouse){ + if(!mouseMiddleClickPressed){ + update_camera_with_mouse_scroll_event(event); + } + } +} + +auto BaseSfmlGlWindow::keyboard_keypress_event(sf::Event::KeyEvent event) -> void{ + + if(!imguiMouse && !imguiKeyboard){ + update_camera_with_keyboardpress_event(event); + } +} + +auto BaseSfmlGlWindow::update_camera_with_mouse_button_event(sf::Event::MouseButtonEvent event, bool pressed) -> void{ + + if(pressed){ + switch (event.button) { + case sf::Mouse::Button::Left: + mouseLeftClickPressed = true; + break; + case sf::Mouse::Button::Right: + mouseRightClickPressed = true; + break; + case sf::Mouse::Button::Middle: + mouseMiddleClickPressed = true; + break; + default: + break; + } + }else{ + + switch (event.button) { + case sf::Mouse::Button::Left: + mouseLeftClickPressed = false; + break; + case sf::Mouse::Button::Right: + mouseRightClickPressed = false; + break; + case sf::Mouse::Button::Middle: + mouseMiddleClickPressed = false; + break; + default: + break; + } + + lastX = -1; + lastY = -1; + } +} + +auto BaseSfmlGlWindow::update_camera_with_keyboardpress_event(sf::Event::KeyEvent event) -> void{ + + switch (event.code) { + case sf::Keyboard::Key::Up: + m_camera.move_front(cameraMovingSpeed); + break; + case sf::Keyboard::Key::Left: + m_camera.move_left(cameraMovingSpeed); + break; + case sf::Keyboard::Key::Right: + m_camera.move_right(cameraMovingSpeed); + break; + case sf::Keyboard::Key::Down: + m_camera.move_back(cameraMovingSpeed); + break; + case sf::Keyboard::Key::R: + m_camera.reset_init_values(); + m_camera.set_direction(0.,0.,0.); + break; + case sf::Keyboard::Key::O: + m_camera.set_mode(Camera::Mode::Orhtographic); + break; + case sf::Keyboard::Key::P: + m_camera.set_mode(Camera::Mode::Perspective); + break; + default: + break; + } +} + +auto BaseSfmlGlWindow::update_camera_with_mouse_scroll_event(sf::Event::MouseWheelScrollEvent event) -> void{ + + if(event.wheel == 0){ + m_camera.move_front(static_cast(event.delta) * cameraScrollSpeed); + } +} + +auto BaseSfmlGlWindow::update_camera_with_mouse_moved_event(sf::Event::MouseMoveEvent event) -> void{ + + if(lastX < 0.){ + lastX = event.x; + lastY = event.y; + } + + double xoffset = event.x - lastX; + double yoffset = event.y - lastY; + lastX = event.x; + lastY = event.y; + + double sensitivity = cameraRotationSpeed; + xoffset *= sensitivity; + yoffset *= sensitivity; + + if(mouseLeftClickPressed){ + m_camera.set_direction(xoffset,-yoffset,0.); + }else if(mouseMiddleClickPressed){ + m_camera.move_up(-0.1*yoffset); + m_camera.move_right(0.1*xoffset); + }else if(mouseRightClickPressed){ + m_camera.set_direction(0.,0.,xoffset); + } +} diff --git a/cpp-projects/3d-engine/windows/base_sfml_gl_window.hpp b/cpp-projects/3d-engine/windows/base_sfml_gl_window.hpp new file mode 100644 index 0000000..28dcf17 --- /dev/null +++ b/cpp-projects/3d-engine/windows/base_sfml_gl_window.hpp @@ -0,0 +1,171 @@ +/******************************************************************************* +** Toolset-3d-engine ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + + +// std +#include +#include + +// glew +#include + +// sfml +#include +#include +#include + +// base +#include "thirdparty/sigslot/signal.hpp" +#include "graphics/camera.hpp" +#include "graphics/screen.hpp" + + +namespace tool::graphics { + +class BaseSfmlGlWindow{ + +public: + + using TimePoint = std::chrono::time_point; + + BaseSfmlGlWindow( + std::string_view title, + graphics::Screen screen, + std::optional context + ); + virtual ~BaseSfmlGlWindow(); + + auto init() -> bool; + auto start() -> void; + + auto elapsed_secondes() const -> float{ + using namespace std::chrono; + return duration_cast(currentFrame-startL).count()*0.001f; + } + + auto quit() -> void{ + running = false; + } + +public: + + // signals + sigslot::signal<> pre_update_signal; + sigslot::signal<> update_signal; + sigslot::signal<> post_update_signal; + sigslot::signal<> draw_gl_signal; + sigslot::signal draw_imgui_signal; + sigslot::signal<> draw_sfml_signal; + +protected: + + // init + virtual auto initialize_gl() -> bool = 0; + virtual auto initialize_imgui() -> void{} + + // resize + virtual auto resize_windows() -> void{} + + // draw + virtual auto draw_gl() -> void{} + virtual auto draw_imgui() -> void{} + virtual auto draw_sfml() -> void{} + + // update + virtual auto pre_update() -> void{} + virtual auto update() -> void{} + virtual auto post_update() -> void{} + + // clean + virtual auto clean() -> void {}; + + // sfml events + // # mouse + virtual auto mouse_button_pressed_event(sf::Event::MouseButtonEvent event) -> void; + virtual auto mouse_button_released_event(sf::Event::MouseButtonEvent event) -> void; + virtual auto mouse_moved_event(sf::Event::MouseMoveEvent event) -> void; + virtual auto mouse_wheel_scroll_event(sf::Event::MouseWheelScrollEvent event) -> void; + // # key + virtual auto keyboard_keypress_event(sf::Event::KeyEvent event) -> void; + virtual auto keyboard_keyrelease_event(sf::Event::KeyEvent event) -> void {static_cast(event);} + + // camera + virtual auto update_camera_with_mouse_button_event(sf::Event::MouseButtonEvent event, bool pressed) -> void;; + virtual auto update_camera_with_keyboardpress_event(sf::Event::KeyEvent event) -> void;; + virtual auto update_camera_with_mouse_scroll_event(sf::Event::MouseWheelScrollEvent event) -> void;; + virtual auto update_camera_with_mouse_moved_event(sf::Event::MouseMoveEvent event) -> void;; + +private: + + auto init_sfml_window() -> bool; + auto base_resize_windows(sf::Event::SizeEvent size) -> void; + +protected: + + // loop + bool running = false; + + // opengl + bool m_glInitialized = false; + sf::RenderWindow m_scene; + sf::ContextSettings glContext; + + // window + std::string m_title = "Base SFML GL window"; + std::string m_imguiWindowTitle = "Default"; + graphics::Screen m_screen; + + // camera + double cameraMovingSpeed = 0.2; + double cameraScrollSpeed = 0.05; + double cameraRotationSpeed = 0.05; + graphics::Camera m_camera; + + // time + TimePoint startL; + TimePoint currentFrame; + int framerate = 60; + sf::Clock deltaClock; + + std::chrono::duration frameDuration; + std::chrono::duration timePerFrame{1000./framerate}; + + // inputs + // # mouse + bool mouseLeftClickPressed = false; + bool mouseRightClickPressed = false; + bool mouseMiddleClickPressed = false; + int lastX=-1, lastY=-1; + + bool imguiMouse = false; + bool imguiKeyboard = false; +}; + + + +} + diff --git a/cpp-projects/_build/bin/3d-engine/empty.txt b/cpp-projects/_build/bin/3d-engine/empty.txt new file mode 100644 index 0000000..e69de29 diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/1_grabber.cmd b/cpp-projects/_build/bin/k4-scaner-grabber/1_grabber.cmd new file mode 100644 index 0000000..9e0399c --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/1_grabber.cmd @@ -0,0 +1 @@ +start grabber0.cmd \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/2_grabbers.cmd b/cpp-projects/_build/bin/k4-scaner-grabber/2_grabbers.cmd new file mode 100644 index 0000000..3a9146f --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/2_grabbers.cmd @@ -0,0 +1,2 @@ +start grabber0.cmd +start grabber1.cmd \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/3_grabbers.cmd b/cpp-projects/_build/bin/k4-scaner-grabber/3_grabbers.cmd new file mode 100644 index 0000000..d94fd15 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/3_grabbers.cmd @@ -0,0 +1,3 @@ +start grabber0.cmd +start grabber1.cmd +start grabber2.cmd \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/4_grabbers .cmd b/cpp-projects/_build/bin/k4-scaner-grabber/4_grabbers .cmd new file mode 100644 index 0000000..61a85d2 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/4_grabbers .cmd @@ -0,0 +1,4 @@ +start grabber0.cmd +start grabber1.cmd +start grabber2.cmd +start grabber3.cmd \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/calibration/model_default.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/calibration/model_default.config new file mode 100644 index 0000000..dd33806 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/calibration/model_default.config @@ -0,0 +1,4 @@ +1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-HOME_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-HOME_G0.config new file mode 100644 index 0000000..3bf6321 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-HOME_G0.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-HOME_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-HOME_G1.config new file mode 100644 index 0000000..8a99a8f --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-HOME_G1.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8890 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-PC_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-PC_G0.config new file mode 100644 index 0000000..5d5fb6e --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-PC_G0.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8894 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-PC_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-PC_G1.config new file mode 100644 index 0000000..8a99a8f --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-PC_G1.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8890 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-PC_G2.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-PC_G2.config new file mode 100644 index 0000000..3bf6321 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-PC_G2.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-PC_G3.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-PC_G3.config new file mode 100644 index 0000000..40805cf --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_FLORIAN-PC_G3.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8892 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-002_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-002_G1.config new file mode 100644 index 0000000..3bf6321 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-002_G1.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-080_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-080_G1.config new file mode 100644 index 0000000..3bf6321 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-080_G1.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-081_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-081_G1.config new file mode 100644 index 0000000..3bf6321 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-081_G1.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-082_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-082_G1.config new file mode 100644 index 0000000..3bf6321 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-082_G1.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-089_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-089_G1.config new file mode 100644 index 0000000..3bf6321 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-089_G1.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-093_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-093_G0.config new file mode 100644 index 0000000..3bf6321 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-093_G0.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-093_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-093_G1.config new file mode 100644 index 0000000..8a99a8f --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-093_G1.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8890 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-106_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-106_G0.config new file mode 100644 index 0000000..3bf6321 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-106_G0.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-106_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-106_G1.config new file mode 100644 index 0000000..8a99a8f --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_SV-10-106_G1.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8890 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_default.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_default.config new file mode 100644 index 0000000..3bf6321 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/network/network_default.config @@ -0,0 +1,2 @@ +udp_id_reading_interface 0 +udp_reading_port 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_FLORIAN-PC_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_FLORIAN-PC_G0.config new file mode 100644 index 0000000..ad9f645 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_FLORIAN-PC_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_FLORIAN-PC_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_FLORIAN-PC_G1.config new file mode 100644 index 0000000..ad9f645 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_FLORIAN-PC_G1.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_FLORIAN-PC_G2.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_FLORIAN-PC_G2.config new file mode 100644 index 0000000..ad9f645 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_FLORIAN-PC_G2.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_FLORIAN-PC_G3.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_FLORIAN-PC_G3.config new file mode 100644 index 0000000..ad9f645 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_FLORIAN-PC_G3.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_default.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_default.config new file mode 100644 index 0000000..ad9f645 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/color_default.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-HOME_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-HOME_G0.config new file mode 100644 index 0000000..f525747 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-HOME_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-HOME_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-HOME_G1.config new file mode 100644 index 0000000..d379b50 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-HOME_G1.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-PC_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-PC_G0.config new file mode 100644 index 0000000..f025f2c Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-PC_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-PC_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-PC_G1.config new file mode 100644 index 0000000..3467419 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-PC_G1.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-PC_G2.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-PC_G2.config new file mode 100644 index 0000000..a731cea Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-PC_G2.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-PC_G3.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-PC_G3.config new file mode 100644 index 0000000..8ea97e0 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_FLORIAN-PC_G3.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-002_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-002_G0.config new file mode 100644 index 0000000..3f30fef Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-002_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-080_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-080_G0.config new file mode 100644 index 0000000..321fbfd Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-080_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-081_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-081_G0.config new file mode 100644 index 0000000..8f02c3d Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-081_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-082_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-082_G0.config new file mode 100644 index 0000000..321fbfd Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-082_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-089_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-089_G0.config new file mode 100644 index 0000000..321fbfd Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-089_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-093_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-093_G0.config new file mode 100644 index 0000000..1304e37 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-093_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-093_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-093_G1.config new file mode 100644 index 0000000..d5e9d13 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-093_G1.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-106_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-106_G0.config new file mode 100644 index 0000000..aa754ba Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-106_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-106_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-106_G1.config new file mode 100644 index 0000000..0bbd847 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_SV-10-106_G1.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_default.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_default.config new file mode 100644 index 0000000..4a764c5 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/device_default.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_FLORIAN-PC_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_FLORIAN-PC_G0.config new file mode 100644 index 0000000..1db06bc Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_FLORIAN-PC_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_FLORIAN-PC_G1.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_FLORIAN-PC_G1.config new file mode 100644 index 0000000..99fbab4 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_FLORIAN-PC_G1.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_FLORIAN-PC_G2.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_FLORIAN-PC_G2.config new file mode 100644 index 0000000..4f99f94 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_FLORIAN-PC_G2.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_FLORIAN-PC_G3.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_FLORIAN-PC_G3.config new file mode 100644 index 0000000..51b2a5b Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_FLORIAN-PC_G3.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_SV-10-081_G0.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_SV-10-081_G0.config new file mode 100644 index 0000000..f54d84a Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_SV-10-081_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_default.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_default.config new file mode 100644 index 0000000..232bcb2 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/filters_default.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/settings_default.config b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/settings_default.config new file mode 100644 index 0000000..735cb60 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/config/settings/settings_default.config @@ -0,0 +1,11 @@ +window: 0 512 64 424 +infra_threshold: 200 65000 5 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 8 10 -1 0.0 0.0 0.0 +compression: 80 +smoothing: 0 0 +neigbhours: 1 1 +erosion: 2 2 0 +fps: 30 +color_filter: 199 0 0 10 40 40 +flags: 0 0 0 0 0 0 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/grabber0.cmd b/cpp-projects/_build/bin/k4-scaner-grabber/grabber0.cmd new file mode 100644 index 0000000..c070927 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/grabber0.cmd @@ -0,0 +1 @@ +k4-scaner-grabber.exe -i2 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/grabber1.cmd b/cpp-projects/_build/bin/k4-scaner-grabber/grabber1.cmd new file mode 100644 index 0000000..9a6faa2 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/grabber1.cmd @@ -0,0 +1 @@ +k4-scaner-grabber.exe -i1 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/grabber2.cmd b/cpp-projects/_build/bin/k4-scaner-grabber/grabber2.cmd new file mode 100644 index 0000000..00b7521 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/grabber2.cmd @@ -0,0 +1 @@ +k4-scaner-grabber.exe -i3 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/grabber3.cmd b/cpp-projects/_build/bin/k4-scaner-grabber/grabber3.cmd new file mode 100644 index 0000000..4e85463 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/grabber3.cmd @@ -0,0 +1 @@ +k4-scaner-grabber.exe -i0 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/k4-scaner-grabber.exe.manifest b/cpp-projects/_build/bin/k4-scaner-grabber/k4-scaner-grabber.exe.manifest new file mode 100644 index 0000000..27087c6 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/k4-scaner-grabber.exe.manifest @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/cloud.fs b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/cloud.fs new file mode 100644 index 0000000..e491ed3 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/cloud.fs @@ -0,0 +1,8 @@ +#version 430 core + +layout( location = 0 ) out vec4 FragColor; +in vec4 vFragColorVs; + +void main(){ + FragColor = vFragColorVs; +} diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/cloud.vs b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/cloud.vs new file mode 100644 index 0000000..5e12211 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/cloud.vs @@ -0,0 +1,36 @@ +#version 430 core + +layout (location = 0) in vec3 aPos; +layout (location = 1) in vec3 aColor; +layout (location = 2) in vec3 aNorm; +out vec4 vFragColorVs; + +// transforms +uniform mat4 model; +uniform mat4 view; +uniform mat4 projection; + +// camera +uniform vec3 camera_position; + +// cloud +uniform float size_pt = 10.f; + +// color +uniform bool enable_unicolor = false; +uniform float factor_unicolor = 0.3; +uniform vec4 unicolor = vec4(1,0,0,1); + +void main(){ + + vec4 p = view * model * vec4(aPos, 1.0); + gl_Position = projection*p; + + vFragColorVs = enable_unicolor ? mix(unicolor,vec4(aColor, 1.0), factor_unicolor) : vec4(aColor, 1.0); + //vFragColorVs = vec4(aNorm, 1.0); + //vFragColorVs = vec4(aColor.x, aColor.y, aNorm.x, 1.0); + + + float l = sqrt(length(p.xyz-camera_position.xyz)); + gl_PointSize = size_pt/(l); +} diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/solid.fs b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/solid.fs new file mode 100644 index 0000000..322d521 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/solid.fs @@ -0,0 +1,15 @@ +#version 400 + +uniform vec4 Color; + +layout ( location = 0 ) out vec4 FragColor; + +// color +uniform vec4 unicolor = vec4(1,0,0,1); +uniform bool enable_unicolor = true; + + +void main() +{ + FragColor = enable_unicolor ? unicolor : Color; +} diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/solid.vs b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/solid.vs new file mode 100644 index 0000000..f3d123f --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/solid.vs @@ -0,0 +1,16 @@ +#version 400 + +layout (location = 0 ) in vec3 VertexPosition; + +// model +uniform mat4 model; +uniform mat4 view; +uniform mat4 projection; + +void main() +{ +// gl_Position = projection*view * model * vec4(VertexPosition, 1.0); +// gl_Position = projection * view * model * vec4(VertexPosition, 1.0); + vec4 p = view * model * vec4(VertexPosition, 1.0); + gl_Position = projection*p; +} diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/voxelcloud.fs b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/voxelcloud.fs new file mode 100644 index 0000000..bda3be0 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/voxelcloud.fs @@ -0,0 +1,8 @@ +#version 430 core + +layout( location = 0 ) out vec4 FragColor; +in vec4 vFragColor; + +void main(){ + FragColor = vFragColor; +} diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/voxelcloud.gs b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/voxelcloud.gs new file mode 100644 index 0000000..dc1ede9 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/voxelcloud.gs @@ -0,0 +1,76 @@ +#version 430 core + + +uniform float hSize = 0.5f; // Half the width of the quad + +in vec4 vFragColorVs[]; +in mat4 mvp[]; +out vec4 vFragColor; + +// model +uniform mat4 model; +uniform mat4 view; +uniform mat4 projection; + + +layout (points) in; +layout( triangle_strip, max_vertices = 14 ) out; + +void main(){ + + vFragColor = vFragColorVs[0]; + vec4 center = gl_in[0].gl_Position; + vec4 p0 = projection* (center + vec4(hSize,hSize,hSize,1)); // 1.0, 1.0, 1.0, + vec4 p1 = projection* (center + vec4(-hSize,hSize,hSize,1)); // 0.0f, 1.0, 1.0, + vec4 p2 = projection* (center + vec4(hSize,hSize,-hSize,1)); // 1.0, 1.0, 0.0f, + vec4 p3 = projection* (center + vec4(-hSize,hSize,-hSize,1)); // 0.0f, 1.0, 0.0f, + vec4 p4 = projection* (center + vec4(hSize,-hSize,hSize,1)); // 1.0, 0.0f, 1.0, + vec4 p5 = projection* (center + vec4(-hSize,-hSize,hSize,1)); // 0.0f, 0.0f, 1.0, + vec4 p6 = projection* (center + vec4(-hSize,-hSize,-hSize,1)); // 0.0f, 0.0f, 0.0f, + vec4 p7 = projection* (center + vec4(hSize,-hSize,-hSize,1)); // 1.0, 0.0f, 0.0f + + gl_Position = p3; + EmitVertex(); + + gl_Position = p2; + EmitVertex(); + + gl_Position = p6; + EmitVertex(); + + gl_Position = p7; + EmitVertex(); + + gl_Position = p4; + EmitVertex(); + + gl_Position = p2; + EmitVertex(); + + gl_Position = p0; + EmitVertex(); + + gl_Position = p3; + EmitVertex(); + + gl_Position = p1; + EmitVertex(); + + gl_Position = p6; + EmitVertex(); + + gl_Position = p5; + EmitVertex(); + + gl_Position = p4; + EmitVertex(); + + gl_Position = p1; + EmitVertex(); + + gl_Position = p0; + EmitVertex(); + + //EndPrimitive(); +} + diff --git a/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/voxelcloud.vs b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/voxelcloud.vs new file mode 100644 index 0000000..3fd6aa5 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-grabber/resources/shaders/voxelcloud.vs @@ -0,0 +1,19 @@ +#version 430 core + +layout (location = 0) in vec3 aPos; +layout (location = 1) in vec3 aColor; +out vec4 vFragColorVs; + +// transforms +uniform mat4 model; +uniform mat4 view; + +// color +uniform bool enable_unicolor = false; +uniform float factor_unicolor = 0.3; +uniform vec4 unicolor = vec4(1,0,0,1); + +void main(){ + gl_Position = view * model * vec4(aPos, 1.0); + vFragColorVs = enable_unicolor ? mix(unicolor,vec4(aColor, 1.0), factor_unicolor) : vec4(aColor, 1.0); +} diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G0.config b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G0.config new file mode 100644 index 0000000..6cb5601 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G0.config @@ -0,0 +1,4 @@ +-0.0094245905 0.9053064 0.42465463 0 +-0.9999556 -0.00853252 -0.0040023737 0 +0 -0.4246735 0.9053466 0 +-0.012 1.707 -0.159 1 diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G1.config b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G1.config new file mode 100644 index 0000000..5b952fb --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G1.config @@ -0,0 +1,4 @@ +0.4400213 -0.8958263 0.062261377 0 +-0.23311403 -0.046995573 0.9713131 0 +-0.86720186 -0.44191253 -0.22950873 0 +1.2541279 1.7522621 1.4312743 1 diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G2.config b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G2.config new file mode 100644 index 0000000..9d5d1a7 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G2.config @@ -0,0 +1,4 @@ +-0.06169157 0.86876005 -0.4913758 0 +0.9945518 0.012059829 -0.10354265 0 +-0.084027834 -0.49508646 -0.86477095 0 +-0.02593118 1.7624222 2.467019 1 diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G3.config b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G3.config new file mode 100644 index 0000000..85ba9df --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G3.config @@ -0,0 +1,4 @@ +-0.40106198 -0.91593266 -0.014720619 0 +0.20829101 -0.075532675 -0.975146 0 +0.89205617 -0.39416015 0.2210738 0 +-1.3028598 1.7032228 0.86120176 1 diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G4.config b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G4.config new file mode 100644 index 0000000..a25671e --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_G4.config @@ -0,0 +1,4 @@ +1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_all.config b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_all.config new file mode 100644 index 0000000..563ca21 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/model_FLORIAN-PC_all.config @@ -0,0 +1,16 @@ +4%%%-0.0094245905 0.9053064 0.42465463 0 +-0.9999556 -0.00853252 -0.0040023737 0 +0 -0.4246735 0.9053466 0 +-0.012 1.707 -0.159 1 +%%%0.4400213 -0.8958263 0.062261377 0 +-0.23311403 -0.046995573 0.9713131 0 +-0.86720186 -0.44191253 -0.22950873 0 +1.2541279 1.7522621 1.4312743 1 +%%%-0.06169157 0.86876005 -0.4913758 0 +0.9945518 0.012059829 -0.10354265 0 +-0.084027834 -0.49508646 -0.86477095 0 +-0.02593118 1.7624222 2.467019 1 +%%%-0.40106198 -0.91593266 -0.014720619 0 +0.20829101 -0.075532675 -0.975146 0 +0.89205617 -0.39416015 0.2210738 0 +-1.3028598 1.7032228 0.86120176 1 diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G0.config b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G0.config new file mode 100644 index 0000000..6f5b37e --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G0.config @@ -0,0 +1,4 @@ +-0.007357105 0.9034052 0.4287248 0 +-0.9999367 -0.0029958903 -0.010846437 0 +-0.008514314 -0.42877746 0.90337 0 +0 1.683 0 1 diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G1.config b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G1.config new file mode 100644 index 0000000..fb53b9c --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G1.config @@ -0,0 +1,4 @@ +-0.9992029 0.00029803743 0.03991872 0 +0.017078187 -0.90065104 0.43420738 0 +0.036082245 0.43454298 0.89992803 0 +0.33825615 1.0516182 0.74492097 1 diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G2.config b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G2.config new file mode 100644 index 0000000..a25671e --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G2.config @@ -0,0 +1,4 @@ +1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G3.config b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G3.config new file mode 100644 index 0000000..a25671e --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G3.config @@ -0,0 +1,4 @@ +1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G4.config b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G4.config new file mode 100644 index 0000000..a25671e --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_G4.config @@ -0,0 +1,4 @@ +1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_all.config b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_all.config new file mode 100644 index 0000000..3b79fff --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/calibration/new/model_FLORIAN-PC_all.config @@ -0,0 +1,16 @@ +4%%%-0.007357105 0.9034052 0.4287248 0 +-0.9999367 -0.0029958903 -0.010846437 0 +-0.008514314 -0.42877746 0.90337 0 +0 1.683 0 1 +%%%-0.9992029 0.00029803743 0.03991872 0 +0.017078187 -0.90065104 0.43420738 0 +0.036082245 0.43454298 0.89992803 0 +0.33825615 1.0516182 0.74492097 1 +%%%1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 +%%%1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_FLORIAN-PC.config b/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_FLORIAN-PC.config new file mode 100644 index 0000000..218dc11 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_FLORIAN-PC.config @@ -0,0 +1,4 @@ +0 8889 localhost 8888 +0 8891 localhost 8890 +0 8893 localhost 8892 +0 8895 localhost 8894 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_SV-10-090.config b/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_SV-10-090.config new file mode 100644 index 0000000..d306c87 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_SV-10-090.config @@ -0,0 +1,5 @@ +0 8889 SV-10-081 8888 0 +1 8891 SV-10-080 8888 0 +2 8893 SV-10-082 8888 0 +2 8895 SV-10-002 8888 0 +3 8897 SV-10-089 8888 0 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_SV-10-093.config b/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_SV-10-093.config new file mode 100644 index 0000000..9287e72 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_SV-10-093.config @@ -0,0 +1,2 @@ +0 8889 localhost 8888 0 +0 8891 localhost 8890 1 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_SV-10-106.config b/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_SV-10-106.config new file mode 100644 index 0000000..9287e72 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_SV-10-106.config @@ -0,0 +1,2 @@ +0 8889 localhost 8888 0 +0 8891 localhost 8890 1 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_default.config b/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_default.config new file mode 100644 index 0000000..3342e7c --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/config/network/network_default.config @@ -0,0 +1,2 @@ +0 8889 localhost 8888 +0 8891 localhost 8890 \ No newline at end of file diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_FLORIAN-PC_G0.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_FLORIAN-PC_G0.config new file mode 100644 index 0000000..2a6185a Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_FLORIAN-PC_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_FLORIAN-PC_G1.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_FLORIAN-PC_G1.config new file mode 100644 index 0000000..2a6185a Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_FLORIAN-PC_G1.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_FLORIAN-PC_G2.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_FLORIAN-PC_G2.config new file mode 100644 index 0000000..2a6185a Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_FLORIAN-PC_G2.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_FLORIAN-PC_G3.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_FLORIAN-PC_G3.config new file mode 100644 index 0000000..fe9d829 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_FLORIAN-PC_G3.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_default.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_default.config new file mode 100644 index 0000000..16bcda9 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/calibration_filters_default.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_G0.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_G0.config new file mode 100644 index 0000000..19d3b2d Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_G1.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_G1.config new file mode 100644 index 0000000..666169e Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_G1.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_G2.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_G2.config new file mode 100644 index 0000000..26577eb Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_G2.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_G3.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_G3.config new file mode 100644 index 0000000..9836fd3 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_G3.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_all.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_all.config new file mode 100644 index 0000000..d9f2288 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/color_FLORIAN-PC_all.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G0.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G0.config new file mode 100644 index 0000000..a731cea Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G1.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G1.config new file mode 100644 index 0000000..3467419 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G1.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G2.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G2.config new file mode 100644 index 0000000..8ea97e0 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G2.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G3.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G3.config new file mode 100644 index 0000000..f025f2c Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G3.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G4.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G4.config new file mode 100644 index 0000000..f66cf8b Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_G4.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_all.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_all.config new file mode 100644 index 0000000..2391a36 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_FLORIAN-PC_all.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_default.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_default.config new file mode 100644 index 0000000..f66cf8b Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/device_default.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_G0.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_G0.config new file mode 100644 index 0000000..4f99f94 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_G0.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_G1.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_G1.config new file mode 100644 index 0000000..99fbab4 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_G1.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_G2.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_G2.config new file mode 100644 index 0000000..51b2a5b Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_G2.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_G3.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_G3.config new file mode 100644 index 0000000..1db06bc Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_G3.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_all.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_all.config new file mode 100644 index 0000000..1df3499 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_FLORIAN-PC_all.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_default.config b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_default.config new file mode 100644 index 0000000..e8f1014 Binary files /dev/null and b/cpp-projects/_build/bin/k4-scaner-manager/config/settings/filters_default.config differ diff --git a/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/cloud.fs b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/cloud.fs new file mode 100644 index 0000000..e491ed3 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/cloud.fs @@ -0,0 +1,8 @@ +#version 430 core + +layout( location = 0 ) out vec4 FragColor; +in vec4 vFragColorVs; + +void main(){ + FragColor = vFragColorVs; +} diff --git a/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/cloud.vs b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/cloud.vs new file mode 100644 index 0000000..e8253ee --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/cloud.vs @@ -0,0 +1,32 @@ +#version 430 core + +layout (location = 0) in vec3 aPos; +layout (location = 1) in vec3 aColor; +out vec4 vFragColorVs; + +// transforms +uniform mat4 model; +uniform mat4 view; +uniform mat4 projection; + +// camera +uniform vec3 camera_position; + +// cloud +uniform float size_pt = 10.f; + +// color +uniform bool enable_unicolor = false; +uniform float factor_unicolor = 0.3; +uniform vec4 unicolor = vec4(1,0,0,1); + +void main(){ + + vec4 p = view * model * vec4(aPos, 1.0); + gl_Position = projection*p; + + vFragColorVs = enable_unicolor ? mix(unicolor,vec4(aColor, 1.0), factor_unicolor) : vec4(aColor, 1.0); + + float l = sqrt(length(p.xyz-camera_position.xyz)); + gl_PointSize = size_pt/(l); +} diff --git a/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/solid.fs b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/solid.fs new file mode 100644 index 0000000..322d521 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/solid.fs @@ -0,0 +1,15 @@ +#version 400 + +uniform vec4 Color; + +layout ( location = 0 ) out vec4 FragColor; + +// color +uniform vec4 unicolor = vec4(1,0,0,1); +uniform bool enable_unicolor = true; + + +void main() +{ + FragColor = enable_unicolor ? unicolor : Color; +} diff --git a/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/solid.vs b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/solid.vs new file mode 100644 index 0000000..26e3a0d --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/solid.vs @@ -0,0 +1,13 @@ +#version 400 + +layout (location = 0 ) in vec3 VertexPosition; + +// model +uniform mat4 model; +uniform mat4 view; +uniform mat4 projection; + +void main() +{ + gl_Position = projection*view * model * vec4(VertexPosition, 1.0); +} diff --git a/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/voxelcloud.fs b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/voxelcloud.fs new file mode 100644 index 0000000..bda3be0 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/voxelcloud.fs @@ -0,0 +1,8 @@ +#version 430 core + +layout( location = 0 ) out vec4 FragColor; +in vec4 vFragColor; + +void main(){ + FragColor = vFragColor; +} diff --git a/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/voxelcloud.gs b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/voxelcloud.gs new file mode 100644 index 0000000..dc1ede9 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/voxelcloud.gs @@ -0,0 +1,76 @@ +#version 430 core + + +uniform float hSize = 0.5f; // Half the width of the quad + +in vec4 vFragColorVs[]; +in mat4 mvp[]; +out vec4 vFragColor; + +// model +uniform mat4 model; +uniform mat4 view; +uniform mat4 projection; + + +layout (points) in; +layout( triangle_strip, max_vertices = 14 ) out; + +void main(){ + + vFragColor = vFragColorVs[0]; + vec4 center = gl_in[0].gl_Position; + vec4 p0 = projection* (center + vec4(hSize,hSize,hSize,1)); // 1.0, 1.0, 1.0, + vec4 p1 = projection* (center + vec4(-hSize,hSize,hSize,1)); // 0.0f, 1.0, 1.0, + vec4 p2 = projection* (center + vec4(hSize,hSize,-hSize,1)); // 1.0, 1.0, 0.0f, + vec4 p3 = projection* (center + vec4(-hSize,hSize,-hSize,1)); // 0.0f, 1.0, 0.0f, + vec4 p4 = projection* (center + vec4(hSize,-hSize,hSize,1)); // 1.0, 0.0f, 1.0, + vec4 p5 = projection* (center + vec4(-hSize,-hSize,hSize,1)); // 0.0f, 0.0f, 1.0, + vec4 p6 = projection* (center + vec4(-hSize,-hSize,-hSize,1)); // 0.0f, 0.0f, 0.0f, + vec4 p7 = projection* (center + vec4(hSize,-hSize,-hSize,1)); // 1.0, 0.0f, 0.0f + + gl_Position = p3; + EmitVertex(); + + gl_Position = p2; + EmitVertex(); + + gl_Position = p6; + EmitVertex(); + + gl_Position = p7; + EmitVertex(); + + gl_Position = p4; + EmitVertex(); + + gl_Position = p2; + EmitVertex(); + + gl_Position = p0; + EmitVertex(); + + gl_Position = p3; + EmitVertex(); + + gl_Position = p1; + EmitVertex(); + + gl_Position = p6; + EmitVertex(); + + gl_Position = p5; + EmitVertex(); + + gl_Position = p4; + EmitVertex(); + + gl_Position = p1; + EmitVertex(); + + gl_Position = p0; + EmitVertex(); + + //EndPrimitive(); +} + diff --git a/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/voxelcloud.vs b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/voxelcloud.vs new file mode 100644 index 0000000..3fd6aa5 --- /dev/null +++ b/cpp-projects/_build/bin/k4-scaner-manager/resources/shaders/voxelcloud.vs @@ -0,0 +1,19 @@ +#version 430 core + +layout (location = 0) in vec3 aPos; +layout (location = 1) in vec3 aColor; +out vec4 vFragColorVs; + +// transforms +uniform mat4 model; +uniform mat4 view; + +// color +uniform bool enable_unicolor = false; +uniform float factor_unicolor = 0.3; +uniform vec4 unicolor = vec4(1,0,0,1); + +void main(){ + gl_Position = view * model * vec4(aPos, 1.0); + vFragColorVs = enable_unicolor ? mix(unicolor,vec4(aColor, 1.0), factor_unicolor) : vec4(aColor, 1.0); +} diff --git a/cpp-projects/_build/bin/nodes/empty.txt b/cpp-projects/_build/bin/nodes/empty.txt new file mode 100644 index 0000000..e69de29 diff --git a/cpp-projects/_build/bin/scaner-grabber/config/network/network_debug.config b/cpp-projects/_build/bin/scaner-grabber/config/network/network_debug.config new file mode 100644 index 0000000..ecbc017 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/network/network_debug.config @@ -0,0 +1 @@ +8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/scaner-grabber/config/network/network_default.config b/cpp-projects/_build/bin/scaner-grabber/config/network/network_default.config new file mode 100644 index 0000000..ecbc017 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/network/network_default.config @@ -0,0 +1 @@ +8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-080.config b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-080.config new file mode 100644 index 0000000..0c2bd7a --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-080.config @@ -0,0 +1,11 @@ +window: 99 512 64 394 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.5 0 -1 0 1 0.1 +compression: 80 +smoothing: 31 0 +neigbhours: 5 1 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 40 40 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-081.config b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-081.config new file mode 100644 index 0000000..f45763e --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-081.config @@ -0,0 +1,11 @@ +window: 99 512 64 394 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.6 15 -1 0 1 0.1 +compression: 80 +smoothing: 31 0 +neigbhours: 5 1 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 40 40 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-082.config b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-082.config new file mode 100644 index 0000000..9cc7c47 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-082.config @@ -0,0 +1,11 @@ +window: 101 430 64 394 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 3.1 8 -1 0 1 0.1 +compression: 80 +smoothing: 31 0 +neigbhours: 5 1 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 40 40 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-083.config b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-083.config new file mode 100644 index 0000000..fb048f5 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-083.config @@ -0,0 +1,11 @@ +window: 80 512 64 394 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.7 15 -1 0 1 0.1 +compression: 80 +smoothing: 31 0 +neigbhours: 5 2 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 40 40 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-086.config b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-086.config new file mode 100644 index 0000000..4601fd7 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-086.config @@ -0,0 +1,11 @@ +window: 66 512 64 406 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.8 15 -1 0 1 0.1 +compression: 80 +smoothing: 31 0 +neigbhours: 5 1 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 40 40 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-087.config b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-087.config new file mode 100644 index 0000000..15ddb31 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-087.config @@ -0,0 +1,11 @@ +window: 99 486 64 394 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.8 15 -1 0 1 0.1 +compression: 80 +smoothing: 31 0 +neigbhours: 5 1 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 40 40 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-088.config b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-088.config new file mode 100644 index 0000000..5bf0730 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-088.config @@ -0,0 +1,11 @@ +window: 100 512 64 394 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.8 15 -1 0 1 0.1 +compression: 80 +smoothing: 31 0 +neigbhours: 5 1 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 40 40 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-089.config b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-089.config new file mode 100644 index 0000000..a0c0cb6 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-089.config @@ -0,0 +1,11 @@ +window: 128 398 62 394 +infra_threshold: 0 65000 0 65000 +color_factor: 0.79 1 1 +depth_threshold: 0.3 3.1 15 -1 0 1 0.1 +compression: 80 +smoothing: 31 0 +neigbhours: 5 1 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 40 40 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-093.config b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-093.config new file mode 100644 index 0000000..3a394c4 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_SV-10-093.config @@ -0,0 +1,11 @@ +window: 3 512 33 394 +infra_threshold: 200 65000 5 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2 10 -1 0.89 -1 -1 +compression: 80 +smoothing: 0 0 +neigbhours: 2 10 +erosion: 2 2 0 +fps: 30 +color_filter: 220 21 60 98 0 0 +flags: 1 0 0 0 0 0 diff --git a/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_default.config b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_default.config new file mode 100644 index 0000000..735cb60 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_default.config @@ -0,0 +1,11 @@ +window: 0 512 64 424 +infra_threshold: 200 65000 5 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 8 10 -1 0.0 0.0 0.0 +compression: 80 +smoothing: 0 0 +neigbhours: 1 1 +erosion: 2 2 0 +fps: 30 +color_filter: 199 0 0 10 40 40 +flags: 0 0 0 0 0 0 \ No newline at end of file diff --git a/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_green_filtering.config b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_green_filtering.config new file mode 100644 index 0000000..5339249 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_green_filtering.config @@ -0,0 +1,11 @@ +window: 3 512 33 394 +infra_threshold: 200 65000 5 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 8 10 0.23 0.0 0.0 0.0 +compression: 80 +smoothing: 0 0 +neigbhours: 1 1 +erosion: 2 2 0 +fps: 30 +color_filter: 56 203 63 68 40 40 +flags: 1 0 0 0 0 0 diff --git a/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_test1.config b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_test1.config new file mode 100644 index 0000000..1e21608 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-grabber/config/settings/settings_test1.config @@ -0,0 +1,11 @@ +window: 3 512 33 394 +infra_threshold: 200 65000 5 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 8 10 -1 0.0 0.0 0.0 +compression: 80 +smoothing: 0 0 +neigbhours: 1 1 +erosion: 2 2 0 +fps: 30 +color_filter: 200 0 0 10 40 40 +flags: 1 0 0 0 0 0 diff --git a/cpp-projects/_build/bin/scaner-manager/config/calibration/--calibration_SV-10-090.config b/cpp-projects/_build/bin/scaner-manager/config/calibration/--calibration_SV-10-090.config new file mode 100644 index 0000000..26f1c3f --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/calibration/--calibration_SV-10-090.config @@ -0,0 +1,40 @@ +# camera 0 +1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 +# camera 1 +0.4144 0.4505 0.7905 0 +-0.3149 0.8858 -0.3402 0 +-0.8536 -0.1082 0.5094 0 +1.4527 -0.2506 1.2133 1 +# camera 2 +0.9992 0.0088 0.0393 0 +-0.0048 0.9939 -0.114 0 +-0.0402 0.1131 0.993 0 +0.0156 -0.6432 0.3654 1 +# camera 3 +0.3785 0.4784 0.7927 0 +-0.5654 0.7981 -0.2113 0 +-0.7332 -0.3679 0.5717 0 +1.4753 0.4603 0.8521 1 +# camera 4 +-0.0752 -0.4945 -0.8661 0 +0.4229 0.7704 -0.4766 0 +0.9024 -0.4021 0.1509 0 +-1.6774 0.3653 2.0594 1 +# camera 5 +-0.9877 -0.0968 -0.1234 0 +0.068 0.4419 -0.8947 0 +0.1415 -0.8917 -0.4294 0 +-0.0953 1.5783 2.7978 1 +# camera 6 +-0.0882 -0.4952 -0.8645 0 +0.6032 0.6641 -0.4419 0 +0.7927 -0.5598 0.2403 0 +-1.6914 0.9693 1.73 1 +# camera 7 +-0.989 -0.0739 -0.1285 0 +0.0554 0.6222 -0.7809 0 +0.1374 -0.7794 -0.6113 0 +-0.0789 0.9688 3.173 1 diff --git a/cpp-projects/_build/bin/scaner-manager/config/calibration/-calibration_SV-10-090.config b/cpp-projects/_build/bin/scaner-manager/config/calibration/-calibration_SV-10-090.config new file mode 100644 index 0000000..eb2f05f --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/calibration/-calibration_SV-10-090.config @@ -0,0 +1,40 @@ +# camera 0 +1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 +# camera 1 +0.3878 0.4424 0.8084 0 +-0.3101 0.8884 -0.3379 0 +-0.8678 -0.1199 0.4821 0 +1.4493 -0.2345 1.2341 1 +# camera 2 +0.9986 -0.0406 0.034 0 +0.0443 0.9904 -0.134 0 +-0.0284 0.1347 0.9907 0 +-0.014 -0.6674 0.3631 1 +# camera 3 +0.4162 0.446 0.7926 0 +-0.526 0.8299 -0.1902 0 +-0.7419 -0.3376 0.5792 0 +1.4616 0.4003 0.8191 1 +# camera 4 +-0.0752 -0.4945 -0.8661 0 +0.4229 0.7704 -0.4766 0 +0.9024 -0.4021 0.1509 0 +-1.6774 0.3653 2.0594 1 +# camera 5 +-0.9877 -0.0968 -0.1234 0 +0.068 0.4419 -0.8947 0 +0.1415 -0.8917 -0.4294 0 +-0.0953 1.5783 2.7978 1 +# camera 6 +-0.1101 -0.4816 -0.8695 0 +0.6404 0.6344 -0.4328 0 +0.7598 -0.6039 0.2387 0 +-1.6509 1.0513 1.7245 1 +# camera 7 +-0.989 -0.0739 -0.1285 0 +0.0554 0.6222 -0.7809 0 +0.1374 -0.7794 -0.6113 0 +-0.0789 0.9688 3.173 1 diff --git a/cpp-projects/_build/bin/scaner-manager/config/calibration/calibration_FLORIAN-HOME.config b/cpp-projects/_build/bin/scaner-manager/config/calibration/calibration_FLORIAN-HOME.config new file mode 100644 index 0000000..17bc7cb --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/calibration/calibration_FLORIAN-HOME.config @@ -0,0 +1,5 @@ +# camera 0 +1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 diff --git a/cpp-projects/_build/bin/scaner-manager/config/calibration/calibration_SV-10-090.config b/cpp-projects/_build/bin/scaner-manager/config/calibration/calibration_SV-10-090.config new file mode 100644 index 0000000..c6cebca --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/calibration/calibration_SV-10-090.config @@ -0,0 +1,40 @@ +# camera 0 +0.9999 -0.0036 -0.0134 0 +0.0038 0.9999 0.0141 0 +0.0133 -0.0141 0.9998 0 +-0.0231 -0.0071 -0.0041 1 +# camera 1 +0.5128 0.3693 0.7759 0 +-0.2777 0.9246 -0.2584 0 +-0.8129 -0.0829 0.5767 0 +1.4815 -0.2526 1.215 1 +# camera 2 +0.9994 0.0347 -0.0006 0 +-0.0355 0.9922 -0.1232 0 +-0.0043 0.1225 0.9925 0 +-0.0182 -0.6829 0.3926 1 +# camera 3 +0.5008 0.4193 0.7565 0 +-0.5482 0.8314 -0.0974 0 +-0.6701 -0.3665 0.6471 0 +1.4717 0.4632 0.7991 1 +# camera 4 +-0.1189 -0.5002 -0.8579 0 +0.3847 0.7728 -0.5047 0 +0.9147 -0.3901 0.1003 0 +-1.6904 0.3319 2.1313 1 +# camera 5 +-0.9894 -0.0521 -0.1344 0 +0.097 0.4449 -0.8906 0 +0.1069 -0.8939 -0.4343 0 +-0.0281 1.5837 2.8427 1 +# camera 6 +-0.1341 -0.4931 -0.8598 0 +0.6046 0.6462 -0.4656 0 +0.7849 -0.5817 0.2114 0 +-1.6781 0.9836 1.7827 1 +# camera 7 +-0.9899 -0.0688 -0.1238 0 +0.0505 0.6481 -0.7598 0 +0.132 -0.7584 -0.638 0 +-0.0519 0.914 3.2126 1 diff --git a/cpp-projects/_build/bin/scaner-manager/config/calibration/calibration_SV-10-093.config b/cpp-projects/_build/bin/scaner-manager/config/calibration/calibration_SV-10-093.config new file mode 100644 index 0000000..17bc7cb --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/calibration/calibration_SV-10-093.config @@ -0,0 +1,5 @@ +# camera 0 +1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 diff --git a/cpp-projects/_build/bin/scaner-manager/config/calibration/calibration_default.config b/cpp-projects/_build/bin/scaner-manager/config/calibration/calibration_default.config new file mode 100644 index 0000000..17bc7cb --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/calibration/calibration_default.config @@ -0,0 +1,5 @@ +# camera 0 +1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 diff --git a/cpp-projects/_build/bin/scaner-manager/config/calibration/old-calibration_SV-10-090.config b/cpp-projects/_build/bin/scaner-manager/config/calibration/old-calibration_SV-10-090.config new file mode 100644 index 0000000..5bd8059 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/calibration/old-calibration_SV-10-090.config @@ -0,0 +1,40 @@ +# camera 0 +1 0 0 0 +0 1 0 0 +0 0 1 0 +0 0 0 1 +# camera 1 +0.4144 0.4505 0.7905 0 +-0.3149 0.8858 -0.3402 0 +-0.8536 -0.1082 0.5094 0 +1.4527 -0.2506 1.2133 1 +# camera 2 +0.9992 0.0088 0.0393 0 +-0.0048 0.9939 -0.114 0 +-0.0402 0.1131 0.993 0 +0.0156 -0.6432 0.3654 1 +# camera 3 +0.3958 0.4624 0.794 0 +-0.5571 0.8084 -0.1929 0 +-0.7305 -0.3656 0.5767 0 +1.4838 0.4501 0.8467 1 +# camera 4 +-0.0752 -0.4945 -0.8661 0 +0.4229 0.7704 -0.4766 0 +0.9024 -0.4021 0.1509 0 +-1.6774 0.3653 2.0594 1 +# camera 5 +-0.9877 -0.0968 -0.1234 0 +0.068 0.4419 -0.8947 0 +0.1415 -0.8917 -0.4294 0 +-0.0953 1.5783 2.7978 1 +# camera 6 +-0.0882 -0.4952 -0.8645 0 +0.6032 0.6641 -0.4419 0 +0.7927 -0.5598 0.2403 0 +-1.6914 0.9693 1.73 1 +# camera 7 +-0.989 -0.0739 -0.1285 0 +0.0554 0.6222 -0.7809 0 +0.1374 -0.7794 -0.6113 0 +-0.0789 0.9688 3.173 1 diff --git a/cpp-projects/_build/bin/scaner-manager/config/network/network_FLORIAN-HOME.config b/cpp-projects/_build/bin/scaner-manager/config/network/network_FLORIAN-HOME.config new file mode 100644 index 0000000..fb9546f --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/network/network_FLORIAN-HOME.config @@ -0,0 +1 @@ +FLORIAN-HOME 0 8888 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/scaner-manager/config/network/network_SV-10-090.config b/cpp-projects/_build/bin/scaner-manager/config/network/network_SV-10-090.config new file mode 100644 index 0000000..297b123 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/network/network_SV-10-090.config @@ -0,0 +1,8 @@ +SV-10-088 0 8888 8888 +SV-10-089 0 8888 8888 +SV-10-087 1 8888 8888 +SV-10-002 1 8888 8888 +SV-10-083 2 8888 8888 +SV-10-082 2 8888 8888 +SV-10-081 3 8888 8888 +SV-10-080 3 8888 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/scaner-manager/config/network/network_SV-10-093.config b/cpp-projects/_build/bin/scaner-manager/config/network/network_SV-10-093.config new file mode 100644 index 0000000..14c228c --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/network/network_SV-10-093.config @@ -0,0 +1 @@ +SV-10-093 0 8888 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/scaner-manager/config/network/network_c8.config b/cpp-projects/_build/bin/scaner-manager/config/network/network_c8.config new file mode 100644 index 0000000..078aab3 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/network/network_c8.config @@ -0,0 +1,8 @@ +SV-10-088 0 8888 8888 +SV-10-089 0 8888 8888 +SV-10-087 1 8888 8888 +SV-10-086 1 8888 8888 +SV-10-083 2 8888 8888 +SV-10-082 2 8888 8888 +SV-10-081 3 8888 8888 +SV-10-080 3 8888 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/scaner-manager/config/network/network_default.config b/cpp-projects/_build/bin/scaner-manager/config/network/network_default.config new file mode 100644 index 0000000..11ec4d0 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/network/network_default.config @@ -0,0 +1 @@ +localhost 0 8888 8888 \ No newline at end of file diff --git a/cpp-projects/_build/bin/scaner-manager/config/settings/settings_common.config b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_common.config new file mode 100644 index 0000000..30ad7fc --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_common.config @@ -0,0 +1,11 @@ +window: 0 512 64 394 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 3.5 8 -1 0 1 0.1 +compression: 80 +smoothing: 31 0 +neigbhours: 1 1 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 40 40 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-manager/config/settings/settings_common_calibration.config b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_common_calibration.config new file mode 100644 index 0000000..30ad7fc --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_common_calibration.config @@ -0,0 +1,11 @@ +window: 0 512 64 394 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 3.5 8 -1 0 1 0.1 +compression: 80 +smoothing: 31 0 +neigbhours: 1 1 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 40 40 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_0.config b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_0.config new file mode 100644 index 0000000..d53e9b5 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_0.config @@ -0,0 +1,11 @@ +window: 86 512 64 413 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.8 15 -1 0 -1 -1 +compression: 80 +smoothing: 0 0 +neigbhours: 5 4 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 0 0 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_1.config b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_1.config new file mode 100644 index 0000000..4c6f4f3 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_1.config @@ -0,0 +1,11 @@ +window: 0 398 60 381 +infra_threshold: 0 65000 0 65000 +color_factor: 0.79 1 1 +depth_threshold: 0.3 2.5 15 -1 0 -1 -1 +compression: 80 +smoothing: 0 0 +neigbhours: 5 3 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 0 0 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_2.config b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_2.config new file mode 100644 index 0000000..85c9616 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_2.config @@ -0,0 +1,11 @@ +window: 78 503 60 369 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.8 15 -1 0 -1 -1 +compression: 80 +smoothing: 0 0 +neigbhours: 5 3 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 0 0 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_3.config b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_3.config new file mode 100644 index 0000000..ef43905 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_3.config @@ -0,0 +1,11 @@ +window: 62 512 64 406 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.8 15 -1 0 -1 -1 +compression: 80 +smoothing: 0 0 +neigbhours: 5 2 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 0 0 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_4.config b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_4.config new file mode 100644 index 0000000..c074156 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_4.config @@ -0,0 +1,11 @@ +window: 80 420 64 298 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.6 15 -1 0 -1 -1 +compression: 80 +smoothing: 0 0 +neigbhours: 5 2 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 0 0 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_5.config b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_5.config new file mode 100644 index 0000000..6475db6 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_5.config @@ -0,0 +1,11 @@ +window: 0 512 72 348 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.6 15 -1 0 -1 -1 +compression: 80 +smoothing: 0 0 +neigbhours: 5 2 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 0 0 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_6.config b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_6.config new file mode 100644 index 0000000..422f263 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_6.config @@ -0,0 +1,11 @@ +window: 97 447 64 328 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.6 15 -1 0 -1 -1 +compression: 80 +smoothing: 0 0 +neigbhours: 5 3 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 0 0 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_7.config b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_7.config new file mode 100644 index 0000000..1f77564 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-090_7.config @@ -0,0 +1,11 @@ +window: 79 512 64 310 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 2.5 0 -1 0 -1 -1 +compression: 80 +smoothing: 0 0 +neigbhours: 5 1 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 0 0 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-093_0.config b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-093_0.config new file mode 100644 index 0000000..17fb808 --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_SV-10-093_0.config @@ -0,0 +1,11 @@ +window: 0 510 64 394 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 3.5 8 -1 0 1 0.1 +compression: 80 +smoothing: 31 0 +neigbhours: 1 1 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 40 40 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_default.config b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_default.config new file mode 100644 index 0000000..30ad7fc --- /dev/null +++ b/cpp-projects/_build/bin/scaner-manager/config/settings/settings_individual_default.config @@ -0,0 +1,11 @@ +window: 0 512 64 394 +infra_threshold: 0 65000 0 65000 +color_factor: 1 1 1 +depth_threshold: 0.3 3.5 8 -1 0 1 0.1 +compression: 80 +smoothing: 31 0 +neigbhours: 1 1 +erosion: 1 0 255 +fps: 30 +color_filter: 246 40 40 40 40 40 +flags: 0 0 0 0 1 0 diff --git a/cpp-projects/_build/bin/tool-test/empty.txt b/cpp-projects/_build/bin/tool-test/empty.txt new file mode 100644 index 0000000..e69de29 diff --git a/cpp-projects/_resources/nodes/DefaultStyle.json b/cpp-projects/_resources/nodes/DefaultStyle.json new file mode 100644 index 0000000..8375b4a --- /dev/null +++ b/cpp-projects/_resources/nodes/DefaultStyle.json @@ -0,0 +1,42 @@ +{ + "FlowViewStyle": { + "BackgroundColor": [53, 53, 53], + "FineGridColor": [60, 60, 60], + "CoarseGridColor": [25, 25, 25] + }, + "NodeStyle": { + "NormalBoundaryColor": [255, 255, 255], + "SelectedBoundaryColor": [255, 165, 0], + "GradientColor0": "gray", + "GradientColor1": [80, 80, 80], + "GradientColor2": [64, 64, 64], + "GradientColor3": [58, 58, 58], + "ShadowColor": [20, 20, 20], + "FontColor" : "white", + "FontColorFaded" : "gray", + "ConnectionPointColor": [169, 169, 169], + "FilledConnectionPointColor": "cyan", + "ErrorColor": "red", + "WarningColor": [128, 128, 0], + + "PenWidth": 1.0, + "HoveredPenWidth": 1.5, + + "ConnectionPointDiameter": 8.0, + + "Opacity": 0.8 + }, + "ConnectionStyle": { + "ConstructionColor": "gray", + "NormalColor": "darkcyan", + "SelectedColor": [100, 100, 100], + "SelectedHaloColor": "orange", + "HoveredColor": "lightcyan", + + "LineWidth": 3.0, + "ConstructionLineWidth": 2.0, + "PointDiameter": 10.0, + + "UseDataDefinedColors": false + } +} diff --git a/cpp-projects/_resources/resources.qrc b/cpp-projects/_resources/resources.qrc new file mode 100644 index 0000000..ff0fd9a --- /dev/null +++ b/cpp-projects/_resources/resources.qrc @@ -0,0 +1,20 @@ + + + shaders/depth_camera_mesh.frag.glsl + shaders/depth_camera_mesh.vert.glsl + shaders/fragment.frag.glsl + shaders/lines.frag.glsl + shaders/lines.vert.glsl + shaders/mesh_lighting.frag.glsl + shaders/mesh_lighting.vert.glsl + shaders/mesh_texture.frag.glsl + shaders/mesh_texture.vert.glsl + shaders/vertex.vert.glsl + + + nodes/DefaultStyle.json + + + splash/exvr_splash.png + + diff --git a/cpp-projects/_resources/shaders/depth_camera_mesh.frag.glsl b/cpp-projects/_resources/shaders/depth_camera_mesh.frag.glsl new file mode 100644 index 0000000..cc91623 --- /dev/null +++ b/cpp-projects/_resources/shaders/depth_camera_mesh.frag.glsl @@ -0,0 +1,9 @@ + +#version 450 core + +out vec4 FragColor; +in vec4 color; + +void main(){ + FragColor = color;//vec4(1,0,0,1); +} diff --git a/cpp-projects/_resources/shaders/depth_camera_mesh.vert.glsl b/cpp-projects/_resources/shaders/depth_camera_mesh.vert.glsl new file mode 100644 index 0000000..a74b075 --- /dev/null +++ b/cpp-projects/_resources/shaders/depth_camera_mesh.vert.glsl @@ -0,0 +1,20 @@ + +#version 450 core +layout (location = 0) in vec3 aPos; // the position variable has attribute position 0 +layout (location = 1) in vec4 aColor; // the color variable has attribute position 1 + +// model +uniform mat4 model; +uniform mat4 view; +uniform mat4 projection; + +// color +uniform vec4 unicolor; +uniform bool enable_unicolor = false; +out vec4 color; + +void main(){ + gl_Position = projection * view * model * vec4(aPos, 1.0); + color = enable_unicolor ? unicolor : aColor; +} + diff --git a/cpp-projects/_resources/shaders/fragment.frag.glsl b/cpp-projects/_resources/shaders/fragment.frag.glsl new file mode 100644 index 0000000..3591909 --- /dev/null +++ b/cpp-projects/_resources/shaders/fragment.frag.glsl @@ -0,0 +1,11 @@ + + + +#version 450 core + +out vec4 FragColor; +in vec4 color; + +void main(){ + FragColor = color; +} diff --git a/cpp-projects/_resources/shaders/lines.frag.glsl b/cpp-projects/_resources/shaders/lines.frag.glsl new file mode 100644 index 0000000..363f8f5 --- /dev/null +++ b/cpp-projects/_resources/shaders/lines.frag.glsl @@ -0,0 +1,9 @@ + +#version 450 core + +out vec4 FragColor; +in vec4 color; + +void main(){ + FragColor = color; +} diff --git a/cpp-projects/_resources/shaders/lines.vert.glsl b/cpp-projects/_resources/shaders/lines.vert.glsl new file mode 100644 index 0000000..c43dedd --- /dev/null +++ b/cpp-projects/_resources/shaders/lines.vert.glsl @@ -0,0 +1,20 @@ +#version 450 core +layout (location = 0) in vec3 aPos; // the position variable has attribute position 0 +layout (location = 1) in vec4 aColor; // the color variable has attribute position 1 + +// model +uniform mat4 model; +uniform mat4 view; +uniform mat4 projection; + +// color +uniform vec4 unicolor; +uniform bool enable_unicolor = false; +out vec4 color; + +void main(){ + + gl_Position = projection*view * model * vec4(aPos, 1.0); + color = enable_unicolor ? unicolor : aColor; +} + diff --git a/cpp-projects/_resources/shaders/mesh_lighting.frag.glsl b/cpp-projects/_resources/shaders/mesh_lighting.frag.glsl new file mode 100644 index 0000000..21a1fc0 --- /dev/null +++ b/cpp-projects/_resources/shaders/mesh_lighting.frag.glsl @@ -0,0 +1,70 @@ + + +// Fragment shader: +// ================ +#version 330 core +out vec4 FragColor; + +in vec3 FragPos; +in vec3 Normal; +in vec3 LightPos; // extra in variable, since we need the light position in view space we calculate this in the vertex shader + +uniform vec3 lightColor; +uniform vec3 objectColor; + + +struct Material { + vec3 ambient; + vec3 diffuse; + vec3 specular; + float shininess; +}; +uniform Material material; + + + + +void main() +{ + // ambient + vec3 ambient = lightColor * material.ambient; + + // diffuse + vec3 norm = normalize(Normal); + vec3 lightDir = normalize(LightPos - FragPos); + float diff = max(dot(norm, lightDir), 0.0); + vec3 diffuse = lightColor * (diff * material.diffuse); + + // specular +// vec3 viewDir = normalize(viewPos - FragPos); + vec3 viewDir = normalize(-FragPos); + vec3 reflectDir = reflect(-lightDir, norm); + float spec = pow(max(dot(viewDir, reflectDir), 0.0), material.shininess); + vec3 specular = lightColor * (spec * material.specular); + + vec3 result = ambient + diffuse + specular; + FragColor = vec4(result, 1.0); + + + +// // ambient +// float ambientStrength = 0.1; +// vec3 ambient = ambientStrength * lightColor; + +// // diffuse +// vec3 norm = normalize(Normal); +// vec3 lightDir = normalize(LightPos - FragPos); +// float diff = max(dot(norm, lightDir), 0.0); +// vec3 diffuse = diff * lightColor; + +// // specular +// float specularStrength = 0.5; +// vec3 viewDir = normalize(-FragPos); // the viewer is always at (0,0,0) in view-space, so viewDir is (0,0,0) - Position => -Position +// vec3 reflectDir = reflect(-lightDir, norm); +// float spec = pow(max(dot(viewDir, reflectDir), 0.0), 32); +// vec3 specular = specularStrength * spec * lightColor; + +// vec3 result = (ambient + diffuse + specular) * objectColor; +// FragColor = vec4(result, 1.0); +} + diff --git a/cpp-projects/_resources/shaders/mesh_lighting.vert.glsl b/cpp-projects/_resources/shaders/mesh_lighting.vert.glsl new file mode 100644 index 0000000..4dff662 --- /dev/null +++ b/cpp-projects/_resources/shaders/mesh_lighting.vert.glsl @@ -0,0 +1,50 @@ + +//#version 330 core +//layout (location = 0) in vec3 aPos; +//layout (location = 1) in vec3 aNormal; + +//uniform mat4 model; +//uniform mat4 view; +//uniform mat4 projection; +//uniform vec3 lightPos; // we now define the uniform in the vertex shader and pass the 'view space' lightpos to the fragment shader. lightPos is currently in world space. + +//out vec3 Normal; +//out vec3 FragPos; +//out vec3 LightPos; + +//void main() +//{ +// gl_Position = projection * view * model * vec4(aPos, 1.0); +// FragPos = vec3(model * vec4(aPos, 1.0)); +// Normal = mat3(transpose(inverse(model))) * aNormal; +// LightPos = vec3(view * vec4(lightPos, 1.0)); // Transform world-space light position to view-space light position +//// Normal = aNormal; +//} + + + + + +// Vertex shader: +// ================ +#version 330 core +layout (location = 0) in vec3 aPos; +layout (location = 1) in vec3 aNormal; + +out vec3 FragPos; +out vec3 Normal; +out vec3 LightPos; + +uniform vec3 lightPos; // we now define the uniform in the vertex shader and pass the 'view space' lightpos to the fragment shader. lightPos is currently in world space. + +uniform mat4 model; +uniform mat4 view; +uniform mat4 projection; + +void main() +{ + gl_Position = projection * view * model * vec4(aPos, 1.0); + FragPos = vec3(view * model * vec4(aPos, 1.0)); + Normal = mat3(transpose(inverse(view * model))) * aNormal; + LightPos = vec3(view * vec4(lightPos, 1.0)); // Transform world-space light position to view-space light position +} diff --git a/cpp-projects/_resources/shaders/mesh_texture.frag.glsl b/cpp-projects/_resources/shaders/mesh_texture.frag.glsl new file mode 100644 index 0000000..cdec432 --- /dev/null +++ b/cpp-projects/_resources/shaders/mesh_texture.frag.glsl @@ -0,0 +1,17 @@ +#version 330 core +out vec4 FragColor; + +in vec3 Normal; +in vec2 TexCoord; + +uniform sampler2D texture1; +//uniform sampler2D texture2; +//uniform sampler2D texture3; +//uniform sampler2D texture4; + +void main() +{ + //FragColor = vec4(TexCoord,0,1); + //FragColor = vec4(Normal,1);//*texture(ourTexture, TexCoord); + FragColor = 10*texture(texture1, TexCoord); +} diff --git a/cpp-projects/_resources/shaders/mesh_texture.vert.glsl b/cpp-projects/_resources/shaders/mesh_texture.vert.glsl new file mode 100644 index 0000000..e2f1892 --- /dev/null +++ b/cpp-projects/_resources/shaders/mesh_texture.vert.glsl @@ -0,0 +1,19 @@ +#version 330 core +layout (location = 0) in vec3 aPos; +layout (location = 1) in vec3 aNormal; +layout (location = 2) in vec2 aTexCoord; + +out vec3 Normal; +out vec2 TexCoord; + +// model +uniform mat4 model; +uniform mat4 view; +uniform mat4 projection; + +void main(){ + + gl_Position = projection*view * model * vec4(aPos, 1.0); + Normal = aNormal; + TexCoord = vec2(aTexCoord.x, aTexCoord.y); +} diff --git a/cpp-projects/_resources/shaders/model/model.frag.glsl b/cpp-projects/_resources/shaders/model/model.frag.glsl new file mode 100644 index 0000000..dee5cca --- /dev/null +++ b/cpp-projects/_resources/shaders/model/model.frag.glsl @@ -0,0 +1,72 @@ +#version 430 core +out vec4 FragColor; + +in vec4 normal; +in vec2 textCoord; + +//uniform bool texture_diffuse_enabled = false; +//uniform bool texture_shininess_enabled = false; + +uniform sampler2D texture_diffuse1; +//uniform sampler2D texture_shininess1; + +float near = 0.1; +float far = 100.0; + +float LinearizeDepth(float depth) +{ + float z = depth * 2.0 - 1.0; // back to NDC + return (2.0 * near * far) / (far + near - z * (far - near)); +} + +void main(){ + + //float depth = LinearizeDepth(gl_FragCoord.z) / far; + //FragColor = vec4(vec3(depth), 1.0); + //FragColor = vec4(vec3(gl_FragCoord.z), 1.0); + //return; + //if(gl_FragCoord.x < 200) + //FragColor = vec4(textCoord.x,textCoord.y,0,1); + //return; + //else if(gl_FragCoord.x < 400) + // FragColor = vec4() + // FragColor = vec4(normal.x,normal.y,0,1); + + + vec4 textureColour = texture(texture_diffuse1, textCoord); + + //vec4(texture(texture_diffuse1, textCoord).y,1,0,1); + // + //FragColor= vec4(1-textureColour.w,0,0,1); + //return; + + if(textureColour.a < 1.0) { + //FragColor= vec4(1,0,0,1); + //return; + //discard; + } + + //FragColor= vec4(textureColour.r,0,0,1); + FragColor= textureColour; + + return; + //FragColor= vec4(textureColour.a,0,0,1); + + + + + return; + + //if(textureColour. == 1){ + FragColor = vec4(0,1,0,1); + //}else{ + FragColor = vec4(1,0,0,1); + //} + //FragColor = vec4(1-textureColour.a, 0,0, 1);//textureColour; + //if(texture_diffuse_enabled){ + + + //} + + //FragColor = vec4(1,0,0,0.2); +} diff --git a/cpp-projects/_resources/shaders/model/model.vert.glsl b/cpp-projects/_resources/shaders/model/model.vert.glsl new file mode 100644 index 0000000..9fadee0 --- /dev/null +++ b/cpp-projects/_resources/shaders/model/model.vert.glsl @@ -0,0 +1,21 @@ +#version 430 core + +// layout +layout (location = 0) in vec3 aPos; +layout (location = 1) in vec3 aNormal; +layout (location = 2) in vec2 aTextCoord; + +// uniform +uniform mat4 model; +uniform mat4 view; +uniform mat4 projection; + +// out +out vec4 normal; +out vec2 textCoord; + +void main(){ + gl_Position = projection*view * model * vec4(aPos, 1.0); + normal = vec4(aNormal.xyz, 1); + textCoord = aTextCoord; +} \ No newline at end of file diff --git a/cpp-projects/_resources/shaders/samples/ch2/1.fs b/cpp-projects/_resources/shaders/samples/ch2/1.fs new file mode 100644 index 0000000..9bd0a91 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch2/1.fs @@ -0,0 +1,18 @@ +#version 440 core + +in vec3 TexCoord; +layout (location = 0) out vec4 FragColor; + +layout (binding = 1) uniform BlobSettings { + vec4 InnerColor; + vec4 OuterColor; + float RadiusInner; + float RadiusOuter; +} Blob; + +void main() { + float dx = TexCoord.x - 0.5; + float dy = TexCoord.y - 0.5; + float dist = sqrt(dx * dx + dy * dy); + FragColor = mix( Blob.InnerColor, Blob.OuterColor,smoothstep( Blob.RadiusInner, Blob.RadiusOuter, dist )); +} diff --git a/cpp-projects/_resources/shaders/samples/ch2/1.vs b/cpp-projects/_resources/shaders/samples/ch2/1.vs new file mode 100644 index 0000000..a9dd7bc --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch2/1.vs @@ -0,0 +1,11 @@ +#version 440 core + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VectexTexCoord; + +out vec3 TexCoord; + +void main(){ + TexCoord = VectexTexCoord; + gl_Position = vec4(VertexPosition, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch3/diffuse.fs b/cpp-projects/_resources/shaders/samples/ch3/diffuse.fs new file mode 100644 index 0000000..b02505f --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch3/diffuse.fs @@ -0,0 +1,9 @@ +#version 440 core + +in vec3 LightIntensity; +layout(location = 0) out vec4 FragColor; + +void main() { + FragColor = vec4(LightIntensity, 1.0); + //FragColor = vec4(1,0,0, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch3/diffuse.vs b/cpp-projects/_resources/shaders/samples/ch3/diffuse.vs new file mode 100644 index 0000000..d63054a --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch3/diffuse.vs @@ -0,0 +1,27 @@ + + +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 LightIntensity; + +uniform vec4 LightPosition; // Light position in eye coords. +uniform vec3 Kd; // Diffuse reflectivity +uniform vec3 Ld; // Diffuse light intensity + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 MVP; + +void main() +{ + vec3 tnorm = normalize( NormalMatrix * VertexNormal); + vec4 eyeCoords = ModelViewMatrix * vec4(VertexPosition,1.0); + vec3 s = normalize(vec3(LightPosition - eyeCoords)); + + LightIntensity = Ld * Kd * max( dot( s, tnorm ), 0.0 ); + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch3/discard.fs b/cpp-projects/_resources/shaders/samples/ch3/discard.fs new file mode 100644 index 0000000..7a62d72 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch3/discard.fs @@ -0,0 +1,19 @@ +#version 440 + +in vec3 Color; +in vec2 TexCoord; +layout( location = 0 ) out vec4 FragColor; + +uniform float discardV = 0.2f; + +void main() { + + const float scale = 15.0; + bvec2 toDiscard = greaterThan(fract(TexCoord * scale), vec2(discardV,discardV)); + + if(all(toDiscard)){ + discard; + } + + FragColor = vec4(Color, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch3/discard.vs b/cpp-projects/_resources/shaders/samples/ch3/discard.vs new file mode 100644 index 0000000..017b6fd --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch3/discard.vs @@ -0,0 +1,63 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Color; +out vec2 TexCoord; + +layout (binding = 0) uniform LightInfo { + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 Ld; // Diffuse light intensity + vec3 Ls; // Specular light intensity +} Light; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + + +vec3 phong_model( vec3 position, vec3 n ) { + vec3 ambient = Light.La * Material.Ka; + vec3 s = normalize( Light.Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Light.Ld * Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 r = reflect( -s, n ); + spec = Light.Ls * Material.Ks *pow( max( dot(r,v), 0.0 ), Material.Shininess ); + } + + return ambient + diffuse + spec; +} + +void main(){ + + TexCoord = VertexTexCoord; + + vec3 tnorm = normalize( NormalMatrix * VertexNormal); + vec3 camCoords = (ModelViewMatrix * vec4(VertexPosition,1.0)).xyz; + vec3 v = normalize(-camCoords.xyz); + + float vDotN = dot(v, tnorm); + + if( vDotN >= 0 ) { + Color = phong_model(camCoords, tnorm); + } else { + Color = phong_model(camCoords, -tnorm); + } + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch3/flat.fs b/cpp-projects/_resources/shaders/samples/ch3/flat.fs new file mode 100644 index 0000000..995d48b --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch3/flat.fs @@ -0,0 +1,8 @@ +#version 440 + +flat in vec3 LightIntensity; +layout( location = 0 ) out vec4 FragColor; + +void main() { + FragColor = vec4(LightIntensity, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch3/flat.vs b/cpp-projects/_resources/shaders/samples/ch3/flat.vs new file mode 100644 index 0000000..032a958 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch3/flat.vs @@ -0,0 +1,57 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +flat out vec3 LightIntensity; + +layout (binding = 0) uniform LightInfo { + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 Ld; // Diffuse light intensity + vec3 Ls; // Specular light intensity +} Light; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 MVP; + +void get_cam_space( out vec3 norm, out vec3 position ) { + norm = normalize( NormalMatrix * VertexNormal); + position = (ModelViewMatrix * vec4(VertexPosition,1.0)).xyz; +} + +vec3 phong_model( vec3 position, vec3 n ) { + vec3 ambient = Light.La * Material.Ka; + vec3 s = normalize( Light.Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Light.Ld * Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 r = reflect( -s, n ); + spec = Light.Ls * Material.Ks *pow( max( dot(r,v), 0.0 ), Material.Shininess ); + } + + return ambient + diffuse + spec; +} + +void main() +{ + // Get the position and normal in camera space + vec3 camNorm, camPosition; + get_cam_space(camNorm, camPosition); + + // Evaluate the reflection model + LightIntensity = phong_model( camPosition, camNorm ); + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch3/phong.fs b/cpp-projects/_resources/shaders/samples/ch3/phong.fs new file mode 100644 index 0000000..91af0f4 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch3/phong.fs @@ -0,0 +1,8 @@ +#version 440 + +in vec3 LightIntensity; +layout( location = 0 ) out vec4 FragColor; + +void main() { + FragColor = vec4(LightIntensity, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch3/phong.vs b/cpp-projects/_resources/shaders/samples/ch3/phong.vs new file mode 100644 index 0000000..5a0ec37 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch3/phong.vs @@ -0,0 +1,59 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 LightIntensity; + +//layout (binding = 0) uniform LightInfo { +uniform struct LightInfo { + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 Ld; // Diffuse light intensity + vec3 Ls; // Specular light intensity +} Light; + + +//layout (binding = 1) uniform MaterialInfo { +uniform struct MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 MVP; + +void get_cam_space( out vec3 norm, out vec3 position ) { + norm = normalize( NormalMatrix * VertexNormal); + position = (ModelViewMatrix * vec4(VertexPosition,1.0)).xyz; +} + +vec3 phong_model( vec3 position, vec3 n ) { + vec3 ambient = Light.La * Material.Ka; + vec3 s = normalize( Light.Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Light.Ld * Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 r = reflect( -s, n ); + spec = Light.Ls * Material.Ks *pow( max( dot(r,v), 0.0 ), Material.Shininess ); + } + + return ambient + diffuse + spec; +} + +void main() +{ + // Get the position and normal in camera space + vec3 camNorm, camPosition; + get_cam_space(camNorm, camPosition); + + // Evaluate the reflection model + LightIntensity = phong_model( camPosition, camNorm ); + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch3/twoside.fs b/cpp-projects/_resources/shaders/samples/ch3/twoside.fs new file mode 100644 index 0000000..25ac213 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch3/twoside.fs @@ -0,0 +1,8 @@ +#version 440 + +in vec3 Color; +layout( location = 0 ) out vec4 FragColor; + +void main() { + FragColor = vec4(Color, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch3/twoside.vs b/cpp-projects/_resources/shaders/samples/ch3/twoside.vs new file mode 100644 index 0000000..b665aa5 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch3/twoside.vs @@ -0,0 +1,59 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 Color; + +layout (binding = 0) uniform LightInfo { + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 Ld; // Diffuse light intensity + vec3 Ls; // Specular light intensity +} Light; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 MVP; + + +vec3 phong_model( vec3 position, vec3 n ) { + vec3 ambient = Light.La * Material.Ka; + vec3 s = normalize( Light.Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Light.Ld * Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 r = reflect( -s, n ); + spec = Light.Ls * Material.Ks *pow( max( dot(r,v), 0.0 ), Material.Shininess ); + } + + return ambient + diffuse + spec; +} + +void main() +{ + vec3 tnorm = normalize( NormalMatrix * VertexNormal); + vec3 camCoords = (ModelViewMatrix * vec4(VertexPosition,1.0)).xyz; + vec3 v = normalize(-camCoords.xyz); + + float vDotN = dot(v, tnorm); + + if( vDotN >= 0 ) { + Color = phong_model(camCoords, tnorm); + } else { + Color = phong_model(camCoords, -tnorm); + //Color = mix(vec4(Color, 1.0), vec4(1.0,0.0,0.0,1.0), 0.7); + } + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch4/blinn-phong.fs b/cpp-projects/_resources/shaders/samples/ch4/blinn-phong.fs new file mode 100644 index 0000000..9256c14 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch4/blinn-phong.fs @@ -0,0 +1,44 @@ +#version 440 + +in vec3 Position; +in vec3 Normal; + +uniform struct LightInfo{ + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 L; // Diffuse and Specular light intensity +} Light; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + + +layout( location = 0 ) out vec4 FragColor; + +vec3 blinnphong(vec3 position, vec3 n ) { + + vec3 ambient = Light.La * Material.Ka; + vec3 s = normalize(Light.Position.xyz - position); + float sDotN = max (dot(s,n), 0.0); + vec3 diffuse = Material.Kd * sDotN; + + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 h = normalize(v+s); + spec = Material.Ks * pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + + return ambient + Light.L * (diffuse + spec); +} + + +void main() { + + FragColor = vec4(blinnphong(Position, Normal ), 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch4/blinn-phong.vs b/cpp-projects/_resources/shaders/samples/ch4/blinn-phong.vs new file mode 100644 index 0000000..93b2ed3 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch4/blinn-phong.vs @@ -0,0 +1,20 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 Position; +out vec3 Normal; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main(){ + Normal = normalize(NormalMatrix * VertexNormal); + Position = (ModelViewMatrix * vec4(VertexPosition, 1.0)).xyz; + gl_Position = MVP * vec4(VertexPosition, 1.0); +} + + diff --git a/cpp-projects/_resources/shaders/samples/ch4/cartoon.fs b/cpp-projects/_resources/shaders/samples/ch4/cartoon.fs new file mode 100644 index 0000000..8add5bb --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch4/cartoon.fs @@ -0,0 +1,38 @@ +#version 440 + +in vec3 Position; +in vec3 Normal; + +uniform struct LightInfo{ + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 L; // Diffuse and Specular light intensity +} Light; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +const int levels = 5; +const float scaleFactor = 1.0 / levels; + +layout( location = 0 ) out vec4 FragColor; + +vec3 toonShade() { + vec3 n = normalize(Normal); + vec3 s = normalize(Light.Position.xyz - Position); + vec3 ambient = Light.La * Material.Ka; + float sDotN = max(dot(s,n), 0.0); + vec3 diffuse = Material.Kd * floor(sDotN * levels) * scaleFactor; + return ambient + Light.L * diffuse; +} + + +void main() { + + FragColor = vec4(toonShade(), 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch4/cartoon.vs b/cpp-projects/_resources/shaders/samples/ch4/cartoon.vs new file mode 100644 index 0000000..93b2ed3 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch4/cartoon.vs @@ -0,0 +1,20 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 Position; +out vec3 Normal; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main(){ + Normal = normalize(NormalMatrix * VertexNormal); + Position = (ModelViewMatrix * vec4(VertexPosition, 1.0)).xyz; + gl_Position = MVP * vec4(VertexPosition, 1.0); +} + + diff --git a/cpp-projects/_resources/shaders/samples/ch4/pbr.fs b/cpp-projects/_resources/shaders/samples/ch4/pbr.fs new file mode 100644 index 0000000..6bd4b7b --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch4/pbr.fs @@ -0,0 +1,97 @@ +#version 440 + +in vec3 Position; +in vec3 Normal; +in vec2 TexCoords; + +layout( location = 0 ) out vec4 FragColor; + +struct LightInfo{ + vec4 Position; // Light position in cam coords + vec3 La; + vec3 Ld; + vec3 Ls; +}; + +struct MaterialInfo{ + vec4 Color; + float Rough; + float Metal; +}; + +layout (std140, binding = 0) uniform LightsInfos { + LightInfo lights[3]; +}; + +layout (std140, binding = 1) uniform MaterialsInfos { + MaterialInfo materials[10]; +}; + +uniform int id = 0; + +const float PI = 3.1415926535897932384626433832795; +float ggxDistribution( float nDotH ) { + float alpha2 = materials[id].Rough * materials[id].Rough * materials[id].Rough * materials[id].Rough; + float d = (nDotH * nDotH) * (alpha2 - 1) + 1; + return alpha2 / (PI * d * d); +} + +float geomSmith( float dotProd ) { + float k = (materials[id].Rough + 1.0) * (materials[id].Rough + 1.0) / 8.0; + float denom = dotProd * (1 - k) + k; + return 1.0 / denom; +} + +vec3 schlickFresnel( float lDotH ) { + vec3 f0 = vec3(0.04); +// if( materials[id].Metal > 0.5) { +// f0 = materials[id].Color.rgb; +// } + f0 = materials[id].Metal * materials[id].Color.rgb; + return f0 + (1 - f0) * pow(1.0 - lDotH, 5); +} + +vec3 microfacetModel( int lightIdx, vec3 position, vec3 n ) { + vec3 diffuseBrdf = vec3(0.0); // Metallic +// if( materials[id].Metal < 0.5) { +// diffuseBrdf = materials[id].Color.rgb; +// } + diffuseBrdf = (1 - materials[id].Metal) * materials[id].Color.rgb; + + vec3 l = vec3(0.0), + lightI = lights[lightIdx].La; + if( lights[lightIdx].Position.w == 0.0 ) { // Directional light + l = normalize(lights[lightIdx].Position.xyz); + } else { // Positional light + l = lights[lightIdx].Position.xyz - position; + float dist = length(l); + l = normalize(l); + lightI /= (dist * dist); + } + + vec3 v = normalize( -position ); + vec3 h = normalize( v + l ); + float nDotH = dot( n, h ); + float lDotH = dot( l, h ); + float nDotL = max( dot( n, l ), 0.0 ); + float nDotV = dot( n, v ); + vec3 specBrdf = 0.25 * ggxDistribution(nDotH) * schlickFresnel(lDotH) * geomSmith(nDotL) * geomSmith(nDotV); + + return (diffuseBrdf + PI * specBrdf) * lightI * nDotL; +} + +void main() { + + vec3 sum = vec3(0); + vec3 n = normalize(Normal); + for(int ii = 0; ii < 3; ++ii){ + sum += microfacetModel(ii, Position, n); + } + + // Gamma + sum = pow(sum, vec3(1.0/2.2)); + + FragColor = vec4(sum, 1.0); +} + + diff --git a/cpp-projects/_resources/shaders/samples/ch4/pbr.vs b/cpp-projects/_resources/shaders/samples/ch4/pbr.vs new file mode 100644 index 0000000..93b2ed3 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch4/pbr.vs @@ -0,0 +1,20 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 Position; +out vec3 Normal; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main(){ + Normal = normalize(NormalMatrix * VertexNormal); + Position = (ModelViewMatrix * vec4(VertexPosition, 1.0)).xyz; + gl_Position = MVP * vec4(VertexPosition, 1.0); +} + + diff --git a/cpp-projects/_resources/shaders/samples/ch4/phong-directional-light.fs b/cpp-projects/_resources/shaders/samples/ch4/phong-directional-light.fs new file mode 100644 index 0000000..f76c538 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch4/phong-directional-light.fs @@ -0,0 +1,8 @@ +#version 440 + +in vec3 Color; +layout( location = 0 ) out vec4 FragColor; + +void main() { + FragColor = vec4(Color, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch4/phong-directional-light.vs b/cpp-projects/_resources/shaders/samples/ch4/phong-directional-light.vs new file mode 100644 index 0000000..238bc35 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch4/phong-directional-light.vs @@ -0,0 +1,65 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 Color; + +uniform struct LightInfo{ + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 L; // Diffuse and Specular light intensity +} Light; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 MVP; + +void get_cam_space( out vec3 norm, out vec3 position ) { + norm = normalize( NormalMatrix * VertexNormal); + position = (ModelViewMatrix * vec4(VertexPosition,1.0)).xyz; +} + +vec3 phong_model(vec3 position, vec3 n ) { + + vec3 ambient = Light.La * Material.Ka; + + vec3 s; + if(Light.Position.w == 0.0){ + s = normalize(Light.Position.xyz); + }else{ + s = normalize( Light.Position.xyz - position ); + } + + + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 r = reflect( -s, n ); + spec = Material.Ks *pow( max( dot(r,v), 0.0 ), Material.Shininess ); + } + + return ambient + Light.L * (diffuse + spec); +} + +void main() +{ + // Get the position and normal in camera space + vec3 camNorm, camPosition; + get_cam_space(camNorm, camPosition); + + // Evaluate the reflection model for directional light + Color = phong_model(camPosition, camNorm ); + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch4/phong-multi-lights.fs b/cpp-projects/_resources/shaders/samples/ch4/phong-multi-lights.fs new file mode 100644 index 0000000..f76c538 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch4/phong-multi-lights.fs @@ -0,0 +1,8 @@ +#version 440 + +in vec3 Color; +layout( location = 0 ) out vec4 FragColor; + +void main() { + FragColor = vec4(Color, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch4/phong-multi-lights.vs b/cpp-projects/_resources/shaders/samples/ch4/phong-multi-lights.vs new file mode 100644 index 0000000..54fb2ce --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch4/phong-multi-lights.vs @@ -0,0 +1,59 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 Color; + +uniform struct LightInfo{ + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 L; // Diffuse and Specular light intensity +} lights[5]; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 MVP; + +void get_cam_space( out vec3 norm, out vec3 position ) { + norm = normalize( NormalMatrix * VertexNormal); + position = (ModelViewMatrix * vec4(VertexPosition,1.0)).xyz; +} + +vec3 phong_model(int light, vec3 position, vec3 n ) { + vec3 ambient = lights[light].La * Material.Ka; + vec3 s = normalize( lights[light].Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 r = reflect( -s, n ); + spec = Material.Ks *pow( max( dot(r,v), 0.0 ), Material.Shininess ); + } + + return ambient + lights[light].L * (diffuse + spec); +} + +void main() +{ + // Get the position and normal in camera space + vec3 camNorm, camPosition; + get_cam_space(camNorm, camPosition); + + // Evaluate the reflection model for each light + Color = vec3(0.0); + for(int ii = 0; ii < 5; ++ii){ + Color += phong_model(ii, camPosition, camNorm ); + } + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch4/phong-per-fragment.fs b/cpp-projects/_resources/shaders/samples/ch4/phong-per-fragment.fs new file mode 100644 index 0000000..09588f3 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch4/phong-per-fragment.fs @@ -0,0 +1,51 @@ +#version 440 + +in vec3 Position; +in vec3 Normal; + +uniform struct LightInfo{ + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 L; // Diffuse and Specular light intensity +} Light; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + + +layout( location = 0 ) out vec4 FragColor; + +vec3 phong_model(vec3 position, vec3 n ) { + + vec3 ambient = Light.La * Material.Ka; + + vec3 s; + if(Light.Position.w == 0.0){ + s = normalize(Light.Position.xyz); + }else{ + s = normalize( Light.Position.xyz - position ); + } + + + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 r = reflect( -s, n ); + spec = Material.Ks *pow( max( dot(r,v), 0.0 ), Material.Shininess ); + } + + return ambient + Light.L * (diffuse + spec); +} + + +void main() { + + FragColor = vec4(phong_model(Position, Normal ), 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch4/phong-per-fragment.vs b/cpp-projects/_resources/shaders/samples/ch4/phong-per-fragment.vs new file mode 100644 index 0000000..93b2ed3 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch4/phong-per-fragment.vs @@ -0,0 +1,20 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 Position; +out vec3 Normal; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main(){ + Normal = normalize(NormalMatrix * VertexNormal); + Position = (ModelViewMatrix * vec4(VertexPosition, 1.0)).xyz; + gl_Position = MVP * vec4(VertexPosition, 1.0); +} + + diff --git a/cpp-projects/_resources/shaders/samples/ch5/diffuse-image-based-lighting.fs b/cpp-projects/_resources/shaders/samples/ch5/diffuse-image-based-lighting.fs new file mode 100644 index 0000000..be74059 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/diffuse-image-based-lighting.fs @@ -0,0 +1,45 @@ +#version 430 +const float PI = 3.14159265358979323846; + +in vec3 Position; +in vec3 Normal; // World coords. +in vec2 TexCoord; + +uniform vec3 CamPos; +uniform float gamma = 2.2; + +layout(binding=0) uniform samplerCube DiffLightTex; +layout(binding=1) uniform sampler2D ColorTex; + +//uniform struct MaterialInfo { +// vec3 Color; +//} Material; + +layout( location = 0 ) out vec4 FragColor; + + +vec3 schlickFresnel( float dotProd ) { + vec3 f0 = vec3(0.04); + return f0 + (1 - f0) * pow(1.0 - dotProd, 5); +} + +void main() { + + vec3 n = normalize(Normal); + vec3 v = normalize( CamPos - Position ); + + // Look up incoming light from diffuse cube map + vec3 light = texture(DiffLightTex, n).rgb; + vec3 color = texture(ColorTex, TexCoord).rgb; + + color = pow(color, vec3(gamma)); + + // Uncomment to add an Fresnel approximation + color = light * color * (1.0 - schlickFresnel(dot(n, v))); + color *= light; + + // Gamma + color = pow( color, vec3(1.0/gamma)); + + FragColor = vec4( color, 1 ); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/diffuse-image-based-lighting.vs b/cpp-projects/_resources/shaders/samples/ch5/diffuse-image-based-lighting.vs new file mode 100644 index 0000000..48103fc --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/diffuse-image-based-lighting.vs @@ -0,0 +1,19 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Position; // world coords +out vec3 Normal; // In world coords. +out vec2 TexCoord; + +uniform mat4 ModelMatrix; +uniform mat4 MVP; + +void main() { + TexCoord = VertexTexCoord; + Position = (ModelMatrix * vec4(VertexPosition,1)).xyz; + Normal = normalize( ModelMatrix * vec4(VertexNormal,0) ).xyz; + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/discard-pixels.fs b/cpp-projects/_resources/shaders/samples/ch5/discard-pixels.fs new file mode 100644 index 0000000..236f928 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/discard-pixels.fs @@ -0,0 +1,63 @@ +#version 440 + +in vec3 Position; +in vec3 Normal; +in vec2 TexCoord; + +layout (binding=0) uniform sampler2D BaseTex; +layout (binding=1) uniform sampler2D AlphaTex; + +uniform struct LightInfo{ + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 L; // Diffuse and Specular light intensity +} Light; + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + + +layout( location = 0 ) out vec4 FragColor; +uniform float decay_factor; + +vec3 blinnphong(vec3 position, vec3 n ) { + + vec3 texColor = texture(BaseTex, TexCoord).rgb; + vec3 ambient = Light.La * Material.Ka * texColor; + + vec3 s = normalize(Light.Position.xyz - position); + float sDotN = max (dot(s,n), 0.0); + vec3 diffuse = Material.Kd * sDotN; + + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 h = normalize(v+s); + spec = Material.Ks * pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + + return ambient + Light.L * (diffuse + spec); +} + + +void main() { + + vec4 alphaMap = texture(AlphaTex, TexCoord); + if(alphaMap.a < decay_factor){ + discard; + }else{ + if(gl_FrontFacing){ + FragColor = vec4(blinnphong(Position, normalize(Normal) ), 1.0); + }else{ + FragColor = vec4(blinnphong(Position, normalize(-Normal) ), 1.0); + } + } + + + +// FragColor = vec4(normalize(Normal).rgb,1); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/discard-pixels.vs b/cpp-projects/_resources/shaders/samples/ch5/discard-pixels.vs new file mode 100644 index 0000000..d952e8c --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/discard-pixels.vs @@ -0,0 +1,23 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Position; +out vec3 Normal; +out vec2 TexCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main(){ + Normal = normalize(NormalMatrix * VertexNormal); + Position = (ModelViewMatrix * vec4(VertexPosition, 1.0)).xyz; + TexCoord = VertexTexCoord; + gl_Position = MVP * vec4(VertexPosition, 1.0); +} + + diff --git a/cpp-projects/_resources/shaders/samples/ch5/normal-map.fs b/cpp-projects/_resources/shaders/samples/ch5/normal-map.fs new file mode 100644 index 0000000..32c7638 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/normal-map.fs @@ -0,0 +1,77 @@ +#version 430 + +in vec3 LightDir; +in vec2 TexCoord; +in vec3 ViewDir; + +//in vec4 Weights2; +//in vec3 PosL2; +in vec3 TCol; + + +layout(binding=0) uniform sampler2D ColorTex; +layout(binding=1) uniform sampler2D NormalMapTex; + +uniform struct LightInfo { + vec4 Position; // Light position in cam. coords. + vec3 L; // D,S intensity + vec3 La; // Amb intensity +} Light ; + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + + + +layout( location = 0 ) out vec4 FragColor; + +vec3 blinnPhong( vec3 n ) { + vec3 texColor = texture(ColorTex, TexCoord).rgb; + + vec3 ambient = Light.La * texColor; + vec3 s = normalize( LightDir ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = texColor * sDotN; + + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(ViewDir); + vec3 h = normalize( v + s ); + spec = Material.Ks * pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + return ambient + Light.L * (diffuse + spec); +} + + + +void main() { + // Lookup the normal from the normal map + // vec3 normal = 2.0 * texture( NormalMapTex, TexCoord ).xyz - 1.0; + // FragColor = vec4( blinnPhong(normal), 1.0 ); + + vec3 norm = texture(NormalMapTex, TexCoord).xyz; + norm.xy = 2.0 * norm.xy - 1.0; + FragColor = vec4( blinnPhong(norm), 1.0 ); +// FragColor = vec4( TCol, 1.0 ); + +// FragColor = vec4( Weights2.x, 0., 0., 1.0 ); +// FragColor = vec4( 0, Weights2.y, 0., 1.0 ); +// FragColor = vec4( 0, 0, Weights2.w, 1.0 ); + +// float total = Weights2.x+Weights2.y+Weights2.z+Weights2.w; +// if(total > 0.9){ +// FragColor = vec4(1 , 0, 0, 1.0 ); +// }else if(total > 0.5){ +// FragColor = vec4(0 , 1, 0, 1.0 ); +// }else { +// FragColor = vec4(0 , 0, 1, 1.0 ); +// } +// FragColor = vec4( 1, 0, 0, 1.0 ); +// FragColor = vec4( PosL2.xyz, 1.0 ); + + +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/normal-map.vs b/cpp-projects/_resources/shaders/samples/ch5/normal-map.vs new file mode 100644 index 0000000..e871a87 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/normal-map.vs @@ -0,0 +1,184 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; +layout (location = 3) in vec4 VertexTangent; +layout (location = 4) in ivec4 BoneIDs; +layout (location = 5) in vec4 Weights; + + +uniform struct LightInfo { + vec4 Position; // Light position in cam. coords. + vec3 L; // D,S intensity + vec3 La; // Amb intensity +} Light ; + + + +out vec3 LightDir; +out vec2 TexCoord; +out vec3 ViewDir; + +//out vec4 Weights2; +//out ivec4 Id2; +//out vec3 PosL2; + +out vec3 TCol; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +//uniform mat4 BonesM; + +//uniform mat4 BonesM1; +//uniform mat4 BonesM2; +//uniform mat4 BonesM3; +//uniform mat4 BonesM4; + +const int MAX_BONES = 100; +uniform mat4 BonesM[MAX_BONES]; + + + +void main(){ + + mat4 BoneTransform =BonesM[BoneIDs[0]] * Weights[0]; + BoneTransform += BonesM[BoneIDs[1]] * Weights[1]; + BoneTransform += BonesM[BoneIDs[2]] * Weights[2]; + BoneTransform += BonesM[BoneIDs[3]] * Weights[3]; + + +// // DEBUG 0 +// // Fox: 22 bones +// mat4 m0 = BonesM[21];// * Weights[0]; +// mat4 m1 = BonesM[BoneIDs[0]];// * Weights[0]; +// mat4 m2 = BonesM[BoneIDs[1]];// * Weights[1]; +// mat4 m3 = BonesM[BoneIDs[2]] * Weights[2]; +// mat4 m4 = BonesM[BoneIDs[3]] * Weights[3]; + +// int c1 = 0; +// for(int ii = 0; ii < 4; ++ii){ +// for(int jj = 0; jj < 4; ++jj){ +// if(m1[ii][jj] == 0){ +// ++c1; +// } +// } +// } + +// if(c1 == 16){ +// TCol = vec3(0,0,0); +// }else{ +// TCol = vec3(1,0,0); +// } + +// BoneTransform = m2; + + +// BoneTransform = mat4; + + // DEBUG 1 + if(Weights[0] == 0){ + TCol = vec3(1,0,0); + }else if(Weights[1] == 0){ + TCol = vec3(0,1,0); + }else if(Weights[2] == 0){ + TCol = vec3(0,0,1); + }else if(Weights[3] == 0){ + TCol = vec3(1,0,1); + }else{ + TCol = vec3(1,1,1); + } + + // DEBUG 2 +// if(BoneIDs[0] == 0 && Weights[0] == 0){ +// TCol = vec3(1,0,0); +// }else if(BoneIDs[1] == 0 && Weights[1] == 0){ +// TCol = vec3(0,1,0); +// }else if(BoneIDs[2] == 0 && Weights[2] == 0){ +// TCol = vec3(0,0,1); +// }else if(BoneIDs[3] == 0 && Weights[3] == 0){ +// TCol = vec3(1,0,1); +// }else{ +// TCol = vec3(1,1,1); +// } + + // DEBUG 3 +// TCol = vec3(Weights[0], Weights[1], Weights[2]); + + + +// BoneTransform = BonesM[BoneIDs[3]];//mat4(1.0); + +// BoneTransform = BonesM[1]; + +// if(BoneIDs[0] == 0){ + +// }else if(BoneIDs[0] == 1){ + +// } + +// float total = Weights[0]+Weights[1]+Weights[2]+Weights[3]; +// float total2 = BoneIDs[0]+BoneIDs[1]+BoneIDs[2]+BoneIDs[3]; +// PosL2 = vec3(1*BoneIDs[0]/total2, 1*BoneIDs[1]/total2, 1*BoneIDs[2]/total2); +// if(total > 0.01){ +// PosL2 = vec3(1,0,0); +// }else{ +// PosL2 = vec3(0,1,0); +// } + +// +// mat4 BoneTransform = BonesM[BoneIDs[0]] * Weights[0]; +// BoneTransform += BonesM[BoneIDs[1]] * Weights[1]; +// BoneTransform += BonesM[BoneIDs[2]] * Weights[2]; +// BoneTransform += BonesM[BoneIDs[3]] * Weights[3]; + +// mat4 BoneTransform = +// identity * Weights[0]; +// BoneTransform += identity * Weights[1]; +// BoneTransform += identity * Weights[2]; +// BoneTransform += identity * Weights[3]; + + +// mat4 BoneTransform = BonesM[BoneIDs[0]] * Weights[0]; +// BoneTransform += BonesM[BoneIDs[1]] * Weights[1]; +// BoneTransform += BonesM[BoneIDs[2]] * Weights[2]; +// BoneTransform += BonesM[BoneIDs[3]] * Weights[3]; + +// vec4 PosL = BoneTransform * vec4(Position, 1.0); + +// vec3 PosL = VertexPosition;//(BoneTransform * vec4(VertexPosition, 1.0)).xyz; +// vec3 NormalL = VertexNormal;//(BoneTransform * vec4(VertexNormal, 0.0)).xyz; +// vec4 TangentL = VertexTangent;//BoneTransform * VertexTangent; + + vec3 PosL = (BoneTransform * vec4(VertexPosition, 1.0)).xyz; + vec3 NormalL = (BoneTransform * vec4(VertexNormal, 0.0)).xyz; + vec4 TangentL = BoneTransform * VertexTangent; + + // transform normal and tangent to eye space + vec3 norm = normalize( NormalMatrix * NormalL ); + vec3 tang = normalize( NormalMatrix * vec3(TangentL) ); + + // compute the binormal + vec3 binormal = normalize( cross( norm, tang ) ) * TangentL.w; + + // matrix for transformation to tangent space + mat3 toObjectLocal = mat3( + tang.x, binormal.x, norm.x, + tang.y, binormal.y, norm.y, + tang.z, binormal.z, norm.z ) ; + + // transform light direction and view direction to tangent space + vec3 pos = vec3( ModelViewMatrix * vec4(PosL,1.0) ); + LightDir = toObjectLocal * (Light.Position.xyz - pos); + + ViewDir = toObjectLocal * normalize(-pos); + + TexCoord = VertexTexCoord; + + gl_Position = MVP * vec4(PosL,1.0); +} + + diff --git a/cpp-projects/_resources/shaders/samples/ch5/parallax-mapping.fs b/cpp-projects/_resources/shaders/samples/ch5/parallax-mapping.fs new file mode 100644 index 0000000..b0937e9 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/parallax-mapping.fs @@ -0,0 +1,60 @@ +#version 430 + +in vec3 LightDir; +in vec2 TexCoord; +in vec3 ViewDir; + +layout(binding=0) uniform sampler2D ColorTex; +layout(binding=1) uniform sampler2D NormalMapTex; +layout(binding=2) uniform sampler2D HeightMapTex; + +uniform struct LightInfo { + vec4 Position; // Light position in cam. coords. + vec3 L; // D,S intensity + vec3 La; // Amb intensity +} Light ; + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +layout( location = 0 ) out vec4 FragColor; + + +vec3 blinnPhong( ) { + vec3 v = normalize(ViewDir); + vec3 s = normalize( LightDir ); + + const float bumpFactor = 0.015; + float height = 1 - texture(HeightMapTex, TexCoord).r; + vec2 delta = vec2(v.x, v.y) * height * bumpFactor / v.z; + vec2 tc = TexCoord.xy - delta; + tc = TexCoord.xy; + + vec3 n = texture(NormalMapTex, tc).xyz; + n.xy = 2.0 * n.xy - 1.0; + n = normalize(n); + + float sDotN = max( dot(s,n), 0.0 ); + + vec3 texColor = texture(ColorTex, tc).rgb; + vec3 ambient = Light.La * texColor; + vec3 diffuse = texColor * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 h = normalize( v + s ); + spec = Material.Ks * pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + return ambient + Light.L * (diffuse + spec); +} + +void main() { + vec3 c = blinnPhong(); +// c = pow(c, vec3(1.0/2.2)); + FragColor = vec4( c, 1.0 ); +// FragColor = vec4(texture(HeightMapTex, TexCoord )); +// FragColor = vec4(texture(ColorTex, TexCoord )); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/parallax-mapping.vs b/cpp-projects/_resources/shaders/samples/ch5/parallax-mapping.vs new file mode 100644 index 0000000..aee1477 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/parallax-mapping.vs @@ -0,0 +1,41 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; +layout (location = 3) in vec4 VertexTangent; + +uniform struct LightInfo { + vec4 Position; // Light position in cam. coords. + vec3 L; // D,S intensity + vec3 La; // Amb intensity +} Light ; + +out vec3 LightDir; +out vec2 TexCoord; +out vec3 ViewDir; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 MVP; + +void main() +{ + // Transform normal and tangent to eye space + vec3 norm = normalize( NormalMatrix * VertexNormal ); + vec3 tang = normalize( NormalMatrix * VertexTangent.xyz ); + + // Compute the binormal + vec3 binormal = normalize( cross( norm, tang ) ); + + // Matrix for transformation to tangent space + mat3 toObjectLocal = transpose( mat3( tang, binormal, norm ) ); + + // Transform light direction and view direction to tangent space + vec3 pos = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + LightDir = normalize( toObjectLocal * (Light.Position.xyz - pos) ); + ViewDir = toObjectLocal * normalize(-pos); + TexCoord = VertexTexCoord; + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/projected-texture.fs b/cpp-projects/_resources/shaders/samples/ch5/projected-texture.fs new file mode 100644 index 0000000..91b0f04 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/projected-texture.fs @@ -0,0 +1,57 @@ +#version 430 + +in vec3 EyeNormal; // normal in eye coordinates +in vec4 EyePosition; // position in eye coordinates +in vec4 ProjTexCoord; + +layout(binding=0) uniform sampler2D ProjectorTex; + + +layout( location = 0 ) out vec4 FragColor; + +uniform struct LightInfo{ + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 L; // Diffuse and Specular light intensity +} Light; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +vec3 blinnphong(vec3 position, vec3 n ) { + + vec3 ambient = Light.La * Material.Ka; + vec3 s = normalize(Light.Position.xyz - position); + float sDotN = max (dot(s,n), 0.0); + vec3 diffuse = Material.Kd * sDotN; + + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 h = normalize(v+s); + spec = Material.Ks * pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + + return ambient + Light.L * (diffuse + spec); +} + +void main() { + + vec3 color = blinnphong(EyePosition.xyz, normalize(EyeNormal)); + + vec3 projTexColor = vec3(0.0); + if(ProjTexCoord.z > 0.0){ + projTexColor = textureProj(ProjectorTex, ProjTexCoord).rgb; + } + + FragColor = vec4(color + projTexColor * 0.5, 1); + //FragColor = ProjTexCoord;//vec4(color + projTexColor * 0.5, 1); +// FragColor = vec4(projTexColor, 1); + + +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/projected-texture.vs b/cpp-projects/_resources/shaders/samples/ch5/projected-texture.vs new file mode 100644 index 0000000..2ff3b46 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/projected-texture.vs @@ -0,0 +1,25 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 EyeNormal; // normal in eye coordinates +out vec4 EyePosition; // position in eye coordinates +out vec4 ProjTexCoord; + +uniform mat4 ProjectorMatrix; + +uniform mat4 ModelViewMatrix; +uniform mat4 ModelMatrix; +uniform mat3 NormalMatrix; +uniform mat4 MVP; + +void main(){ + + vec4 pos4 = vec4(VertexPosition, 1.0); + + EyeNormal = normalize(NormalMatrix * VertexNormal); + EyePosition = ModelViewMatrix * pos4; + ProjTexCoord = ProjectorMatrix * (ModelMatrix * pos4); + gl_Position = MVP * pos4; +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/reflect-cubemap.fs b/cpp-projects/_resources/shaders/samples/ch5/reflect-cubemap.fs new file mode 100644 index 0000000..27fdfc0 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/reflect-cubemap.fs @@ -0,0 +1,17 @@ +#version 430 + +in vec3 ReflectDir; // direction of the relfected ray + +layout(binding=0) uniform samplerCube CubeMapTex; + +uniform float ReflectFactor; // Amount of reflection +uniform vec4 MaterialColor; // Color of the object's "Tint" + +layout( location = 0 ) out vec4 FragColor; + + +void main() { + // access the map texture1D + vec4 CubeMapColor = texture(CubeMapTex, ReflectDir); + FragColor = mix(MaterialColor, CubeMapColor, ReflectFactor); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/reflect-cubemap.vs b/cpp-projects/_resources/shaders/samples/ch5/reflect-cubemap.vs new file mode 100644 index 0000000..c605528 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/reflect-cubemap.vs @@ -0,0 +1,21 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 ReflectDir; // direction of the relfected ray + +uniform vec3 WorldCameraPosition; +uniform mat4 ModelMatrix; +uniform mat4 MVP; + +void main(){ + + // compute the reflected direction in world coords + vec3 worldPos = vec3(ModelMatrix*vec4(VertexPosition,1.0)); + vec3 worldNorm = vec3(ModelMatrix*vec4(VertexNormal, 0.0)); + vec3 worldView = normalize(WorldCameraPosition - worldPos); + ReflectDir = reflect(-worldView, worldNorm); + gl_Position = MVP * vec4(VertexPosition, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/refract-cubemap.fs b/cpp-projects/_resources/shaders/samples/ch5/refract-cubemap.fs new file mode 100644 index 0000000..3beb633 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/refract-cubemap.fs @@ -0,0 +1,24 @@ +#version 430 + + +layout(binding=0) uniform samplerCube CubeMapTex; + +in vec3 ReflectDir; // direction of the reflected ray +in vec3 RefractDir; // transmitted direction + +struct MaterialInfo{ + float Eta; // ratio of indices of reffraction + float ReflectionFactor; // percentage of reflected light +}; +uniform MaterialInfo Material; + +layout( location = 0 ) out vec4 FragColor; + + +void main() { + // access the map texture1D + vec4 reflectColor = texture(CubeMapTex, ReflectDir); + vec4 refractColor = texture(CubeMapTex, RefractDir); + + FragColor = mix(reflectColor, refractColor, Material.ReflectionFactor); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/refract-cubemap.vs b/cpp-projects/_resources/shaders/samples/ch5/refract-cubemap.vs new file mode 100644 index 0000000..4f9077d --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/refract-cubemap.vs @@ -0,0 +1,31 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + + +out vec3 ReflectDir; // reflected direction +out vec3 RefractDir; // Transmitted direction + +struct MaterialInfo{ + float Eta; // ratio of indices of reffraction + float ReflectionFactor; // percentage of reflected light +}; +uniform MaterialInfo Material; + +uniform vec3 WorldCameraPosition; +uniform mat4 ModelMatrix; +uniform mat4 MVP; + +void main(){ + + // compute the reflected direction in world coords + vec3 worldPos = vec3(ModelMatrix*vec4(VertexPosition,1.0)); + vec3 worldNorm = vec3(ModelMatrix*vec4(VertexNormal, 0.0)); + vec3 worldView = normalize(WorldCameraPosition - worldPos); + + ReflectDir = reflect(-worldView, worldNorm); + RefractDir = refract(-worldView, worldNorm, Material.Eta); + + gl_Position = MVP * vec4(VertexPosition, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/render-to-texture.fs b/cpp-projects/_resources/shaders/samples/ch5/render-to-texture.fs new file mode 100644 index 0000000..228aeef --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/render-to-texture.fs @@ -0,0 +1,46 @@ +#version 430 + +in vec3 Position; +in vec3 Normal; +in vec2 TexCoord; + +//uniform sampler2D RenderTex; + +layout (binding=0) uniform sampler2D RenderTex; + +struct LightInfo { + vec4 Position; // Light position in eye coords. + vec3 L; // D,S intensity + vec3 La; // Amb intensity +}; +uniform LightInfo Light; + +struct MaterialInfo { + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +}; +uniform MaterialInfo Material; + +layout( location = 0 ) out vec4 FragColor; + +vec3 blinnPhong( vec3 position, vec3 n ) { + + vec3 texColor = texture(RenderTex, TexCoord).rgb; + + vec3 ambient = Light.La * texColor; + vec3 s = normalize( Light.Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = texColor * sDotN; + + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 h = normalize( v + s ); + spec = Material.Ks * pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + return ambient + Light.L * (diffuse + spec); +} + +void main() { + FragColor = vec4(blinnPhong(Position,normalize(Normal)), 1.0 ); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/render-to-texture.vs b/cpp-projects/_resources/shaders/samples/ch5/render-to-texture.vs new file mode 100644 index 0000000..9b350ba --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/render-to-texture.vs @@ -0,0 +1,23 @@ +#version 410 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Position; +out vec3 Normal; +out vec2 TexCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +//uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main() +{ + TexCoord = VertexTexCoord; + Normal = normalize( NormalMatrix * VertexNormal); + Position = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/sampler-objects.fs b/cpp-projects/_resources/shaders/samples/ch5/sampler-objects.fs new file mode 100644 index 0000000..bb9608f --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/sampler-objects.fs @@ -0,0 +1,41 @@ +#version 430 + +in vec3 Position; +in vec3 Normal; +in vec2 TexCoord; + +layout(binding=0) uniform sampler2D Tex1; + +uniform struct LightInfo { + vec4 Position; // Light position in cam. coords. + vec3 L; // D,S intensity + vec3 La; // Amb intensity +} Light ; + +uniform struct MaterialInfo { + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material ; + +layout( location = 0 ) out vec4 FragColor; + +vec3 blinnPhong( vec3 position, vec3 n ) { + vec3 texColor = texture(Tex1, TexCoord).rgb; + + vec3 ambient = Light.La * texColor; + vec3 s = normalize( Light.Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = texColor * sDotN; + + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 h = normalize( v + s ); + spec = Material.Ks * pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + return ambient + Light.L * (diffuse + spec); +} + +void main() { + FragColor = vec4( blinnPhong( Position, normalize(Normal) ), 1.0 ); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/sampler-objects.vs b/cpp-projects/_resources/shaders/samples/ch5/sampler-objects.vs new file mode 100644 index 0000000..7abbb06 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/sampler-objects.vs @@ -0,0 +1,23 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Position; +out vec3 Normal; +out vec2 TexCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +//uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main() +{ + TexCoord = VertexTexCoord; + Normal = normalize( NormalMatrix * VertexNormal); + Position = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/scene-multi-textures.fs b/cpp-projects/_resources/shaders/samples/ch5/scene-multi-textures.fs new file mode 100644 index 0000000..439459e --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/scene-multi-textures.fs @@ -0,0 +1,53 @@ +#version 440 + +in vec3 Position; +in vec3 Normal; +in vec2 TexCoord; + +layout (binding=0) uniform sampler2D Tex1; +layout (binding=1) uniform sampler2D Tex2; + +uniform struct LightInfo{ + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 L; // Diffuse and Specular light intensity +} Light; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + + +layout( location = 0 ) out vec4 FragColor; + +vec3 blinnphong(vec3 position, vec3 n ) { + + vec4 texColor1 = texture(Tex1, TexCoord); + vec4 texColor2 = texture(Tex2, TexCoord); + vec3 mixTexColor = mix(texColor1.rgb,texColor2.rgb, texColor2.a); + + vec3 ambient = Light.La * Material.Ka * mixTexColor; + + vec3 s = normalize(Light.Position.xyz - position); + float sDotN = max (dot(s,n), 0.0); + vec3 diffuse = Material.Kd * sDotN; + + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 h = normalize(v+s); + spec = Material.Ks * pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + + return ambient + Light.L * (diffuse + spec); +} + + +void main() { + + FragColor = vec4(blinnphong(Position, normalize(Normal) ), 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/scene-multi-textures.vs b/cpp-projects/_resources/shaders/samples/ch5/scene-multi-textures.vs new file mode 100644 index 0000000..d952e8c --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/scene-multi-textures.vs @@ -0,0 +1,23 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Position; +out vec3 Normal; +out vec2 TexCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main(){ + Normal = normalize(NormalMatrix * VertexNormal); + Position = (ModelViewMatrix * vec4(VertexPosition, 1.0)).xyz; + TexCoord = VertexTexCoord; + gl_Position = MVP * vec4(VertexPosition, 1.0); +} + + diff --git a/cpp-projects/_resources/shaders/samples/ch5/scene-texture.fs b/cpp-projects/_resources/shaders/samples/ch5/scene-texture.fs new file mode 100644 index 0000000..b111145 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/scene-texture.fs @@ -0,0 +1,50 @@ +#version 440 + +in vec3 Position; +in vec3 Normal; +in vec2 TexCoord; + +layout (binding=0) uniform sampler2D Tex1; + +uniform struct LightInfo{ + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 L; // Diffuse and Specular light intensity +} Light; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + + +layout( location = 0 ) out vec4 FragColor; + +vec3 blinnphong(vec3 position, vec3 n ) { + + vec3 texColor = texture(Tex1, TexCoord).rgb; + vec3 ambient = Light.La * Material.Ka * texColor; + + vec3 s = normalize(Light.Position.xyz - position); + float sDotN = max (dot(s,n), 0.0); + vec3 diffuse = Material.Kd * sDotN; + + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 h = normalize(v+s); + spec = Material.Ks * pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + + return ambient + Light.L * (diffuse + spec); +} + + +void main() { + + FragColor = vec4(blinnphong(Position, normalize(Normal) ), 1.0); +// FragColor = vec4(normalize(Normal).rgb,1); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/scene-texture.vs b/cpp-projects/_resources/shaders/samples/ch5/scene-texture.vs new file mode 100644 index 0000000..d952e8c --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/scene-texture.vs @@ -0,0 +1,23 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Position; +out vec3 Normal; +out vec2 TexCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main(){ + Normal = normalize(NormalMatrix * VertexNormal); + Position = (ModelViewMatrix * vec4(VertexPosition, 1.0)).xyz; + TexCoord = VertexTexCoord; + gl_Position = MVP * vec4(VertexPosition, 1.0); +} + + diff --git a/cpp-projects/_resources/shaders/samples/ch5/steep-parallax-mapping.fs b/cpp-projects/_resources/shaders/samples/ch5/steep-parallax-mapping.fs new file mode 100644 index 0000000..78ee599 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/steep-parallax-mapping.fs @@ -0,0 +1,99 @@ +#version 430 + +in vec3 LightDir; +in vec2 TexCoord; +in vec3 ViewDir; + +layout(binding=0) uniform sampler2D ColorTex; +layout(binding=1) uniform sampler2D NormalMapTex; +layout(binding=2) uniform sampler2D HeightMapTex; + +uniform struct LightInfo { + vec4 Position; // Light position in cam. coords. + vec3 L; // D,S intensity + vec3 La; // Amb intensity +} Light ; + +//uniform struct MaterialInfo { +// vec3 Ka; // Ambient reflectivity +// vec3 Kd; // Diffuse reflectivity +// vec3 Ks; // Specular reflectivity +// float Shininess; // Specular shininess factor +//}Material; + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + + +layout( location = 0 ) out vec4 FragColor; + +uniform float bumpScale = 0.03; + +vec2 findOffset(vec3 v, out float height) { + + const int nSteps = int(mix(60, 10, abs(v.z))); + float htStep = 1.0 / nSteps; + vec2 deltaT = (v.xy * bumpScale) / (nSteps * v.z); + float ht = 1.0; + vec2 tc = TexCoord.xy; + height = texture(HeightMapTex, tc).r; + + while( height < ht ) { + ht -= htStep; + tc -= deltaT; + height = texture(HeightMapTex, tc).r; + } + return tc; +} + +bool isOccluded(float height, vec2 tc, vec3 s) { + // Shadow ray cast + const int nShadowSteps = int(mix(60,10,abs(s.z))); + float htStep = 1.0 / nShadowSteps; + vec2 deltaT = (s.xy * bumpScale) / ( nShadowSteps * s.z ); + float ht = height + htStep * 0.1; + while( height < ht && ht < 1.0 ) { + ht += htStep; + tc += deltaT; + height = texture(HeightMapTex, tc).r; + } + + return ht < 1.0; +} + +vec3 blinnPhong( ) { + vec3 v = normalize(ViewDir); + vec3 s = normalize( LightDir ); + + float height = 1.0; + vec2 tc = findOffset(v, height); + + vec3 texColor = texture(ColorTex, tc).rgb; + vec3 n = texture(NormalMapTex, tc).xyz; + n.xy = 2.0 * n.xy - 1.0; + n = normalize(n); + + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = vec3(0.0), + ambient = Light.La * texColor; + vec3 spec = vec3(0.0); + + if( sDotN > 0.0 && ! isOccluded(height, tc, s) ) { + diffuse = texColor * sDotN; + vec3 h = normalize( v + s ); + spec = Material.Ks * pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + + return ambient + Light.L * (diffuse + spec); +} + +void main() { + vec3 c = blinnPhong(); +// c = pow(c, vec3(1.0/2.2)); + c = pow(c, vec3(1.0/1.2)); + FragColor = vec4( c, 1.0 ); +} diff --git a/cpp-projects/_resources/shaders/samples/ch5/steep-parallax-mapping.vs b/cpp-projects/_resources/shaders/samples/ch5/steep-parallax-mapping.vs new file mode 100644 index 0000000..985e764 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch5/steep-parallax-mapping.vs @@ -0,0 +1,41 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; +layout (location = 3) in vec4 VertexTangent; + +uniform struct LightInfo { + vec4 Position; // Light position in cam. coords. + vec3 L; // D,S intensity + vec3 La; // Amb intensity +} Light ; + +out vec3 LightDir; +out vec2 TexCoord; +out vec3 ViewDir; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 MVP; + +void main() +{ + // Transform normal and tangent to eye space + vec3 norm = normalize( NormalMatrix * VertexNormal ); + vec3 tang = normalize( NormalMatrix * VertexTangent.xyz ); + + // Compute the binormal + vec3 binormal = normalize( cross( norm, tang ) ); + + // Matrix for transformation to tangent space + mat3 toObjectLocal = transpose( mat3( tang, binormal, norm ) ); + + // Transform light direction and view direction to tangent space + vec3 pos = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + LightDir = normalize( toObjectLocal * (Light.Position.xyz - pos) ); + ViewDir = toObjectLocal * normalize(-pos); + TexCoord = VertexTexCoord; + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/deferred.fs b/cpp-projects/_resources/shaders/samples/ch6/deferred.fs new file mode 100644 index 0000000..98d0f95 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/deferred.fs @@ -0,0 +1,130 @@ +#version 430 + +//struct Light { +// vec3 Position; +// vec3 Color; + +// float Linear; +// float Quadratic; +// float Radius; +//}; +//const int NR_LIGHTS = 32; +//uniform Light lights[NR_LIGHTS]; + + + +in vec3 Position; +in vec3 Normal; +in vec2 TexCoord; + +layout (location = 0) out vec4 FragColor; +layout (location = 1) out vec3 PositionData; +layout (location = 2) out vec3 NormalData; +layout (location = 3) out vec3 DiffuseColorData; +layout (location = 4) out vec3 AmbiantColorData; +layout (location = 5) out vec3 SpecularColorData; + +layout(binding=0) uniform sampler2D PositionTex; +layout(binding=1) uniform sampler2D NormalTex; +layout(binding=2) uniform sampler2D DiffuseColorTex; +layout(binding=3) uniform sampler2D AmbiantColorTex; +layout(binding=4) uniform sampler2D SpecularColorTex; + +uniform int Pass; // Pass number +uniform int LightCount; + +uniform struct LightInfo { + vec4 Position; // Light position in eye coords. + vec3 L; // D,S intensity + vec3 La; // Ambient +} Light[50]; + + +uniform struct MaterialInfo { + vec3 Ka; + vec3 Kd; // Diffuse reflectivity + vec3 Ks; + float Shininess; +} Material; + + + +vec3 diffuseModel( vec3 pos, vec3 norm, vec3 diff ) { + vec3 s = normalize( vec3(Light[0].Position) - pos); + float sDotN = max( dot(s,norm), 0.0 ); + return Light[0].L * diff * sDotN; +} + + +vec3 blinnPhong( vec3 position, vec3 n, vec3 kd, vec3 ka, vec3 ks, int idx ) { + vec3 ambient = Light[idx].La * ka; + vec3 s = normalize( Light[idx].Position.xyz - position ); + + vec3 l = Light[idx].Position.xyz - position; + float dist = length(l); + l = normalize(l); + + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 h = normalize( v + s ); + spec = ks * + pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + + vec3 res = ambient + Light[idx].L * (diffuse + spec); + res /= (dist*dist ); + return res; +} + + +void pass1() { + + // Store position, normal, and diffuse color in textures + PositionData = Position; + NormalData = normalize(Normal); + + // Store Material + DiffuseColorData = Material.Kd; + AmbiantColorData = Material.Ka; + SpecularColorData = Material.Ks; + + // texture(Tex1, TexCoord).rgb; +} + +void pass2() { + // Retrieve position and normal information from textures + vec3 pos = vec3( texture( PositionTex, TexCoord ) ); + vec3 norm = vec3( texture( NormalTex, TexCoord ) ); + vec3 diffColor = vec3( texture( DiffuseColorTex, TexCoord) ); + vec3 ambiantColor = vec3( texture( AmbiantColorTex, TexCoord) ); + vec3 specularColor = vec3( texture( SpecularColorTex, TexCoord) ); + + //FragColor = vec4( diffuseModel(pos,norm,diffColor), 1.0 ); + //FragColor = vec4(diffColor, 1.0); + //FragColor = vec4(specularColor.xyz, 1.0); + +// LightCount + + vec3 sum = vec3(0); + for(int ii = 0; ii < LightCount; ++ii){ + sum += blinnPhong(pos, norm, diffColor, ambiantColor, specularColor, ii).xyz; + } + + if(norm.x == 1 && norm.y == 1 && norm.z == 1){ + FragColor = vec4(1,1,1, 1.0 ); + }else{ + FragColor = vec4(sum, 1.0); + } + + + //FragColor = vec4(blinnPhong(pos, norm, diffColor, ambiantColor, specularColor, 0).xyz, 1.0); + //FragColor = vec4(vec3(texture(PositionTex, TexCoord)),1); +} + +void main() { + if( Pass == 1) pass1(); + else if(Pass==2) pass2(); +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/deferred.vs b/cpp-projects/_resources/shaders/samples/ch6/deferred.vs new file mode 100644 index 0000000..cba8a12 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/deferred.vs @@ -0,0 +1,21 @@ +#version 430 + +layout( location = 0 ) in vec3 VertexPosition; +layout( location = 1 ) in vec3 VertexNormal; +layout( location = 2 ) in vec2 VertexTexCoord; + +out vec3 Position; +out vec3 Normal; +out vec2 TexCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main() { + Normal = normalize( NormalMatrix * VertexNormal); + Position = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + TexCoord = VertexTexCoord; + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/edge-detection-filter.fs b/cpp-projects/_resources/shaders/samples/ch6/edge-detection-filter.fs new file mode 100644 index 0000000..d04c72a --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/edge-detection-filter.fs @@ -0,0 +1,82 @@ +#version 430 + +in vec3 Position; +in vec3 Normal; + +layout( binding=0 ) uniform sampler2D RenderTex; + +uniform float EdgeThreshold; +uniform int Pass; + + +uniform struct LightInfo { + vec4 Position; // Light position in eye coords. + vec3 L; // D,S intensity + vec3 La; // amb +} Light; + +uniform struct MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +layout( location = 0 ) out vec4 FragColor; +const vec3 lum = vec3(0.2126, 0.7152, 0.0722); + +vec3 blinnPhong( vec3 position, vec3 n ) { + vec3 ambient = Light.La * Material.Ka; + vec3 s = normalize( Light.Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 h = normalize( v + s ); + spec = Material.Ks * + pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + return ambient + Light.L * (diffuse + spec); +} + +vec4 pass1() +{ + return vec4(blinnPhong( Position, normalize(Normal) ),1.0); +} + +float luminance( vec3 color ) { + return dot(lum,color); +} + +vec4 pass2() +{ + ivec2 pix = ivec2(gl_FragCoord.xy); + + float s00 = luminance(texelFetchOffset(RenderTex, pix, 0, ivec2(-1,1)).rgb); + float s10 = luminance(texelFetchOffset(RenderTex, pix, 0, ivec2(-1,0)).rgb); + float s20 = luminance(texelFetchOffset(RenderTex, pix, 0, ivec2(-1,-1)).rgb); + float s01 = luminance(texelFetchOffset(RenderTex, pix, 0, ivec2(0,1)).rgb); + float s21 = luminance(texelFetchOffset(RenderTex, pix, 0, ivec2(0,-1)).rgb); + float s02 = luminance(texelFetchOffset(RenderTex, pix, 0, ivec2(1,1)).rgb); + float s12 = luminance(texelFetchOffset(RenderTex, pix, 0, ivec2(1,0)).rgb); + float s22 = luminance(texelFetchOffset(RenderTex, pix, 0, ivec2(1,-1)).rgb); + + float sx = s00 + 2 * s10 + s20 - (s02 + 2 * s12 + s22); + float sy = s00 + 2 * s01 + s02 - (s20 + 2 * s21 + s22); + + float g = sx * sx + sy * sy; + + if( g > EdgeThreshold ) + return vec4(1.0); + else + return vec4(0.0,0.0,0.0,1.0); +} + +void main() +{ + + + if( Pass == 1 ) FragColor = pass1(); + if( Pass == 2 ) FragColor = pass2(); +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/edge-detection-filter.vs b/cpp-projects/_resources/shaders/samples/ch6/edge-detection-filter.vs new file mode 100644 index 0000000..29d292a --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/edge-detection-filter.vs @@ -0,0 +1,22 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Position; +out vec3 Normal; +out vec2 TexCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 MVP; + +void main(){ + + TexCoord = VertexTexCoord; + Normal = normalize( NormalMatrix * VertexNormal); + Position = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/gaussian-filter.fs b/cpp-projects/_resources/shaders/samples/ch6/gaussian-filter.fs new file mode 100644 index 0000000..3c7292c --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/gaussian-filter.fs @@ -0,0 +1,81 @@ +#version 430 + +in vec3 Position; +in vec3 Normal; + +uniform int Pass; // Pass number + +layout(binding=0) uniform sampler2D Texture0; + +uniform struct LightInfo { + vec4 Position; // Light position in eye coords. + vec3 L; // D,S intensity + vec3 La; // Ambient +} Light; + +uniform struct MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +layout( location = 0 ) out vec4 FragColor; + +uniform float Weight[5]; + +vec3 blinnPhong( vec3 position, vec3 n ) { + vec3 ambient = Light.La * Material.Ka; + vec3 s = normalize( Light.Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 h = normalize( v + s ); + spec = Material.Ks *pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + return ambient + Light.L * (diffuse + spec); +} + +vec4 pass1() { + return vec4(blinnPhong( Position, normalize(Normal) ),1.0); +} + +vec4 pass2() { + ivec2 pix = ivec2( gl_FragCoord.xy ); + vec4 sum = texelFetch(Texture0, pix, 0) * Weight[0]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(0,1) ) * Weight[1]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(0,-1) ) * Weight[1]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(0,2) ) * Weight[2]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(0,-2) ) * Weight[2]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(0,3) ) * Weight[3]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(0,-3) ) * Weight[3]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(0,4) ) * Weight[4]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(0,-4) ) * Weight[4]; + return sum; +} + +vec4 pass3() { + ivec2 pix = ivec2( gl_FragCoord.xy ); + vec4 sum = texelFetch(Texture0, pix, 0) * Weight[0]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(1,0) ) * Weight[1]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(-1,0) ) * Weight[1]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(2,0) ) * Weight[2]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(-2,0) ) * Weight[2]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(3,0) ) * Weight[3]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(-3,0) ) * Weight[3]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(4,0) ) * Weight[4]; + sum += texelFetchOffset( Texture0, pix, 0, ivec2(-4,0) ) * Weight[4]; + return sum; +} + +void main() { + if( Pass == 1 ){ + FragColor = pass1(); + } else if( Pass == 2 ) { + FragColor = pass2(); + }else if( Pass == 3 ) { + FragColor = pass3(); + } +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/gaussian-filter.vs b/cpp-projects/_resources/shaders/samples/ch6/gaussian-filter.vs new file mode 100644 index 0000000..ae2396d --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/gaussian-filter.vs @@ -0,0 +1,20 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 Position; +out vec3 Normal; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main() +{ + Normal = normalize( NormalMatrix * VertexNormal); + Position = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/hdr-bloom.fs b/cpp-projects/_resources/shaders/samples/ch6/hdr-bloom.fs new file mode 100644 index 0000000..c09031c --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/hdr-bloom.fs @@ -0,0 +1,169 @@ +#version 430 + +in vec3 Position; +in vec3 Normal; +in vec2 TexCoord; + +layout( location = 0 ) out vec4 FragColor; + +uniform int Pass; // Pass number + +layout (binding=0) uniform sampler2D HdrTex; +layout (binding=1) uniform sampler2D BlurTex1; +layout (binding=2) uniform sampler2D BlurTex2; + +uniform float LumThresh; // Luminance threshold +uniform float Gamma; // gamma correction + + +uniform struct LightInfo { + vec4 Position; // Light position in eye coords. + vec3 L; // D,S intensity + vec3 La; // Ambient +} Lights[3]; + +uniform struct MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +uniform float PixOffset[10] = float[](0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0); +uniform float Weight[10]; + +// XYZ/RGB conversion matrices from: +// http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html + +uniform mat3 rgb2xyz = mat3( + 0.4124564, 0.2126729, 0.0193339, + 0.3575761, 0.7151522, 0.1191920, + 0.1804375, 0.0721750, 0.9503041 ); + +uniform mat3 xyz2rgb = mat3( + 3.2404542, -0.9692660, 0.0556434, + -1.5371385, 1.8760108, -0.2040259, + -0.4985314, 0.0415560, 1.0572252 ); + +uniform float Exposure = 0.35; +uniform float White = 0.928; +uniform float AveLum; +uniform vec3 Lum = vec3(0.2126, 0.7152, 0.0722); + +float luminance( vec3 color ) { + return dot(Lum,color); + //return 0.2126 * color.r + 0.7152 * color.g + 0.0722 * color.b; +} + +vec3 blinnPhong( vec3 position, vec3 n, int idx ) { + vec3 ambient = Lights[idx].La * Material.Ka; + vec3 s = normalize( Lights[idx].Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 h = normalize( v + s ); + spec = Material.Ks * + pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + return ambient + Lights[idx].L * (diffuse + spec); +} + +// The render pass +vec4 pass1() +{ + vec3 n = normalize(Normal); + vec3 color = vec3(0.0); + for( int i = 0; i < 3; i++) + color += blinnPhong(Position, n, i); + + return vec4(color,1); +} + +// Bright-pass filter (write to BlurTex1) +vec4 pass2() { + vec4 val = texture(HdrTex, TexCoord); + if( luminance(val.rgb) > LumThresh ) + return val; + else + return vec4(0.0); +} + +// First blur pass (read from BlurTex1, write to BlurTex2) +vec4 pass3() { + float dy = 1.0 / (textureSize(BlurTex1,0)).y; + + vec4 sum = texture(BlurTex1, TexCoord) * Weight[0]; + for( int i = 1; i < 10; i++ ) + { + sum += texture( BlurTex1, TexCoord + vec2(0.0,PixOffset[i]) * dy ) * Weight[i]; + sum += texture( BlurTex1, TexCoord - vec2(0.0,PixOffset[i]) * dy ) * Weight[i]; + } + return sum; +} + +// Second blur (read from BlurTex2, write to BlurTex1) +vec4 pass4() { + float dx = 1.0 / (textureSize(BlurTex2,0)).x; + + vec4 sum = texture(BlurTex2, TexCoord) * Weight[0]; + for( int i = 1; i < 10; i++ ) + { + sum += texture( BlurTex2, TexCoord + vec2(PixOffset[i],0.0) * dx ) * Weight[i]; + sum += texture( BlurTex2, TexCoord - vec2(PixOffset[i],0.0) * dx ) * Weight[i]; + } + return sum; +} + +// Composite pass, apply tone map to HDR image, +// then combine with the blurred bright-pass filter. +// (Read from BlurTex1 and HdrTex, write to default buffer). +vec4 pass5() { + /////////////// Tone mapping /////////////// + // Retrieve high-res color from texture + vec4 color = texture( HdrTex, TexCoord ); + + // Convert to XYZ + vec3 xyzCol = rgb2xyz * vec3(color); + + // Convert to xyY + float xyzSum = xyzCol.x + xyzCol.y + xyzCol.z; + vec3 xyYCol = vec3( xyzCol.x / xyzSum, xyzCol.y / xyzSum, xyzCol.y); + + // Apply the tone mapping operation to the luminance (xyYCol.z or xyzCol.y) + float L = (Exposure * xyYCol.z) / AveLum; + L = (L * ( 1 + L / (White * White) )) / ( 1 + L ); + + // Using the new luminance, convert back to XYZ + xyzCol.x = (L * xyYCol.x) / (xyYCol.y); + xyzCol.y = L; + xyzCol.z = (L * (1 - xyYCol.x - xyYCol.y))/xyYCol.y; + + // Convert back to RGB + vec4 toneMapColor = vec4( xyz2rgb * xyzCol, 1.0); + + ///////////// Combine with blurred texture ///////////// + // We want linear filtering on this texture access so that + // we get additional blurring. + vec4 blurTex = texture(BlurTex1, TexCoord); + + // ivec2 blurSize = textureSize(BlurTex1, 0); + // if( gl_FragCoord.x < blurSize.x && gl_FragCoord.y < blurSize.y ) + // return texture( BlurTex1, vec2(gl_FragCoord.x / blurSize.x, + // gl_FragCoord.y / blurSize.y ) ); + // else + return toneMapColor + blurTex; +} + +void main() { + if(Pass == 1) FragColor = pass1(); + else if(Pass == 2) FragColor = pass2(); + else if(Pass == 3) FragColor = pass3(); + else if(Pass == 4) FragColor = pass4(); + else if(Pass == 5) { + //FragColor = pass5(); + FragColor = vec4( pow( pass5().rgb, vec3(1.0/Gamma) ), 1.0 ); + } + +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/hdr-bloom.vs b/cpp-projects/_resources/shaders/samples/ch6/hdr-bloom.vs new file mode 100644 index 0000000..55834cc --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/hdr-bloom.vs @@ -0,0 +1,23 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Position; +out vec3 Normal; +out vec2 TexCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main() +{ + TexCoord = VertexTexCoord; + Normal = normalize( NormalMatrix * VertexNormal); + Position = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/hdr-lighting-tone-mapping.fs b/cpp-projects/_resources/shaders/samples/ch6/hdr-lighting-tone-mapping.fs new file mode 100644 index 0000000..0ea2f09 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/hdr-lighting-tone-mapping.fs @@ -0,0 +1,100 @@ +#version 430 + +in vec3 Position; +in vec3 Normal; +in vec2 TexCoord; + +layout (location = 0) out vec4 FragColor; +layout (location = 1) out vec3 HdrColor; + +uniform int Pass; // Pass number + +uniform struct LightInfo { + vec4 Position; // Light position in eye coords. + vec3 L; // D,S intensity + vec3 La; // Ambient +} Lights[3]; + +uniform struct MaterialInfo { + vec3 Ka; + vec3 Kd; // Diffuse reflectivity + vec3 Ks; + float Shininess; +} Material; + +uniform float AveLum; + +layout(binding=0) uniform sampler2D HdrTex; + +// XYZ/RGB conversion matrices from: +// http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html + +uniform mat3 rgb2xyz = mat3( + 0.4124564, 0.2126729, 0.0193339, + 0.3575761, 0.7151522, 0.1191920, + 0.1804375, 0.0721750, 0.9503041 ); + +uniform mat3 xyz2rgb = mat3( + 3.2404542, -0.9692660, 0.0556434, + -1.5371385, 1.8760108, -0.2040259, + -0.4985314, 0.0415560, 1.0572252 ); + +uniform float Exposure = 0.35; +uniform float White = 0.928; +uniform bool DoToneMap = true; + +vec3 blinnPhong( vec3 position, vec3 n, int idx ) { + vec3 ambient = Lights[idx].La * Material.Ka; + vec3 s = normalize( Lights[idx].Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 h = normalize( v + s ); + spec = Material.Ks * + pow( max( dot(h,n), 0.0 ), Material.Shininess ); + } + return ambient + Lights[idx].L * (diffuse + spec); +} + +void pass1() { + vec3 n = normalize(Normal); + // Compute shading and store result in high-res framebuffer + HdrColor = vec3(0.0); + for( int i = 0; i < 3; i++) + HdrColor += blinnPhong(Position, n, i); +} + +// This pass computes the sum of the luminance of all pixels +void pass2() { + // Retrieve high-res color from texture + vec4 color = texture( HdrTex, TexCoord ); + + // Convert to XYZ + vec3 xyzCol = rgb2xyz * vec3(color); + + // Convert to xyY + float xyzSum = xyzCol.x + xyzCol.y + xyzCol.z; + vec3 xyYCol = vec3( xyzCol.x / xyzSum, xyzCol.y / xyzSum, xyzCol.y); + + // Apply the tone mapping operation to the luminance (xyYCol.z or xyzCol.y) + float L = (Exposure * xyYCol.z) / AveLum; + L = (L * ( 1 + L / (White * White) )) / ( 1 + L ); + + // Using the new luminance, convert back to XYZ + xyzCol.x = (L * xyYCol.x) / (xyYCol.y); + xyzCol.y = L; + xyzCol.z = (L * (1 - xyYCol.x - xyYCol.y))/xyYCol.y; + + // Convert back to RGB and send to output buffer + if( DoToneMap ) + FragColor = vec4( xyz2rgb * xyzCol, 1.0); + else + FragColor = color; +} + +void main() { + if( Pass == 1 ) pass1(); + else if( Pass == 2) pass2(); +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/hdr-lighting-tone-mapping.vs b/cpp-projects/_resources/shaders/samples/ch6/hdr-lighting-tone-mapping.vs new file mode 100644 index 0000000..b06d033 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/hdr-lighting-tone-mapping.vs @@ -0,0 +1,22 @@ +#version 430 + +layout( location = 0 ) in vec3 VertexPosition; +layout( location = 1 ) in vec3 VertexNormal; +layout( location = 2 ) in vec2 VertexTexCoord; + +out vec3 Position; +out vec3 Normal; +out vec2 TexCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main() +{ + Normal = normalize( NormalMatrix * VertexNormal); + Position = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + TexCoord = VertexTexCoord; + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/oit.fs b/cpp-projects/_resources/shaders/samples/ch6/oit.fs new file mode 100644 index 0000000..c996f1e --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/oit.fs @@ -0,0 +1,110 @@ +#version 430 + +layout (early_fragment_tests) in; + +#define MAX_FRAGMENTS 75 + +in vec3 Position; +in vec3 Normal; + +uniform vec4 LightPosition; +uniform vec3 LightIntensity; + +uniform vec4 Kd; // Diffuse reflectivity +uniform vec4 Ka; // Ambient reflectivity + +struct NodeType { + vec4 color; + float depth; + uint next; +}; + +layout( binding = 0, r32ui) uniform uimage2D headPointers; +layout( binding = 0, offset = 0) uniform atomic_uint nextNodeCounter; +layout( binding = 0, std430 ) buffer linkedLists { + NodeType nodes[]; +}; +uniform uint MaxNodes; + +layout( location = 0 ) out vec4 FragColor; + +subroutine void RenderPassType(); +subroutine uniform RenderPassType RenderPass; + +vec3 diffuse( ){ + vec3 s = normalize( LightPosition.xyz - Position ); + vec3 n = normalize(Normal); + return LightIntensity * ( Ka.rgb + Kd.rgb * max( dot(s, n), 0.0 ) ); +} + +subroutine(RenderPassType) +void pass1(){ + + // Get the index of the next empty slot in the buffer + uint nodeIdx = atomicCounterIncrement(nextNodeCounter); // atomically increment a counter and return the prior value + + // Is our buffer full? If so, we don't add the fragment + // to the list. + if( nodeIdx < MaxNodes ) { + + // Our fragment will be the new head of the linked list, so + // replace the value at gl_FragCoord.xy with our new node's + // index. We use imageAtomicExchange to make sure that this + // is an atomic operation. The return value is the old head + // of the list (the previous value), which will become the + // next element in the list once our node is inserted. + uint prevHead = imageAtomicExchange(headPointers, ivec2(gl_FragCoord.xy), nodeIdx); + + // Here we set the color and depth of this new node to the color + // and depth of the fragment. The next pointer, points to the + // previous head of the list. + nodes[nodeIdx].color = vec4(diffuse(), Kd.a); + nodes[nodeIdx].depth = gl_FragCoord.z; + nodes[nodeIdx].next = prevHead; + } + // FragColor = nodes[nodeIdx].color; +} + +subroutine(RenderPassType) +void pass2(){ + + NodeType frags[MAX_FRAGMENTS]; + int count = 0; + + // Get the index of the head of the list + uint n = imageLoad(headPointers, ivec2(gl_FragCoord.xy)).r; + + // Copy the linked list for this fragment into an array + while( n != 0xffffffff && count < MAX_FRAGMENTS) { + frags[count] = nodes[n]; + n = frags[count].next; + count++; + } + + // Sort the array by depth using insertion sort (largest + // to smallest). + for( uint i = 1; i < count; i++ ){ + + NodeType toInsert = frags[i]; + uint j = i; + while( j > 0 && toInsert.depth > frags[j-1].depth ) { + frags[j] = frags[j-1]; + j--; + } + frags[j] = toInsert; + } + + // Traverse the array, and combine the colors using the alpha + // channel. + vec4 color = vec4(0.5, 0.5, 0.5, 1.0); + for( int i = 0; i < count; i++ ){ + color = mix( color, frags[i].color, frags[i].color.a); + } + + // Output the final color + FragColor = color; +} + +void main() { + RenderPass(); +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/oit.vs b/cpp-projects/_resources/shaders/samples/ch6/oit.vs new file mode 100644 index 0000000..ae2396d --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/oit.vs @@ -0,0 +1,20 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 Position; +out vec3 Normal; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main() +{ + Normal = normalize( NormalMatrix * VertexNormal); + Position = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/ssao.fs b/cpp-projects/_resources/shaders/samples/ch6/ssao.fs new file mode 100644 index 0000000..b0d3047 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/ssao.fs @@ -0,0 +1,147 @@ +#version 430 + +in vec3 Position; +in vec3 Normal; +in vec2 TexCoord; + +layout (location = 0) out vec4 FragColor; +layout (location = 1) out vec3 PositionData; +layout (location = 2) out vec3 NormalData; +layout (location = 3) out vec3 ColorData; +layout (location = 4) out float AoData; + +uniform mat4 ProjectionMatrix; + +const int kernelSize = 64; + +uniform int Pass; // Pass number +uniform vec3 SampleKernel[kernelSize]; +uniform float Radius = 0.55; + +uniform struct LightInfo { + vec4 Position; // Light position in eye coords. + vec3 L; // D,S intensity + vec3 La; // Ambient +} Light ; + +uniform struct MaterialInfo { + vec3 Kd; // Diffuse reflectivity + bool UseTex; // Use texture +} Material; + +layout(binding=0) uniform sampler2D PositionTex; +layout(binding=1) uniform sampler2D NormalTex; +layout(binding=2) uniform sampler2D ColorTex; +layout(binding=3) uniform sampler2D AoTex; +layout(binding=4) uniform sampler2D RandTex; +layout(binding=5) uniform sampler2D DiffTex; + + +uniform vec2 randScale = vec2( 800.0 / 4.0, 600.0 / 4.0 ); // screen = 800x600 +// tile noise texture over screen, based on screen dimensions divided by noise size +// const vec2 randScale = vec2( 800.0 / 4.0, 600.0 / 4.0 ); // screen = 800x600 +//const vec2 randScale = vec2( 2560.0 / 4.0, 1444.0 / 4.0 ); // screen = 800x600 + +vec3 ambAndDiffuse( vec3 pos, vec3 norm, vec3 diff, float ao ) { + ao = pow(ao, 4); + vec3 ambient = Light.La * diff * ao; + vec3 s = normalize( vec3(Light.Position) - pos); + float sDotN = max( dot(s,norm), 0.0 ); + return ambient + Light.L * diff * sDotN; +} + +void pass1() { + // Store position, normal, and diffuse color in textures + PositionData = Position; + NormalData = normalize(Normal); + if( Material.UseTex ) { + ColorData = pow( texture(DiffTex, TexCoord.xy).rgb, vec3(2.2) ); + } + else ColorData = Material.Kd; +} + +// SSAO pass +void pass2() { + + // Create the random tangent space matrix + vec3 randDir = normalize( texture(RandTex, TexCoord.xy * randScale).xyz ); + vec3 normal = normalize( texture(NormalTex, TexCoord).xyz ); + vec3 biTangent = cross( normal, randDir ); + if( length(biTangent) < 0.0001 ){ // If n and randDir are parallel, n is in x-y plane + biTangent = cross( normal, vec3(0,0,1)); + } + biTangent = normalize(biTangent); + + vec3 tang = cross(biTangent, normal); + mat3 toCamSpace = mat3(tang, biTangent, normal); + + float occlusionSum = 0.0; + vec3 camPos = texture(PositionTex, TexCoord).xyz; + + for( int i = 0; i < kernelSize; i++ ) { + + vec3 samplePos = camPos + Radius * (toCamSpace * SampleKernel[i]); + + // Project point + vec4 p = ProjectionMatrix * vec4(samplePos,1); + p *= 1.0 / p.w; + p.xyz = p.xyz * 0.5 + 0.5; + + // Access camera space z-coordinate at that point + float surfaceZ = texture(PositionTex, p.xy).z; + float zDist = surfaceZ - camPos.z; + + // Count points that ARE occluded + if( zDist >= 0.0 && zDist <= Radius && surfaceZ > samplePos.z ){ + occlusionSum += 1.0; + } + + } + + float occ = occlusionSum / kernelSize; + AoData = 1.0 - occ; + //FragColor = vec4(AoData, AoData, AoData, 1); +} + + +// Blur pass +void pass3() { + ivec2 pix = ivec2( gl_FragCoord.xy ); + float sum = 0.0; + for( int x = -1; x <= 1; ++x ) { + for( int y = -1; y <= 1; y++ ) { + sum += texelFetchOffset( AoTex, pix, 0, ivec2(x,y) ).r; + } + } + + float ao = sum * (1.0 / 9.0); + AoData = ao; + // AoData = texelFetchOffset( AoTex, pix, 0, ivec2(0,0) ).r;//ao; + //FragColor = vec4(ao, ao, ao, 1); +} + +// Final color pass +void pass4() { + // Retrieve position and normal information from textures + vec3 pos = texture(PositionTex, TexCoord).xyz; + vec3 norm = texture(NormalTex, TexCoord).xyz; + vec3 diffColor = texture(ColorTex, TexCoord).rgb; + float aoVal = texture(AoTex, TexCoord).r; + + vec3 col = ambAndDiffuse(pos, norm, diffColor, aoVal); + col = pow(col, vec3(1.0/2.2)); + + if(norm.x == 1 && norm.y == 1 && norm.z == 1){ + FragColor = vec4(1,1,1, 1.0 ); + }else{ + FragColor = vec4(col, 1.0 ); + //FragColor = vec4(aoVal,aoVal,aoVal, 1.0 ); + } +} + +void main() { + if( Pass == 1) pass1(); + else if(Pass==2) pass2(); + else if(Pass == 3) pass3(); + else if(Pass == 4) pass4(); +} diff --git a/cpp-projects/_resources/shaders/samples/ch6/ssao.vs b/cpp-projects/_resources/shaders/samples/ch6/ssao.vs new file mode 100644 index 0000000..46a2bf2 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch6/ssao.vs @@ -0,0 +1,20 @@ +#version 430 + +layout( location = 0 ) in vec3 VertexPosition; +layout( location = 1 ) in vec3 VertexNormal; +layout( location = 2 ) in vec2 VertexTexCoord; + +out vec3 Position; +out vec3 Normal; +out vec2 TexCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 MVP; + +void main() { + Normal = normalize( NormalMatrix * VertexNormal); + Position = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + TexCoord = VertexTexCoord; + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/bezcurve.fs b/cpp-projects/_resources/shaders/samples/ch7/bezcurve.fs new file mode 100644 index 0000000..50683c3 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/bezcurve.fs @@ -0,0 +1,10 @@ +#version 400 + +uniform vec4 LineColor; + +layout ( location = 0 ) out vec4 FragColor; + +void main() +{ + FragColor = LineColor; +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/bezcurve.tcs b/cpp-projects/_resources/shaders/samples/ch7/bezcurve.tcs new file mode 100644 index 0000000..22d30f8 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/bezcurve.tcs @@ -0,0 +1,20 @@ +#version 400 + +layout( vertices=4 ) out; + +uniform int NumSegments; +uniform int NumStrips; + +void main() +{ + // Pass along the vertex position unmodified + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; + + // Some drivers (e.g. Intel) treat these levels incorrectly. The OpenGL spec + // says that level 0 should be the number of strips and level 1 should be + // the number of segments per strip. Unfortunately, not all drivers do this. + // If this example doesn't work for you, try switching the right + // hand side of the two assignments below. + gl_TessLevelOuter[0] = float(NumStrips); + gl_TessLevelOuter[1] = float(NumSegments); +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/bezcurve.tes b/cpp-projects/_resources/shaders/samples/ch7/bezcurve.tes new file mode 100644 index 0000000..e873203 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/bezcurve.tes @@ -0,0 +1,30 @@ +#version 400 + +layout( isolines ) in; + +uniform mat4 MVP; + +void main() +{ + float u = gl_TessCoord.x; + + vec3 p0 = gl_in[0].gl_Position.xyz; + vec3 p1 = gl_in[1].gl_Position.xyz; + vec3 p2 = gl_in[2].gl_Position.xyz; + vec3 p3 = gl_in[3].gl_Position.xyz; + + float u1 = (1.0 - u); + float u2 = u * u; + + // Bernstein polynomials + float b3 = u2 * u; + float b2 = 3.0 * u2 * u1; + float b1 = 3.0 * u * u1 * u1; + float b0 = u1 * u1 * u1; + + // Cubic Bezier interpolation + vec3 p = p0 * b0 + p1 * b1 + p2 * b2 + p3 * b3; + + gl_Position = MVP * vec4(p, 1.0); + +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/bezcurve.vs b/cpp-projects/_resources/shaders/samples/ch7/bezcurve.vs new file mode 100644 index 0000000..f79c96b --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/bezcurve.vs @@ -0,0 +1,8 @@ +#version 400 + +layout (location = 0 ) in vec2 VertexPosition; + +void main() +{ + gl_Position = vec4(VertexPosition, 0.0, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/pointsprite.fs b/cpp-projects/_resources/shaders/samples/ch7/pointsprite.fs new file mode 100644 index 0000000..94f0e99 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/pointsprite.fs @@ -0,0 +1,13 @@ +#version 440 + +in vec2 TexCoord; + +layout (binding=0) uniform sampler2D SpriteTex; +//uniform sampler2D SpriteTex; + +layout( location = 0 ) out vec4 FragColor; + +void main() +{ + FragColor = texture(SpriteTex, TexCoord); +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/pointsprite.gs b/cpp-projects/_resources/shaders/samples/ch7/pointsprite.gs new file mode 100644 index 0000000..15a4976 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/pointsprite.gs @@ -0,0 +1,33 @@ +#version 400 + +layout( points ) in; +layout( triangle_strip, max_vertices = 4 ) out; + +uniform float Size2; // Half the width of the quad + +uniform mat4 ProjectionMatrix; + +out vec2 TexCoord; + +void main() +{ + mat4 m = ProjectionMatrix; + + gl_Position = m * (vec4(-Size2,-Size2,0.0,0.0) + gl_in[0].gl_Position); + TexCoord = vec2(0.0,0.0); + EmitVertex(); + + gl_Position = m * (vec4(Size2,-Size2,0.0,0.0) + gl_in[0].gl_Position); + TexCoord = vec2(1.0,0.0); + EmitVertex(); + + gl_Position = m * (vec4(-Size2,Size2,0.0,0.0) + gl_in[0].gl_Position); + TexCoord = vec2(0.0,1.0); + EmitVertex(); + + gl_Position = m * (vec4(Size2,Size2,0.0,0.0) + gl_in[0].gl_Position); + TexCoord = vec2(1.0,1.0); + EmitVertex(); + + EndPrimitive(); +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/pointsprite.vs b/cpp-projects/_resources/shaders/samples/ch7/pointsprite.vs new file mode 100644 index 0000000..0badcc4 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/pointsprite.vs @@ -0,0 +1,12 @@ +#version 400 + +layout (location = 0) in vec3 VertexPosition; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; + +void main() +{ + gl_Position = ModelViewMatrix * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/shadewire.fs b/cpp-projects/_resources/shaders/samples/ch7/shadewire.fs new file mode 100644 index 0000000..fa4b291 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/shadewire.fs @@ -0,0 +1,63 @@ +#version 400 + +struct LightInfo { + vec4 Position; // Light position in eye coords. + vec3 Intensity; // A,D,S intensity +}; +uniform LightInfo Light; + +struct MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +}; +uniform MaterialInfo Material; + +uniform struct LineInfo { + float Width; + vec4 Color; +} Line; + +in vec3 GPosition; +in vec3 GNormal; +noperspective in vec3 GEdgeDistance; + +layout( location = 0 ) out vec4 FragColor; + +vec3 phongModel( vec3 pos, vec3 norm ) +{ + vec3 s = normalize(vec3(Light.Position) - pos); + vec3 v = normalize(-pos.xyz); + vec3 r = reflect( -s, norm ); + vec3 ambient = Light.Intensity * Material.Ka; + float sDotN = max( dot(s,norm), 0.0 ); + vec3 diffuse = Light.Intensity * Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) + spec = Light.Intensity * Material.Ks * + pow( max( dot(r,v), 0.0 ), Material.Shininess ); + + return ambient + diffuse + spec; +} + +void main() { + + vec4 color = vec4( phongModel(GPosition, GNormal), 1.0 ); + + // Find the smallest distance + float d = min( GEdgeDistance.x, GEdgeDistance.y ); + d = min( d, GEdgeDistance.z ); + + float mixVal; + if( d < Line.Width - 1 ) { + mixVal = 1.0; + } else if( d > Line.Width + 1 ) { + mixVal = 0.0; + } else { + float x = d - (Line.Width - 1); + mixVal = exp2(-2.0 * (x*x)); + } + FragColor = mix( color, Line.Color, mixVal ); + +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/shadewire.gs b/cpp-projects/_resources/shaders/samples/ch7/shadewire.gs new file mode 100644 index 0000000..97ab81c --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/shadewire.gs @@ -0,0 +1,50 @@ +#version 400 + +layout( triangles ) in; +layout( triangle_strip, max_vertices = 3 ) out; + +out vec3 GNormal; +out vec3 GPosition; +noperspective out vec3 GEdgeDistance; + +in vec3 VNormal[]; +in vec3 VPosition[]; + +uniform mat4 ViewportMatrix; // Viewport matrix + +void main() +{ + // Transform each vertex into viewport space + vec2 p0 = vec2(ViewportMatrix * (gl_in[0].gl_Position / gl_in[0].gl_Position.w)); + vec2 p1 = vec2(ViewportMatrix * (gl_in[1].gl_Position / gl_in[1].gl_Position.w)); + vec2 p2 = vec2(ViewportMatrix * (gl_in[2].gl_Position / gl_in[2].gl_Position.w)); + + float a = length(p1 - p2); + float b = length(p2 - p0); + float c = length(p1 - p0); + float alpha = acos( (b*b + c*c - a*a) / (2.0*b*c) ); + float beta = acos( (a*a + c*c - b*b) / (2.0*a*c) ); + float ha = abs( c * sin( beta ) ); + float hb = abs( c * sin( alpha ) ); + float hc = abs( b * sin( alpha ) ); + + GEdgeDistance = vec3( ha, 0, 0 ); + GNormal = VNormal[0]; + GPosition = VPosition[0]; + gl_Position = gl_in[0].gl_Position; + EmitVertex(); + + GEdgeDistance = vec3( 0, hb, 0 ); + GNormal = VNormal[1]; + GPosition = VPosition[1]; + gl_Position = gl_in[1].gl_Position; + EmitVertex(); + + GEdgeDistance = vec3( 0, 0, hc ); + GNormal = VNormal[2]; + GPosition = VPosition[2]; + gl_Position = gl_in[2].gl_Position; + EmitVertex(); + + EndPrimitive(); +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/shadewire.vs b/cpp-projects/_resources/shaders/samples/ch7/shadewire.vs new file mode 100644 index 0000000..f86751f --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/shadewire.vs @@ -0,0 +1,19 @@ +#version 400 + +layout (location = 0 ) in vec3 VertexPosition; +layout (location = 1 ) in vec3 VertexNormal; + +out vec3 VNormal; +out vec3 VPosition; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main() +{ + VNormal = normalize( NormalMatrix * VertexNormal); + VPosition = vec3(ModelViewMatrix * vec4(VertexPosition,1.0)); + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/silhouette.fs b/cpp-projects/_resources/shaders/samples/ch7/silhouette.fs new file mode 100644 index 0000000..cbb12d3 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/silhouette.fs @@ -0,0 +1,46 @@ +#version 400 + +struct LightInfo { + vec4 Position; // Light position in eye coords. + vec3 Intensity; // A,D,S intensity +}; +uniform LightInfo Light; + +struct MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +}; +uniform MaterialInfo Material; + +uniform vec4 LineColor; + +in vec3 GPosition; +in vec3 GNormal; + +flat in int GIsEdge; + +layout( location = 0 ) out vec4 FragColor; + +const int levels = 3; +const float scaleFactor = 1.0 / levels; + +vec3 toonShade( ) +{ + vec3 s = normalize( Light.Position.xyz - GPosition.xyz ); + vec3 ambient = Material.Ka; + float cosine = dot( s, GNormal ); + vec3 diffuse = Material.Kd * ceil( cosine * levels ) * scaleFactor; + + return Light.Intensity * (ambient + diffuse); +} + +void main() { + if( GIsEdge == 1 ) { + FragColor = LineColor; + } else { + FragColor = vec4( toonShade(), 1.0 ); + } + +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/silhouette.gs b/cpp-projects/_resources/shaders/samples/ch7/silhouette.gs new file mode 100644 index 0000000..3936748 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/silhouette.gs @@ -0,0 +1,75 @@ +#version 400 + +layout( triangles_adjacency ) in; +layout( triangle_strip, max_vertices = 15 ) out; + +out vec3 GNormal; +out vec3 GPosition; + +// Which triangle edges are silhouette edges +flat out int GIsEdge; + +in vec3 VNormal[]; +in vec3 VPosition[]; + +uniform float EdgeWidth; +uniform float PctExtend; + +bool isFrontFacing( vec3 a, vec3 b, vec3 c ) +{ + return ((a.x * b.y - b.x * a.y) + (b.x * c.y - c.x * b.y) + (c.x * a.y - a.x * c.y)) + > 0; +} + +void emitEdgeQuad( vec3 e0, vec3 e1 ) +{ + vec2 ext = PctExtend * (e1.xy - e0.xy); + vec2 v = normalize(e1.xy - e0.xy); + vec2 n = vec2(-v.y, v.x) * EdgeWidth; + + GIsEdge = 1; // This is part of the sil. edge + + gl_Position = vec4( e0.xy - ext, e0.z, 1.0 ); EmitVertex(); + gl_Position = vec4( e0.xy - n - ext, e0.z, 1.0 ); EmitVertex(); + gl_Position = vec4( e1.xy + ext, e1.z, 1.0 ); EmitVertex(); + gl_Position = vec4( e1.xy - n + ext, e1.z, 1.0 ); EmitVertex(); + + EndPrimitive(); +} + +void main() +{ + vec3 p0 = gl_in[0].gl_Position.xyz / gl_in[0].gl_Position.w; + vec3 p1 = gl_in[1].gl_Position.xyz / gl_in[1].gl_Position.w; + vec3 p2 = gl_in[2].gl_Position.xyz / gl_in[2].gl_Position.w; + vec3 p3 = gl_in[3].gl_Position.xyz / gl_in[3].gl_Position.w; + vec3 p4 = gl_in[4].gl_Position.xyz / gl_in[4].gl_Position.w; + vec3 p5 = gl_in[5].gl_Position.xyz / gl_in[5].gl_Position.w; + + if( isFrontFacing(p0, p2, p4) ) { + if( ! isFrontFacing(p0,p1,p2) ) emitEdgeQuad(p0,p2); + if( ! isFrontFacing(p2,p3,p4) ) emitEdgeQuad(p2,p4); + if( ! isFrontFacing(p4,p5,p0) ) emitEdgeQuad(p4,p0); + } + + // Output the original triangle + + GIsEdge = 0; // This triangle is not part of an edge. + + GNormal = VNormal[0]; + GPosition = VPosition[0]; + gl_Position = gl_in[0].gl_Position; + EmitVertex(); + + GNormal = VNormal[2]; + GPosition = VPosition[2]; + gl_Position = gl_in[2].gl_Position; + EmitVertex(); + + GNormal = VNormal[4]; + GPosition = VPosition[4]; + gl_Position = gl_in[4].gl_Position; + EmitVertex(); + + EndPrimitive(); +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/silhouette.vs b/cpp-projects/_resources/shaders/samples/ch7/silhouette.vs new file mode 100644 index 0000000..f86751f --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/silhouette.vs @@ -0,0 +1,19 @@ +#version 400 + +layout (location = 0 ) in vec3 VertexPosition; +layout (location = 1 ) in vec3 VertexNormal; + +out vec3 VNormal; +out vec3 VPosition; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main() +{ + VNormal = normalize( NormalMatrix * VertexNormal); + VPosition = vec3(ModelViewMatrix * vec4(VertexPosition,1.0)); + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/solid.fs b/cpp-projects/_resources/shaders/samples/ch7/solid.fs new file mode 100644 index 0000000..34c1d61 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/solid.fs @@ -0,0 +1,10 @@ +#version 400 + +uniform vec4 Color; + +layout ( location = 0 ) out vec4 FragColor; + +void main() +{ + FragColor = Color; +} diff --git a/cpp-projects/_resources/shaders/samples/ch7/solid.vs b/cpp-projects/_resources/shaders/samples/ch7/solid.vs new file mode 100644 index 0000000..dc95356 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch7/solid.vs @@ -0,0 +1,10 @@ +#version 400 + +layout (location = 0 ) in vec2 VertexPosition; + +uniform mat4 MVP; + +void main() +{ + gl_Position = MVP * vec4(VertexPosition, 0.0, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch8/shadowmap.fs b/cpp-projects/_resources/shaders/samples/ch8/shadowmap.fs new file mode 100644 index 0000000..c13b8b0 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch8/shadowmap.fs @@ -0,0 +1,70 @@ +#version 400 + +uniform struct LightInfo { + vec4 Position; + vec3 Intensity; +} Light; + +uniform struct MaterialInfo { + vec3 Ka; + vec3 Kd; + vec3 Ks; + float Shininess; +} Material; + +uniform sampler2DShadow ShadowMap; + +in vec3 Position; +in vec3 Normal; +in vec4 ShadowCoord; + +layout (location = 0) out vec4 FragColor; + +vec3 phongModelDiffAndSpec() +{ + vec3 n = Normal; + vec3 s = normalize(vec3(Light.Position) - Position); + vec3 v = normalize(-Position.xyz); + vec3 r = reflect( -s, n ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Light.Intensity * Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) + spec = Light.Intensity * Material.Ks * + pow( max( dot(r,v), 0.0 ), Material.Shininess ); + + return diffuse + spec; +} + +subroutine void RenderPassType(); +subroutine uniform RenderPassType RenderPass; + +subroutine (RenderPassType) +void shadeWithShadow() +{ + vec3 ambient = Light.Intensity * Material.Ka; + vec3 diffAndSpec = phongModelDiffAndSpec(); + + float shadow = 1.0; + if( ShadowCoord.z >= 0 ) { + shadow = textureProj(ShadowMap, ShadowCoord); + } + + // If the fragment is in shadow, use ambient light only. + FragColor = vec4(diffAndSpec * shadow + ambient, 1.0); + + // Gamma correct + FragColor = pow( FragColor, vec4(1.0 / 2.2) ); + +} + +subroutine (RenderPassType) +void recordDepth() +{ + // Do nothing, depth will be written automatically +} + +void main() { + // This will call either shadeWithShadow or recordDepth + RenderPass(); +} diff --git a/cpp-projects/_resources/shaders/samples/ch8/shadowmap.vs b/cpp-projects/_resources/shaders/samples/ch8/shadowmap.vs new file mode 100644 index 0000000..2b82d5f --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch8/shadowmap.vs @@ -0,0 +1,21 @@ +#version 400 + +layout (location=0) in vec3 VertexPosition; +layout (location=1) in vec3 VertexNormal; + +out vec3 Normal; +out vec3 Position; +out vec4 ShadowCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 MVP; +uniform mat4 ShadowMatrix; + +void main() +{ + Position = (ModelViewMatrix * vec4(VertexPosition,1.0)).xyz; + Normal = normalize( NormalMatrix * VertexNormal ); + ShadowCoord = ShadowMatrix * vec4(VertexPosition,1.0); + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-comp.fs b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-comp.fs new file mode 100644 index 0000000..172d312 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-comp.fs @@ -0,0 +1,13 @@ +#version 410 + +in vec3 Position; +in vec3 Normal; + +uniform sampler2D DiffSpecTex; +layout( location = 0 ) out vec4 FragColor; + +void main() { + vec4 diffSpec = texelFetch(DiffSpecTex, ivec2(gl_FragCoord), 0); + + FragColor = vec4(diffSpec.xyz, 1); +} diff --git a/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-comp.vs b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-comp.vs new file mode 100644 index 0000000..8518e04 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-comp.vs @@ -0,0 +1,19 @@ +#version 410 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; + +out vec3 Position; +out vec3 Normal; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjMatrix; + +void main() +{ + Normal = normalize( NormalMatrix * VertexNormal); + Position = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + + gl_Position = ProjMatrix * ModelViewMatrix * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-render.fs b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-render.fs new file mode 100644 index 0000000..745d121 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-render.fs @@ -0,0 +1,36 @@ +#version 410 + +in vec3 Position; +in vec3 Normal; +in vec2 TexCoord; + +uniform vec4 LightPosition; +uniform vec3 LightIntensity; + +uniform sampler2D Tex; + +uniform vec3 Kd; // Diffuse reflectivity +uniform vec3 Ka; // Ambient reflectivity +uniform vec3 Ks; // Specular reflectivity +uniform float Shininess; // Specular shininess factor + +layout( location = 0 ) out vec4 Ambient; +layout( location = 1 ) out vec4 DiffSpec; + +void shade( ) +{ + vec3 s = normalize( vec3(LightPosition) - Position ); + vec3 v = normalize(vec3(-Position)); + vec3 r = reflect( -s, Normal ); + vec4 texColor = texture(Tex, TexCoord); + + Ambient = vec4(texColor.rgb * LightIntensity * Ka, 1.0); + DiffSpec = vec4(texColor.rgb * LightIntensity * + ( Kd * max( dot(s, Normal), 0.0 ) + + Ks * pow( max( dot(r,v), 0.0 ), Shininess ) ) , + 1.0 ); +} + +void main() { + shade(); +} diff --git a/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-render.vs b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-render.vs new file mode 100644 index 0000000..fb968af --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-render.vs @@ -0,0 +1,22 @@ +#version 410 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Position; +out vec3 Normal; +out vec2 TexCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjMatrix; + +void main() +{ + TexCoord = VertexTexCoord; + Normal = normalize( NormalMatrix * VertexNormal); + Position = vec3( ModelViewMatrix * vec4(VertexPosition,1.0) ); + + gl_Position = ProjMatrix * ModelViewMatrix * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-vol.fs b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-vol.fs new file mode 100644 index 0000000..ab3c1f3 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-vol.fs @@ -0,0 +1,5 @@ +#version 410 + +void main() { + // Nothing to see here, move along +} diff --git a/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-vol.gs b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-vol.gs new file mode 100644 index 0000000..c247f4d --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-vol.gs @@ -0,0 +1,50 @@ +#version 410 + +layout( triangles_adjacency ) in; +layout( triangle_strip, max_vertices = 18 ) out; + +in vec3 VPosition[]; +in vec3 VNormal[]; + +uniform vec4 LightPosition; // Light position (eye coords) +uniform mat4 ProjMatrix; // Projection matrix + +bool facesLight( vec3 a, vec3 b, vec3 c ) +{ + vec3 n = cross( b - a, c - a ); + vec3 da = LightPosition.xyz - a; + vec3 db = LightPosition.xyz - b; + vec3 dc = LightPosition.xyz - c; + + return dot(n, da) > 0 || dot(n, db) > 0 || dot(n, dc) > 0; +} + +void emitEdgeQuad( vec3 a, vec3 b ) { + gl_Position = ProjMatrix * vec4(a, 1); + EmitVertex(); + + gl_Position = ProjMatrix * vec4(a - LightPosition.xyz, 0); + EmitVertex(); + + gl_Position = ProjMatrix * vec4(b, 1); + EmitVertex(); + + gl_Position = ProjMatrix * vec4(b - LightPosition.xyz, 0); + EmitVertex(); + EndPrimitive(); +} + +void main() +{ + // If the main triangle faces the light, check each adjacent + // triangle. If an adjacent triangle does not face the light + // we output a sihlouette edge quad for the corresponding edge. + if( facesLight(VPosition[0], VPosition[2], VPosition[4]) ) { + if( ! facesLight(VPosition[0],VPosition[1],VPosition[2]) ) + emitEdgeQuad(VPosition[0],VPosition[2]); + if( ! facesLight(VPosition[2],VPosition[3],VPosition[4]) ) + emitEdgeQuad(VPosition[2],VPosition[4]); + if( ! facesLight(VPosition[4],VPosition[5],VPosition[0]) ) + emitEdgeQuad(VPosition[4],VPosition[0]); + } +} diff --git a/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-vol.vs b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-vol.vs new file mode 100644 index 0000000..7d9ad5c --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch8/shadowvolume-vol.vs @@ -0,0 +1,19 @@ +#version 410 + +layout (location=0) in vec3 VertexPosition; +layout (location=1) in vec3 VertexNormal; +layout (location=2) in vec2 VertexTexCoord; + +out vec3 VPosition; +out vec3 VNormal; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjMatrix; + +void main() +{ + VNormal = NormalMatrix * VertexNormal; + VPosition = (ModelViewMatrix * vec4(VertexPosition,1.0)).xyz; + gl_Position = ProjMatrix * ModelViewMatrix * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/ch8/solid.fs b/cpp-projects/_resources/shaders/samples/ch8/solid.fs new file mode 100644 index 0000000..34c1d61 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch8/solid.fs @@ -0,0 +1,10 @@ +#version 400 + +uniform vec4 Color; + +layout ( location = 0 ) out vec4 FragColor; + +void main() +{ + FragColor = Color; +} diff --git a/cpp-projects/_resources/shaders/samples/ch8/solid.vs b/cpp-projects/_resources/shaders/samples/ch8/solid.vs new file mode 100644 index 0000000..d981c47 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/ch8/solid.vs @@ -0,0 +1,10 @@ +#version 400 + +layout (location = 0 ) in vec3 VertexPosition; + +uniform mat4 MVP; + +void main() +{ + gl_Position = MVP * vec4(VertexPosition, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/learn/3_1_1_debug_quad.fs b/cpp-projects/_resources/shaders/samples/learn/3_1_1_debug_quad.fs new file mode 100644 index 0000000..deaa4f1 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/learn/3_1_1_debug_quad.fs @@ -0,0 +1,22 @@ +#version 430 core +out vec4 FragColor; + +in vec2 TexCoords; + +uniform sampler2D depthMap; +uniform float near_plane; +uniform float far_plane; + +// required when using a perspective projection matrix +float LinearizeDepth(float depth) +{ + float z = depth * 2.0 - 1.0; // Back to NDC + return (2.0 * near_plane * far_plane) / (far_plane + near_plane - z * (far_plane - near_plane)); +} + +void main() +{ + float depthValue = texture(depthMap, TexCoords).r; + // FragColor = vec4(vec3(LinearizeDepth(depthValue) / far_plane), 1.0); // perspective + FragColor = vec4(vec3(depthValue), 1.0); // orthographic +} diff --git a/cpp-projects/_resources/shaders/samples/learn/3_1_1_debug_quad.vs b/cpp-projects/_resources/shaders/samples/learn/3_1_1_debug_quad.vs new file mode 100644 index 0000000..bba53e5 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/learn/3_1_1_debug_quad.vs @@ -0,0 +1,11 @@ +#version 430 core +layout (location = 0) in vec3 aPos; +layout (location = 1) in vec2 aTexCoords; + +out vec2 TexCoords; + +void main() +{ + TexCoords = aTexCoords; + gl_Position = vec4(aPos, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/learn/3_1_1_shadow_mapping.fs b/cpp-projects/_resources/shaders/samples/learn/3_1_1_shadow_mapping.fs new file mode 100644 index 0000000..e0fedb1 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/learn/3_1_1_shadow_mapping.fs @@ -0,0 +1,97 @@ + +#version 430 core +out vec4 FragColor; + +in VS_OUT { + vec3 FragPos; + vec3 Normal; + vec2 TexCoords; + vec4 FragPosLightSpace; +} fs_in; + +layout(binding=0) uniform sampler2D diffuseTexture; +layout(binding=1) uniform sampler2D shadowMap; + +uniform vec3 lightPos; +uniform vec3 viewPos; + +float ShadowCalculation(vec4 fragPosLightSpace) +{ + // perform perspective divide + vec3 projCoords = fragPosLightSpace.xyz / fragPosLightSpace.w; + // transform to [0,1] range + projCoords = projCoords * 0.5 + 0.5; + // get closest depth value from light's perspective (using [0,1] range fragPosLight as coords) + float closestDepth = texture(shadowMap, projCoords.xy).r; + // get depth of current fragment from light's perspective + float currentDepth = projCoords.z; + // calculate bias (based on depth map resolution and slope) + vec3 normal = normalize(fs_in.Normal); + vec3 lightDir = normalize(lightPos - fs_in.FragPos); + float bias = max(0.05 * (1.0 - dot(normal, lightDir)), 0.005); + // check whether current frag pos is in shadow +// float shadow = currentDepth - bias > closestDepth ? 1.0 : 0.0; + // PCF + float shadow = 0.0; + vec2 texelSize = 1.0 / textureSize(shadowMap, 0); + for(int x = -1; x <= 1; ++x) + { + for(int y = -1; y <= 1; ++y) + { + float pcfDepth = texture(shadowMap, projCoords.xy + vec2(x, y) * texelSize).r; + shadow += currentDepth - bias > pcfDepth ? 1.0 : 0.0; + } + } + shadow /= 9.0; + + // keep the shadow at 0.0 when outside the far_plane region of the light's frustum. + if(projCoords.z > 1.0) + shadow = 0.0; + +// shadow = 1.0; +// if( projCoords.z >= 0 ) { +// textureProj(shadowMap, projCoords); +// } + + return shadow; +} + +void main() +{ + vec3 color = texture(diffuseTexture, fs_in.TexCoords).rgb; + vec3 normal = normalize(fs_in.Normal); + vec3 lightColor = vec3(0.3); + // ambient + vec3 ambient = 0.3 * color; + // diffuse + vec3 lightDir = normalize(lightPos - fs_in.FragPos); + float diff = max(dot(lightDir, normal), 0.0); + vec3 diffuse = diff * lightColor; + // specular + vec3 viewDir = normalize(viewPos - fs_in.FragPos); + vec3 reflectDir = reflect(-lightDir, normal); + float spec = 0.0; + vec3 halfwayDir = normalize(lightDir + viewDir); + spec = pow(max(dot(normal, halfwayDir), 0.0), 64.0); + vec3 specular = spec * lightColor; + // calculate shadow + float shadow = ShadowCalculation(fs_in.FragPosLightSpace); + vec3 lighting = (ambient + (1.0 - shadow) * (diffuse + specular)) * color; + + + vec2 uv = gl_FragCoord.xy;// / screenSize; + + if(uv.x < 800){ + //FragColor = vec4(color, 1.0); + FragColor = vec4(shadow,shadow,shadow, 1.0); + }else{ + FragColor = vec4(lighting, 1.0); + } +// }else if(uv.x < 800){ +// FragColor = vec4(diffuse, 1.0); +// }else if(uv.x < 1200){ +// FragColor = vec4(shadow,shadow,shadow, 1.0); +// }else{ +// FragColor = vec4(lighting, 1.0); +// } +} diff --git a/cpp-projects/_resources/shaders/samples/learn/3_1_1_shadow_mapping.vs b/cpp-projects/_resources/shaders/samples/learn/3_1_1_shadow_mapping.vs new file mode 100644 index 0000000..9cb93e4 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/learn/3_1_1_shadow_mapping.vs @@ -0,0 +1,26 @@ +#version 430 core + +layout (location = 0) in vec3 aPos; +layout (location = 1) in vec3 aNormal; +layout (location = 2) in vec2 aTexCoords; + +out VS_OUT { + vec3 FragPos; + vec3 Normal; + vec2 TexCoords; + vec4 FragPosLightSpace; +} vs_out; + +uniform mat4 projection; +uniform mat4 view; +uniform mat4 model; +uniform mat4 lightSpaceMatrix; + +void main() +{ + vs_out.FragPos = vec3(model * vec4(aPos, 1.0)); + vs_out.Normal = transpose(inverse(mat3(model))) * aNormal; + vs_out.TexCoords = aTexCoords; + vs_out.FragPosLightSpace = lightSpaceMatrix * vec4(vs_out.FragPos, 1.0); + gl_Position = projection * view * vec4(vs_out.FragPos, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/learn/3_1_1_shadow_mapping_depth.fs b/cpp-projects/_resources/shaders/samples/learn/3_1_1_shadow_mapping_depth.fs new file mode 100644 index 0000000..0a6b4ff --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/learn/3_1_1_shadow_mapping_depth.fs @@ -0,0 +1,6 @@ +#version 430 core + +void main() +{ + // gl_FragDepth = gl_FragCoord.z; +} diff --git a/cpp-projects/_resources/shaders/samples/learn/3_1_1_shadow_mapping_depth.vs b/cpp-projects/_resources/shaders/samples/learn/3_1_1_shadow_mapping_depth.vs new file mode 100644 index 0000000..5b5eccc --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/learn/3_1_1_shadow_mapping_depth.vs @@ -0,0 +1,10 @@ +#version 430 core +layout (location = 0) in vec3 aPos; + +uniform mat4 lightSpaceMatrix; +uniform mat4 model; + +void main() +{ + gl_Position = lightSpaceMatrix * model * vec4(aPos, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/others/model.fs b/cpp-projects/_resources/shaders/samples/others/model.fs new file mode 100644 index 0000000..17c3d91 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/others/model.fs @@ -0,0 +1,45 @@ +#version 430 core +out vec4 FragColor; + +in vec3 Position; +in vec3 Normal; +in vec2 TexCoord; + +layout (binding=0) uniform sampler2D Tex1; + +float LinearizeDepth(float depth) +{ + float z = depth * 2.0 - 1.0; // back to NDC + return (2.0 * near * far) / (far + near - z * (far - near)); +} + +void main(){ + + //float depth = LinearizeDepth(gl_FragCoord.z) / far; + //FragColor = vec4(vec3(depth), 1.0); + //FragColor = vec4(vec3(gl_FragCoord.z), 1.0); + //return; + //if(gl_FragCoord.x < 200) + //FragColor = vec4(textCoord.x,textCoord.y,0,1); + //return; + //else if(gl_FragCoord.x < 400) + // FragColor = vec4() + // FragColor = vec4(normal.x,normal.y,0,1); + + + vec4 textureColour = texture(texture_diffuse1, textCoord); + + //vec4(texture(texture_diffuse1, textCoord).y,1,0,1); + // + //FragColor= vec4(1-textureColour.w,0,0,1); + //return; + + if(textureColour.a < 1.0) { + //FragColor= vec4(1,0,0,1); + //return; + //discard; + } + + //FragColor= vec4(textureColour.r,0,0,1); + FragColor= textureColour; +} diff --git a/cpp-projects/_resources/shaders/samples/others/model.vs b/cpp-projects/_resources/shaders/samples/others/model.vs new file mode 100644 index 0000000..d952e8c --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/others/model.vs @@ -0,0 +1,23 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Position; +out vec3 Normal; +out vec2 TexCoord; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + +void main(){ + Normal = normalize(NormalMatrix * VertexNormal); + Position = (ModelViewMatrix * vec4(VertexPosition, 1.0)).xyz; + TexCoord = VertexTexCoord; + gl_Position = MVP * vec4(VertexPosition, 1.0); +} + + diff --git a/cpp-projects/_resources/shaders/samples/others/screen-quad.fs b/cpp-projects/_resources/shaders/samples/others/screen-quad.fs new file mode 100644 index 0000000..16f7ebe --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/others/screen-quad.fs @@ -0,0 +1,11 @@ + +#version 430 + +layout(binding=0) uniform sampler2D Texture0; +layout( location = 0 ) out vec4 FragColor; + +void main() { + + ivec2 pix = ivec2( gl_FragCoord.xy ); + FragColor = texelFetch(Texture0, pix, 0); +} diff --git a/cpp-projects/_resources/shaders/samples/others/screen-quad.vs b/cpp-projects/_resources/shaders/samples/others/screen-quad.vs new file mode 100644 index 0000000..bdc9862 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/others/screen-quad.vs @@ -0,0 +1,10 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; + +uniform mat4 MVP; + +void main() +{ + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/others/skybox.fs b/cpp-projects/_resources/shaders/samples/others/skybox.fs new file mode 100644 index 0000000..2ca6ef8 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/others/skybox.fs @@ -0,0 +1,12 @@ +#version 430 + +layout(binding=0) uniform samplerCube SkyBoxTex; + +in vec3 Vec; +layout( location = 0 ) out vec4 FragColor; + +void main() { + vec3 texColor = texture(SkyBoxTex, normalize(Vec)).rgb; + texColor = pow( texColor, vec3(1.0/2.2)); + FragColor = vec4(texColor,1); +} diff --git a/cpp-projects/_resources/shaders/samples/others/skybox.vs b/cpp-projects/_resources/shaders/samples/others/skybox.vs new file mode 100644 index 0000000..916f0f2 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/others/skybox.vs @@ -0,0 +1,11 @@ +#version 430 + +layout (location = 0) in vec3 VertexPosition; + +out vec3 Vec; +uniform mat4 MVP; + +void main() { + Vec = VertexPosition; + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/others/unicolor.fs b/cpp-projects/_resources/shaders/samples/others/unicolor.fs new file mode 100644 index 0000000..2b18b42 --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/others/unicolor.fs @@ -0,0 +1,10 @@ + +#version 440 + +layout( location = 0 ) out vec4 FragColor; + +uniform vec3 unicolor; + +void main() { + FragColor = vec4(unicolor, 1.0); +} diff --git a/cpp-projects/_resources/shaders/samples/others/unicolor.vs b/cpp-projects/_resources/shaders/samples/others/unicolor.vs new file mode 100644 index 0000000..9f38efa --- /dev/null +++ b/cpp-projects/_resources/shaders/samples/others/unicolor.vs @@ -0,0 +1,11 @@ + +#version 440 + +layout (location = 0) in vec3 VertexPosition; + +uniform mat4 MVP; + +void main() +{ + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/scaner/scanercloud.fs b/cpp-projects/_resources/shaders/scaner/scanercloud.fs new file mode 100644 index 0000000..c0cee99 --- /dev/null +++ b/cpp-projects/_resources/shaders/scaner/scanercloud.fs @@ -0,0 +1,17 @@ +#version 440 + +in vec3 Color; +in vec2 TexCoord; +layout( location = 0 ) out vec4 FragColor; + +void main() { + + const float scale = 15.0; + bvec2 toDiscard = greaterThan(fract(TexCoord * scale), vec2(0.2,0.2)); + + if(all(toDiscard)){ + discard; + } + + FragColor = vec4(Color, 1.0); +} diff --git a/cpp-projects/_resources/shaders/scaner/scanercloud.vs b/cpp-projects/_resources/shaders/scaner/scanercloud.vs new file mode 100644 index 0000000..017b6fd --- /dev/null +++ b/cpp-projects/_resources/shaders/scaner/scanercloud.vs @@ -0,0 +1,63 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Color; +out vec2 TexCoord; + +layout (binding = 0) uniform LightInfo { + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 Ld; // Diffuse light intensity + vec3 Ls; // Specular light intensity +} Light; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + + +vec3 phong_model( vec3 position, vec3 n ) { + vec3 ambient = Light.La * Material.Ka; + vec3 s = normalize( Light.Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Light.Ld * Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 r = reflect( -s, n ); + spec = Light.Ls * Material.Ks *pow( max( dot(r,v), 0.0 ), Material.Shininess ); + } + + return ambient + diffuse + spec; +} + +void main(){ + + TexCoord = VertexTexCoord; + + vec3 tnorm = normalize( NormalMatrix * VertexNormal); + vec3 camCoords = (ModelViewMatrix * vec4(VertexPosition,1.0)).xyz; + vec3 v = normalize(-camCoords.xyz); + + float vDotN = dot(v, tnorm); + + if( vDotN >= 0 ) { + Color = phong_model(camCoords, tnorm); + } else { + Color = phong_model(camCoords, -tnorm); + } + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/scaner/scanermesh.fs b/cpp-projects/_resources/shaders/scaner/scanermesh.fs new file mode 100644 index 0000000..c0cee99 --- /dev/null +++ b/cpp-projects/_resources/shaders/scaner/scanermesh.fs @@ -0,0 +1,17 @@ +#version 440 + +in vec3 Color; +in vec2 TexCoord; +layout( location = 0 ) out vec4 FragColor; + +void main() { + + const float scale = 15.0; + bvec2 toDiscard = greaterThan(fract(TexCoord * scale), vec2(0.2,0.2)); + + if(all(toDiscard)){ + discard; + } + + FragColor = vec4(Color, 1.0); +} diff --git a/cpp-projects/_resources/shaders/scaner/scanermesh.vs b/cpp-projects/_resources/shaders/scaner/scanermesh.vs new file mode 100644 index 0000000..017b6fd --- /dev/null +++ b/cpp-projects/_resources/shaders/scaner/scanermesh.vs @@ -0,0 +1,63 @@ +#version 440 + +layout (location = 0) in vec3 VertexPosition; +layout (location = 1) in vec3 VertexNormal; +layout (location = 2) in vec2 VertexTexCoord; + +out vec3 Color; +out vec2 TexCoord; + +layout (binding = 0) uniform LightInfo { + vec4 Position; // Light position in eye coords. + vec3 La; // Ambient light intensity + vec3 Ld; // Diffuse light intensity + vec3 Ls; // Specular light intensity +} Light; + + +layout (binding = 1) uniform MaterialInfo { + vec3 Ka; // Ambient reflectivity + vec3 Kd; // Diffuse reflectivity + vec3 Ks; // Specular reflectivity + float Shininess; // Specular shininess factor +} Material; + +uniform mat4 ModelViewMatrix; +uniform mat3 NormalMatrix; +uniform mat4 ProjectionMatrix; +uniform mat4 MVP; + + +vec3 phong_model( vec3 position, vec3 n ) { + vec3 ambient = Light.La * Material.Ka; + vec3 s = normalize( Light.Position.xyz - position ); + float sDotN = max( dot(s,n), 0.0 ); + vec3 diffuse = Light.Ld * Material.Kd * sDotN; + vec3 spec = vec3(0.0); + if( sDotN > 0.0 ) { + vec3 v = normalize(-position.xyz); + vec3 r = reflect( -s, n ); + spec = Light.Ls * Material.Ks *pow( max( dot(r,v), 0.0 ), Material.Shininess ); + } + + return ambient + diffuse + spec; +} + +void main(){ + + TexCoord = VertexTexCoord; + + vec3 tnorm = normalize( NormalMatrix * VertexNormal); + vec3 camCoords = (ModelViewMatrix * vec4(VertexPosition,1.0)).xyz; + vec3 v = normalize(-camCoords.xyz); + + float vDotN = dot(v, tnorm); + + if( vDotN >= 0 ) { + Color = phong_model(camCoords, tnorm); + } else { + Color = phong_model(camCoords, -tnorm); + } + + gl_Position = MVP * vec4(VertexPosition,1.0); +} diff --git a/cpp-projects/_resources/shaders/vertex.vert.glsl b/cpp-projects/_resources/shaders/vertex.vert.glsl new file mode 100644 index 0000000..4b0bd59 --- /dev/null +++ b/cpp-projects/_resources/shaders/vertex.vert.glsl @@ -0,0 +1,32 @@ + + + +#version 450 core +layout (location = 0) in vec3 aPos; // the position variable has attribute position 0 +layout (location = 1) in vec4 aColor; // the color variable has attribute position 1 + +// model +uniform mat4 model; +uniform mat4 view; +uniform mat4 projection; + +// camera +uniform vec3 camera_position; + +// cloud +uniform float size_pt = 3.f; + +// color +uniform vec4 unicolor; +uniform bool enable_unicolor = false; +out vec4 color; + +void main(){ + + vec4 p = view * model * vec4(aPos, 1.0); + gl_Position = projection*p; + color = enable_unicolor ? unicolor : aColor; + float l = sqrt(length(p.xyz-camera_position.xyz)); + gl_PointSize = size_pt/(l); +} + diff --git a/cpp-projects/_resources/splash/exvr_splash.png b/cpp-projects/_resources/splash/exvr_splash.png new file mode 100644 index 0000000..4783749 Binary files /dev/null and b/cpp-projects/_resources/splash/exvr_splash.png differ diff --git a/cpp-projects/_resources/textures/brick1.jpg b/cpp-projects/_resources/textures/brick1.jpg new file mode 100644 index 0000000..1a01624 Binary files /dev/null and b/cpp-projects/_resources/textures/brick1.jpg differ diff --git a/cpp-projects/_resources/textures/cement.jpg b/cpp-projects/_resources/textures/cement.jpg new file mode 100644 index 0000000..f05d43a Binary files /dev/null and b/cpp-projects/_resources/textures/cement.jpg differ diff --git a/cpp-projects/_resources/textures/container.jpg b/cpp-projects/_resources/textures/container.jpg new file mode 100644 index 0000000..d07bee4 Binary files /dev/null and b/cpp-projects/_resources/textures/container.jpg differ diff --git a/cpp-projects/_resources/textures/container2.png b/cpp-projects/_resources/textures/container2.png new file mode 100644 index 0000000..596e8da Binary files /dev/null and b/cpp-projects/_resources/textures/container2.png differ diff --git a/cpp-projects/_resources/textures/container2_specular.png b/cpp-projects/_resources/textures/container2_specular.png new file mode 100644 index 0000000..681bf6e Binary files /dev/null and b/cpp-projects/_resources/textures/container2_specular.png differ diff --git a/cpp-projects/_resources/textures/fire.png b/cpp-projects/_resources/textures/fire.png new file mode 100644 index 0000000..79f2749 Binary files /dev/null and b/cpp-projects/_resources/textures/fire.png differ diff --git a/cpp-projects/_resources/textures/flower.png b/cpp-projects/_resources/textures/flower.png new file mode 100644 index 0000000..39a291f Binary files /dev/null and b/cpp-projects/_resources/textures/flower.png differ diff --git a/cpp-projects/_resources/textures/grace-diffuse/README b/cpp-projects/_resources/textures/grace-diffuse/README new file mode 100644 index 0000000..53a5381 --- /dev/null +++ b/cpp-projects/_resources/textures/grace-diffuse/README @@ -0,0 +1,2 @@ +Images from: http://gl.ict.usc.edu/Data/HighResProbes/ + diff --git a/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_negx.hdr b/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_negx.hdr new file mode 100644 index 0000000..0ae45a7 Binary files /dev/null and b/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_negx.hdr differ diff --git a/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_negy.hdr b/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_negy.hdr new file mode 100644 index 0000000..9c6813e Binary files /dev/null and b/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_negy.hdr differ diff --git a/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_negz.hdr b/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_negz.hdr new file mode 100644 index 0000000..f936c93 Binary files /dev/null and b/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_negz.hdr differ diff --git a/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_posx.hdr b/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_posx.hdr new file mode 100644 index 0000000..7fceabf Binary files /dev/null and b/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_posx.hdr differ diff --git a/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_posy.hdr b/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_posy.hdr new file mode 100644 index 0000000..289c58b Binary files /dev/null and b/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_posy.hdr differ diff --git a/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_posz.hdr b/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_posz.hdr new file mode 100644 index 0000000..bf8d2a8 Binary files /dev/null and b/cpp-projects/_resources/textures/grace-diffuse/grace-diffuse_posz.hdr differ diff --git a/cpp-projects/_resources/textures/grace/README b/cpp-projects/_resources/textures/grace/README new file mode 100644 index 0000000..53a5381 --- /dev/null +++ b/cpp-projects/_resources/textures/grace/README @@ -0,0 +1,2 @@ +Images from: http://gl.ict.usc.edu/Data/HighResProbes/ + diff --git a/cpp-projects/_resources/textures/grace/grace_negx.hdr b/cpp-projects/_resources/textures/grace/grace_negx.hdr new file mode 100644 index 0000000..147cb19 Binary files /dev/null and b/cpp-projects/_resources/textures/grace/grace_negx.hdr differ diff --git a/cpp-projects/_resources/textures/grace/grace_negy.hdr b/cpp-projects/_resources/textures/grace/grace_negy.hdr new file mode 100644 index 0000000..78267c6 Binary files /dev/null and b/cpp-projects/_resources/textures/grace/grace_negy.hdr differ diff --git a/cpp-projects/_resources/textures/grace/grace_negz.hdr b/cpp-projects/_resources/textures/grace/grace_negz.hdr new file mode 100644 index 0000000..28338f1 Binary files /dev/null and b/cpp-projects/_resources/textures/grace/grace_negz.hdr differ diff --git a/cpp-projects/_resources/textures/grace/grace_posx.hdr b/cpp-projects/_resources/textures/grace/grace_posx.hdr new file mode 100644 index 0000000..6f3470b Binary files /dev/null and b/cpp-projects/_resources/textures/grace/grace_posx.hdr differ diff --git a/cpp-projects/_resources/textures/grace/grace_posy.hdr b/cpp-projects/_resources/textures/grace/grace_posy.hdr new file mode 100644 index 0000000..50b9324 Binary files /dev/null and b/cpp-projects/_resources/textures/grace/grace_posy.hdr differ diff --git a/cpp-projects/_resources/textures/grace/grace_posz.hdr b/cpp-projects/_resources/textures/grace/grace_posz.hdr new file mode 100644 index 0000000..021af43 Binary files /dev/null and b/cpp-projects/_resources/textures/grace/grace_posz.hdr differ diff --git a/cpp-projects/_resources/textures/hardwood2_diffuse.jpg b/cpp-projects/_resources/textures/hardwood2_diffuse.jpg new file mode 100644 index 0000000..7cc171b Binary files /dev/null and b/cpp-projects/_resources/textures/hardwood2_diffuse.jpg differ diff --git a/cpp-projects/_resources/textures/hardwood2_roughness.jpg b/cpp-projects/_resources/textures/hardwood2_roughness.jpg new file mode 100644 index 0000000..6ce0af2 Binary files /dev/null and b/cpp-projects/_resources/textures/hardwood2_roughness.jpg differ diff --git a/cpp-projects/_resources/textures/me_textile.png b/cpp-projects/_resources/textures/me_textile.png new file mode 100644 index 0000000..37a08b6 Binary files /dev/null and b/cpp-projects/_resources/textures/me_textile.png differ diff --git a/cpp-projects/_resources/textures/moss.png b/cpp-projects/_resources/textures/moss.png new file mode 100644 index 0000000..8431ff8 Binary files /dev/null and b/cpp-projects/_resources/textures/moss.png differ diff --git a/cpp-projects/_resources/textures/mybrick-color.png b/cpp-projects/_resources/textures/mybrick-color.png new file mode 100644 index 0000000..804a80e Binary files /dev/null and b/cpp-projects/_resources/textures/mybrick-color.png differ diff --git a/cpp-projects/_resources/textures/mybrick-height.png b/cpp-projects/_resources/textures/mybrick-height.png new file mode 100644 index 0000000..96f962d Binary files /dev/null and b/cpp-projects/_resources/textures/mybrick-height.png differ diff --git a/cpp-projects/_resources/textures/mybrick-normal.png b/cpp-projects/_resources/textures/mybrick-normal.png new file mode 100644 index 0000000..2982010 Binary files /dev/null and b/cpp-projects/_resources/textures/mybrick-normal.png differ diff --git a/cpp-projects/_resources/textures/ogre_diffuse.png b/cpp-projects/_resources/textures/ogre_diffuse.png new file mode 100644 index 0000000..98fd5f7 Binary files /dev/null and b/cpp-projects/_resources/textures/ogre_diffuse.png differ diff --git a/cpp-projects/_resources/textures/ogre_normalmap.png b/cpp-projects/_resources/textures/ogre_normalmap.png new file mode 100644 index 0000000..edf2091 Binary files /dev/null and b/cpp-projects/_resources/textures/ogre_normalmap.png differ diff --git a/cpp-projects/_resources/textures/pisa-hdr/pisa_negx.hdr b/cpp-projects/_resources/textures/pisa-hdr/pisa_negx.hdr new file mode 100644 index 0000000..b2544dc Binary files /dev/null and b/cpp-projects/_resources/textures/pisa-hdr/pisa_negx.hdr differ diff --git a/cpp-projects/_resources/textures/pisa-hdr/pisa_negy.hdr b/cpp-projects/_resources/textures/pisa-hdr/pisa_negy.hdr new file mode 100644 index 0000000..aad4218 Binary files /dev/null and b/cpp-projects/_resources/textures/pisa-hdr/pisa_negy.hdr differ diff --git a/cpp-projects/_resources/textures/pisa-hdr/pisa_negz.hdr b/cpp-projects/_resources/textures/pisa-hdr/pisa_negz.hdr new file mode 100644 index 0000000..349621e Binary files /dev/null and b/cpp-projects/_resources/textures/pisa-hdr/pisa_negz.hdr differ diff --git a/cpp-projects/_resources/textures/pisa-hdr/pisa_posx.hdr b/cpp-projects/_resources/textures/pisa-hdr/pisa_posx.hdr new file mode 100644 index 0000000..71486fc Binary files /dev/null and b/cpp-projects/_resources/textures/pisa-hdr/pisa_posx.hdr differ diff --git a/cpp-projects/_resources/textures/pisa-hdr/pisa_posy.hdr b/cpp-projects/_resources/textures/pisa-hdr/pisa_posy.hdr new file mode 100644 index 0000000..059c76a Binary files /dev/null and b/cpp-projects/_resources/textures/pisa-hdr/pisa_posy.hdr differ diff --git a/cpp-projects/_resources/textures/pisa-hdr/pisa_posz.hdr b/cpp-projects/_resources/textures/pisa-hdr/pisa_posz.hdr new file mode 100644 index 0000000..d664db2 Binary files /dev/null and b/cpp-projects/_resources/textures/pisa-hdr/pisa_posz.hdr differ diff --git a/cpp-projects/_resources/textures/pisa/pisa_negx.png b/cpp-projects/_resources/textures/pisa/pisa_negx.png new file mode 100644 index 0000000..d55788c Binary files /dev/null and b/cpp-projects/_resources/textures/pisa/pisa_negx.png differ diff --git a/cpp-projects/_resources/textures/pisa/pisa_negy.png b/cpp-projects/_resources/textures/pisa/pisa_negy.png new file mode 100644 index 0000000..8b1facd Binary files /dev/null and b/cpp-projects/_resources/textures/pisa/pisa_negy.png differ diff --git a/cpp-projects/_resources/textures/pisa/pisa_negz.png b/cpp-projects/_resources/textures/pisa/pisa_negz.png new file mode 100644 index 0000000..0aeb5b9 Binary files /dev/null and b/cpp-projects/_resources/textures/pisa/pisa_negz.png differ diff --git a/cpp-projects/_resources/textures/pisa/pisa_posx.png b/cpp-projects/_resources/textures/pisa/pisa_posx.png new file mode 100644 index 0000000..87f4d1c Binary files /dev/null and b/cpp-projects/_resources/textures/pisa/pisa_posx.png differ diff --git a/cpp-projects/_resources/textures/pisa/pisa_posy.png b/cpp-projects/_resources/textures/pisa/pisa_posy.png new file mode 100644 index 0000000..8338982 Binary files /dev/null and b/cpp-projects/_resources/textures/pisa/pisa_posy.png differ diff --git a/cpp-projects/_resources/textures/pisa/pisa_posz.png b/cpp-projects/_resources/textures/pisa/pisa_posz.png new file mode 100644 index 0000000..341b8b8 Binary files /dev/null and b/cpp-projects/_resources/textures/pisa/pisa_posz.png differ diff --git a/cpp-projects/_resources/textures/smiley.png b/cpp-projects/_resources/textures/smiley.png new file mode 100644 index 0000000..abb680c Binary files /dev/null and b/cpp-projects/_resources/textures/smiley.png differ diff --git a/cpp-projects/_resources/textures/spot_texture.png b/cpp-projects/_resources/textures/spot_texture.png new file mode 100644 index 0000000..6309448 Binary files /dev/null and b/cpp-projects/_resources/textures/spot_texture.png differ diff --git a/cpp-projects/_resources/textures/wall.jpg b/cpp-projects/_resources/textures/wall.jpg new file mode 100644 index 0000000..4963198 Binary files /dev/null and b/cpp-projects/_resources/textures/wall.jpg differ diff --git a/cpp-projects/base-app/LICENSE b/cpp-projects/base-app/LICENSE new file mode 100644 index 0000000..fc6ced1 --- /dev/null +++ b/cpp-projects/base-app/LICENSE @@ -0,0 +1,24 @@ + + +Toolbox-base +MIT License + +Copyright (c) 2018 Florian Lance + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/cpp-projects/base-app/base-app.pro b/cpp-projects/base-app/base-app.pro new file mode 100644 index 0000000..d60ab6f --- /dev/null +++ b/cpp-projects/base-app/base-app.pro @@ -0,0 +1,107 @@ + +# /******************************************************************************* +# ** Toolset-base-app ** +# ** MIT License ** +# ** Copyright (c) [2018] [Florian Lance] ** +# ** ** +# ** Permission is hereby granted, free of charge, to any person obtaining a ** +# ** copy of this software and associated documentation files (the "Software"), ** +# ** to deal in the Software without restriction, including without limitation ** +# ** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +# ** and/or sell copies of the Software, and to permit persons to whom the ** +# ** Software is furnished to do so, subject to the following conditions: ** +# ** ** +# ** The above copyright notice and this permission notice shall be included in ** +# ** all copies or substantial portions of the Software. ** +# ** ** +# ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +# ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +# ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +# ** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +# ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +# ** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +# ** DEALINGS IN THE SOFTWARE. ** +# ** ** +# ********************************************************************************/ + +####################################### repo +TOOLBOX_REPOSITORY_DIR = $$PWD"/../.." + +####################################### PRI +# defines compiling options +include(../ts-settings.pri) +# defines projects paths and variables +include(../ts-projects.pri) +# defines thirdparty includes and libs +include(../ts-thirdparty.pri) + +####################################### TARGET +equals(CFG, "debug"){ + TARGET = based-app +} +equals(CFG, "release"){ + TARGET = base-app +} + +####################################### TEMPLATE +TEMPLATE = app +CONFIG += console + +####################################### BUILD FILES +OBJECTS_DIR = $$BASE_OBJ +DESTDIR = $$BASE_DEST + +####################################### CONFIG +CONFIG -= qt + +####################################### INCLUDES +INCLUDEPATH += \ + # tool + $$BASE_INCLUDES \ + # thidparty + $$ASSIMP_INCLUDES \ + $$TURBOJPG_INCLUDES \ + $$FASTPFOR_INCLUDES \ + $$OPENCV_INCLUDES \ + $$KINECT2_INCLUDES \ + $$KINECT4_INCLUDES \ + $$BOOST_INCLUDES \ + $$LIBSOUNDIO_INCLUDES \ + $$LIBUSB_INCLUDES \ + $$EIGEN_INCLUDES\ + $$OPEN3D_INCLUDES \ + $$TURBOPFOR_INCLUDES \ + +####################################### LIBRAIRIES + +PRE_TARGETDEPS += \ + # tool + $$BASE_LIB_DEP\ + +LIBS += \ + # tool + $$BASE_LIB\ + # thidparty + $$ASSIMP_LIBS \ + $$TURBOJPG_LIBS \ + $$FASTPFOR_LIBS \ + $$OPENCV_LIBS \ + $$KINECT2_LIBS \ + $$KINECT4_LIBS \ + $$BOOST_LIBS \ + $$LIBSOUNDIO_LIBS \ + $$LIBUSB_LIBS \ + $$EIGEN_LIBS\ + $$OPEN3D_LIBS \ + $$TURBOPFOR_LIBS \ + $$WINDOWS_LIBS \ + +####################################### PROJECT FILES + +HEADERS += \ + +SOURCES += \ + # main + base_main.cpp \ + + diff --git a/cpp-projects/base-app/base_main.cpp b/cpp-projects/base-app/base_main.cpp new file mode 100644 index 0000000..1620853 --- /dev/null +++ b/cpp-projects/base-app/base_main.cpp @@ -0,0 +1,294 @@ +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +// std +#include +#include +#include + +// kinect4 +#include + +// turbojpg +#include + +// local +#include "utility/logger.hpp" +#include "utility/benchmark.hpp" +#include "utility/io_file.hpp" +#include "camera/kinect2/k2_device.hpp" +#include "camera/kinect4/k4_device.hpp" +#include "camera/kinect4/k4_device_manager.hpp" + +#include "data/integers_encoder.hpp" +#include "graphics/texture.hpp" +#include "files/cloud_io.hpp" +#include "camera/kinect4/k4_player.hpp" +#include "camera/kinect4/k4_frame_compressor.hpp" +#include "geometry/voxel_grid.hpp" + +#include "exvr/ex_experiment.hpp" + +using namespace tool; +using namespace tool::camera; + +void test_k4device(){ + + std::puts("### Init device.\n"); + tool::camera::K4Device device; + std::puts("### Open device.\n"); + if(device.open(0)){ + tool::camera::K4ConfigSettings config; + config.idDevice = 0; + config.mode = tool::camera::K4Mode::Cloud_640x576_NV12; + config.synchronizeColorAndDepth = true; + config.delayBetweenColorAndDepthUsec = 0; + config.synchMode = tool::camera::K4SynchronisationMode::Standalone; + config.subordinateDelayUsec = 0; + config.disableLED = false; + + std::puts("### Start cameras.\n"); + device.start_cameras(config); + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + std::puts("### Stop cameras.\n"); + device.stop_cameras(); + + std::puts("### Close device.\n"); + device.close(); + }else{ + std::puts("### Device opening failure.\n"); + } + + std::puts("End tests.\n"); + tool::Bench::display(); + std::puts("End tests.\n"); +} + + +void test_raw_k4device(){ + + // open device + std::puts("### Open device.\n"); + k4a_device_t handle = nullptr; + k4a_result_t result = k4a_device_open(0, &handle); + if (result == K4A_RESULT_FAILED){ + std::puts("### Fail to open device.\n"); + return; + } + + k4a_device_configuration_t configuration; + configuration.color_format = k4a_image_format_t::K4A_IMAGE_FORMAT_COLOR_BGRA32; + configuration.color_resolution = k4a_color_resolution_t::K4A_COLOR_RESOLUTION_720P; + configuration.depth_delay_off_color_usec = 0; + configuration.subordinate_delay_off_master_usec = 0; + configuration.synchronized_images_only = false; + configuration.camera_fps = k4a_fps_t::K4A_FRAMES_PER_SECOND_30; + configuration.wired_sync_mode = k4a_wired_sync_mode_t::K4A_WIRED_SYNC_MODE_STANDALONE; + configuration.depth_mode = k4a_depth_mode_t::K4A_DEPTH_MODE_NFOV_UNBINNED; + + std::puts("### Start cameras.\n"); + result = k4a_device_start_cameras(handle, &configuration); + if (result == K4A_RESULT_SUCCEEDED){ + + int countLoops = 100; + + + for(int ii = 0; ii < countLoops; ++ii){ + + k4a_capture_t capture = nullptr; + + std::puts("### Get capture.\n"); + auto wresult = k4a_device_get_capture(handle, &capture, 300); + if(wresult == k4a_wait_result_t::K4A_WAIT_RESULT_SUCCEEDED){ + std::puts("SUCCEEDED.\n"); + + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + std::puts("### Release capture.\n"); + k4a_capture_release(capture); + + }else if(wresult == k4a_wait_result_t::K4A_WAIT_RESULT_FAILED){ + std::puts("FAILED.\n"); + }else if(wresult == k4a_wait_result_t::K4A_WAIT_RESULT_TIMEOUT){ + std::puts("TIMEOUT.\n"); + } + + + } + + std::puts("### Stop cameras.\n"); + k4a_device_stop_cameras(handle); + + }else{ + std::puts("### Fail to start device.\n"); + } + + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + + std::puts("### Close device.\n"); + k4a_device_close(handle); + handle = nullptr; +} + + +auto test_compress_frame() -> void{ + + K4DeviceManager m; + + m.initialize(); + m.update_device_list(); + + K4DeviceSettings ds; + ds.actionsS.openCamera = true; + ds.actionsS.startDevice = true; + + ds.configS.idDevice = 2; + ds.configS.mode = K4Mode::Cloud_640x576_NV12; + ds.configS.subordinateDelayUsec = 0; + + ds.dataS.sendCloud = true; + ds.dataS.sendColor = true; + + m.update_settings(ds); + + K4FrameUncompressor uc; + + + m.new_compressed_frame_signal.connect([&](std::shared_ptr cFrame){ + std::cout << "cframe " << cFrame->idCapture << "\n"; + + std::vector bufferToSend; + size_t totalDataSizeBytes = cFrame->convert_to_data(bufferToSend); + +// std::cout << totalDataSizeBytes << "\n" << std::flush; + auto cFrame2 = std::make_shared(); + cFrame2->init_from_data(bufferToSend.data()); + + K4Frame frame; + uc.uncompress(cFrame2.get(), frame); + + std::cout << "frame " << frame.idCapture << "\n" << std::flush; + }); + + + for(size_t ii = 0; ii < 100; ++ii){ + std::this_thread::sleep_for(std::chrono::milliseconds(33)); + } + +} + + +void kinect2_test(){ + +// using namespace std::chrono_literals; +// camera::K2Device kinect; +// if(!kinect.open(camera::K2FrameRequest::compressed_color_mesh)){ +// std::cerr << "Cannot init kinect 2\n"; +// return; +// } + +// std::cout << "init\n"; +// std::this_thread::sleep_for(1000ms); +// std::cout << "try to grab\n"; + +// for(int ii = 0; ii < 10000; ++ii){ +// if(auto newFrame = kinect.get_kinect_data(); newFrame.has_value()){ +// std::cout << "-"; +// }else{ +// std::cout << "E"; +// } +// std::this_thread::sleep_for((1000/33)*1ms); +// } + +// std::cout << "close\n"; +// kinect.close(); +} + + + +int main(){ + + using namespace std::chrono; + + K4Player player; + player.load_from_file("D:/kvidtest.kvid"); + + + auto duration = player.video()->duration_ms(); + + + auto ffTs = duration_cast(nanoseconds(player.video()->first_frame_capture_timestamp().value())); + for(size_t idC = 0; idC < player.video()->nb_cameras(); ++idC){ + std::cout << player.video()->get_camera_data(idC)->frames.size() << "\n"; +// auto ff = duration_cast(nanoseconds(player.video()->get_camera_data(idC)->first_frame_capture_timestamp().value())); +// auto sec = duration_cast(nanoseconds(player.video()->get_camera_data(idC)->frames[1]->afterCaptureTS)); +// std::cout << "IDC " << idC << " " << ffTs << " " << ff << " " << sec << "\n"; + } + + + return 0; + + for(size_t ii = 0; ii < 10; ++ii){ + auto time = (ii / 10.0)* duration; + + for(size_t idC = 0; idC < player.video()->nb_cameras(); ++idC){ + auto id = player.video()->closest_frame_id_from_time(idC, time); + std::cout << "time " << time << " idC " << idC << " " << id.value() << "\n"; + } + } + + + return 0; + player.load_from_file("D:/_debug/adriel2.kvid"); + + player.set_current_time(1000); + player.update_time(); + player.remove_until_current_frame(); + player.update_time(); + + player.merge_cameras(0.005f, {-20.f,-20.f,-20.f}, {+20.f,+20.f,+20.f}); + player.save_to_file("D:/--.kvid"); + + + duration = player.video()->duration_ms(); + + for(size_t ii = 0; ii < 1000; ++ii){ + auto time = (ii / 1000.0)* duration; + auto id = player.video()->closest_frame_id_from_time(0, time); + if(id.has_value()){ + if(auto frame = player.video()->get_compressed_frame(0, id.value()).lock(); frame != nullptr){ + std::cout << "time " << time << " id " << id.value() << " " << frame->cloud_vertices_size() << "\n"; + } + + }else{ + std::cout << "ERROR " << id.error() << "\n"; + } + } + + return 0; +} + + + diff --git a/cpp-projects/base-test/LICENSE b/cpp-projects/base-test/LICENSE new file mode 100644 index 0000000..2145be7 --- /dev/null +++ b/cpp-projects/base-test/LICENSE @@ -0,0 +1,24 @@ + + +tool-test +MIT License + +Copyright (c) 2018 Florian Lance + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/cpp-projects/base-test/base-test.pro b/cpp-projects/base-test/base-test.pro new file mode 100644 index 0000000..a0dfd18 --- /dev/null +++ b/cpp-projects/base-test/base-test.pro @@ -0,0 +1,118 @@ + +# /******************************************************************************* +# ** Toolset-test ** +# ** MIT License ** +# ** Copyright (c) [2018] [Florian Lance] ** +# ** ** +# ** Permission is hereby granted, free of charge, to any person obtaining a ** +# ** copy of this software and associated documentation files (the "Software"), ** +# ** to deal in the Software without restriction, including without limitation ** +# ** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +# ** and/or sell copies of the Software, and to permit persons to whom the ** +# ** Software is furnished to do so, subject to the following conditions: ** +# ** ** +# ** The above copyright notice and this permission notice shall be included in ** +# ** all copies or substantial portions of the Software. ** +# ** ** +# ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +# ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +# ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +# ** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +# ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +# ** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +# ** DEALINGS IN THE SOFTWARE. ** +# ** ** +# ********************************************************************************/ + +####################################### repo +TOOLBOX_REPOSITORY_DIR = $$PWD"/../.." + +####################################### PRI +# defines compiling options +include(../ts-settings.pri) +# defines projects paths and variables +include(../ts-projects.pri) +# defines thirdparty includes and libs +include(../ts-thirdparty.pri) + +####################################### TARGET +equals(CFG, "debug"){ + TARGET = base-testd +} +equals(CFG, "release"){ + TARGET = base-test +} + +####################################### TEMPLATE +TEMPLATE = app +CONFIG += console + +####################################### BUILD FILES +OBJECTS_DIR = $$BASE_OBJ +DESTDIR = $$BASE_DEST + +####################################### CONFIG +CONFIG -= qt + +####################################### INCLUDES +INCLUDEPATH += \ + # base + $$BASE_INCLUDES\ + # opengl-utility + $$OPENGL_UTILITY_INCLUDES\ + # thirdparty + $$GLEW_INCLUDES\ + $$GLFW_INCLUDES\ + $$SFML_INCLUDES\ + $$ASSIMP_INCLUDES\ + $$GLM_INCLUDES\ + $$KINECT4_INCLUDES\ + $$TURBOJPG_INCLUDES\ + $$FASTPFOR_INCLUDES\ + $$OPEN3D_INCLUDES \ + $$LIBSOUNDIO_INCLUDES \ + $$TURBOPFOR_INCLUDES \ + +####################################### LIBRAIRIES + +PRE_TARGETDEPS += \ + # tool + $$BASE_LIB_DEP\ + +LIBS +=\ + # tool + $$BASE_LIB\ + $$OPENGL_UTILITY_LIB \ + # thirdparty + $$GLEW_LIBS \ + $$GLFW_LIBS \ + $$SFML_LIBS \ + $$ASSIMP_LIBS\ + $$GLM_LIBS\ + $$KINECT4_LIBS \ + $$TURBOJPG_LIBS\ + $$FASTPFOR_LIBS\ + $$OPEN3D_LIBS \ + $$LIBSOUNDIO_LIBS \ + $$TURBOPFOR_LIBS \ + + +####################################### PROJECT FILES + +SOURCES += \ +# camera_tests.cpp \ +# cloud_tests.cpp \ +# exp_tests.cpp \ + main.cpp \ + geometry_tests.cpp \ + +HEADERS += \ + glm_utility.hpp + +DISTFILES += \ + CREDITS.md \ + LICENSE \ + + + + diff --git a/cpp-projects/base-test/camera_tests.cpp b/cpp-projects/base-test/camera_tests.cpp new file mode 100644 index 0000000..f5158d0 --- /dev/null +++ b/cpp-projects/base-test/camera_tests.cpp @@ -0,0 +1,801 @@ + +/******************************************************************************* +** tool-test ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +// std +#include +#include +#include +#include + +#include +#include "data/FastDifferentialCoding/fastdelta.h" +#include "TurboPFor/vp4.h" + +// base +#include "thirdparty/catch/catch.hpp" +#include "graphics/texture.hpp" +#include "camera/kinect4/k4_data.hpp" +#include "utility/logger.hpp" +#include "utility/benchmark.hpp" +#include "data/integers_encoder.hpp" +#include "files/cloud_io.hpp" + +#include "camera/kinect4/k4_volumetric_full_video_manager.hpp" + +#include "camera/kinect4/k4_frame_compressor.hpp" +#include "camera/kinect4/k4_frame_uncompressor.hpp" + + +using namespace tool; +using namespace tool::camera; + + +TEST_CASE("Kinect4 camera"){ + +// CloudFrameCompressor cc; +// FullFrameCompressor fc; +// CloudFrameUncompressor cu; +// FullFrameUncompressor fu; + +// CloudFrame dd; +// CompressedCloudFrame f; +// cu.uncompress(&f, dd); + + K4VolumetricFullVideoResource video; + +// const std::string filePath = "E:/ttt.kvid"; + const std::string filePath = "D:/compress_test.kvid"; + REQUIRE(video.load_from_file(filePath)); + + K4VolumetricFullVideoManager manager(&video); + + auto jpegCompressor = tjInitCompress(); + tjhandle jpegUncompressor = tjInitDecompress(); + unsigned char *tjCompressedImage = nullptr; + data::IntegersEncoder integerCompressor; + std::vector uncompressedColorFrame; + std::vector uncompressedDepthFrame; + + std::map>> scores; + + auto check16 = [](std::uint16_t *input, std::uint16_t *decoded, size_t size, size_t encodedBytesNb, size_t decodedBytesNb){ + REQUIRE(encodedBytesNb == decodedBytesNb); + bool isIdentical = true; + for(size_t ii= 0; ii < size; ++ii){ + if(input[ii] != decoded[ii]){ + isIdentical = false; + break; + } + } + REQUIRE(isIdentical); + }; + auto check32 = [](std::uint32_t *input, std::uint32_t *decoded, size_t size, size_t encodedBytesNb, size_t decodedBytesNb){ + REQUIRE(encodedBytesNb == decodedBytesNb); + bool isIdentical = true; + for(size_t ii= 0; ii < size; ++ii){ + if(input[ii] != decoded[ii]){ + isIdentical = false; + break; + } + } + REQUIRE(isIdentical); + }; + auto check64 = [](std::uint64_t *input, std::uint64_t *decoded, size_t size, size_t encodedBytesNb, size_t decodedBytesNb){ + REQUIRE(encodedBytesNb == decodedBytesNb); + bool isIdentical = true; + for(size_t ii= 0; ii < size; ++ii){ + if(input[ii] != decoded[ii]){ + isIdentical = false; + break; + } + } + REQUIRE(isIdentical); + }; + + auto algos_16 = [&](std::string name, std::uint16_t *inputData16, size_t insize, unsigned char *encoded, std::uint16_t *decoded16){ + +// std::cout << name << " -> algos_16\n"; + + size_t encodedBytesNb,decodedBytesNb; + + Bench::start("p4nenc16"); + encodedBytesNb = p4nenc16(inputData16, insize, encoded); Bench::stop(); + Bench::start("p4nenc16"); + decodedBytesNb = p4ndec16(encoded, insize, decoded16); Bench::stop(); + check16(inputData16, decoded16, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nenc16"].emplace_back(0.5f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4ndenc16"); + encodedBytesNb = p4ndenc16(inputData16, insize, encoded); Bench::stop(); + Bench::start("p4nddec16"); + decodedBytesNb = p4nddec16(encoded, insize, decoded16); Bench::stop(); + check16(inputData16, decoded16, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4ndenc16"].emplace_back(0.5f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4nd1enc16"); + encodedBytesNb = p4nd1enc16(inputData16, insize, encoded); Bench::stop(); + Bench::start("p4nd1dec16"); + decodedBytesNb = p4nd1dec16(encoded, insize, decoded16); Bench::stop(); + check16(inputData16, decoded16, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nd1enc16"].emplace_back(0.5f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4nzenc16"); + encodedBytesNb = p4nzenc16(inputData16, insize, encoded); Bench::stop(); + Bench::start("p4nzdec16"); + decodedBytesNb = p4nzdec16(encoded, insize, decoded16); Bench::stop(); + check16(inputData16, decoded16, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nzenc16"].emplace_back(0.5f*encodedBytesNb/insize, encodedBytesNb); + }; + + auto algos_32 = [&](std::string name, std::uint32_t *inputData32, size_t insize, unsigned char *encoded, std::uint32_t *decoded32){ + +// std::cout << name << " -> algos_32\n"; + + size_t encodedBytesNb,decodedBytesNb; + + Bench::start("p4nenc32"); + encodedBytesNb = p4nenc32(inputData32, insize, encoded); Bench::stop(); + Bench::start("p4ndec32"); + decodedBytesNb = p4ndec32(encoded, insize, decoded32); Bench::stop(); + check32(inputData32, decoded32, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nenc32"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4ndenc32"); + encodedBytesNb = p4ndenc32(inputData32, insize, encoded); Bench::stop(); + Bench::start("p4nddec32"); + decodedBytesNb = p4nddec32(encoded, insize, decoded32); Bench::stop(); + check32(inputData32, decoded32, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4ndenc32"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4nd1enc32"); + encodedBytesNb = p4nd1enc32(inputData32, insize, encoded); Bench::stop(); + Bench::start("p4nd1dec32"); + decodedBytesNb = p4nd1dec32(encoded, insize, decoded32); Bench::stop(); + check32(inputData32, decoded32, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nd1enc32"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4nzenc32"); + encodedBytesNb = p4nzenc32(inputData32, insize, encoded); Bench::stop(); + Bench::start("p4nzdec32"); + decodedBytesNb = p4nzdec32(encoded, insize, decoded32); Bench::stop(); + check32(inputData32, decoded32, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nzenc32"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); + + }; + + auto algos_64 = [&](std::string name, std::uint64_t *inputData64, size_t insize, unsigned char *encoded, std::uint64_t *decoded64){ + +// std::cout << name << " -> algos_64\n"; + + size_t encodedBytesNb,decodedBytesNb; + + Bench::start("p4nenc64"); + encodedBytesNb = p4nenc64(inputData64, insize, encoded); Bench::stop(); + Bench::start("p4ndec64"); + decodedBytesNb = p4ndec64(encoded, insize, decoded64); Bench::stop(); + check64(inputData64, decoded64, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nenc64"].emplace_back(0.125f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4ndenc64"); + encodedBytesNb = p4ndenc64(inputData64, insize, encoded); Bench::stop(); + Bench::start("p4nddec64"); + decodedBytesNb = p4nddec64(encoded, insize, decoded64); Bench::stop(); + check64(inputData64, decoded64, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4ndenc64"].emplace_back(0.125f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4nd1enc64"); + encodedBytesNb = p4nd1enc64(inputData64, insize, encoded); Bench::stop(); + Bench::start("p4nd1dec64"); + decodedBytesNb = p4nd1dec64(encoded, insize, decoded64); Bench::stop(); + check64(inputData64, decoded64, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nd1enc64"].emplace_back(0.125f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4nzenc64"); + encodedBytesNb = p4nzenc64(inputData64, insize, encoded); Bench::stop(); + Bench::start("p4nzdec64"); + decodedBytesNb = p4nzdec64(encoded, insize, decoded64); Bench::stop(); + check64(inputData64, decoded64, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nzenc64"].emplace_back(0.125f*encodedBytesNb/insize, encodedBytesNb); + }; + + auto simd128_algos_16 = [&](std::string name, std::uint16_t *inputData16, size_t insize, unsigned char *encoded, std::uint16_t *decoded16){ + +// std::cout << name << " -> simd128_algos_16\n"; + size_t encodedBytesNb,decodedBytesNb; + + Bench::start("p4nenc128v16"); + encodedBytesNb = p4nenc128v16(inputData16, insize, encoded); Bench::stop(); + Bench::start("p4ndec128v16"); + decodedBytesNb = p4ndec128v16(encoded, insize, decoded16); Bench::stop(); + check16(inputData16, decoded16, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nenc128v16"].emplace_back(0.5f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4ndenc128v16"); + encodedBytesNb = p4ndenc128v16(inputData16, insize, encoded); Bench::stop(); + Bench::start("p4nddec128v16"); + decodedBytesNb = p4nddec128v16(encoded, insize, decoded16); Bench::stop(); + check16(inputData16, decoded16, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4ndenc128v16"].emplace_back(0.5f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4nd1enc128v16"); + encodedBytesNb = p4nd1enc128v16(inputData16, insize, encoded); Bench::stop(); + Bench::start("p4nd1dec128v16"); + decodedBytesNb = p4nd1dec128v16(encoded, insize, decoded16); Bench::stop(); + check16(inputData16, decoded16, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nd1enc128v16"].emplace_back(0.5f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4nzenc128v16"); + encodedBytesNb = p4nzenc128v16(inputData16, insize, encoded); Bench::stop(); + Bench::start("p4nzdec128v16"); + decodedBytesNb = p4nzdec128v16(encoded, insize, decoded16); Bench::stop(); + check16(inputData16, decoded16, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nzenc128v16"].emplace_back(0.5f*encodedBytesNb/insize, encodedBytesNb); + }; + + auto simd128_algos_32 = [&](std::string name, std::uint32_t *inputData32, size_t insize, unsigned char *encoded, std::uint32_t *decoded32){ + +// std::cout << name << " -> simd128_algos_32\n"; +return; + size_t encodedBytesNb,decodedBytesNb; + + Bench::start("p4nenc128v32"); + encodedBytesNb = p4nenc128v32(inputData32, insize, encoded); Bench::stop(); + Bench::start("p4ndec128v32"); + decodedBytesNb = p4ndec128v32(encoded, insize, decoded32); Bench::stop(); + check32(inputData32, decoded32, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nenc128v32"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4ndenc128v32"); + encodedBytesNb = p4ndenc128v32(inputData32, insize, encoded); Bench::stop(); + Bench::start("p4nddec128v32"); + decodedBytesNb = p4nddec128v32(encoded, insize, decoded32); Bench::stop(); + check32(inputData32, decoded32, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4ndenc128v32"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4nd1enc128v32"); + encodedBytesNb = p4nd1enc128v32(inputData32, insize, encoded); Bench::stop(); + Bench::start("p4nd1dec128v32"); + decodedBytesNb = p4nd1dec128v32(encoded, insize, decoded32); Bench::stop(); + check32(inputData32, decoded32, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nd1enc128v32"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); + + Bench::start("p4nzenc128v32"); + encodedBytesNb = p4nzenc128v32(inputData32, insize, encoded); Bench::stop(); + Bench::start("p4nzdec128v32"); + decodedBytesNb = p4nzdec128v32(encoded, insize, decoded32); Bench::stop(); + check32(inputData32, decoded32, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nzenc128v32"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); + }; + + auto simd128_algos_64 = [&](std::string name, std::uint64_t *inputData64, size_t insize, unsigned char *encoded, std::uint64_t *decoded64){ + +// std::cout << name << " -> simd128_algos_64\n"; + + size_t encodedBytesNb,decodedBytesNb; + + Bench::start("p4nenc128v64"); + encodedBytesNb = p4nenc128v64(inputData64, insize, encoded); Bench::stop(); + Bench::start("p4ndec128v64"); + decodedBytesNb = p4ndec128v64(encoded, insize, decoded64); Bench::stop(); + check64(inputData64, decoded64, insize, encodedBytesNb, decodedBytesNb); + scores[name + "-p4nenc128v64"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); + }; + +// auto simd256_algos_32 = [&](std::string name, std::uint32_t *inputData32, size_t insize, unsigned char *encoded, std::uint32_t *decoded32){ + +// std::cout << name << " -> simd256_algos_32\n"; + +// size_t encodedBytesNb,decodedBytesNb; + +// Bench::start("p4nenc256v32"); +// encodedBytesNb = p4nenc256v32(inputData32, insize, encoded); Bench::stop(); +// Bench::start("p4ndec256v32"); +// decodedBytesNb = p4ndec256v32(encoded, insize, decoded32); Bench::stop(); +// check32(inputData32, decoded32, insize, encodedBytesNb, decodedBytesNb); +// scores[name + "-p4nenc256v32"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); + +// Bench::start("p4ndenc256v32"); +// encodedBytesNb = p4ndenc256v32(inputData32, insize, encoded); Bench::stop(); +// Bench::start("p4nddec256v32"); +// decodedBytesNb = p4nddec256v32(encoded, insize, decoded32); Bench::stop(); +// check32(inputData32, decoded32, insize, encodedBytesNb, decodedBytesNb); +// scores[name + "-p4ndenc256v32"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); + +// Bench::start("p4nd1enc256v32"); +// encodedBytesNb = p4nd1enc256v32(inputData32, insize, encoded); Bench::stop(); +// Bench::start("p4nd1dec256v32"); +// decodedBytesNb = p4nd1dec256v32(encoded, insize, decoded32); Bench::stop(); +// check32(inputData32, decoded32, insize, encodedBytesNb, decodedBytesNb); +// scores[name + "-p4nd1enc256v32"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); + +// Bench::start("p4nzenc256v32"); +// encodedBytesNb = p4nzenc256v32(inputData32, insize, encoded); Bench::stop(); +// Bench::start("p4nzdec256v32"); +// decodedBytesNb = p4nzdec256v32(encoded, insize, decoded32); Bench::stop(); +// check32(inputData32, decoded32, insize, encodedBytesNb, decodedBytesNb); +// scores[name + "-p4nzenc256v32"].emplace_back(0.25f*encodedBytesNb/insize, encodedBytesNb); +// }; + + + auto process16 = [&](std::string name, std::vector &inputData16, std::vector &encoded, std::vector &decoded16){ + + size_t beforePaddingSize = inputData16.size(); + + size_t insize = inputData16.size(); + encoded.resize(insize*2); + decoded16.resize(insize); + algos_16(name, inputData16.data(), insize, encoded.data(), decoded16.data()); + + if(inputData16.size()%128 != 0){ + inputData16.resize(inputData16.size() + (128-(inputData16.size()%128))); + REQUIRE(inputData16.size()%128==0); + std::fill(inputData16.begin() + beforePaddingSize, inputData16.end(), 0); + insize = inputData16.size(); + encoded.resize(insize*2); + decoded16.resize(insize); + } + simd128_algos_16(name, inputData16.data(), insize, encoded.data(), decoded16.data()); + + if(beforePaddingSize != inputData16.size()){ + inputData16.resize(beforePaddingSize); + } + }; + + auto process32 = [&](std::string name, std::vector &inputData32, std::vector &encoded, std::vector &decoded32){ + + size_t beforePaddingSize = inputData32.size(); + size_t insize = inputData32.size(); + encoded.resize(insize*4); + decoded32.resize(insize); + algos_32(name, inputData32.data(), insize, encoded.data(), decoded32.data()); + + if(inputData32.size()%128 != 0){ + inputData32.resize(inputData32.size() + (128-(inputData32.size()%128))); + REQUIRE(inputData32.size()%128==0); + std::fill(inputData32.begin() + beforePaddingSize, inputData32.end(), 0); + insize = inputData32.size(); + encoded.resize(insize*4); + decoded32.resize(insize); + } + + simd128_algos_32(name, inputData32.data(), insize, encoded.data(), decoded32.data()); + +// if(inputData32.size()%256 != 0){ +// inputData32.resize(inputData32.size() + (256-(inputData32.size()%256))); +// REQUIRE(inputData32.size()%256==0); +// std::fill(inputData32.begin() + beforePaddingSize, inputData32.end(), 0); +// insize = inputData32.size(); +// encoded.resize(insize*4); +// decoded32.resize(insize); +// } +// simd256_algos_32(name, inputData32.data(), insize, encoded.data(), decoded32.data()); + + +// if(beforePaddingSize != inputData32.size()){ +// inputData32.resize(beforePaddingSize); +// encoded.resize(inputData32.size()*4+1024); +// } + +// Bench::start("integerCompressor"); +// size_t sizeCompressed = integerCompressor.encode( +// inputData32.data(), inputData32.size(), +// reinterpret_cast(encoded.data()), beforePaddingSize + 1024); +// Bench::stop(); +// REQUIRE(sizeCompressed != 0); +// REQUIRE(sizeCompressed < inputData32.size()); + +// scores[name + "-integerCompressor"].emplace_back(1.f *sizeCompressed/insize, sizeCompressed*4); + }; + + auto process64 = [&](std::string name, std::vector &inputData64, std::vector &encoded, std::vector &decoded64){ + + size_t beforePaddingSize = inputData64.size(); + size_t insize = inputData64.size(); + encoded.resize(insize*8); + decoded64.resize(insize); + algos_64(name, inputData64.data(), insize, encoded.data(), decoded64.data()); + + if(inputData64.size()%128 != 0){ + inputData64.resize(inputData64.size() + (128-(inputData64.size()%128))); + REQUIRE(inputData64.size()%128==0); + std::fill(inputData64.begin() + beforePaddingSize, inputData64.end(), 0); + } + simd128_algos_64(name, inputData64.data(), insize, encoded.data(), decoded64.data()); + + if(beforePaddingSize != inputData64.size()){ + inputData64.resize(beforePaddingSize); + } + }; + + std::cout << "start " << video.nb_frames(0) << "\n"; + K4FullFrame fFrame; + for(size_t ii = 0; ii < 79; ++ii){//video.nb_frames(0); ++ii){ + + // use only lossless for depth/cloud values + auto frame = video.get_full_frame(ii,0); + + std::cout << "frame " << ii << " " << frame->colorWidth << " " << frame->colorHeight << "\n"; + + Bench::start("uncompress_frame"); + REQUIRE (manager.uncompress_frame(frame, fFrame)); + Bench::stop(); + + + geo::Pt3 *cloud = manager.cloud_data(); + auto cloudSize = (frame->depthWidth*frame->depthHeight); + + std::cout << "cloudSize " << cloudSize << " " << frame->depthWidth << " " << frame->depthHeight << "\n"; + + + std::vector indices(cloudSize); + std::iota(indices.begin(), indices.end(), 0); + + std::vector vIndices; + vIndices.resize(cloudSize); + + size_t idV = 0; + for(size_t jj = 0; jj < cloudSize; ++jj){ + if(uncompressedDepthFrame[jj] != k4_invalid_depth_value){ + vIndices[idV++] = jj; + } + } + + std::vector packedBuffer; + packedBuffer.resize(cloudSize); + + auto color = reinterpret_cast*>(uncompressedColorFrame.data()); + Bench::start("pack"); + for_each(std::execution::par_unseq, std::begin(indices), std::begin(indices) + idV, [&](size_t id){ + packedBuffer[id] = K4PackedVoxel::pack64(cloud[vIndices[id]], color[vIndices[id]]); + }); + std::sort(std::execution::par_unseq, std::begin(packedBuffer), std::end(packedBuffer)); + Bench::stop(); + + + + + std::vector depth16(cloudSize); + std::vector depth32(cloudSize); + std::vector ddepth32(cloudSize); + std::vector depth64(cloudSize); + + std::vector xyz16, uxyz16; + std::vector xyz32, uxyz32, dxyz32, duxyz32; + std::vector xyz64, uxyz64; + + std::vector inputColorData(cloudSize*3); + std::fill(inputColorData.begin(), inputColorData.end(), 0); + + xyz16.resize(idV*3); + xyz32.resize(idV*3); + xyz64.resize(idV*3); + + uxyz16.resize(idV*3); + uxyz32.resize(idV*3); + uxyz64.resize(idV*3); + + dxyz32.resize(idV*3); + duxyz32.resize(idV*3); + + Bench::start("fill arrays"); + + // check cloud values + bool validValues = true; + for(size_t jj= 0; jj < cloudSize; ++jj){ + + if(uncompressedDepthFrame[jj] < 0) [[unlikely]] { + validValues = false; + break; + }else if(uncompressedDepthFrame[jj] > 10000) [[unlikely]] { + validValues = false; + break; + } + depth16[jj] = uncompressedDepthFrame[jj]; + depth32[jj] = uncompressedDepthFrame[jj]; + depth64[jj] = uncompressedDepthFrame[jj]; + } + REQUIRE(validValues); + + validValues = true; + for_each(std::execution::unseq, std::begin(indices), std::begin(indices) + idV, [&](size_t id){ + + if(cloud[vIndices[id]].x() < -4096) [[unlikely]] { + validValues = false; + return; + }else if(cloud[vIndices[id]].y() < -4096) [[unlikely]] { + validValues = false; + return; + }else if(cloud[vIndices[id]].z() < 0) [[unlikely]] { + validValues = false; + return; + } + + xyz16[id] = static_cast(static_cast(cloud[vIndices[id]].x())+4096); + xyz16[idV + id] = static_cast(static_cast(cloud[vIndices[id]].y())+4096); + xyz16[2*idV + id] = static_cast(cloud[vIndices[id]].z()); + + xyz32[id] = static_cast(static_cast(cloud[vIndices[id]].x())+4096); + xyz32[idV + id] = static_cast(static_cast(cloud[vIndices[id]].y())+4096); + xyz32[2*idV + id] = static_cast(cloud[vIndices[id]].z()); + + if(static_cast(cloud[vIndices[id]].x()) != static_cast(xyz32[id]) - 4096){ + validValues = false; + return; + } + if(static_cast(cloud[vIndices[id]].y()) != static_cast(xyz32[idV + id]) - 4096){ + validValues = false; + return; + } + if(static_cast(cloud[vIndices[id]].z()) != static_cast(xyz32[2*idV + id])){ + validValues = false; + return; + } + + xyz64[id] = static_cast(static_cast(cloud[vIndices[id]].x())+4096); + xyz64[idV + id] = static_cast(static_cast(cloud[vIndices[id]].y())+4096); + xyz64[2*idV + id] = static_cast(cloud[vIndices[id]].z()); + + }); + REQUIRE(validValues); + + for_each(std::execution::par_unseq, std::begin(indices), std::begin(indices) + idV, [&](size_t id){ + + uxyz16[id*3+0] = static_cast(static_cast(cloud[vIndices[id]].x())+4096); + uxyz16[id*3+1] = static_cast(static_cast(cloud[vIndices[id]].y())+4096); + uxyz16[id*3+2] = static_cast(cloud[vIndices[id]].z()); + + uxyz32[id*3+0] = static_cast(cloud[vIndices[id]].x()+4096); + uxyz32[id*3+1] = static_cast(cloud[vIndices[id]].y()+4096); + uxyz32[id*3+2] = static_cast(cloud[vIndices[id]].z()); + + uxyz64[id*3+0] = static_cast(cloud[vIndices[id]].x()+4096); + uxyz64[id*3+1] = static_cast(cloud[vIndices[id]].y()+4096); + uxyz64[id*3+2] = static_cast(cloud[vIndices[id]].z()); + + auto c = reinterpret_cast*>(uncompressedColorFrame.data()); + inputColorData[id*3+0] = c[vIndices[id]].x(); + inputColorData[id*3+1] = c[vIndices[id]].y(); + inputColorData[id*3+2] = c[vIndices[id]].z(); + }); + + compute_deltas(xyz32.data(), idV*3, dxyz32.data(), 0); + compute_deltas(uxyz32.data(), idV*3, duxyz32.data(), 0); + compute_deltas(depth32.data(), cloudSize, ddepth32.data(), 0); + + Bench::stop(); + + Bench::start("check arrays"); + validValues = true; + for_each(std::execution::par_unseq, std::begin(indices), std::begin(indices) + idV, [&](size_t id){ + auto x = static_cast(static_cast(xyz32[id]) - 4096); + auto y = static_cast(static_cast(xyz32[idV + id]) - 4096); + auto z = static_cast(xyz32[2*idV + id]); + + if(x != cloud[vIndices[id]].x()){ + validValues = false; + return; + } + if(y != cloud[vIndices[id]].y()){ + validValues = false; + return; + } + if(z != cloud[vIndices[id]].z()){ + validValues = false; + return; + } + + }); + REQUIRE(validValues); + + validValues = true; + for_each(std::execution::par_unseq, std::begin(indices), std::begin(indices) + idV, [&](size_t id){ + auto x = static_cast(static_cast(uxyz32[id*3+0]) - 4096); + auto y = static_cast(static_cast(uxyz32[id*3+1]) - 4096); + auto z = static_cast(uxyz32[id*3+2]); + if(x != cloud[vIndices[id]].x()){ + validValues = false; + return; + } + if(y != cloud[vIndices[id]].y()){ + validValues = false; + return; + } + if(z != cloud[vIndices[id]].z()){ + validValues = false; + return; + } + }); + REQUIRE(validValues); + Bench::stop(); + + // data + std::vector encoded; + std::vector decoded16; + std::vector decoded32; + std::vector decoded64; + + // process + process16("depth16", depth16, encoded, decoded16); + process16("xyz16", xyz16, encoded, decoded16); + process16("uxyz16", uxyz16, encoded, decoded16); + process32("depth32", depth32, encoded, decoded32); + process32("xyz32", xyz32, encoded, decoded32); + + process32("uxyz32", uxyz32, encoded, decoded32); + process32("dxyz32", dxyz32, encoded, decoded32); + process32("duxyz32", duxyz32, encoded, decoded32); + + process64("xyz64", xyz64, encoded, decoded64); + process64("uxyz64", uxyz64, encoded, decoded64); + process64("packed", packedBuffer, encoded, decoded64); + + // compress color + graphics::Texture uncompressedTexture; + + + std::vector uncompressColorData; + uncompressColorData.resize(inputColorData.size()); + + int jpegQuality = 80; + long unsigned int jpegColorSize = 0; + if(tjCompressedImage == nullptr){ + tjCompressedImage = tjAlloc(int(frame->colorWidth*frame->colorHeight*3)); + } + + int ret = tjCompress2(jpegCompressor, + reinterpret_cast(inputColorData.data()), + int(frame->colorWidth), 0, int(frame->colorHeight), + TJPF_BGR, + &tjCompressedImage, &jpegColorSize, TJSAMP_444, jpegQuality, TJFLAG_NOREALLOC | TJFLAG_FASTDCT); + REQUIRE(ret != -1); + scores["TJSAMP_444-jpeg80"].emplace_back(0.25f*jpegColorSize/(frame->colorWidth*frame->colorHeight*3), jpegColorSize); + + ret = tjDecompress2(jpegUncompressor, tjCompressedImage, jpegColorSize, uncompressColorData.data(), + int(frame->colorWidth), 0, int(frame->colorHeight),TJPF_BGR, TJFLAG_NOREALLOC | TJFLAG_FASTDCT); + REQUIRE(ret != -1); +// uncompressedTexture.copy_2d_data(frame->colorWidth, frame->colorHeight, 3, uncompressColorData.data()); +// uncompressedTexture.write_2d_image_file_data(std::format("E:/TJSAMP_444-jpeg80-{}.jpg", ii)); + + jpegQuality = 60; + ret = tjCompress2(jpegCompressor, + reinterpret_cast(inputColorData.data()), + int(frame->colorWidth), 0, int(frame->colorHeight), + TJPF_BGR, + &tjCompressedImage, &jpegColorSize, TJSAMP_444, jpegQuality, TJFLAG_NOREALLOC | TJFLAG_FASTDCT); + REQUIRE(ret != -1); + scores["TJSAMP_444-jpeg60"].emplace_back(0.25f*jpegColorSize/(frame->colorWidth*frame->colorHeight*3), jpegColorSize); + + ret = tjDecompress2(jpegUncompressor, tjCompressedImage, jpegColorSize, uncompressColorData.data(), + int(frame->colorWidth), 0, int(frame->colorHeight),TJPF_BGR, TJFLAG_NOREALLOC | TJFLAG_FASTDCT); + REQUIRE(ret != -1); +// uncompressedTexture.copy_2d_data(frame->colorWidth, frame->colorHeight, 3, uncompressColorData.data()); +// uncompressedTexture.write_2d_image_file_data(std::format("E:/TJSAMP_444-jpeg60-{}.jpg", ii)); + + + jpegQuality = 50; + ret = tjCompress2(jpegCompressor, + reinterpret_cast(inputColorData.data()), + int(frame->colorWidth), 0, int(frame->colorHeight), + TJPF_BGR, + &tjCompressedImage, &jpegColorSize, TJSAMP_444, jpegQuality, TJFLAG_NOREALLOC | TJFLAG_FASTDCT); + REQUIRE(ret != -1); + scores["TJSAMP_444-jpeg50"].emplace_back(0.25f*jpegColorSize/(frame->colorWidth*frame->colorHeight*3), jpegColorSize); + ret = tjDecompress2(jpegUncompressor, tjCompressedImage, jpegColorSize, uncompressColorData.data(), + int(frame->colorWidth), 0, int(frame->colorHeight),TJPF_BGR, TJFLAG_NOREALLOC | TJFLAG_FASTDCT); + REQUIRE(ret != -1); +// uncompressedTexture.copy_2d_data(frame->colorWidth, frame->colorHeight, 3, uncompressColorData.data()); +// uncompressedTexture.write_2d_image_file_data(std::format("E:/TJSAMP_444-jpeg50-{}.jpg", ii)); + + ret = tjCompress2(jpegCompressor, + reinterpret_cast(inputColorData.data()), + int(frame->colorWidth), 0, int(frame->colorHeight), + TJPF_BGR, + &tjCompressedImage, &jpegColorSize, TJSAMP_422, jpegQuality, TJFLAG_NOREALLOC | TJFLAG_FASTDCT); + REQUIRE(ret != -1); + scores["TJSAMP_422-jpeg80"].emplace_back(0.25f*jpegColorSize/(frame->colorWidth*frame->colorHeight*3), jpegColorSize); + ret = tjDecompress2(jpegUncompressor, tjCompressedImage, jpegColorSize, uncompressColorData.data(), + int(frame->colorWidth), 0, int(frame->colorHeight),TJPF_BGR, TJFLAG_NOREALLOC | TJFLAG_FASTDCT); + REQUIRE(ret != -1); +// uncompressedTexture.copy_2d_data(frame->colorWidth, frame->colorHeight, 3, uncompressColorData.data()); +// uncompressedTexture.write_2d_image_file_data(std::format("E:/TJSAMP_422-jpeg80-{}.jpg", ii)); + + jpegQuality = 60; + ret = tjCompress2(jpegCompressor, + reinterpret_cast(inputColorData.data()), + int(frame->colorWidth), 0, int(frame->colorHeight), + TJPF_BGR, + &tjCompressedImage, &jpegColorSize, TJSAMP_422, jpegQuality, TJFLAG_NOREALLOC | TJFLAG_FASTDCT); + REQUIRE(ret != -1); + scores["TJSAMP_422-jpeg60"].emplace_back(0.25f*jpegColorSize/(frame->colorWidth*frame->colorHeight*3), jpegColorSize); + ret = tjDecompress2(jpegUncompressor, tjCompressedImage, jpegColorSize, uncompressColorData.data(), + int(frame->colorWidth), 0, int(frame->colorHeight),TJPF_BGR, TJFLAG_NOREALLOC | TJFLAG_FASTDCT); + REQUIRE(ret != -1); +// uncompressedTexture.copy_2d_data(frame->colorWidth, frame->colorHeight, 3, uncompressColorData.data()); +// uncompressedTexture.write_2d_image_file_data(std::format("E:/TJSAMP_422-jpeg60-{}.jpg", ii)); + + jpegQuality = 50; + ret = tjCompress2(jpegCompressor, + reinterpret_cast(inputColorData.data()), + int(frame->colorWidth), 0, int(frame->colorHeight), + TJPF_BGR, + &tjCompressedImage, &jpegColorSize, TJSAMP_422, jpegQuality, TJFLAG_NOREALLOC | TJFLAG_FASTDCT); + REQUIRE(ret != -1); + scores["TJSAMP_422-jpeg50"].emplace_back(0.25f*jpegColorSize/(frame->colorWidth*frame->colorHeight*3), jpegColorSize); + ret = tjDecompress2(jpegUncompressor, tjCompressedImage, jpegColorSize, uncompressColorData.data(), + int(frame->colorWidth), 0, int(frame->colorHeight),TJPF_BGR, TJFLAG_NOREALLOC | TJFLAG_FASTDCT); + REQUIRE(ret != -1); +// uncompressedTexture.copy_2d_data(frame->colorWidth, frame->colorHeight, 3, uncompressColorData.data()); +// uncompressedTexture.write_2d_image_file_data(std::format("E:/TJSAMP_422-jpeg50-{}.jpg", ii)); + + + for_each(std::execution::unseq, std::begin(indices), std::begin(indices) + idV, [&](size_t id){ + + }); + +// REQUIRE(files::CloudIO::save_cloud(std::format("E:/boring_cloud_{}_1.obj",ii), pts.data(), pts.size())); + +// continue; + + + +// REQUIRE(files::CloudIO::save_cloud(std::format("E:/boring_cloud_{}_2.obj", ii), cloud, pts.size())); + } + + + + Bench::display(BenchUnit::microseconds); + + std::vector> results; + for(auto &m : scores){ + double total = 0.; + size_t totalB = 0; + for(auto &f : m.second){ + total += std::get<0>(f); + totalB += std::get<1>(f); + } + double res = ((1.*totalB/m.second.size())/1024)/1024; + results.emplace_back(m.first, res); +// std::cout << "mode : " << m.first << " " << total/m.second.size()<< " " << ((1.*totalB/m.second.size())/1024)/1024 << "\n"; + } + + std::sort(std::begin(results), std::end(results), [](const auto &p1, const auto &p2) { + return p1.second < p2.second; + }); + + for(const auto &res : results){ + std::cout << res.first << " " << res.second << "\n"; + } + + if(tjCompressedImage != nullptr){ + tjFree(tjCompressedImage); + } + + + tjDestroy(jpegUncompressor); + tjDestroy(jpegCompressor); + + +} diff --git a/cpp-projects/base-test/cloud_tests.cpp b/cpp-projects/base-test/cloud_tests.cpp new file mode 100644 index 0000000..16a85a4 --- /dev/null +++ b/cpp-projects/base-test/cloud_tests.cpp @@ -0,0 +1,120 @@ + +/******************************************************************************* +** tool-test ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +// std +#include + +// local +#include "thirdparty/catch/catch.hpp" +#include "geometry/cloud.hpp" +#include "utility/logger.hpp" +#include "files/cloud_io.hpp" +#include "utility/constants.hpp" + +using namespace tool; +using namespace geo; + +TEST_CASE("Cloud"){ + + Logger::message("start Cloud\n"); + + + std::mt19937 gen(0); + std::uniform_real_distribution<> dist(-10.f, 10.f); + + size_t nbPts = 100000; + geo::ColoredCloudData cloud; + cloud.validVerticesCount = nbPts; + cloud.vertices.reserve(nbPts); + cloud.colors.reserve(nbPts); + for(size_t ii = 0; ii < nbPts; ++ii){ + cloud.vertices.emplace_back(dist(gen),dist(gen),dist(gen)); + cloud.colors.emplace_back(1.f,0.f,0.f); + } + + auto mean = cloud.mean_position(); + + const std::string directoryPath = "D:/tool-tests"; + files::CloudIO::save_cloud(directoryPath + "/mean.obj", geo::ColoredCloudData (mean, {1.f,1.f,1.f})); + files::CloudIO::save_cloud(directoryPath + "/original.obj", cloud); + cloud.remove_outliers({0,0,0}, 5.f); + cloud.fill_colors({0.f, 1.f, 0.f}); + files::CloudIO::save_cloud(directoryPath + "/without_outliers.obj", cloud); + + + + geo::ColoredCloudData sphereCloud; + sphereCloud.validVerticesCount = 100000; + sphereCloud.vertices.reserve(sphereCloud.validVerticesCount); + sphereCloud.colors.resize(sphereCloud.validVerticesCount); + + std::uniform_real_distribution uniform01(0.f, 1.f); + + + Pt3f center = {1,1,0}; + float ray = 5.f; +// float squareRay = ray*ray; + for (size_t ii = 0; ii < sphereCloud.validVerticesCount; ++ii) { + float theta = 2 * PI * uniform01(gen); + float phi = acos(1 - 2 * uniform01(gen)); + float x = sin(phi) * cos(theta); + float y = sin(phi) * sin(theta); + float z = cos(phi); + sphereCloud.vertices.push_back(center + Pt3f{x,y,z}*ray); + } + sphereCloud.fill_colors({0.5f,0.5f,0.f}); + files::CloudIO::save_cloud(directoryPath + "/sphere_cloud.obj", sphereCloud); + + + auto sphereCenter = sphereCloud.sphere().position; + files::CloudIO::save_cloud(directoryPath + "/sphere_center.obj", geo::ColoredCloudData(sphereCenter, {1.f,1.f,1.f})); + + + auto c1 = sphereCloud; + auto c2 = sphereCloud; + auto c3 = sphereCloud; + auto c4 = sphereCloud; + c1.cut(Plane3f{{1,0,0}, 2.f}, true); + c2.cut(Plane3f{{1,0,0}, 2.f}, false); + c3.cut(Plane3f{{1,0,0}, 4.f}, false); + c4.cut(Plane3f{{1,0,0}, 4.7f}, false); + + files::CloudIO::save_cloud(directoryPath + "/cut1.obj", c1); + files::CloudIO::save_cloud(directoryPath + "/cut2.obj", c2); + files::CloudIO::save_cloud(directoryPath + "/cut3.obj", c3); + files::CloudIO::save_cloud(directoryPath + "/cut4.obj", c4); + + files::CloudIO::save_cloud(directoryPath + "/cut1_center.obj", geo::ColoredCloudData(c1.sphere().position, {1.f,0.f,1.f})); + files::CloudIO::save_cloud(directoryPath + "/cut2_center.obj", geo::ColoredCloudData(c2.sphere().position, {0.f,1.f,1.f})); + files::CloudIO::save_cloud(directoryPath + "/cut3_center.obj", geo::ColoredCloudData(c3.sphere().position, {0.f,0.f,1.f})); + files::CloudIO::save_cloud(directoryPath + "/cut4_center.obj", geo::ColoredCloudData(c4.sphere().position, {0.f,0.f,1.f})); + + SECTION("cloud init"){ + + + // tool::Logger::message(""); + } +} diff --git a/cpp-projects/base-test/exp_tests.cpp b/cpp-projects/base-test/exp_tests.cpp new file mode 100644 index 0000000..d13717e --- /dev/null +++ b/cpp-projects/base-test/exp_tests.cpp @@ -0,0 +1,48 @@ + +/******************************************************************************* +** tool-test ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "thirdparty/catch/catch.hpp" + +#include "exvr/ex_experiment.hpp" + +using namespace tool; + + +TEST_CASE("experiment"){ + + + SECTION("test1"){ + tool::ex::ExExperiment exp; + + REQUIRE(exp.logger == nullptr); + exp.generate_logger_no_file(); + REQUIRE(exp.logger != nullptr); +// REQUIRE(geo::equals(dm1,from_glm(glmdm1))); +// REQUIRE(dm1.determinant() == glm::determinant(glmdm1)); + } + // return; + +} diff --git a/cpp-projects/base-test/geometry_tests.cpp b/cpp-projects/base-test/geometry_tests.cpp new file mode 100644 index 0000000..8e6caa6 --- /dev/null +++ b/cpp-projects/base-test/geometry_tests.cpp @@ -0,0 +1,1063 @@ + +/******************************************************************************* +** Toolset-test ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +// std +#include + +// base +#include "thirdparty/catch/catch.hpp" +#include "utility/logger.hpp" + +// local +#include "glm_utility.hpp" + + +using namespace tool; + + +//geo::Mat4f transform4(const geo::Vec3f &scale, const geo::Vec3f &rotation, const geo::Vec3f &translate){ +// auto tr = scale_matrix(scale); +// tr *= rotation_matrix(rotation*PI_180); +// tr *= translation_matrix(translate); +// return tr; +//} +//geo::Mat4f transform5(const geo::Vec3f &scale, const geo::Vec3f &rotation, const geo::Vec3f &translate){ +// return scale_matrix(scale)*(rotation_matrix(rotation*PI_180)*translation_matrix(translate)); +//} + +//geo::Mat4f transform6(const geo::Vec3f &scale, const geo::Vec3f &rotation, const geo::Vec3f &translate){ +// auto tr = scale_matrix(scale); +// tr = rotation_matrix(rotation*PI_180)*tr; +// tr = translation_matrix(translate)*tr; +// return tr; +//} + +//template +//geo::Matrix multiply2(const geo::Matrix &l, const geo::Matrix &r){ +// geo::Matrix res; +// for(int ii = 0; ii < l.rows(); ++ii){ +// for(int jj = 0; jj < r.cols(); ++jj){ +// for(int kk = 0; kk < r.rows(); ++kk){ +// res(r.cols() * ii +jj) += l(l.cols() * ii + kk) * r(r.cols() * kk + jj); +// } +// } +// } +// return res; +//} +//template +//geo::Matrix multiply3(const geo::Matrix &r, const geo::Matrix &l){ +// geo::Matrix res; +// for(int ii = 0; ii < l.rows(); ++ii){ +// for(int jj = 0; jj < r.cols(); ++jj){ +// for(int kk = 0; kk < r.rows(); ++kk){ +// res(r.cols() * ii +jj) += l(l.cols() * ii + kk) * r(r.cols() * kk + jj); +// } +// } +// } +// return res; +//} + + +TEST_CASE("geo::Point"){ Logger::message("geo::Point\n"); + + SECTION("geo::Point2 constructors"){ Logger::message("geo::Point2 constructors\n"); + + constexpr auto pt00 = geo::Point{{0.f,0.f}}; + constexpr auto pt10 = geo::Point{{1.f}}; + constexpr auto pt12 = geo::Point{{1.f,2.f}}; + + {constexpr geo::Point pt; REQUIRE(pt == pt00);} + {constexpr geo::Point pt({1.f,2.f}); REQUIRE(pt == pt12);} + {constexpr geo::Point pt{{1.f,2.f}}; REQUIRE(pt == pt12);} + {constexpr geo::Point pt = {{1.f,2.f}}; REQUIRE(pt == pt12);} + { + constexpr geo::Point pt1{{1.f,2.f}}; + constexpr geo::Point pt2(pt1.array); REQUIRE(pt2 == pt12); + constexpr geo::Point pt3(std::move(pt1.array)); REQUIRE(pt3 == pt12); + } + { + constexpr geo::Point pt1{{1.f,2.f}}; + constexpr geo::Point pt2(pt1.array); REQUIRE(pt2 == pt12); + } + + { + constexpr auto pt1 = geo::Pt2f{1.f,2.f}; + constexpr geo::Pt2f pt2(pt1); REQUIRE(pt2 == pt12); + } + { + constexpr auto pt1 = geo::Pt2f{1.f,2.f}; + constexpr geo::Pt2f pt2(std::move(pt1)); REQUIRE(pt2 == pt12); + } + {constexpr geo::Pt2f pt2; REQUIRE(pt2 == pt00);} + {constexpr geo::Pt2f pt2 = {}; REQUIRE(pt2 == pt00);} + {constexpr geo::Pt2f pt2 = {1.f}; REQUIRE(pt2 == pt10);} + {constexpr geo::Pt2f pt2 = {1.f,2.f}; REQUIRE(pt2 == pt12);} + + {constexpr geo::Pt2f pt2{}; REQUIRE(pt2 == pt00);} + {constexpr geo::Pt2f pt2{1.f}; REQUIRE(pt2 == pt10);} + {constexpr geo::Pt2f pt2{1.f,2.f}; REQUIRE(pt2 == pt12);} + {constexpr geo::Pt2f pt2(1.f); REQUIRE(pt2 == pt10);} + {constexpr geo::Pt2f pt2(1.f,2.f); REQUIRE(pt2 == pt12);} + {constexpr geo::Pt2f pt2(geo::Matrix{{1.f,2.f}}); REQUIRE(pt2 == pt12);} + { + constexpr geo::Matrix mat{{1.f,2.f}}; + constexpr geo::Pt2f pt2(mat); REQUIRE(pt2 == pt12); + } + // {constexpr geo::Pt2f pt2{1.f,2.f,3.f}; REQUIRE(pt2 == pt12);} // NOK + // {constexpr geo::Pt2f pt2 = {1.f,2.f,3.f}; REQUIRE(pt2 == pt12);} // NOK + } + SECTION("geo::Point2 operators"){ Logger::message("geo::Point2 operators\n"); + + constexpr auto pt12 = geo::Point{{1.f,2.f}}; + constexpr auto pt21 = geo::Point{{2.f,1.f}}; + + {constexpr auto res = pt12 + pt21; REQUIRE(res == geo::Pt2f{3,3});} + {constexpr auto res = pt12 + 1.f; REQUIRE(res == geo::Pt2f{2,3});} + {constexpr auto res = pt12 - pt21; REQUIRE(res == geo::Pt2f{-1,1});} + {constexpr auto res = pt12 - 1.f; REQUIRE(res == geo::Pt2f{0,1});} + {constexpr auto res = 2.f * pt12; REQUIRE(res == geo::Pt2f{2,4});} + {constexpr auto res = pt12 * 2.f; REQUIRE(res == geo::Pt2f{2,4});} + {constexpr auto res = pt12 / 2.f; REQUIRE(res == geo::Pt2f{0.5,1});} + + // {constexpr auto res = 1.f + pt12; REQUIRE(res == geo::Pt2f{2,3});} // NOK + // {constexpr auto res = 1.f - pt12; REQUIRE(res == geo::Pt2f{0,1});} // NOK + // {constexpr auto res = 2.f / pt12; REQUIRE(res == geo::Pt2f{2,4});} // NOK + // {constexpr auto res = pt12 * pt21;} // NOK + } + SECTION("geo::Point2 getters"){ Logger::message("geo::Point2 getters\n"); + constexpr auto pt12 = geo::Pt2f{1.f,2.f}; + pt12.x(); + pt12.y(); +// pt12.z(); // NOK +// pt12.xyz(); // NOK + } + SECTION("geo::Point2 GLM comparison"){ Logger::message("geo::Point2 GLM comparison\n"); + + constexpr glm::vec2 glmPt1(1,2); + constexpr glm::vec2 glmPt2(4,5); + constexpr geo::Pt2f pt1{1,2}; + constexpr geo::Pt2f pt2{4,5}; + // getters + REQUIRE(almost_equal(glmPt2.x,pt2.x())); + REQUIRE(almost_equal(glmPt2.y,pt2.y())); + // operators + REQUIRE(from_glm(glmPt1) == pt1); + REQUIRE(from_glm(glmPt2) == pt2); + REQUIRE(from_glm(-1.f*glmPt2) == -1.f*pt2); + REQUIRE(from_glm(glmPt1+glmPt2) == (pt1+pt2)); + REQUIRE(from_glm(glmPt1-glmPt2) == (pt1-pt2)); + REQUIRE(from_glm(glmPt1*glmPt2) == (pt1*pt2)); +// REQUIRE(from_glm(glmPt1/glmPt2) == (pt3/pt4)); + // functions + REQUIRE(glm::dot(glmPt1,glmPt2) == geo::dot(pt1,pt2)); + +// REQUIRE(glm::cross(glmPt1,glmPt2) == geo::cross(pt1,pt2)); + + Logger::message(std::format("dotglm {} dot {}\n", glm::dot(glmPt1,glmPt2), geo::dot(pt1,pt2))); + Logger::message(std::format("pt3 {}\n", to_string(pt1))); + Logger::message(std::format("pt4 {}\n", to_string(pt2))); + + + + REQUIRE(from_glm(-glmPt1) == invert(pt1)); + REQUIRE(tool::almost_equal(glm::length(glmPt1)*glm::length(glmPt1),geo::square_norm(pt1))); + REQUIRE(from_glm(glm::normalize(glmPt1)) == geo::normalize(pt1)); + REQUIRE(from_glm(glm::normalize(glm::vec2(4,5))) == normalize(geo::Pt2f{4,5})); + } + + + + SECTION("geo::Point3"){ Logger::message("geo::Point3\n"); + + { + constexpr auto pt000 = geo::Point{{0.f,0.f,0.f}}; + constexpr auto pt100 = geo::Point{{1.f}}; + constexpr auto pt120 = geo::Point{{1.f,2.f}}; + constexpr geo::Pt3f pt123 = {1.f,2.f,3.f}; + + {constexpr geo::Point pt; REQUIRE(pt == pt000);} + {constexpr geo::Point pt({1.f,2.f,3.f}); REQUIRE(pt == pt123);} + // {constexpr geo::Point pt{1.f,2.f,3.f}; REQUIRE(pt == pt123);} // NOK + // {constexpr geo::Point pt = {1.f,2.f,3.f}; REQUIRE(pt == pt123);} // NOK + { + constexpr geo::Point pt1{{1.f,2.f,3.f}}; + constexpr geo::Point pt2(pt1.array); REQUIRE(pt2 == pt123); + constexpr geo::Point pt3(std::move(pt1.array)); REQUIRE(pt3 == pt123); + } + { + constexpr auto pt1 = geo::Pt3f{1.f,2.f,3.f}; + constexpr geo::Pt3f pt2(pt1); REQUIRE(pt2 == pt123); + } + { + constexpr auto pt1 = geo::Pt3f{1.f,2.f,3.f}; + constexpr geo::Pt3f pt2(std::move(pt1)); REQUIRE(pt2 == pt123); + } + {constexpr geo::Pt3f pt3; REQUIRE(pt3 == pt000);} + {constexpr geo::Pt3f pt3 = {}; REQUIRE(pt3 == pt000);} + {constexpr geo::Pt3f pt3 = {1.f}; REQUIRE(pt3 == pt100);} + {constexpr geo::Pt3f pt3 = {1.f,2.f}; REQUIRE(pt3 == pt120);} + {constexpr geo::Pt3f pt3 = {1.f,2.f,3.f}; REQUIRE(pt3 == pt123);} + + {constexpr geo::Pt3f pt3{}; REQUIRE(pt3 == pt000);} + {constexpr geo::Pt3f pt3{1.f}; REQUIRE(pt3 == pt100);} + {constexpr geo::Pt3f pt3{1.f,2.f}; REQUIRE(pt3 == pt120);} + {constexpr geo::Pt3f pt3{1.f,2.f,3.f}; REQUIRE(pt3 == pt123);} + {constexpr geo::Pt3f pt3(1.f); REQUIRE(pt3 == pt100);} + {constexpr geo::Pt3f pt3(1.f,2.f); REQUIRE(pt3 == pt120);} + {constexpr geo::Pt3f pt3(1.f,2.f,3.f); REQUIRE(pt3 == pt123);} + {constexpr geo::Pt3f pt3(geo::Matrix{{1.f,2.f,3.f}});REQUIRE(pt3 == pt123);} + { + constexpr geo::Matrix mat{{1.f,2.f,3.f}}; + constexpr geo::Pt3f pt3(mat); REQUIRE(pt3 == pt123); + } + // {constexpr geo::Pt3f pt3 = {1.f,2.f,3.f,4.f}; REQUIRE(pt3 == pt3);} // NOK + // {constexpr geo::Pt3f pt3{1.f,2.f,3.f,4.f}; REQUIRE(pt3 == pt3);} // NOK + } + { + constexpr glm::vec3 glmPt1(1,2,3); + constexpr glm::vec3 glmPt2(4,5,6); + constexpr geo::Pt3f pt1{1,2,3}; + constexpr geo::Pt3f pt2{4,5,6}; + constexpr auto dotR = geo::dot(pt1,pt2); + constexpr auto inv = invert(pt1); + constexpr auto sqn = geo::square_norm(pt1); + + constexpr glm::vec4 glmPt3(1,2,3,1); + constexpr glm::vec4 glmPt4(4,5,6,1); + + Logger::message(std::format("cross {}\n", to_string(geo::cross(pt1,pt2)))); + // Logger::message(std::format("glm cross {}\n", to_string(from_glm(glm::cross(glmPt3,glmPt4))))); + // const glm::vec3 v(0); + // auto v2 = glm::normalize(v); + // // getters + // REQUIRE(glmPt2.x == pt2.x()); + // REQUIRE(glmPt2.y == pt2.y()); + // REQUIRE(glmPt2.z == pt2.z()); + // // operators + // REQUIRE(from_glm(glmPt1) == pt1); + // REQUIRE(from_glm(glmPt2) == pt2); + // REQUIRE(from_glm(-glmPt2) == -pt2); + // REQUIRE(from_glm(glmPt1+glmPt2) == (pt1+pt2)); + // REQUIRE(from_glm(glmPt1-glmPt2) == (pt1-pt2)); + // REQUIRE(from_glm(glmPt1*glmPt2) == (pt1*pt2)); + // REQUIRE(from_glm(glmPt1/glmPt2) == (pt1/pt2)); + // REQUIRE(from_glm(glmPt1+glmPt2) == (add(pt1,pt2))); + // REQUIRE(from_glm(glmPt1-glmPt2) == (substract(pt1,pt2))); + // REQUIRE(from_glm(glmPt1*glmPt2) == (multiply(pt1,pt2))); + // REQUIRE(from_glm(glmPt1/glmPt2) == (divide(pt1,pt2))); + // // functions + // REQUIRE(glm::dot(glmPt1,glmPt2) == geo::dot(pt1,pt2)); + // REQUIRE(from_glm(glm::cross(glmPt1,glmPt2)) == geo::cross(pt1,pt2)); + // REQUIRE(from_glm(-glmPt1) == invert(pt1)); + // REQUIRE(tool::almost_equal(glm::length(glmPt1)*glm::length(glmPt1),sqn)); + // REQUIRE(from_glm(glm::normalize(glmPt1)) == geo::normalize(pt1)); + // REQUIRE(from_glm(glm::normalize(glm::vec3(4,5,6))) == normalize(geo::Pt3f{4,5,6})); + Logger::message(std::format("cross {}\n", to_string(geo::cross(pt1,pt2)))); + Logger::message(std::format("glm cross {}\n", to_string(from_glm(glm::cross(glmPt1,glmPt2))))); + Logger::message(std::format("glm mul {}\n", to_string(from_glm(glmPt1*glmPt2)))); + } + } + + SECTION("Point4"){ + +// Logger::message("geo::Point operations: Point4\n"); +// constexpr glm::vec4 pt1(1,2,3,4); +// constexpr glm::vec4 pt2(5,6,7,8); +// constexpr geo::Pt4f pt3{1,2,3,4}; +// constexpr geo::Pt4f pt4{5,6,7,8}; +// constexpr auto dotR = geo::dot(pt3,pt4); +// constexpr auto inv = invert(pt3); +// constexpr auto sqn = geo::square_norm(pt3); + +// const glm::vec3 v(0); +// auto v2 = glm::normalize(v); +// // getters +// REQUIRE(pt2.x == pt4.x()); +// REQUIRE(pt2.y == pt4.y()); +// REQUIRE(pt2.z == pt4.z()); +// REQUIRE(pt2.w == pt4.w()); +// // operators +// REQUIRE(from_glm(pt1) == pt3); +// REQUIRE(from_glm(pt2) == pt4); +// REQUIRE(from_glm(-pt2) == -pt4); +// REQUIRE(from_glm(pt1+pt2) == (pt3+pt4)); +// REQUIRE(from_glm(pt1-pt2) == (pt3-pt4)); +// REQUIRE(from_glm(pt1*pt2) == (pt3*pt4)); +// REQUIRE(from_glm(pt1/pt2) == (pt3/pt4)); +// REQUIRE(from_glm(pt1+pt2) == (add(pt3,pt4))); +// REQUIRE(from_glm(pt1-pt2) == (substract(pt3,pt4))); +// REQUIRE(from_glm(pt1*pt2) == (multiply(pt3,pt4))); +// REQUIRE(from_glm(pt1/pt2) == (divide(pt3,pt4))); +// // functions +// REQUIRE(glm::dot(pt1,pt2) == geo::dot(pt3,pt4)); +// REQUIRE(from_glm(-pt1) == invert(pt3)); +// REQUIRE(tool::almost_equal(glm::length(pt1)*glm::length(pt1),sqn)); +// REQUIRE(from_glm(glm::normalize(pt1)) == geo::normalize(pt3)); +// REQUIRE(from_glm(glm::normalize(glm::vec4(5,6,7,8))) == normalize(geo::Pt4f{5,6,7,8})); + } +} + +TEST_CASE("geo::Matrix"){ Logger::message("geo::Matrix\n"); + + SECTION("Constructor"){ Logger::message("geo::Matrix - Constructor\n"); + + constexpr auto pt00 = geo::Point{{0.f,0.f}}; + REQUIRE(almost_equal(pt00.x(),0.f)); + REQUIRE(almost_equal(pt00.y(),0.f)); + constexpr auto pt10 = geo::Point{{1.f}}; + REQUIRE(almost_equal(pt10.x(),1.f)); + REQUIRE(almost_equal(pt10.y(),0.f)); + constexpr auto pt12 = geo::Point{{1.f,2.f}}; + REQUIRE(almost_equal(pt12.x(),1.f)); + REQUIRE(almost_equal(pt12.y(),2.f)); + + // Validity + {geo::Matrix m = {}; REQUIRE(m == pt00);} // OK + {geo::Matrix m{}; REQUIRE(m == pt00);} // OK + {geo::Matrix m{{}}; REQUIRE(m == pt00);} // OK + {geo::Matrix m({1.f}); REQUIRE(m == pt10);} // OK + {geo::Matrix m({1.f,2.f}); REQUIRE(m == pt12);} // OK + std::array a2 = {1.f,2.f}; + {geo::Matrix m(a2); REQUIRE(m == pt12);} // OK + {geo::Matrix m(std::move(a2)); REQUIRE(m == pt12);} // OK + {geo::Matrix m(std::array{}); REQUIRE(m == pt00);} // OK + {geo::Matrix m(std::array{1.f}); REQUIRE(m == pt10);} // OK + {geo::Matrix m(std::array{1.f,2.f}); REQUIRE(m == pt12);} // OK + // {geo::Matrix m = {1.f};} // NOK + // {geo::Matrix m = {1.f,2.f};} // NOK + // {geo::Matrix m = {1.f,2.f,3.f};} // NOK + // {geo::Matrix m{1.f};} // NOK + // {geo::Matrix m{1.f,2.f};} // NOK + // {geo::Matrix m{1.f,2.f,3.f};} // NOK + // {geo::Matrix m({1.f,2.f,3.f});} // NOK + // {geo::Matrix m();} // NOK (warning) + // {geo::Matrix m(1.f);} // NOK + // {geo::Matrix m(1.f,2.f);} // NOK + // {geo::Matrix m(1.f,2.f,3.f);} // NOK + // {std::array a1 = {1.f}; geo::Matrix m(a1);} // NOK + // {geo::Matrix m(std::array{1.f,2.f,3.f});} // NOK + } + + SECTION("Functions"){ Logger::message("geo::Matrix - Functions\n"); + +// {constexpr auto m = geo::Matrix::identity();} +// {constexpr geo::Matrix r = m1.row(0);} + + } + + SECTION("Matrix 2x2"){ Logger::message("geo::Mat2x2\n"); + + constexpr geo::Mat2f m1{ + 7.f,2, + 0,3 + }; + constexpr geo::Mat2f m2{ + 3.f,1, + -2,7 + }; + constexpr auto glmM1 = glm::mat2( + glm::vec2(7,0), + glm::vec2(2,3) + ); + constexpr auto glmM2 = glm::mat2( + glm::vec2(3,-2), + glm::vec2(1,7) + ); + constexpr auto altglmM1 = glm::mat2( + 7,0, + 2,3 + ); + REQUIRE(glmM1 == altglmM1); + + REQUIRE(compare(m1.col(0), glm::column(glmM1,0))); + REQUIRE(compare(m1.col(1), glm::column(glmM1,1))); + REQUIRE(compare(m1.row(0), glm::row(glmM1,0))); + REQUIRE(compare(m1.row(1), glm::row(glmM1,1))); + REQUIRE(compare(m1,glmM1)); + REQUIRE(compare(m1,from_glm(glmM1))); + + + + REQUIRE(compare(m2.col(0), glm::column(glmM2,0))); + REQUIRE(compare(m2.col(1), glm::column(glmM2,1))); + REQUIRE(compare(m2.row(0), glm::row(glmM2,0))); + REQUIRE(compare(m2.row(1), glm::row(glmM2,1))); + REQUIRE(compare(m2,glmM2)); + REQUIRE(compare(m2,from_glm(glmM2))); + + REQUIRE(determinant(m1) == glm::determinant(glmM1)); + REQUIRE(compare(inverse(m1),glm::inverse(glmM1))); + + REQUIRE(compare(m1*m2, glmM1*glmM2)); + REQUIRE(compare(m2*m1, glmM2*glmM1)); + + constexpr auto v1 = geo::Vec2f{1.3f,5.f}; + constexpr auto glmV1 = glm::vec2(1.3f,5.f); + REQUIRE(compare(v1, glmV1)); + REQUIRE(compare(v1*m1, glmV1*glmM1)); + REQUIRE(compare(m1*m2, multiply(m1,m2))); + REQUIRE(compare(m2*m1, multiply(m2,m1))); + REQUIRE(compare(m1*transpose(v1), glmM1*glmV1)); + + +// v1 +// m1*transpose(v1); + +// geo::Mat2f m3{ +// cos(PI/2.f), sin(PI/2.f), +// -sin(PI/2.f), cos(PI/2.f) +// }; +// geo::Mat2f m4{ +// 1,0, +// 0,-1 +// }; + +// Logger::message(to_string(m3*m4)); +// Logger::message(to_string(m4*m3)); + } + + SECTION("Matrix 3x3"){ Logger::message("geo::Mat3x3\n"); + + constexpr geo::Mat3f m1{ + 7.f,2,1, + 0,3,-1, + -3,4,-2 + }; + constexpr geo::Mat3f m2{ + 3.f,1,-4, + -2,7,5, + 1.2f,4,-4 + }; + constexpr auto glmM1 = glm::mat3( + glm::vec3(7,0,-3), + glm::vec3(2,3,4), + glm::vec3(1,-1,-2) + ); + constexpr auto glmM2 = glm::mat3( + glm::vec3(3,-2,1.2), + glm::vec3(1,7,4), + glm::vec3(-4,5,-4) + ); + constexpr auto altglmM1 = glm::mat3( + 7,0,-3, + 2,3,4, + 1,-1,-2 + ); + + REQUIRE(glmM1 == altglmM1); + + REQUIRE(compare(m1.col(0), glm::column(glmM1,0))); + REQUIRE(compare(m1.col(1), glm::column(glmM1,1))); + REQUIRE(compare(m1.col(2), glm::column(glmM1,2))); + REQUIRE(compare(m1.row(0), glm::row(glmM1,0))); + REQUIRE(compare(m1.row(1), glm::row(glmM1,1))); + REQUIRE(compare(m1.row(2), glm::row(glmM1,2))); + REQUIRE(compare(m1,glmM1)); + REQUIRE(compare(m1,from_glm(glmM1))); + + REQUIRE(compare(m2.col(0), glm::column(glmM2,0))); + REQUIRE(compare(m2.col(1), glm::column(glmM2,1))); + REQUIRE(compare(m2.col(2), glm::column(glmM2,2))); + REQUIRE(compare(m2.row(0), glm::row(glmM2,0))); + REQUIRE(compare(m2.row(1), glm::row(glmM2,1))); + REQUIRE(compare(m2.row(2), glm::row(glmM2,2))); + REQUIRE(compare(m2,glmM2)); + REQUIRE(compare(m2,from_glm(glmM2))); + + REQUIRE(determinant(m1) == glm::determinant(glmM1)); + REQUIRE(compare(inverse(m1),glm::inverse(glmM1))); + REQUIRE(compare(m1*m2, glmM1*glmM2)); + REQUIRE(compare(m2*m1, glmM2*glmM1)); + + constexpr auto v1 = geo::Vec3f{1.3f,5.f,-2.f}; + constexpr auto glmV1 = glm::vec3(1.3f,5.f,-2.f); + REQUIRE(compare(v1, glmV1)); + REQUIRE(compare(v1*m1, glmV1*glmM1)); + REQUIRE(compare(m1*m2, multiply(m1,m2))); + REQUIRE(compare(m2*m1, multiply(m2,m1))); + REQUIRE(compare(m1*transpose(v1), glmM1*glmV1)); + +// REQUIRE(compare(m1.multiply_point(v1), v1*m1)); + } + + SECTION("Matrix 4x4"){ Logger::message("geo::Mat4x4\n"); + + constexpr geo::Mat4f m1{ + 7.f,2,1,-4, + 0,3,-1,1, + -3,4,-2,-3, + -4,4,6.2f,1.2f + }; + constexpr geo::Mat4f m2{ + 3.f,1,-4,-5, + -2,7,5,-1, + 1.2f,4,-4,1.2f, + 3.6f,-1.5,4.2f,2 + }; + constexpr auto glmM1 = glm::mat4( + glm::vec4(7,0,-3,-4), + glm::vec4(2,3,4,4), + glm::vec4(1,-1,-2,6.2), + glm::vec4(-4,1,-3,1.2) + ); + constexpr auto glmM2 = glm::mat4( + glm::vec4(3,-2,1.2,3.6), + glm::vec4(1,7,4,-1.5), + glm::vec4(-4,5,-4,4.2), + glm::vec4(-5,-1,1.2,2) + ); + constexpr auto altglmM1 = glm::mat4( + 7,0,-3,-4, + 2,3,4,4, + 1,-1,-2,6.2, + -4,1,-3,1.2 + ); + + REQUIRE(glmM1 == altglmM1); + + + std::cout << "mmm:\n" << to_string(transpose(transpose(m1))) << "\n"; + std::cout << to_string(from_glm(glmM1)) << "\n"; + REQUIRE(compare(from_glm(to_gml_mat4(m1)),from_glm(glmM1))); + REQUIRE(compare(transpose(transpose(m1)),from_glm(glmM1))); + + REQUIRE(compare(m1.col(0), glm::column(glmM1,0))); + REQUIRE(compare(m1.col(1), glm::column(glmM1,1))); + REQUIRE(compare(m1.col(2), glm::column(glmM1,2))); + REQUIRE(compare(m1.col(3), glm::column(glmM1,3))); + REQUIRE(compare(m1.row(0), glm::row(glmM1,0))); + REQUIRE(compare(m1.row(1), glm::row(glmM1,1))); + REQUIRE(compare(m1.row(2), glm::row(glmM1,2))); + REQUIRE(compare(m1.row(3), glm::row(glmM1,3))); + REQUIRE(compare(m1,glmM1)); + REQUIRE(compare(m1,from_glm(glmM1))); + + REQUIRE(compare(m2.col(0), glm::column(glmM2,0))); + REQUIRE(compare(m2.col(1), glm::column(glmM2,1))); + REQUIRE(compare(m2.col(2), glm::column(glmM2,2))); + REQUIRE(compare(m2.col(3), glm::column(glmM2,3))); + REQUIRE(compare(m2.row(0), glm::row(glmM2,0))); + REQUIRE(compare(m2.row(1), glm::row(glmM2,1))); + REQUIRE(compare(m2.row(2), glm::row(glmM2,2))); + REQUIRE(compare(m2.row(3), glm::row(glmM2,3))); + REQUIRE(compare(m2,glmM2)); + REQUIRE(compare(m2,from_glm(glmM2))); + + REQUIRE(determinant(m1) == glm::determinant(glmM1)); + REQUIRE(compare(inverse(m1),glm::inverse(glmM1))); + REQUIRE(compare(m1*m2, glmM1*glmM2)); + REQUIRE(compare(m2*m1, glmM2*glmM1)); + + constexpr auto v1 = geo::Vec4f{1.3f,5.f,-2.f,2.5f}; + constexpr auto glmV1 = glm::vec4(1.3f,5.f,-2.f,2.5f); + REQUIRE(compare(v1, glmV1)); + REQUIRE(compare(v1*m1, glmV1*glmM1)); + + REQUIRE(compare(m1*m2, multiply(m1,m2))); + REQUIRE(compare(m2*m1, multiply(m2,m1))); + REQUIRE(compare(v1*m1, glmV1*glmM1)); + REQUIRE(compare(m1*transpose(v1), glmM1*glmV1)); + + + } + + SECTION("Look at"){ Logger::message("Look at\n"); + auto glmLookAt = glm::lookAt( + glm::vec3(-2.0f, 4.0f, -1.0f), + glm::vec3( 0.0f, 2.0f, 0.0f), + glm::vec3( 0.0f, 1.0f, 0.0f) + ); + + + auto lootAt = look_at( + geo::Pt3f{-2.0f, 4.0f, -1.0f}, + geo::Pt3f{0.0f, 2.0f, 0.0f}, + geo::Vec3f{0.0f, 1.0f, 0.0f} + ); + +// std::cout << from_glm(glmLookAt) << "\n"; +// std::cout << lootAt << "\n"; +// REQUIRE(compare(clean(glmLookAt),clean(lootAt))); + } + + + + SECTION("Transform combination 1"){Logger::message("Transform combination 1\n"); + + // scale + auto scale = geo::scale(geo::Mat4f::identity(),geo::Vec3f{0.5f, 0.5f, 0.5f}); + auto glmScale = glm::scale(glm::mat4(1.0f), glm::vec3(0.5, 0.5, 0.5));; + REQUIRE(compare(clean(scale),clean(glmScale))); + + // translate + auto tr = geo::translate(geo::Mat4f::identity(), {{-1.f, 17.f, 56.f}}); + auto glmTr = glm::translate(glm::mat4(1.0f), glm::vec3(-1.0, 17.0, 56.0)); + REQUIRE(compare(tr,glmTr)); + REQUIRE(compare(geo::translation_m4x4(geo::Vec3f{-1.f, 17.f, 56.f}),tr)); + + // rotation 1 + auto rot1 = geo::rotate(geo::Mat4f::identity(), geo::Vec3f{1.f,0.f,0.f}, 90.f); + auto glmRot1 = glm::rotate(glm::mat4(1.0f), glm::radians(90.0f), glm::vec3(1.0, 0.0, 0.0)); + REQUIRE(compare(clean(rot1),clean(glmRot1))); + + rot1 = geo::rotate(geo::Mat4f::identity(), geo::Vec3f{0.f,1.f,0.f}, 90.f); + glmRot1 = glm::rotate(glm::mat4(1.0f), glm::radians(90.0f), glm::vec3(0.0, 1.0, 0.0)); + REQUIRE(compare(clean(rot1),clean(glmRot1))); + + rot1 = geo::rotate(geo::Mat4f::identity(), geo::Vec3f{0.f,0.f,1.f}, 90.f); + glmRot1 = glm::rotate(glm::mat4(1.0f), glm::radians(90.0f), glm::vec3(0.0, 0.0, 1.0)); + REQUIRE(compare(clean(rot1),clean(glmRot1))); + + // rotation 2 + rot1 = geo::rotate(geo::Mat4f::identity(), geo::Vec3f{90.f,0.f,0.f}); + glmRot1 = glm::rotate(glm::mat4(1.0f), glm::radians(90.0f), glm::vec3(1.0, 0.0, 0.0)); + REQUIRE(compare(clean(rot1),clean(glmRot1))); + + rot1 = geo::rotate(geo::Mat4f::identity(), geo::Vec3f{0.f,90.f,0.f}); + glmRot1 = glm::rotate(glm::mat4(1.0f), glm::radians(90.0f), glm::vec3(0.0, 1.0, 0.0)); + REQUIRE(compare(clean(rot1),clean(glmRot1))); + + rot1 = geo::rotate(geo::Mat4f::identity(), geo::Vec3f{0.f,0.f,90.f}); + glmRot1 = glm::rotate(glm::mat4(1.0f), glm::radians(90.0f), glm::vec3(0.0, 0.0, 1.0)); + REQUIRE(compare(clean(rot1),clean(glmRot1))); + + // rotation 3 + rot1 = geo::Mat4f::identity()*geo::rotation_m4x4(geo::Vec3f{90.f*tool::PI_180,0.f,0.f}); + glmRot1 = glm::rotate(glm::mat4(1.0f), glm::radians(90.0f), glm::vec3(1.0, 0.0, 0.0)); + REQUIRE(compare(clean(rot1),clean(glmRot1))); + +// REQUIRE(clean(from_glm(trans1)) == clean(trans2)); + +// trans1 = glm::rotate(glm::mat4(1.0f), glm::radians(90.0f), glm::vec3(0.0, 1.0, 0.0)); +// trans2 = geo::Mat4f::identity()*geo::Mat4f::rotation_matrix({0.f,90.f*tool::PI_180,0.f}); +// REQUIRE(clean(from_glm(trans1)) == clean(trans2)); + +// trans1 = glm::rotate(glm::mat4(1.0f), glm::radians(90.0f), glm::vec3(0.0, 0.0, 1.0)); +// trans2 = geo::Mat4f::identity()*geo::Mat4f::rotation_matrix({0.f,0.f,90.f*tool::PI_180}); +// REQUIRE(clean(from_glm(trans1)) == clean(trans2)); + + // combination 1 + auto glmC1 = glm::scale(glm::mat4(1.0f), glm::vec3(0.5, 0.5, 0.5)); + auto c1 = geo::scale(geo::Mat4f::identity(), geo::Vec3f{0.5f, 0.5f, 0.5f}); + REQUIRE(compare(clean(c1),clean(glmC1))); + glmC1 = glm::rotate(glmC1, glm::radians(90.0f), glm::vec3(1.0, 0.0, 0.0)); + c1 = geo::rotate(c1, geo::Vec3f{1.f,0.f,0.f}, 90.f); + +// std::cout << "RR:\n"; +//// Logger::message(to_string(clean(from_glm(glmC1)))); +// Logger::message(to_string( geo::translate(geo::Mat4f::identity(), {{-1.0, 17.0, 56.0}}))); +// Logger::message(to_string( from_glm(glm::translate(glm::mat4(1.0f), glm::vec3(-1.0, 17.0, 56.0))))); +//// CHECK(compare(c1,glmC1)); + + glmC1 = glm::scale(glm::mat4(1.0f), glm::vec3(0.5, 0.5, 0.5)); + c1 = geo::scale(geo::Mat4f::identity(), {{0.5f, 0.5f, 0.5f}}); + CHECK(compare(c1,glmC1)); + glmC1 = glm::rotate(glmC1, glm::radians(-47.0f), glm::vec3(1.0, 0.0, 0.0)); + c1 = geo::rotate(c1, geo::Vec3f{1.f,0.f,0.f}, -47.f); + CHECK(compare(c1,glmC1)); + Logger::message(to_string( c1)); + Logger::message(to_string( from_glm(glmC1))); + glmC1 = glm::translate(glmC1, glm::vec3(-1.0, 17.0, 56.0)); + c1 = geo::translate(c1, geo::Vec3f{-1.0, 17.0, 56.0}); +// CHECK(compare(c1,glmC1)); + + auto glm_tr = glm::translate(glm::mat4(1.0f), glm::vec3(1.5f, 2.6f, -10.5)); + auto glm_sc = glm::scale(glm::mat4(1.0f), glm::vec3(1.2f,1.2f,1.2f)); + auto glm_rx = glm::rotate(glm::mat4(1.0f), glm::radians(-45.f), glm::vec3(1.0, 0.0, 0.0)); + auto glm_ry = glm::rotate(glm::mat4(1.0f), glm::radians(70.6f), glm::vec3(0.0, 1.0, 0.0)); + auto glm_rz = glm::rotate(glm::mat4(1.0f), glm::radians(7.9f), glm::vec3(0.0, 0.0, 1.0)); + auto glm_model = glm_tr * (glm_rz*glm_rx*glm_ry) * glm_sc; + + auto geo_tr = geo::translate(geo::Mat4f::identity(), geo::Vec3f{1.5f, 2.6f, -10.5f}); + auto geo_scale = geo::scale(geo::Mat4f::identity(), geo::Vec3f{1.2f,1.2f,1.2f}); + auto geo_rot = geo::rotate(geo::Mat4f::identity(), geo::Vec3f{-45.f,70.6f,7.9f}); + auto geo_model = geo_tr * geo_rot * geo_scale; + +// std::cout << "### TRC:\n " << to_string(geo_model) << "\n" << to_string(from_glm(glm_model)) << "\n"; +// CHECK(compare(clean(geo_model),clean(glm_model))); + + auto glmP = glm::perspective(60.f*PI_180, 1.4f, 0.02f, 1000.f); + auto p1 = geo::perspective(60.f*PI_180, 1.4f, 0.02f, 1000.f); +// Logger::message(to_string(clean(from_glm(glmP)))); +// Logger::message(to_string(clean(p1))); + CHECK(compare(clean(glmP),clean(p1))); + + +// c1 = geo::rotate(c1, {{-47.f,0.f,0.f}}); + + + +// std::cout << "RRé:\n"; +// Logger::message(to_string(clean(from_glm(glmC1)))); +// Logger::message(to_string(clean(c1))); + + + } + +// SECTION("Transformation comparison"){ + +// // rotation matrices +// trans1 = glm_x_rotation(87.f); +// trans2 = geo::Mat3f::x_rotation_matrix(87.f); +// REQUIRE(clean(from_glm(trans1)) == clean(trans2)); + +// trans1 = glm_y_rotation(87.f); +// trans2 = geo::Mat3f::y_rotation_matrix(87.f); +// REQUIRE(clean(from_glm(trans1)) == clean(trans2)); + +// trans1 = glm_z_rotation(87.f); +// trans2 = geo::Mat3f::z_rotation_matrix(87.f); +// REQUIRE(clean(from_glm(trans1)) == clean(trans2)); + +// trans1 = glm_rotate(glm::vec3(10.0, 20., -30.)); +// trans2 = geo::Mat4f::rotate(geo::Mat4f::identity(), {10.0, 20., -30.}); +// REQUIRE(clean(from_glm(trans1)) == clean(trans2)); + +// // scale matrix +// trans1 = glm::scale(glm::mat4(1.0f), glm::vec3(0.5, 0.5, 0.5)); +// trans2 = geo::Mat4f::scale(geo::Mat4f::identity(), {0.5f, 0.5f, 0.5f}); +// trans3 = geo::Mat4f::scale_matrix({0.5f, 0.5f, 0.5f}); + +// REQUIRE(clean(from_glm(trans1)) == clean(trans2)); +// REQUIRE(clean(trans2) == clean(trans3)); + +// // translate matrix +// trans1 = glm::translate(glm::mat4(1.0f), glm::vec3(-1.0, 17.0, 56.0)); +// trans2 = geo::Mat4f::translate(geo::Mat4f::identity(), {-1.f, 17.f, 56.f}); +// trans3 = geo::Mat4f::translation_matrix({-1.f, 17.f, 56.f}); + +// REQUIRE(clean(from_glm(trans1)) == clean(trans2)); +// REQUIRE(clean(trans2) == clean(trans3)); + + + + +// trans3 = geo::Mat4f::scale_matrix({0.5f, 0.5f, 0.5f}); +// trans3 = geo::Mat4f::rotation_matrix(geo::Vec3f{-47.f,0.f,0.f}*tool::PI_180)*trans3; +// trans3 = geo::Mat4f::translation_matrix({-1.0, 17.0, 56.0})* trans3; + +// REQUIRE(clean(from_glm(trans1)) == clean(trans2)); +// REQUIRE(clean(trans2) == clean(trans3)); + +// trans4 = geo::Mat4f::transform({0.5f, 0.5f, 0.5f}, {-47.f,0.f,0.f}, {-1.0, 17.0, 56.0}); +// CHECK(clean(trans3) == clean(trans4)); +// } + + + auto gq1 = glm::angleAxis(glm::radians(67.f), glm::vec3(1.f, 0.f, 0.f)); + auto gq2 = glm::angleAxis(glm::radians(135.f), glm::vec3(0.f, 1.f, 0.f)); + auto gq3 = glm::angleAxis(glm::radians(-54.f), glm::vec3(0.f, 0.f, 1.f)); + auto gq4 = glm::angleAxis(glm::radians(58.f), glm::normalize(glm::vec3(1.f,0.6f,-0.5f))); + + auto q1 = geo::Quatf::from_axis({1.f,0,0}, 67.f); + auto q2 = geo::Quatf::from_axis({0.f,1,0}, 135.f); + auto q3 = geo::Quatf::from_axis({0.f,0,1}, -54.f); + auto q4 = geo::Quatf::from_axis(normalize(geo::Vec3f{1.f,0.6f,-0.5f}), 58.f); + +// auto e1 = euler_angles(q1); +// auto e2 = euler_angles(q2); +// auto e3 = euler_angles(q3); +// auto e4 = euler_angles(q4); + + SECTION("Quaternion"){ Logger::message("Quaternion\n"); + + REQUIRE(from_glm(gq1) == q1); + REQUIRE(from_glm(gq2) == q2); + REQUIRE(from_glm(gq3) == q3); + + REQUIRE(angle(q1) == glm::angle(gq1)); + REQUIRE(angle(q2) == glm::angle(gq2)); + REQUIRE(angle(q3) == glm::angle(gq3)); + + REQUIRE(axis(q1) == from_glm(glm::axis(gq1))); + REQUIRE(axis(q2) == from_glm(glm::axis(gq2))); + REQUIRE(axis(q3) == from_glm(glm::axis(gq3))); + + REQUIRE(norm(q1) == glm::length(gq1)); + REQUIRE(norm(q2) == glm::length(gq2)); + REQUIRE(norm(q3) == glm::length(gq3)); + + REQUIRE((q1 + q2) == from_glm(gq1 + gq2)); + REQUIRE((q1 - q2) == from_glm(gq1 - gq2)); + REQUIRE((q1 * q2) == from_glm(gq1 * gq2)); + REQUIRE((q1 * 5.f) == from_glm(gq1 * 5.f)); + REQUIRE((q1 / 5.f) == from_glm(gq1 / 5.f)); + + REQUIRE(normalize(q1) == from_glm(glm::normalize(gq1))); + REQUIRE(normalize(q2) == from_glm(glm::normalize(gq2))); + REQUIRE(normalize(q3) == from_glm(glm::normalize(gq3))); + + REQUIRE(inverse(q1) == from_glm(glm::inverse(gq1))); + REQUIRE(inverse(q2) == from_glm(glm::inverse(gq2))); + REQUIRE(inverse(q3) == from_glm(glm::inverse(gq3))); + + REQUIRE(dot(q1,q2) == glm::dot(gq1, gq2)); + REQUIRE(dot(q2,q1) == glm::dot(gq2, gq1)); + REQUIRE(dot(q3,q2) == glm::dot(gq3, gq2)); + + REQUIRE(conjugate(q1) == from_glm(glm::conjugate(gq1))); + REQUIRE(conjugate(q2) == from_glm(glm::conjugate(gq2))); + REQUIRE(conjugate(q3) == from_glm(glm::conjugate(gq3))); + + REQUIRE(slerp(q1,q2, 0.4f) == from_glm(glm::slerp(gq1, gq2, 0.4f))); + REQUIRE(slerp(q1,q3, 0.19f) == from_glm(glm::slerp(gq1, gq3, 0.19f))); + REQUIRE(slerp(q2,q3, 0.89f) == from_glm(glm::slerp(gq2, gq3, 0.89f))); + + REQUIRE(pitch(q1) == (glm::pitch(gq1))); + REQUIRE(pitch(q2) == (glm::pitch(gq2))); + REQUIRE(pitch(q3) == (glm::pitch(gq3))); + REQUIRE(pitch(q4) == (glm::pitch(gq4))); + + REQUIRE(yaw(q1) == (glm::yaw(gq1))); + REQUIRE(yaw(q2) == (glm::yaw(gq2))); + REQUIRE(yaw(q3) == (glm::yaw(gq3))); + REQUIRE(yaw(q4) == (glm::yaw(gq4))); + + REQUIRE(roll(q1) == (glm::roll(gq1))); + REQUIRE(roll(q2) == (glm::roll(gq2))); + REQUIRE(roll(q3) == (glm::roll(gq3))); + REQUIRE(roll(q4) == (glm::roll(gq4))); + + REQUIRE(euler_angles(q1) == from_glm(glm::eulerAngles(gq1))); + REQUIRE(euler_angles(q2) == from_glm(glm::eulerAngles(gq2))); + REQUIRE(euler_angles(q3) == from_glm(glm::eulerAngles(gq3))); + REQUIRE(euler_angles(q4) == from_glm(glm::eulerAngles(gq4))); + +// CHECK(clean(geo::Quatf::from_euler({rad_2_deg(e1.x()),rad_2_deg(e1.y()),rad_2_deg(e1.z())})) == q1); +// CHECK(clean(geo::Quatf::from_euler({rad_2_deg(e2.x()),rad_2_deg(e2.y()),rad_2_deg(e2.z())})) == q2); +// CHECK(clean(geo::Quatf::from_euler({rad_2_deg(e3.x()),rad_2_deg(e3.y()),rad_2_deg(e3.z())})) == q3); +// CHECK(clean(geo::Quatf::from_euler({rad_2_deg(e4.x()),rad_2_deg(e4.y()),rad_2_deg(e4.z())})) == q4); + +// REQUIRE(to_mat4(q1) == from_glm(glm::toMat4(gq1))); +// REQUIRE(to_mat4(q2) == from_glm(glm::toMat4(gq2))); +// REQUIRE(to_mat4(q3) == from_glm(glm::toMat4(gq3))); +// REQUIRE(to_mat4(q4) == from_glm(glm::toMat4(gq4))); + +// REQUIRE(to_quaternion(to_mat4(q1)) == from_glm(gq1)); +// REQUIRE(to_quaternion(to_mat4(q2)) == from_glm(gq2)); +// REQUIRE(to_quaternion(to_mat4(q3)) == from_glm(gq3)); +// REQUIRE(to_quaternion(to_mat4(q4)) == from_glm(gq4)); + } + +} + +#include "geometry/cloud.hpp" +#include "camera/kinect4/k4_frame.hpp" +#include "geometry/voxel_grid.hpp" + +template +constexpr auto ccc(const geo::Matrix &lhs, const geo::Matrix &rhs) noexcept -> bool{ + if(lhs != rhs){ + return std::lexicographical_compare(lhs.array.begin(), lhs.array.end(), rhs.array.begin(), rhs.array.end()); + } + return false; +} + +// turbopfor +#include "TurboPFor/vp4.h" + +bool comparator(const tool::geo::CVoxel& lhs, const tool::geo::CVoxel& rhs) { + auto *c1 = reinterpret_cast(&lhs); + auto *c2 = reinterpret_cast(&rhs); + return *c1 < *c2; +} + +TEST_CASE("geo::Cloud"){ Logger::message("geo::Cloud\n"); + SECTION("ColoredCloudData"){ Logger::message("geo::ColoredCloudData\n"); + geo::ColoredCloudData cloud; +// cloud.vertices.reserve(100); +// for(size_t ii = 0; ii < 100; ++ii){ +// cloud.vertices.emplace_back(100.f-ii,2.f*ii,3.f*ii); +// } + + cloud.vertices.push_back({2.f,1.f,3.f}); + cloud.vertices.push_back({-5.f, 3.f, -2.f}); + cloud.vertices.push_back({9.f, 7.f, 0.f}); + cloud.vertices.push_back({0.f, 0.f, 0.f}); + + auto pt1 = cloud.vertices[0]; + auto pt2 = cloud.vertices[1]; + auto pt3 = cloud.vertices[2]; + + cloud.vertices.resize(5); + Logger::message(std::format("<: {}\n", pt1 < pt1)); + Logger::message(std::format(">: {}\n", pt1 > pt2)); + Logger::message(std::format("<: {}\n", pt1 < pt3)); + Logger::message(std::format(">: {}\n", pt1 > pt3)); + + geo::Pt3f pt{1.f,2.f,3.f}; + Logger::message(std::format("sum: {}\n", geo::sum(pt))); + + Logger::message(std::format("sum: {}\n", geo::sum(pt1))); + Logger::message(std::format("sum: {}\n", geo::sum(pt2))); + + Logger::message(std::format("sum: {}\n", to_string(cloud.vertices.sum()))); + Logger::message(std::format("mean: {}\n", to_string(cloud.vertices.mean_position()))); + Logger::message(std::format("minx: {}\n", std::to_string(cloud.vertices.min_x()))); + Logger::message(std::format("maxx: {}\n", std::to_string(cloud.vertices.max_x()))); + Logger::message(std::format("miny: {}\n", std::to_string(cloud.vertices.min_y()))); + Logger::message(std::format("maxy: {}\n", std::to_string(cloud.vertices.max_y()))); + Logger::message(std::format("minz: {}\n", std::to_string(cloud.vertices.min_z()))); + Logger::message(std::format("maxz: {}\n", std::to_string(cloud.vertices.max_z()))); + Logger::message(std::format("min: {}\n", to_string(cloud.vertices.min()))); + Logger::message(std::format("max: {}\n", to_string(cloud.vertices.max()))); + + Logger::message(std::format("minx_id: {}\n", cloud.vertices.min_x_id())); + Logger::message(std::format("maxx_id: {}\n", cloud.vertices.max_x_id())); + Logger::message(std::format("miny_id: {}\n", cloud.vertices.min_y_id())); + Logger::message(std::format("maxy_id: {}\n", cloud.vertices.max_y_id())); + Logger::message(std::format("minz_id: {}\n", cloud.vertices.min_z_id())); + Logger::message(std::format("maxz_id: {}\n", cloud.vertices.max_z_id())); + Logger::message(std::format("min_id: {}\n", cloud.vertices.min_id())); + Logger::message(std::format("max_id: {}\n", cloud.vertices.max_id())); + + + camera::K4Frame f; + for(size_t ii = 0; ii < 10000; ++ii){ +// f.cloud.vertices.push_back({(rand()%1000)*0.001f,(rand()%2000)*0.001f,(rand()%3000)*0.001f}); +// f.cloud.colors.push_back({(rand()%1000)*0.001f,(rand()%1000)*0.001f,(rand()%1000)*0.001f}); + + f.cloud.vertices.push_back({(rand()%1000)*0.001f,(rand()%2000)*0.001f,(rand()%3000)*0.001f}); + f.cloud.colors.push_back({(rand()%1000)*0.001f,(rand()%1000)*0.001f,(rand()%1000)*0.001f}); + + } + + Logger::message("TEST VOXELISATION\n"); + auto minBound = tool::geo::Pt3f{-1.f,-1.f,-1.f}; + auto maxBound = tool::geo::Pt3f{1.f,1.f,1.f}; + tool::geo::VoxelGrid voxelGrid = tool::geo::VoxelGrid::create_from_point_cloud_within_bounds( + f.cloud, + 0.1f, + minBound, + maxBound + ); + + Logger::message(std::format("frame cloud: {}\n", cloud.vertices.size())); + Logger::message(std::format("minx_id: {}\n", f.cloud.vertices.min_x())); + Logger::message(std::format("maxx_id: {}\n", f.cloud.vertices.max_x())); + Logger::message(std::format("miny_id: {}\n", f.cloud.vertices.min_y())); + Logger::message(std::format("maxy_id: {}\n", f.cloud.vertices.max_y())); + Logger::message(std::format("minz_id: {}\n", f.cloud.vertices.min_z())); + Logger::message(std::format("maxz_id: {}\n", f.cloud.vertices.max_z())); + + Logger::message(std::format("voxel size: {}\n", sizeof(geo::Voxel))); + Logger::message(std::format("CVoxel size: {}\n", sizeof(geo::CVoxel))); + Logger::message(std::format("std::int64_t size: {}\n", sizeof(std::int64_t))); + Logger::message(std::format("std::int32_t size: {}\n", sizeof(std::int32_t))); + Logger::message(std::format("geo::Pt3 size: {}\n", sizeof(geo::Pt3))); + Logger::message(std::format("geo::Pt3f size: {}\n", sizeof(geo::Pt3f))); + + std::vector cvoxels; + cvoxels.reserve(voxelGrid.grid.size()); + for(const auto &grid : voxelGrid.grid){ + + const auto &id = grid.second.index; + const auto &col = grid.second.color; + tool::geo::CVoxel cv; + cv.red = static_cast(col.x()*255.f); + cv.green = static_cast(col.y()*255.f); + cv.blue = static_cast(col.z()*255.f); + cv.xIndex= id.x(); + cv.yIndex= id.y(); + cv.zIndex= id.z(); + cvoxels.push_back(cv); + } + + // pass it to sort: + sort(cvoxels.begin(), cvoxels.end(), &comparator); + + cvoxels.resize(cvoxels.size() + 128- cvoxels.size()%128); + + std::vector encoded; + encoded.resize(cvoxels.size()*16); + Logger::message(std::format("cvoxels size: {} : bytes {}\n", cvoxels.size(), cvoxels.size()*sizeof(tool::geo::CVoxel))); + Logger::message(std::format("encoded size: {}\n", encoded.size())); + + size_t encodedBytesNb = p4nzenc64( + reinterpret_cast(cvoxels.data()), + cvoxels.size(), + encoded.data() + ); + Logger::message(std::format("p4nzenc64: {}\n", encodedBytesNb)); + + encodedBytesNb = p4nd1enc64( + reinterpret_cast(cvoxels.data()), + cvoxels.size(), + encoded.data() + ); + Logger::message(std::format("p4nd1enc64: {}\n", encodedBytesNb)); + + encodedBytesNb = p4ndenc64( + reinterpret_cast(cvoxels.data()), + cvoxels.size(), + encoded.data() + ); + Logger::message(std::format("p4ndenc64: {}\n", encodedBytesNb)); + + encodedBytesNb = p4nenc64( + reinterpret_cast(cvoxels.data()), + cvoxels.size(), + encoded.data() + ); + Logger::message(std::format("p4nenc64: {}\n", encodedBytesNb)); + + encodedBytesNb = p4nenc128v64( + reinterpret_cast(cvoxels.data()), + cvoxels.size(), + encoded.data() + ); + Logger::message(std::format("p4nenc128v64: {}\n", encodedBytesNb)); + + +// p4nenc128v64 +// // compress depth buffer +// cFrame->encodedDepthData.resize(depthSize*2); + +// // depth sizes for every mode have already a 128 padded size +// size_t encodedBytesNb = p4nzenc128v16( +// depthBuffer, +// depthSize, +// cFrame->encodedDepthData.data() +// ); +// cFrame->encodedDepthData.resize(encodedBytesNb); + + // // init cloud from voxel grid + // cloud.vertices.resize(grid.grid.size()); + // cloud.colors.resize(grid.grid.size()); + + // size_t idVoxel = 0; + // for(const auto &voxel : grid.grid){ + + // const auto &id = voxel.second.index; + // cloud.vertices[idVoxel] = (minBound + + // geo::Pt3f{static_cast(id.x()), static_cast(id.y()), static_cast(id.z())}) + // *static_cast(grid.voxelSize); + // cloud.colors[idVoxel++] = voxel.second.color; + // } + + + } +} diff --git a/cpp-projects/base-test/glm_utility.hpp b/cpp-projects/base-test/glm_utility.hpp new file mode 100644 index 0000000..eb68eb3 --- /dev/null +++ b/cpp-projects/base-test/glm_utility.hpp @@ -0,0 +1,308 @@ + +#pragma once + +// std +#include + +// glm +#include +#include +#include +#include +#include + +// base +#include "geometry/point2.hpp" +#include "geometry/point3.hpp" +#include "geometry/point4.hpp" +#include "geometry/matrix2.hpp" +#include "geometry/matrix3.hpp" +#include "geometry/matrix4.hpp" +#include "geometry/quaternion.hpp" +//#include "geometry/transform.hpp" + +namespace tool { + +template +auto to_string(const geo::Matrix &m) -> std::string{ + std::stringstream str; + str << m; + return str.str(); +} + +auto from_glm(const glm::vec2 &v) -> geo::Vec2f{ + return geo::Vec2f{v.x, v.y}; +} + +auto from_glm(const glm::vec3 &v) -> geo::Vec3f{ + return {v.x, v.y, v.z}; +} + +auto from_glm(const glm::vec4 &v) -> geo::Vec4f{ + return {v.x, v.y, v.z, v.w}; +} + +auto from_glm(const glm::quat &q) -> geo::Quatf{ + return {q.x, q.y, q.z, q.w}; +} + +auto from_glm(const glm::mat2 &m) -> geo::Mat2f{ + return { + m[0][0], m[1][0], + m[0][1], m[1][1] + }; +} + +auto from_glm(const glm::mat3 &m) -> geo::Mat3f{ + return { + m[0][0], m[1][0], m[2][0], + m[0][1], m[1][1], m[2][1], + m[0][2], m[1][2], m[2][2], + }; +} +auto from_glm(const glm::mat4 &m) -> geo::Mat4f{ + return { + m[0][0], m[1][0], m[2][0], m[3][0], + m[0][1], m[1][1], m[2][1], m[3][1], + m[0][2], m[1][2], m[2][2], m[3][2], + m[0][3], m[1][3], m[2][3], m[3][3], + }; +} + +auto to_gml_mat4(const geo::Mat4f &m) -> glm::mat4{ + return glm::mat4( + glm::vec4(m(0,0),m(1,0),m(2,0),m(3,0)), + glm::vec4(m(0,1),m(1,1),m(2,1),m(3,1)), + glm::vec4(m(0,2),m(1,2),m(2,2),m(3,2)), + glm::vec4(m(0,3),m(1,3),m(2,3),m(3,3)) + ); +} + + +auto compare(float v1, float v2) -> bool{ + return almost_equal(v1,v2); +} + +auto compare(const glm::vec2 &v1, const geo::RowVec &v2) -> bool{ + return + compare(v1.x, v2.x()) && + compare(v1.y, v2.y()); +} +auto compare(const geo::RowVec &v1, const glm::vec2 &v2) -> bool{ + return compare(v2,v1); +} +auto compare(const glm::vec3 &v1, const geo::RowVec &v2) -> bool{ + return + compare(v1.x, v2.x()) && + compare(v1.y, v2.y()) && + compare(v1.z, v2.z()); +} +auto compare(const geo::RowVec &v1, const glm::vec3 &v2) -> bool{ + return compare(v2,v1); +} +auto compare(const glm::vec4 &v1, const geo::RowVec &v2) -> bool{ + return + compare(v1.x, v2.x()) && + compare(v1.y, v2.y()) && + compare(v1.z, v2.z()) && + compare(v1.w, v2.w()); +} +auto compare(const geo::RowVec &v1, const glm::vec4 &v2) -> bool{ + return compare(v2,v1); +} + +// +auto compare(const glm::vec2 &v1, const geo::ColVec &v2) -> bool{ + return + compare(v1.x, v2.x()) && + compare(v1.y, v2.y()); +} +auto compare(const geo::ColVec &v1, const glm::vec2 &v2) -> bool{ + return compare(v2,v1); +} +auto compare(const glm::vec3 &v1, const geo::ColVec &v2) -> bool{ + return + compare(v1.x, v2.x()) && + compare(v1.y, v2.y()) && + compare(v1.z, v2.z()); +} +auto compare(const geo::ColVec &v1, const glm::vec3 &v2) -> bool{ + return compare(v2,v1); +} +auto compare(const glm::vec4 &v1, const geo::ColVec &v2) -> bool{ + return + compare(v1.x, v2.x()) && + compare(v1.y, v2.y()) && + compare(v1.z, v2.z()) && + compare(v1.w, v2.w()); +} +auto compare(const geo::ColVec &v1, const glm::vec4 &v2) -> bool{ + return compare(v2,v1); +} + + +auto compare(const glm::mat2 &m1, const geo::Mat2f &m2) -> bool{ + for(int r = 0; r < 2; ++r){ + for(int c = 0; c < 2; ++c){ + if(!compare(m1[c][r], m2.at(r,c))){ + return false; + } + } + } + return true; +} + +auto compare(const geo::Mat2f &m1, const glm::mat2 &m2) -> bool{ + return compare(m2,m1); +} + +auto compare(const glm::mat3 &m1, const geo::Mat3f &m2) -> bool{ + for(int r = 0; r < 3; ++r){ + for(int c = 0; c < 3; ++c){ + if(!compare(m1[c][r], m2.at(r,c))){ + return false; + } + } + } + return true; +} + +auto compare(const geo::Mat3f &m1, const glm::mat3 &m2) -> bool{ + return compare(m2,m1); +} + +auto compare(const glm::mat4 &m1, const geo::Mat4f &m2) -> bool{ + for(int r = 0; r < 4; ++r){ + for(int c = 0; c < 4; ++c){ + if(!compare(m1[c][r], m2.at(r,c))){ + return false; + } + } + } + return true; +} + +auto compare(const geo::Mat4f &m1, const glm::mat4 &m2) -> bool{ + return compare(m2,m1); +} + +//auto compare(const geo::Vec2f &v1, const geo::Vec2f &v2) -> bool{ +// return v1 == v2; +//} + +//auto compare(const geo::Vec3f &v1, const geo::Vec3f &v2) -> bool{ +// return v1 == v2; +//} + +//auto compare(const geo::Vec4f &v1, const geo::Vec4f &v2) -> bool{ +// return v1 == v2; +//} + +//auto compare(const geo::Mat2f &m1, const geo::Mat2f &m2) -> bool{ +// return m1 == m2; +//} + +//auto compare(const geo::Mat3f &m1, const geo::Mat3f &m2) -> bool{ +// return m1 == m2; +//} + +//auto compare(const geo::Mat4f &m1, const geo::Mat4f &m2) -> bool{ +// return m1 == m2; +//} + +auto clean(geo::Mat2f m) -> geo::Mat2f{ + for(int ii = 0; ii < 4; ++ii){ + if(m.at(ii) < 0.0000001f){ + m.at(ii) = 0.f; + } + } + return m; +} + +auto clean(geo::Mat3f m) -> geo::Mat3f{ + for(int ii = 0; ii < 9; ++ii){ + if(m.at(ii) < 0.0000001f){ + m.at(ii) = 0.f; + } + } + return m; +} + +auto clean(geo::Mat4f m) -> geo::Mat4f{ + for(int ii = 0; ii < 16; ++ii){ + if(m.at(ii) < 0.0000001f){ + m.at(ii) = 0.f; + } + } + return m; +} + +auto clean(glm::mat3 m) -> glm::mat3 { + for(int ii = 0; ii < 3; ++ii){ + for(int jj = 0; jj < 3; ++jj){ + if(m[ii][jj] < 0.0000001f){ + m[ii][jj] = 0.f; + } + } + } + return m; +} + +auto clean(glm::mat4 m) -> glm::mat4 { + for(int ii = 0; ii < 4; ++ii){ + for(int jj = 0; jj < 4; ++jj){ + if(m[ii][jj] < 0.0000001f){ + m[ii][jj] = 0.f; + } + } + } + return m; +} + +auto clean(geo::Quatf q) -> geo::Quatf{ + if(q.x < 0.0000001f){ + q.x = 0.f; + } + if(q.y < 0.0000001f){ + q.y = 0.f; + } + if(q.z < 0.0000001f){ + q.z = 0.f; + } + if(q.w < 0.0000001f){ + q.w = 0.f; + } + return q; +} + +// TEST + +auto glm_x_rotation(float value) -> glm::mat4{ + return glm::rotate(glm::mat4(1.0f), glm::radians(value), glm::vec3(1.0, 0.0, 0.0)); +} +auto glm_y_rotation(float value) -> glm::mat4{ + return glm::rotate(glm::mat4(1.0f), glm::radians(value), glm::vec3(0.0, 1.0, 0.0)); +} +auto glm_z_rotation(float value) -> glm::mat4{ + return glm::rotate(glm::mat4(1.0f), glm::radians(value), glm::vec3(0.0, 0.0, 1.0)); +} + +auto glm_rotate(const glm::vec3 &rotation) -> glm::mat4{ + return glm_z_rotation(rotation.z)*glm_x_rotation(rotation.x)*glm_y_rotation(rotation.y); +} + +auto transform1(const geo::Vec3f &scale, const geo::Vec3f &rotation, const geo::Vec3f &translate) -> geo::Mat4f{ + return scale_m4x4(scale)*rotation_m4x4(rotation*PI_180)*translation_m4x4(translate); +} +auto transform2(const geo::Vec3f &scale, const geo::Vec3f &rotation, const geo::Vec3f &translate) -> geo::Mat4f{ + return translation_m4x4(translate)*rotation_m4x4(rotation*PI_180)*scale_m4x4(scale); +} +auto transform3(const geo::Vec3f &scale, const geo::Vec3f &rotation, const geo::Vec3f &translate) -> geo::Mat4f{ + geo::Mat4f tr = scale_m4x4(scale); + tr = tr * rotation_m4x4(rotation*PI_180); + tr = tr * translation_m4x4(translate); + return tr; +} + + +} diff --git a/cpp-projects/base-test/main.cpp b/cpp-projects/base-test/main.cpp new file mode 100644 index 0000000..6c98d02 --- /dev/null +++ b/cpp-projects/base-test/main.cpp @@ -0,0 +1,78 @@ +/******************************************************************************* +** Toolset-test ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +// catch +#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do this in one cpp file +#include "thirdparty/catch/catch.hpp" + +// local +#include "utility/logger.hpp" + +TEST_CASE( "1: All test cases reside in other .cpp files (empty)", "[multi-file:1]" ) { + tool::Logger::message("All tests\n"); +} + +// REQUIRE( expression ) : quit if failed +// CHECK( expression ) : continue if failed +// REQUIRE_FALSE( expression ) +// CHECK_FALSE( expression ) +// REQUIRE( performComputation() == Approx( 2.1 ) ); +// using namespace Catch::literals; +// REQUIRE( performComputation() == 2.1_a ); + +// Approx target = Approx(100).epsilon(0.01); +// 100.0 == target; // Obviously true +// 200.0 == target; // Obviously still false +// 100.5 == target; // True, because we set target to allow up to 1% difference + +// Approx target = Approx(100).margin(5); +// 100.0 == target; // Obviously true +// 200.0 == target; // Obviously still false +// 104.0 == target; // True, because we set target to allow absolute difference of at most 5 + +// REQUIRE_NOTHROW( expression ) and +// CHECK_NOTHROW( expression ) +// REQUIRE_THROWS( expression ) and +// CHECK_THROWS( expression ) +// REQUIRE_THROWS_WITH( expression, string or string matcher ) and +// CHECK_THROWS_WITH( expression, string or string matcher ) +// REQUIRE_THROWS_MATCHES( expression, exception type, matcher for given exception type ) and +// CHECK_THROWS_MATCHES( expression, exception type, matcher for given exception type ) +//REQUIRE_NOTHROW([&](){ +// int i = 1; +// int j = 2; +// auto k = i + j; +// if (k == 3) { +// throw 1; +// } +//}()); + +// REQUIRE_THAT( lhs, matcher expression ) and +// CHECK_THAT( lhs, matcher expression ) +// TEST_CASE_METHOD((Fixture), "foo", "[bar]") { +// SUCCEED(); +// } + + diff --git a/cpp-projects/base/LICENSE b/cpp-projects/base/LICENSE new file mode 100644 index 0000000..fc6ced1 --- /dev/null +++ b/cpp-projects/base/LICENSE @@ -0,0 +1,24 @@ + + +Toolbox-base +MIT License + +Copyright (c) 2018 Florian Lance + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/cpp-projects/base/algorithms/maching_cube.hpp b/cpp-projects/base/algorithms/maching_cube.hpp new file mode 100644 index 0000000..8753ca3 --- /dev/null +++ b/cpp-projects/base/algorithms/maching_cube.hpp @@ -0,0 +1,611 @@ + + +#pragma once + +// std +#include +#include +#include +#include + +// local +#include "geometry/octree.hpp" + +namespace tool::algo { + +constexpr int edgeTable[256]={ + 0x0 , 0x109, 0x203, 0x30a, 0x406, 0x50f, 0x605, 0x70c, + 0x80c, 0x905, 0xa0f, 0xb06, 0xc0a, 0xd03, 0xe09, 0xf00, + 0x190, 0x99 , 0x393, 0x29a, 0x596, 0x49f, 0x795, 0x69c, + 0x99c, 0x895, 0xb9f, 0xa96, 0xd9a, 0xc93, 0xf99, 0xe90, + 0x230, 0x339, 0x33 , 0x13a, 0x636, 0x73f, 0x435, 0x53c, + 0xa3c, 0xb35, 0x83f, 0x936, 0xe3a, 0xf33, 0xc39, 0xd30, + 0x3a0, 0x2a9, 0x1a3, 0xaa , 0x7a6, 0x6af, 0x5a5, 0x4ac, + 0xbac, 0xaa5, 0x9af, 0x8a6, 0xfaa, 0xea3, 0xda9, 0xca0, + 0x460, 0x569, 0x663, 0x76a, 0x66 , 0x16f, 0x265, 0x36c, + 0xc6c, 0xd65, 0xe6f, 0xf66, 0x86a, 0x963, 0xa69, 0xb60, + 0x5f0, 0x4f9, 0x7f3, 0x6fa, 0x1f6, 0xff , 0x3f5, 0x2fc, + 0xdfc, 0xcf5, 0xfff, 0xef6, 0x9fa, 0x8f3, 0xbf9, 0xaf0, + 0x650, 0x759, 0x453, 0x55a, 0x256, 0x35f, 0x55 , 0x15c, + 0xe5c, 0xf55, 0xc5f, 0xd56, 0xa5a, 0xb53, 0x859, 0x950, + 0x7c0, 0x6c9, 0x5c3, 0x4ca, 0x3c6, 0x2cf, 0x1c5, 0xcc , + 0xfcc, 0xec5, 0xdcf, 0xcc6, 0xbca, 0xac3, 0x9c9, 0x8c0, + 0x8c0, 0x9c9, 0xac3, 0xbca, 0xcc6, 0xdcf, 0xec5, 0xfcc, + 0xcc , 0x1c5, 0x2cf, 0x3c6, 0x4ca, 0x5c3, 0x6c9, 0x7c0, + 0x950, 0x859, 0xb53, 0xa5a, 0xd56, 0xc5f, 0xf55, 0xe5c, + 0x15c, 0x55 , 0x35f, 0x256, 0x55a, 0x453, 0x759, 0x650, + 0xaf0, 0xbf9, 0x8f3, 0x9fa, 0xef6, 0xfff, 0xcf5, 0xdfc, + 0x2fc, 0x3f5, 0xff , 0x1f6, 0x6fa, 0x7f3, 0x4f9, 0x5f0, + 0xb60, 0xa69, 0x963, 0x86a, 0xf66, 0xe6f, 0xd65, 0xc6c, + 0x36c, 0x265, 0x16f, 0x66 , 0x76a, 0x663, 0x569, 0x460, + 0xca0, 0xda9, 0xea3, 0xfaa, 0x8a6, 0x9af, 0xaa5, 0xbac, + 0x4ac, 0x5a5, 0x6af, 0x7a6, 0xaa , 0x1a3, 0x2a9, 0x3a0, + 0xd30, 0xc39, 0xf33, 0xe3a, 0x936, 0x83f, 0xb35, 0xa3c, + 0x53c, 0x435, 0x73f, 0x636, 0x13a, 0x33 , 0x339, 0x230, + 0xe90, 0xf99, 0xc93, 0xd9a, 0xa96, 0xb9f, 0x895, 0x99c, + 0x69c, 0x795, 0x49f, 0x596, 0x29a, 0x393, 0x99 , 0x190, + 0xf00, 0xe09, 0xd03, 0xc0a, 0xb06, 0xa0f, 0x905, 0x80c, + 0x70c, 0x605, 0x50f, 0x406, 0x30a, 0x203, 0x109, 0x0 }; + + + +constexpr int triTable[256][16] ={ + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 8, 3, 9, 8, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 2, 10, 0, 2, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 8, 3, 2, 10, 8, 10, 9, 8, -1, -1, -1, -1, -1, -1, -1}, + {3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 11, 2, 8, 11, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 11, 2, 1, 9, 11, 9, 8, 11, -1, -1, -1, -1, -1, -1, -1}, + {3, 10, 1, 11, 10, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 10, 1, 0, 8, 10, 8, 11, 10, -1, -1, -1, -1, -1, -1, -1}, + {3, 9, 0, 3, 11, 9, 11, 10, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 3, 0, 7, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 1, 9, 4, 7, 1, 7, 3, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 4, 7, 3, 0, 4, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1}, + {9, 2, 10, 9, 0, 2, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, + {2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, -1, -1, -1, -1}, + {8, 4, 7, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 4, 7, 11, 2, 4, 2, 0, 4, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 1, 8, 4, 7, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, + {4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, -1, -1, -1, -1}, + {3, 10, 1, 3, 11, 10, 7, 8, 4, -1, -1, -1, -1, -1, -1, -1}, + {1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, -1, -1, -1, -1}, + {4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, -1, -1, -1, -1}, + {4, 7, 11, 4, 11, 9, 9, 11, 10, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 5, 4, 1, 5, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 5, 4, 8, 3, 5, 3, 1, 5, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 1, 2, 10, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, + {5, 2, 10, 5, 4, 2, 4, 0, 2, -1, -1, -1, -1, -1, -1, -1}, + {2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, -1, -1, -1, -1}, + {9, 5, 4, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 11, 2, 0, 8, 11, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, + {0, 5, 4, 0, 1, 5, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, + {2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, -1, -1, -1, -1}, + {10, 3, 11, 10, 1, 3, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, -1, -1, -1, -1}, + {5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, -1, -1, -1, -1}, + {5, 4, 8, 5, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1}, + {9, 7, 8, 5, 7, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 3, 0, 9, 5, 3, 5, 7, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 7, 8, 0, 1, 7, 1, 5, 7, -1, -1, -1, -1, -1, -1, -1}, + {1, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 7, 8, 9, 5, 7, 10, 1, 2, -1, -1, -1, -1, -1, -1, -1}, + {10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, -1, -1, -1, -1}, + {8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, -1, -1, -1, -1}, + {2, 10, 5, 2, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1}, + {7, 9, 5, 7, 8, 9, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, -1, -1, -1, -1}, + {2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, -1, -1, -1, -1}, + {11, 2, 1, 11, 1, 7, 7, 1, 5, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, -1, -1, -1, -1}, + {5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, -1}, + {11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, -1}, + {11, 10, 5, 7, 11, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 1, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 8, 3, 1, 9, 8, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 5, 2, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 5, 1, 2, 6, 3, 0, 8, -1, -1, -1, -1, -1, -1, -1}, + {9, 6, 5, 9, 0, 6, 0, 2, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, -1, -1, -1, -1}, + {2, 3, 11, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 0, 8, 11, 2, 0, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, -1, -1, -1, -1}, + {6, 3, 11, 6, 5, 3, 5, 1, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, -1, -1, -1, -1}, + {3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, -1, -1, -1, -1}, + {6, 5, 9, 6, 9, 11, 11, 9, 8, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 3, 0, 4, 7, 3, 6, 5, 10, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 5, 10, 6, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, + {10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, -1, -1, -1, -1}, + {6, 1, 2, 6, 5, 1, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, -1, -1, -1, -1}, + {8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, -1, -1, -1, -1}, + {7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, -1}, + {3, 11, 2, 7, 8, 4, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, -1, -1, -1, -1}, + {0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1}, + {9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, -1}, + {8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, -1, -1, -1, -1}, + {5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, -1}, + {0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, -1}, + {6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, -1, -1, -1, -1}, + {10, 4, 9, 6, 4, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 10, 6, 4, 9, 10, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1}, + {10, 0, 1, 10, 6, 0, 6, 4, 0, -1, -1, -1, -1, -1, -1, -1}, + {8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, -1, -1, -1, -1}, + {1, 4, 9, 1, 2, 4, 2, 6, 4, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, -1, -1, -1, -1}, + {0, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 3, 2, 8, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1}, + {10, 4, 9, 10, 6, 4, 11, 2, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, -1, -1, -1, -1}, + {3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, -1, -1, -1, -1}, + {6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, -1}, + {9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, -1, -1, -1, -1}, + {8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, -1}, + {3, 11, 6, 3, 6, 0, 0, 6, 4, -1, -1, -1, -1, -1, -1, -1}, + {6, 4, 8, 11, 6, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 10, 6, 7, 8, 10, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1}, + {0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, -1, -1, -1, -1}, + {10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, -1, -1, -1, -1}, + {10, 6, 7, 10, 7, 1, 1, 7, 3, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, -1, -1, -1, -1}, + {2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, -1}, + {7, 8, 0, 7, 0, 6, 6, 0, 2, -1, -1, -1, -1, -1, -1, -1}, + {7, 3, 2, 6, 7, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, -1, -1, -1, -1}, + {2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, -1}, + {1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, -1}, + {11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, -1, -1, -1, -1}, + {8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, -1}, + {0, 9, 1, 11, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, -1, -1, -1, -1}, + {7, 11, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 1, 9, 8, 3, 1, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, + {10, 1, 2, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 3, 0, 8, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, + {2, 9, 0, 2, 10, 9, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, + {6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, -1, -1, -1, -1}, + {7, 2, 3, 6, 2, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 0, 8, 7, 6, 0, 6, 2, 0, -1, -1, -1, -1, -1, -1, -1}, + {2, 7, 6, 2, 3, 7, 0, 1, 9, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, -1, -1, -1, -1}, + {10, 7, 6, 10, 1, 7, 1, 3, 7, -1, -1, -1, -1, -1, -1, -1}, + {10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, -1, -1, -1, -1}, + {0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, -1, -1, -1, -1}, + {7, 6, 10, 7, 10, 8, 8, 10, 9, -1, -1, -1, -1, -1, -1, -1}, + {6, 8, 4, 11, 8, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 6, 11, 3, 0, 6, 0, 4, 6, -1, -1, -1, -1, -1, -1, -1}, + {8, 6, 11, 8, 4, 6, 9, 0, 1, -1, -1, -1, -1, -1, -1, -1}, + {9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, -1, -1, -1, -1}, + {6, 8, 4, 6, 11, 8, 2, 10, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, -1, -1, -1, -1}, + {4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, -1, -1, -1, -1}, + {10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, -1}, + {8, 2, 3, 8, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1}, + {0, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, -1, -1, -1, -1}, + {1, 9, 4, 1, 4, 2, 2, 4, 6, -1, -1, -1, -1, -1, -1, -1}, + {8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, -1, -1, -1, -1}, + {10, 1, 0, 10, 0, 6, 6, 0, 4, -1, -1, -1, -1, -1, -1, -1}, + {4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, -1}, + {10, 9, 4, 6, 10, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 5, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 4, 9, 5, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 0, 1, 5, 4, 0, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, + {11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, -1, -1, -1, -1}, + {9, 5, 4, 10, 1, 2, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, + {6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, -1, -1, -1, -1}, + {7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, -1, -1, -1, -1}, + {3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, -1}, + {7, 2, 3, 7, 6, 2, 5, 4, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, -1, -1, -1, -1}, + {3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, -1, -1, -1, -1}, + {6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, -1}, + {9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, -1, -1, -1, -1}, + {1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, -1}, + {4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, -1}, + {7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, -1, -1, -1, -1}, + {6, 9, 5, 6, 11, 9, 11, 8, 9, -1, -1, -1, -1, -1, -1, -1}, + {3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, -1, -1, -1, -1}, + {0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, -1, -1, -1, -1}, + {6, 11, 3, 6, 3, 5, 5, 3, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, -1, -1, -1, -1}, + {0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, -1}, + {11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, -1}, + {6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, -1, -1, -1, -1}, + {5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, -1, -1, -1, -1}, + {9, 5, 6, 9, 6, 0, 0, 6, 2, -1, -1, -1, -1, -1, -1, -1}, + {1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, -1}, + {1, 5, 6, 2, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, -1}, + {10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, -1, -1, -1, -1}, + {0, 3, 8, 5, 6, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {10, 5, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 5, 10, 7, 5, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 5, 10, 11, 7, 5, 8, 3, 0, -1, -1, -1, -1, -1, -1, -1}, + {5, 11, 7, 5, 10, 11, 1, 9, 0, -1, -1, -1, -1, -1, -1, -1}, + {10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, -1, -1, -1, -1}, + {11, 1, 2, 11, 7, 1, 7, 5, 1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, -1, -1, -1, -1}, + {9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, -1, -1, -1, -1}, + {7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, -1}, + {2, 5, 10, 2, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1}, + {8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, -1, -1, -1, -1}, + {9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, -1, -1, -1, -1}, + {9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, -1}, + {1, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 7, 0, 7, 1, 1, 7, 5, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 3, 9, 3, 5, 5, 3, 7, -1, -1, -1, -1, -1, -1, -1}, + {9, 8, 7, 5, 9, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {5, 8, 4, 5, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1}, + {5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, -1, -1, -1, -1}, + {0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, -1, -1, -1, -1}, + {10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, -1}, + {2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, -1, -1, -1, -1}, + {0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, -1}, + {0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, -1}, + {9, 4, 5, 2, 11, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, -1, -1, -1, -1}, + {5, 10, 2, 5, 2, 4, 4, 2, 0, -1, -1, -1, -1, -1, -1, -1}, + {3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, -1}, + {5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, -1, -1, -1, -1}, + {8, 4, 5, 8, 5, 3, 3, 5, 1, -1, -1, -1, -1, -1, -1, -1}, + {0, 4, 5, 1, 0, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, -1, -1, -1, -1}, + {9, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 11, 7, 4, 9, 11, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, -1, -1, -1, -1}, + {1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, -1, -1, -1, -1}, + {3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, -1}, + {4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, -1, -1, -1, -1}, + {9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, -1}, + {11, 7, 4, 11, 4, 2, 2, 4, 0, -1, -1, -1, -1, -1, -1, -1}, + {11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, -1, -1, -1, -1}, + {2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, -1, -1, -1, -1}, + {9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, -1}, + {3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, -1}, + {1, 10, 2, 8, 7, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 1, 4, 1, 7, 7, 1, 3, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, -1, -1, -1, -1}, + {4, 0, 3, 7, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 8, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 9, 3, 9, 11, 11, 9, 10, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 10, 0, 10, 8, 8, 10, 11, -1, -1, -1, -1, -1, -1, -1}, + {3, 1, 10, 11, 3, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 11, 1, 11, 9, 9, 11, 8, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, -1, -1, -1, -1}, + {0, 2, 11, 8, 0, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 2, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 8, 2, 8, 10, 10, 8, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 10, 2, 0, 9, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, -1, -1, -1, -1}, + {1, 10, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 3, 8, 9, 1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 9, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1} +}; + + +template +struct TRIANGLE +{ + geo::Point3 p[3]; +}; + +template +struct GRIDCELL +{ + geo::Point3 p[8]; + T2 val[8]; +}; + +/* + Linearly interpolate the position where an isosurface cuts + an edge between two vertices, each with their own scalar value +*/ +template +geo::Point3 VertexInterp(T isolevel, geo::Point3 p1, geo::Point3 p2,T valp1,T valp2){ + + T mu; + geo::Point3 p; + + if (std::abs(isolevel-valp1) < 0.00001) + return(p1); + if (std::abs(isolevel-valp2) < 0.00001) + return(p2); + if (std::abs(valp1-valp2) < 0.00001) + return(p1); + mu = (isolevel - valp1) / (valp2 - valp1); + p.x() = p1.x() + mu * (p2.x() - p1.x()); + p.y() = p1.y() + mu * (p2.y() - p1.y()); + p.z() = p1.z() + mu * (p2.z() - p1.z()); + + return(p); +} + + + +template +static int Polygonise(GRIDCELL grid,T2 isolevel,TRIANGLE *triangles){ + + int i,ntriang; + int cubeindex; + geo::Point3 vertlist[12]; + + /* + Determine the index into the edge table which + tells us which vertices are inside of the surface + */ + cubeindex = 0; + + if (grid.val[0] < isolevel) cubeindex |= 1; + if (grid.val[1] < isolevel) cubeindex |= 2; + if (grid.val[2] < isolevel) cubeindex |= 4; + if (grid.val[3] < isolevel) cubeindex |= 8; + if (grid.val[4] < isolevel) cubeindex |= 16; + if (grid.val[5] < isolevel) cubeindex |= 32; + if (grid.val[6] < isolevel) cubeindex |= 64; + if (grid.val[7] < isolevel) cubeindex |= 128; + +// std::cout << "cube index " << cubeindex << "\n"; + + /* Cube is entirely in/out of the surface */ + if (edgeTable[cubeindex] == 0) + return(0); + + /* Find the vertices where the surface intersects the cube */ + if (edgeTable[cubeindex] & 1) + vertlist[0] = + VertexInterp(isolevel,grid.p[0],grid.p[1],grid.val[0],grid.val[1]); + if (edgeTable[cubeindex] & 2) + vertlist[1] = + VertexInterp(isolevel,grid.p[1],grid.p[2],grid.val[1],grid.val[2]); + if (edgeTable[cubeindex] & 4) + vertlist[2] = + VertexInterp(isolevel,grid.p[2],grid.p[3],grid.val[2],grid.val[3]); + if (edgeTable[cubeindex] & 8) + vertlist[3] = + VertexInterp(isolevel,grid.p[3],grid.p[0],grid.val[3],grid.val[0]); + if (edgeTable[cubeindex] & 16) + vertlist[4] = + VertexInterp(isolevel,grid.p[4],grid.p[5],grid.val[4],grid.val[5]); + if (edgeTable[cubeindex] & 32) + vertlist[5] = + VertexInterp(isolevel,grid.p[5],grid.p[6],grid.val[5],grid.val[6]); + if (edgeTable[cubeindex] & 64) + vertlist[6] = + VertexInterp(isolevel,grid.p[6],grid.p[7],grid.val[6],grid.val[7]); + if (edgeTable[cubeindex] & 128) + vertlist[7] = + VertexInterp(isolevel,grid.p[7],grid.p[4],grid.val[7],grid.val[4]); + if (edgeTable[cubeindex] & 256) + vertlist[8] = + VertexInterp(isolevel,grid.p[0],grid.p[4],grid.val[0],grid.val[4]); + if (edgeTable[cubeindex] & 512) + vertlist[9] = + VertexInterp(isolevel,grid.p[1],grid.p[5],grid.val[1],grid.val[5]); + if (edgeTable[cubeindex] & 1024) + vertlist[10] = + VertexInterp(isolevel,grid.p[2],grid.p[6],grid.val[2],grid.val[6]); + if (edgeTable[cubeindex] & 2048) + vertlist[11] = + VertexInterp(isolevel,grid.p[3],grid.p[7],grid.val[3],grid.val[7]); + + /* Create the triangle */ + ntriang = 0; + for (i=0;triTable[cubeindex][i]!=-1;i+=3) { + triangles[ntriang].p[0] = vertlist[triTable[cubeindex][i ]]; + triangles[ntriang].p[1] = vertlist[triTable[cubeindex][i+1]]; + triangles[ntriang].p[2] = vertlist[triTable[cubeindex][i+2]]; + ntriang++; + } + + return(ntriang); +} + + +static size_t count_neighbours(std_v3*> &nodes, geo::Pt3 id, size_t side){ + int ii = id.x(); + int jj = id.y(); + int kk = id.z(); + + size_t count = 0; + for(int ni = ii-1; ni < ii+2; ++ni){ + for(int nj = jj-1; nj < jj+2; ++nj){ + for(int nk = kk-1; nk < kk+2; ++nk){ + if(ni == ii && nj == jj && nk == kk){ + continue; + } + + if(ni > 0 && nj > 0 && nk > 0 && ni < side-1 && nj < side-1 && nk < side-1){ + if(nodes[ni][nj][nk]){ + ++count; + } + } + } + } + } + + return count; +} + +static std_v1> marching_cube(geo::VoxelOctreeNode *node, size_t depth, float iso){ + + const size_t side = static_cast(math::ipow(2, depth)); + + // sort nodes + auto sortedNodes = node->sorted_nodes(depth); + + + + + // do the marching cube + std_v1> totalTri; + for(size_t ii = 0; ii < side; ++ii){ + for(size_t jj = 0; jj < side; ++jj){ + for(size_t kk = 0; kk < side; ++kk){ + if(sortedNodes[ii][jj][kk]){ + std_v1> pts; + + Pt3 id(ii,jj,kk); + + const auto &b = sortedNodes[ii][jj][kk]->bounds; + GRIDCELL grid; + + for(int ii = 0; ii < 8; ++ii){ + grid.p[ii] = b.origin + mu[ii]*b.size*2.f; + grid.val[ii] = count_neighbours(sortedNodes, id + Pt3(mu[ii].x(), mu[ii].y(), mu[ii].z()), side)/26.f; + } + +// grid.p[1] = b.origin + mu[1]*b.size*2.f; +// grid.p[2] = b.origin + mu[2]*b.size*2.f; +// grid.p[3] = b.origin + mu[3]*b.size*2.f; +// grid.p[4] = b.origin + mu[4]*b.size*2.f; +// grid.p[5] = b.origin + mu[5]*b.size*2.f; +// grid.p[6] = b.origin + mu[6]*b.size*2.f; +// grid.p[7] = b.origin + mu[7]*b.size*2.f; + + std::array,5> tri; + int nbTri = Polygonise(grid, iso, tri.data()); + if(nbTri > 0){ +// std::cout << "tri " << nbTri << "\n"; + std::move(tri.begin(), tri.begin() + nbTri, std::back_inserter(totalTri)); + } + + } + } + } + } + + return totalTri; +} + + +//static std_v1> marching_cube(VoxelOctreeNode *node, size_t depth, float iso){ + +// const size_t side = static_cast(ipow(2, depth)); + +// // sort nodes +// auto sortedNodes = node->sorted_nodes(depth); + + + + +// // do the marching cube +// std_v1> totalTri; +// for(size_t ii = 0; ii < side; ++ii){ +// for(size_t jj = 0; jj < side; ++jj){ +// for(size_t kk = 0; kk < side; ++kk){ +// if(sortedNodes[ii][jj][kk]){ +// std_v1> pts; + +// for(int ni = ii-1; ni < ii+2; ++ni){ +// for(int nj = jj-1; nj < jj+2; ++nj){ +// for(int nk = kk-1; nk < kk+2; ++nk){ +// if(ni == ii && nj == jj && nk == kk){ +// continue; +// } + + +// if(ni > 0 && nj > 0 && nk > 0 && ni < side-1 && nj < side-1 && nk < side-1){ +// if(sortedNodes[ni][nj][nk]){ +// pts.emplace_back(sortedNodes[ni][nj][nk]->bounds.origin); +// } +// } +// } +// } +// } +////// std::cout << ni << " " << nj << " " << nk << "\n"; +//// if(sortedNodes[ii+ni][jj+nj][kk+nk] != nullptr){ +////// std::cout << ni << " " << nj << " " << nk << "\n"; + +////// if(ni == 1 && nj ==1 && nk == 1){ +//// ptsN.emplace_back(sortedNodes[ii+ni][jj+nj][kk+nk]->bounds.origin); + + +//// std::cout << "pts n " << pts.size() << "\n"; + + +// // std::cout << "node " << ii << " " <bounds; +// GRIDCELL grid; + +// // init gridcell +// for(size_t ll = 0; ll < 8; ++ll){ + +// // set grid +// grid.p[ll] = b.origin + mu[ll]*b.size; + + +//// std::cout << "ll " << ll << " " << grid.p[ll] << "\n"; + +// const auto max = 1.5*b.size.norm(); + +// size_t countClosePts = 0; + +// for(const auto &pt : pts){ + +//// std::cout << geo::vec(pt, grid.p[ll]).norm() << " " << max << "\n";// << grid.p[ll] << " " << max << geo::vec(pt, grid.p[ll]).square_norm() << "\n"; +// if(geo::vec(pt, grid.p[ll]).norm() < max){ +// countClosePts++; +// } +// } +//// pts + + +//// // compute value +//// float total = 0.f; +//// for(const auto &id : mu4[ll]){ +//// const Pt3 idCurrN(ii+id.x(), jj+id.y(), kk+id.z()); + +////// std::cout << "id " << id << " " << idCurrN << "\n"; +//// if(idCurrN.x() < 0 || idCurrN.y() < 0 || idCurrN.z() < 0 || idCurrN.x() >= side || idCurrN.y() >= side || idCurrN.z() >= side ){ +//// continue; +//// } + +//// if(sortedNodes[idCurrN.x()][idCurrN.y()][idCurrN.z()]){ +//// total += 1;// sortedNodes[idCurrN.x()][idCurrN.y()][idCurrN.z()]->ids.size(); +//// } +//// } +// grid.val[ll] = countClosePts/8.f; +//// std::cout << grid.val[ll] << " "; +// } + +// std::array,5> tri; +// int nbTri = Polygonise(grid, iso, tri.data()); +// if(nbTri > 0){ +//// std::cout << "tri " << nbTri << "\n"; +// std::move(tri.begin(), tri.begin() + nbTri, std::back_inserter(totalTri)); +// } + +// } +// } +// } +// } + +// return totalTri; +//} +} diff --git a/cpp-projects/base/base.pro b/cpp-projects/base/base.pro new file mode 100644 index 0000000..e71b508 --- /dev/null +++ b/cpp-projects/base/base.pro @@ -0,0 +1,453 @@ + +# /******************************************************************************* +# ** Toolset-base ** +# ** MIT License ** +# ** Copyright (c) [2018] [Florian Lance] ** +# ** ** +# ** Permission is hereby granted, free of charge, to any person obtaining a ** +# ** copy of this software and associated documentation files (the "Software"), ** +# ** to deal in the Software without restriction, including without limitation ** +# ** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +# ** and/or sell copies of the Software, and to permit persons to whom the ** +# ** Software is furnished to do so, subject to the following conditions: ** +# ** ** +# ** The above copyright notice and this permission notice shall be included in ** +# ** all copies or substantial portions of the Software. ** +# ** ** +# ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +# ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +# ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +# ** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +# ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +# ** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +# ** DEALINGS IN THE SOFTWARE. ** +# ** ** +# ********************************************************************************/ + +####################################### repo +TOOLBOX_REPOSITORY_DIR = $$PWD"/../.." + +####################################### PRI +# defines compiling options +include(../ts-settings.pri) +# defines projects paths and variables +include(../ts-projects.pri) +# defines thirdparty includes and libs +include(../ts-thirdparty.pri) + +####################################### TARGET +equals(CFG, "debug"){ + TARGET = based +} +equals(CFG, "release"){ + TARGET = base +} + +####################################### TEMPLATE +TEMPLATE = lib +CONFIG += staticlib +CONFIG -= console + +####################################### BUILD FILES +OBJECTS_DIR = $$BASE_OBJ +DESTDIR = $$BASE_DEST + +####################################### CONFIG +CONFIG -= qt + +####################################### INCLUDES +INCLUDEPATH += \ + # thirdparty + $$ASSIMP_INCLUDES \ + $$TURBOJPG_INCLUDES \ + $$FASTPFOR_INCLUDES \ + $$OPENCV_INCLUDES \ + $$KINECT2_INCLUDES \ + $$KINECT4_INCLUDES \ + $$BOOST_INCLUDES \ + $$LIBSOUNDIO_INCLUDES \ + $$LIBUSB_INCLUDES \ + $$EIGEN_INCLUDES \ + $$OPEN3D_INCLUDES \ + $$TURBOPFOR_INCLUDES \ + +####################################### LIBRAIRIES +LIBS += \ + # thirdparty + $$ASSIMP_LIBS \ + $$TURBOJPG_LIBS \ + $$FASTPFOR_LIBS \ + $$OPENCV_LIBS \ + $$KINECT2_LIBS \ + $$KINECT4_LIBS \ + $$BOOST_LIBS \ + $$LIBSOUNDIO_LIBS \ + $$LIBUSB_LIBS \ + $$EIGEN_LIBS\ + $$OPEN3D_LIBS \ + $$TURBOPFOR_LIBS \ + $$WINDOWS_LIBS \ + +####################################### PROJECT FILES + +HEADERS += \ + # exvr + camera/kinect4/k4_actions_settings.hpp \ + camera/kinect4/k4_calibrator.hpp \ + camera/kinect4/k4_calibrator_settings.hpp \ + camera/kinect4/k4_color_settings.hpp \ + camera/kinect4/k4_compressed_frame.hpp \ + camera/kinect4/k4_config_settings.hpp \ + camera/kinect4/k4_data_settings.hpp \ + camera/kinect4/k4_delay.hpp \ + camera/kinect4/k4_device_settings.hpp \ + camera/kinect4/k4_display_settings.hpp \ + camera/kinect4/k4_filters.hpp \ + camera/kinect4/k4_grabber_data_processing.hpp \ + camera/kinect4/k4_model.hpp \ + camera/kinect4/k4_player_settings.hpp \ + camera/kinect4/k4_recorder.hpp \ + camera/kinect4/k4_recorder_settings.hpp \ + camera/kinect4/k4_volumetric_video.hpp \ + camera/kinect4/k4_player.hpp \ + camera/kinect4/k4a/k4a_body_tracking_helpers.hpp \ + exvr/ex_element.hpp \ + exvr/ex_experiment.hpp \ + exvr/ex_resource.hpp \ + exvr/ex_component.hpp \ + # files + files/binary_settings.hpp \ + files/cloud_io.hpp \ + files/assimp_loader.hpp \ + files/settings.hpp \ + # geometry + files/text_settings.hpp \ + geometry/cloud.hpp \ + geometry/geometry.hpp \ + geometry/transform.hpp \ + geometry/aabb3.hpp \ + geometry/circle.hpp \ + geometry/obb3.hpp \ + geometry/plane3.hpp \ + geometry/line2.hpp \ + geometry/line3.hpp \ + geometry/rectangle.hpp \ + geometry/sphere.hpp \ + geometry/ray3.hpp \ + geometry/triangle3.hpp \ + geometry/geometry2.hpp \ + geometry/geometry3.hpp \ + geometry/interval.hpp \ + geometry/matrix.hpp \ + geometry/matrix2.hpp \ + geometry/matrix3.hpp \ + geometry/matrix4.hpp \ + geometry/mesh.hpp \ + geometry/point.hpp \ + geometry/point2.hpp \ + geometry/point3.hpp \ + geometry/point4.hpp \ + geometry/raycast.hpp \ + geometry/dummy.hpp \ + geometry/aabb2.hpp \ + geometry/octree.hpp \ + geometry/maching_cube.hpp \ + geometry/vertices.hpp \ + geometry/voxel.hpp \ + geometry/quaternion.hpp \ + # graphics + geometry/voxel_grid.hpp \ + graphics/color.hpp \ + graphics/colors.hpp \ + graphics/screen.hpp \ + graphics/light.hpp \ + graphics/material.hpp \ + graphics/texture.hpp \ + graphics/model.hpp \ + graphics/camera.hpp \ + # input + input/joypad.hpp \ + input/mouse.hpp \ + input/keyboard.hpp \ + # network + network/kinect4/k4_client_connection.hpp \ + network/kinect4/k4_client_network_settings.hpp \ + network/kinect4/k4_server_connection.hpp \ + network/kinect4/k4_server_network.hpp \ + network/kinect4/k4_server_network_settings.hpp \ + network/kinect4/k4_udp_reader.hpp \ + network/kinect4/k4_udp_sender.hpp \ + network/network_utility.hpp \ + network/tcp_reader.hpp \ + network/tcp_sender.hpp \ + network/tcp_server.hpp \ + network/udp_header.hpp \ + network/udp_reader.hpp \ + network/udp_sender.hpp \ + ## kinect4 + network/kinect4/k4_network.hpp \ + # utility + thirdparty/rectpack2D/best_bin_finder.h \ + thirdparty/rectpack2D/empty_space_allocators.h \ + thirdparty/rectpack2D/empty_spaces.h \ + thirdparty/rectpack2D/finders_interface.h \ + thirdparty/rectpack2D/insert_and_split.h \ + thirdparty/rectpack2D/rect_structs.h \ + utility/algorithm.hpp \ + utility/array.hpp \ + utility/benchmark.hpp \ + utility/bit_mask.hpp \ + utility/buffer_vector.hpp \ + utility/cmd_args.hpp \ + utility/constants.hpp \ + utility/decimal.hpp \ + utility/export.hpp \ + utility/format.hpp \ + utility/id_alias_map.hpp \ + utility/io_data.hpp \ + utility/io_fstream.hpp \ + utility/math.hpp \ + utility/paths.hpp \ + utility/stop_watch.hpp \ + utility/string.hpp \ + utility/string_geo.hpp \ + utility/string_unordered_map.hpp \ + utility/thread.hpp \ + utility/types.hpp \ + utility/unordered_map.hpp \ + utility/unordered_set.hpp \ + utility/vector.hpp \ + utility/logger.hpp \ + utility/tuple_array.hpp \ + utility/utility.hpp \ + utility/view.hpp \ + utility/time.hpp \ + utility/io_file.hpp \ + # algorithms + algorithms/marching_cube.hpp \ + # camera + camera/frame.hpp \ + ## kinect2 + camera/kinect2/k2_config_files.hpp \ + camera/kinect2/k2_device.hpp \ + camera/kinect2/k2_manager.hpp \ + camera/kinect2/k2_network.hpp \ + camera/kinect2/k2_types.hpp \ + ## kinect4 + camera/kinect4/k4_data.hpp \ + camera/kinect4/k4_device.hpp \ + camera/kinect4/k4_frame.hpp \ + camera/kinect4/k4_frame_compressor.hpp \ + camera/kinect4/k4_frame_uncompressor.hpp \ + camera/kinect4/k4_types.hpp \ + camera/kinect4/k4a/k4aaudiochanneldatagraph.h \ + camera/kinect4/k4a/k4aaudiomanager.h \ + camera/kinect4/k4a/k4aaudiowindow.h \ + camera/kinect4/k4a/k4adevicecorrelator.h \ + camera/kinect4/k4a/k4amicrophone.h \ + camera/kinect4/k4a/k4amicrophonelistener.h \ + camera/kinect4/k4a/k4asoundio_util.h \ + camera/kinect4/k4a/k4astaticimageproperties.h \ + camera/kinect4/k4_device_manager.hpp \ + camera/kinect4/k4_server_data.hpp \ + # data + data/FastDifferentialCoding/fastdelta.h \ + data/integers_encoder.hpp \ + # tests + tests/marching_cube_test.hpp \ + # thirdparty + ## stb + thirdparty/stb/stb_image.h \ + thirdparty/stb/stb_image_resize.h \ + thirdparty/stb/stb_image_write.h \ + ## libyuv + thirdparty/libyuv/libyuv.h \ + thirdparty/libyuv/libyuv/basic_types.h \ + thirdparty/libyuv/libyuv/compare.h \ + thirdparty/libyuv/libyuv/compare_row.h \ + thirdparty/libyuv/libyuv/convert.h \ + thirdparty/libyuv/libyuv/convert_argb.h \ + thirdparty/libyuv/libyuv/convert_from.h \ + thirdparty/libyuv/libyuv/convert_from_argb.h \ + thirdparty/libyuv/libyuv/cpu_id.h \ + thirdparty/libyuv/libyuv/loongson_intrinsics.h \ + thirdparty/libyuv/libyuv/macros_msa.h \ + thirdparty/libyuv/libyuv/mjpeg_decoder.h \ + thirdparty/libyuv/libyuv/planar_functions.h \ + thirdparty/libyuv/libyuv/rotate.h \ + thirdparty/libyuv/libyuv/rotate_argb.h \ + thirdparty/libyuv/libyuv/rotate_row.h \ + thirdparty/libyuv/libyuv/row.h \ + thirdparty/libyuv/libyuv/scale.h \ + thirdparty/libyuv/libyuv/scale_argb.h \ + thirdparty/libyuv/libyuv/scale_rgb.h \ + thirdparty/libyuv/libyuv/scale_row.h \ + thirdparty/libyuv/libyuv/scale_uv.h \ + thirdparty/libyuv/libyuv/version.h \ + thirdparty/libyuv/libyuv/video_common.h \ + ## nanobench + thirdparty/nanobench/nanobench.h \ + ## binpack2D + thirdparty/BinPack2D/binpack2d.hpp \ + ## ankerl + thirdparty/ankerl/unordered_dense.h \ + ## audiofile + thirdparty/audiofile/AudioFile.h \ + ## catch + thirdparty/catch/catch.hpp \ + ## sigslot + thirdparty/sigslot/signal.hpp \ + + +SOURCES += \ +# # main +# base_main.cpp \ + # exvr + camera/kinect4/k4_actions_settings.cpp \ + camera/kinect4/k4_calibrator.cpp \ + camera/kinect4/k4_color_settings.cpp \ + camera/kinect4/k4_compressed_frame.cpp \ + camera/kinect4/k4_config_settings.cpp \ + camera/kinect4/k4_data_settings.cpp \ + camera/kinect4/k4_delay.cpp \ + camera/kinect4/k4_device_settings.cpp \ + camera/kinect4/k4_display_settings.cpp \ + camera/kinect4/k4_filters.cpp \ + camera/kinect4/k4_grabber_data_processing.cpp \ + camera/kinect4/k4_model.cpp \ + camera/kinect4/k4_player.cpp \ + camera/kinect4/k4_recorder.cpp \ + camera/kinect4/k4_volumetric_video.cpp \ + exvr/ex_component.cpp \ + exvr/ex_element.cpp \ + exvr/ex_experiment.cpp \ + exvr/ex_resource.cpp \ + # files + files/assimp_loader.cpp \ + files/cloud_io.cpp \ + files/settings.cpp \ + # graphics + geometry/mesh.cpp \ + geometry/vertices.cpp \ + geometry/voxel_grid.cpp \ + graphics/color.cpp \ + graphics/model.cpp \ + graphics/texture.cpp \ + # geometry + geometry/cloud.cpp \ + # utility + utility/benchmark.cpp \ + utility/cmd_args.cpp \ + utility/decimal.cpp \ + utility/io_file.cpp \ + utility/logger.cpp \ + utility/paths.cpp \ + utility/string.cpp \ + # camera + camera/frame.cpp \ + ## kinect2 + camera/kinect2/k2_config_files.cpp \ + camera/kinect2/k2_device.cpp \ + camera/kinect2/k2_manager.cpp \ + camera/kinect2/k2_types.cpp \ + ## kinect4 + camera/kinect4/k4_frame.cpp \ + camera/kinect4/k4a/k4aaudiochanneldatagraph.cpp \ + camera/kinect4/k4a/k4aaudiomanager.cpp \ + camera/kinect4/k4a/k4aaudiowindow.cpp \ + camera/kinect4/k4a/platform/windows/k4adevicecorrelator.cpp \ + camera/kinect4/k4a/k4amicrophone.cpp \ + camera/kinect4/k4a/k4amicrophonelistener.cpp \ + camera/kinect4/k4_data.cpp \ + camera/kinect4/k4_device.cpp \ + camera/kinect4/k4_frame_compressor.cpp \ + camera/kinect4/k4_frame_uncompressor.cpp \ + camera/kinect4/k4_device_manager.cpp \ + camera/kinect4/k4_server_data.cpp \ + # network + network/network_utility.cpp \ + network/tcp_reader.cpp \ + network/tcp_sender.cpp \ + network/tcp_server.cpp \ + network/udp_header.cpp \ + network/udp_reader.cpp \ + network/udp_sender.cpp \ + network/kinect4/k4_client_connection.cpp \ + network/kinect4/k4_client_network_settings.cpp \ + network/kinect4/k4_server_connection.cpp \ + network/kinect4/k4_server_network.cpp \ + network/kinect4/k4_server_network_settings.cpp \ + network/kinect4/k4_udp_reader.cpp \ + network/kinect4/k4_udp_sender.cpp \ + ## kinect4 + network/kinect4/k4_network.cpp \ + # data + data/FastDifferentialCoding/fastdelta.c \ + data/integers_encoder.cpp \ + # thirdparty + ## nanobench + thirdparty/nanobench/nanobench.cpp \ + ## stb + thirdparty/stb/stb_image.cpp \ + thirdparty/stb/stb_image_resize.cpp \ + thirdparty/stb/stb_image_write.cpp \ + ## libyuv + thirdparty/libyuv/compare.cc \ + thirdparty/libyuv/compare_common.cc \ + thirdparty/libyuv/compare_gcc.cc \ + thirdparty/libyuv/compare_mmi.cc \ + thirdparty/libyuv/compare_msa.cc \ + thirdparty/libyuv/compare_neon.cc \ + thirdparty/libyuv/compare_neon64.cc \ + thirdparty/libyuv/compare_win.cc \ + thirdparty/libyuv/convert.cc \ + thirdparty/libyuv/convert_argb.cc \ + thirdparty/libyuv/convert_from.cc \ + thirdparty/libyuv/convert_from_argb.cc \ + thirdparty/libyuv/convert_jpeg.cc \ + thirdparty/libyuv/convert_to_argb.cc \ + thirdparty/libyuv/convert_to_i420.cc \ + thirdparty/libyuv/cpu_id.cc \ + thirdparty/libyuv/mjpeg_decoder.cc \ + thirdparty/libyuv/mjpeg_validate.cc \ + thirdparty/libyuv/planar_functions.cc \ + thirdparty/libyuv/rotate.cc \ + thirdparty/libyuv/rotate_any.cc \ + thirdparty/libyuv/rotate_argb.cc \ + thirdparty/libyuv/rotate_common.cc \ + thirdparty/libyuv/rotate_gcc.cc \ + thirdparty/libyuv/rotate_lsx.cc \ + thirdparty/libyuv/rotate_mmi.cc \ + thirdparty/libyuv/rotate_msa.cc \ + thirdparty/libyuv/rotate_neon.cc \ + thirdparty/libyuv/rotate_neon64.cc \ + thirdparty/libyuv/rotate_win.cc \ + thirdparty/libyuv/row_any.cc \ + thirdparty/libyuv/row_common.cc \ + thirdparty/libyuv/row_gcc.cc \ + thirdparty/libyuv/row_lasx.cc \ + thirdparty/libyuv/row_lsx.cc \ + thirdparty/libyuv/row_mmi.cc \ + thirdparty/libyuv/row_msa.cc \ + thirdparty/libyuv/row_neon.cc \ + thirdparty/libyuv/row_neon64.cc \ + thirdparty/libyuv/row_win.cc \ + thirdparty/libyuv/scale.cc \ + thirdparty/libyuv/scale_any.cc \ + thirdparty/libyuv/scale_argb.cc \ + thirdparty/libyuv/scale_common.cc \ + thirdparty/libyuv/scale_gcc.cc \ + thirdparty/libyuv/scale_lsx.cc \ + thirdparty/libyuv/scale_mmi.cc \ + thirdparty/libyuv/scale_msa.cc \ + thirdparty/libyuv/scale_neon.cc \ + thirdparty/libyuv/scale_neon64.cc \ + thirdparty/libyuv/scale_rgb.cc \ + thirdparty/libyuv/scale_uv.cc \ + thirdparty/libyuv/scale_win.cc \ + thirdparty/libyuv/video_common.cc \ + + + +#DISTFILES += \ + diff --git a/cpp-projects/base/base_main.cpp b/cpp-projects/base/base_main.cpp new file mode 100644 index 0000000..a5c37cd --- /dev/null +++ b/cpp-projects/base/base_main.cpp @@ -0,0 +1,609 @@ +/******************************************************************************* +** Toolbox-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +// std +#include +#include +#include + +// kinect4 +#include + +// turbojpg +#include + +// local +#include "utility/logger.hpp" +#include "utility/benchmark.hpp" +#include "utility/io_file.hpp" +#include "camera/kinect2/k2_device.hpp" +#include "camera/kinect4/k4_device.hpp" +#include "data/integers_encoder.hpp" +#include "graphics/texture.hpp" +#include "files/cloud_io.hpp" +#include "camera/kinect4/k4_player.hpp" +#include "camera/kinect4/k4_frame_compressor.hpp" +#include "geometry/voxel_grid.hpp" + +#include "exvr/ex_experiment.hpp" + +using namespace tool; +using namespace tool::camera; + +void kinect2_test(){ + +// using namespace std::chrono_literals; +// camera::K2Device kinect; +// if(!kinect.open(camera::K2FrameRequest::compressed_color_mesh)){ +// std::cerr << "Cannot init kinect 2\n"; +// return; +// } + +// std::cout << "init\n"; +// std::this_thread::sleep_for(1000ms); +// std::cout << "try to grab\n"; + +// for(int ii = 0; ii < 10000; ++ii){ +// if(auto newFrame = kinect.get_kinect_data(); newFrame.has_value()){ +// std::cout << "-"; +// }else{ +// std::cout << "E"; +// } +// std::this_thread::sleep_for((1000/33)*1ms); +// } + +// std::cout << "close\n"; +// kinect.close(); +} + +void kinect4_test(){ + +// using namespace camera; +// using namespace camera::K4; + +// bool saveDisplayFrames = true; +// bool saveCompressedFrames = false; + + +// Logger::message("Create kinect4\n"); +// Kinect4 kinect; + +// Logger::message("Open kinect4\n"); +// if(!kinect.open(0)){ +// return; +// } + +// k4a_calibration_t test1; +// std::cout << "sizeof " << sizeof(test1) << "\n"; +// std::cout << "sizeof " << sizeof(k4a_calibration_t) << "\n"; + + +// std::cout << "Main thread id id " << std::this_thread::get_id() << "\n"; + +// Config config; +//// config.mode = Mode::Only_color_1280x720; // works +//// config.mode = Mode::Only_color_1920x1080; // works +//// config.mode = Mode::Only_color_2048x1536; // works +// config.mode = Mode::Cloud_640x576; +//// config.mode = Mode::Cloud_512x512; +// kinect.start_cameras(config); + +// // stored frames +// std::vector> displayFrames; +// std::vector> compressedFrames; + +// // connections +// kinect.new_compressed_data_frame_signal.connect([&](std::shared_ptr frame){ +// std::cout << "receive compressed frame from thread id " << std::this_thread::get_id() << "\n"; +//// Logger::message(std::format("receive compressed frame from thread id {}\n", std::this_thread::get_id())); +// compressedFrames.emplace_back(frame); +// }); +// kinect.new_display_frame_signal.connect([&](std::shared_ptr frame){ +// std::cout << "receive display frame from thread id " << std::this_thread::get_id() << "\n"; +//// Logger::message(std::format("receive display frame from thread id {}\n", std::this_thread::get_id())); +// displayFrames.emplace_back(frame); +// }); + +// // parameters +// Parameters p; +// p.sendCompressedDataFrame = true; +// p.sendDisplayCloud = true; +// p.sendDisplayColorFrame = true; +// p.sendDisplayDepthFrame = true; +// p.sendDisplayInfraredFrame = true; +// p.filterDepthWithColor = false; +// p.jpegCompressionRate = 80; +// kinect.set_parameters(p); + +// Logger::message("Start reading.\n"); +// kinect.start_reading(); + +// Logger::message("Sleep...\n"); +// std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + +// Logger::message("Stop reading.\n"); +// kinect.stop_reading(); + +// Logger::message("Close.\n"); +// kinect.close(); + +// if(saveDisplayFrames){ + +// Logger::message("Save display frames.\n"); +// BenchGuard g("Save display frames"); + +// size_t idFrame = 0; +// for(const auto &frame : displayFrames){ + +// std::string pathColor = "./display_color_" + std::to_string(idFrame) + ".png"; +// std::string pathDepth = "./display_depth_" + std::to_string(idFrame) + ".png"; +// std::string pathInfra = "./display_infra_" + std::to_string(idFrame) + ".png"; +// std::string pathCloud = "./display_cloud_" + std::to_string(idFrame) + ".obj"; + + + +// auto &cf = frame->colorFrame; +// std::cout << "col: " << cf.width << " "<< cf.height << " " << cf.pixels.size() << "\n"; +// if(cf.width > 0){ +// tool::graphics::Texture texColor; +// texColor.copy_2d_data( +// cf.width, +// cf.height, +// cf.pixels +// ); +// if(!texColor.write_2d_image_file_data(pathColor)){ +// Logger::error("Failed color.\n"); +// } +// } + +// auto &df = frame->depthFrame; +// std::cout << "d: " << df.width << " "<< df.height << " " << df.pixels.size() << "\n"; +// if(df.width > 0){ +// tool::graphics::Texture texDepth; +// texDepth.copy_2d_data( +// df.width, +// df.height, +// df.pixels +// ); +// if(!texDepth.write_2d_image_file_data(pathDepth)){ +// Logger::error("Failed depth.\n"); +// } +// } + +// auto &irf = frame->infraredFrame; +// std::cout << "r: " << irf.width << " "<< irf.height << " " << irf.pixels.size() << "\n"; +// if(irf.width > 0){ +// tool::graphics::Texture texInfra; +// texInfra.copy_2d_data( +// irf.width, +// irf.height, +// irf.pixels +// ); +// if(!texInfra.write_2d_image_file_data(pathInfra)){ +// Logger::error("Failed infra.\n"); +// } +// } + +// auto &cloudF = frame->cloud; +// std::cout << "c: " << cloudF.validVerticesCount << "\n"; +// if(cloudF.validVerticesCount > 0){ + +// if(!tool::files::CloudIO::save_cloud(pathCloud, cloudF.vertices.data(), cloudF.colors.data(), cloudF.validVerticesCount)){ +// Logger::error("Failed cloud.\n"); +// } +// } + +// ++idFrame; +// } +// } + +// if(saveCompressedFrames){ + +// Logger::message("Save uncompressed frames.\n"); +// BenchGuard g("save uncompressed frame"); + +// size_t idFrame = 0; +// tjhandle jpegUncompressor = tjInitDecompress(); +// tool::data::IntegersEncoder depthCompressor; +// for(const auto &cFrame : compressedFrames){ + + +// std::string pathColor = "./uncompressed_color_" + std::to_string(idFrame) + ".png"; +// std::string pathDepth = "./uncompressed_depth_" + std::to_string(idFrame) + ".png"; +// std::string pathCloud = "./uncompressed_cloud_" + std::to_string(idFrame) + ".obj"; + +// std::vector uncompressedColorData; +// uncompressedColorData.resize(cFrame->colorWidth * cFrame->colorHeight*4); + +// const int decompressStatus = tjDecompress2( +// jpegUncompressor, +// cFrame->colorBuffer.data(), +// static_cast(cFrame->colorBuffer.size()), +// uncompressedColorData.data(), +// cFrame->colorWidth, +// 0, // pitch +// cFrame->colorHeight, +// TJPF_RGBA, +// TJFLAG_FASTDCT// | TJFLAG_FASTUPSAMPLE +// ); +// if(decompressStatus == -1){ +// Logger::error("Error uncompress color.\n"); +// break; +// } + +// tool::graphics::Texture texColor; +// texColor.copy_2d_data( +// cFrame->colorWidth, +// cFrame->colorHeight, +// 4, +// uncompressedColorData.data() +// ); +// texColor.write_2d_image_file_data(pathColor); + +// // depth +// std::vector uncompressedDepthData; +// uncompressedDepthData.resize(cFrame->depthWidth*cFrame->depthHeight); + +// size_t originalSize; + +// try{ +// originalSize= depthCompressor.decode( +// cFrame->depthBuffer.data(), +// cFrame->depthBuffer.size(), +// reinterpret_cast(uncompressedDepthData.data()), +// (cFrame->depthWidth*cFrame->depthHeight)/2 +// ); +// }catch(std::exception e){ +// Logger::error(std::format("Error uncompress depth {}.\n", e.what())); +// } + + +// float min=0.f,max =0.f,diff = 0.f; +// const std::vector depthGradient ={ +// {0.f,0.f,1.f}, +// {0.f,1.f,1.f}, +// {0.f,1.f,0.f}, +// {1.f,1.f,0.f}, +// {1.f,0.f,0.f}, +// }; + +// // find min/max +// const auto [pmin, pmax] = std::minmax_element(uncompressedDepthData.begin(), uncompressedDepthData.end()); +// min = static_cast(*pmin); +// max = static_cast(*pmax); +// diff = max-min; + +// std::vector uncompressedDepthImageData; +// uncompressedDepthImageData.resize(cFrame->depthWidth * cFrame->depthHeight*4); + +// for(size_t ii = 0; ii < uncompressedDepthData.size(); ++ii){ + +// float vF = (static_cast(uncompressedDepthData[ii]) - min)/diff; +// float intPart; +// float decPart = std::modf((vF*(depthGradient.size()-1)), &intPart); +// size_t idG = static_cast(intPart); + +// auto col = depthGradient[idG]*(1.f-decPart) + depthGradient[idG+1]*decPart; +// uncompressedDepthImageData[ii*4+0] = static_cast(255*col.x()); +// uncompressedDepthImageData[ii*4+1] = static_cast(255*col.y()); +// uncompressedDepthImageData[ii*4+2] = static_cast(255*col.z()); +// uncompressedDepthImageData[ii*4+3] = 255; +// } + +// texColor.copy_2d_data( +// cFrame->depthWidth, +// cFrame->depthWidth, +// 4, +// uncompressedDepthImageData.data() +// ); +// texColor.write_2d_image_file_data(pathDepth); + + +// Logger::message("uncompress depth\n"); +// k4a::image depthImage = k4a::image::create( +// k4a_image_format_t::K4A_IMAGE_FORMAT_DEPTH16, +// cFrame->depthWidth, cFrame->depthWidth, +// static_cast(cFrame->depthWidth * 1 * sizeof(uint16_t))); + +// try{ +// originalSize= depthCompressor.decode( +// cFrame->depthBuffer.data(), +// cFrame->depthBuffer.size(), +// reinterpret_cast(depthImage.get_buffer()), +// (cFrame->depthWidth*cFrame->depthHeight)/2 +// ); +// }catch(std::exception e){ +// Logger::error(std::format("Error uncompress depth {}.\n", e.what())); +// } + +// k4a_transformation_t tr = k4a_transformation_create(&cFrame->calibration); +// k4a::image pointCloudImage = k4a::image::create(K4A_IMAGE_FORMAT_CUSTOM, +// cFrame->depthWidth, +// cFrame->depthHeight, +// static_cast(cFrame->depthWidth * 3 * sizeof(int16_t)) +// ); + +// k4a_result_t result = k4a_transformation_depth_image_to_point_cloud( +// tr, +// depthImage.handle(), +// K4A_CALIBRATION_TYPE_DEPTH, +// pointCloudImage.handle() +// ); + +// if(result == K4A_RESULT_SUCCEEDED){ + +// ColoredCloudFrame cloud; + +// auto cloudBuffer = reinterpret_cast*>(pointCloudImage.get_buffer()); + +// const size_t width = pointCloudImage.get_width_pixels(); +// const size_t height = pointCloudImage.get_height_pixels(); +// const size_t size = width * height; +// cloud.vertices.resize(size); +// cloud.colors.resize(size); +// cloud.validVerticesCount = size; + +// for(size_t ii = 0; ii < size; ++ii){ +// cloud.vertices[ii] = geo::Pt3f{ +// static_cast(-cloudBuffer[ii].x()), +// static_cast(-cloudBuffer[ii].y()), +// static_cast(-cloudBuffer[ii].z()) +// }*0.01f; + +// cloud.colors[ii] = geo::Pt3f{ +// static_cast(uncompressedColorData[ii*4+0]), +// static_cast(uncompressedColorData[ii*4+1]), +// static_cast(uncompressedColorData[ii*4+2]) +// }/255.f; +// } + +// if(!tool::files::CloudIO::save_cloud(pathCloud, cloud.vertices.data(), cloud.colors.data(), cloud.validVerticesCount)){ +// Logger::error("Faild uncompressed cloud.\n"); +// } +// } + +// idFrame++; +// } +// tjDestroy(jpegUncompressor); +// } + +} + +void bench_test(){ + +// { +// const auto t1 = "t1"sv; +// tool::Bench::start(t1, true); +// std::this_thread::sleep_for(std::chrono::milliseconds(100)); +// tool::Bench::stop(); + +// { +// tool::BenchGuard g("g1", true); +// std::this_thread::sleep_for(std::chrono::milliseconds(100)); +// } + +// tool::Bench::start("t2", true); +// std::this_thread::sleep_for(std::chrono::milliseconds(100)); +// tool::Bench::stop(); + +// tool::BenchGuard g0("g0", true); +// { +// tool::BenchGuard g1("g1", true); +// std::this_thread::sleep_for(std::chrono::milliseconds(100)); + +// { +// tool::BenchGuard g2("g2", true); +// std::this_thread::sleep_for(std::chrono::milliseconds(100)); + +// } + +// tool::BenchGuard g3("g3", true); +// std::this_thread::sleep_for(std::chrono::milliseconds(100)); +// } + +// tool::Bench::start("t3"sv, true); +// std::this_thread::sleep_for(std::chrono::milliseconds(100)); +// tool::Bench::stop(); + +// tool::Bench::start("t4", true); +// std::this_thread::sleep_for(std::chrono::milliseconds(100)); +// tool::Bench::stop(); +// } + +// tool::BenchGuard g3("g4", true); + +// tool::Bench::start("base-lib start", true); +// Logger::init((std::filesystem::current_path() / "base-lib-logs").string(), "logs.html"); + +// auto logger = Logger::get(); +// logger->message_signal.connect([&](std::string messsage){ +// std::cout << "Message from logger: " << messsage << "\n"; +// }); + +// tool::Bench::stop(); +} + + + +int main(){ + + K4Player player; + player.load_from_file("D:/a6.kvid"); + + std::cout << "player " << player.nb_cameras() << " " << player.duration_ms() << " " << player.nb_frames(0) << "\n"; + + + std::cout << "set time\n"; +// player.set_current_time(1000.0); +// player.update(); +// std::cout << "remove " << player.current_frame_id(0) << "\n"; +// player.remove_after_current_frame(); + + std::cout << "uncompress\n"; + std::cout << "time1 : " << player.current_time_ms() << " " << player.duration_ms() << " " << player.nb_frames(0) << "\n"; + player.set_current_time(1000); + player.update(); + player.remove_until_current_frame(); + + std::cout << "time2 : " << player.current_time_ms() << " " << player.duration_ms() << " " << player.nb_frames(0) << "\n"; + player.set_current_time(500); + player.update(); +// player.remove_after_current_frame(); + +// std::cout << "time3 : " << player.current_time_ms() << " " << player.duration_ms() << " " << player.nb_frames(0) << "\n"; +// player.set_current_time(0); +// player.update(); + +// std::cout << "time4 : " << player.current_time_ms() << " " << player.duration_ms() << " " << player.nb_frames(0) << "\n"; +// player.merge_cameras(); + + +// K4Frame ff; +// player.uncompress_frame(0, 10, ff); +// files::CloudIO::save_cloud("D:/after.obj", ff.cloud); + +// std::cout << "time5 : " << player.current_time_ms() << " " << player.duration_ms() << " " << player.nb_frames(0) << "\n"; + +// std::cout << "save\n"; +// player.save_to_file("D:/after.kvid"); + +// std::cout << "end\n"; + +// geo::VoxelGrid grid(0.01f, {-10.f,-10.f,-10.f}, {10.f,10.f,10.f}); +// K4Frame frame; +// std::cout << "u0 " << player.video()->get_transform(0).conv() << "\n"; +// std::cout << "u1 " << player.video()->get_transform(1).conv() << "\n"; +// std::cout << "u2 " << player.video()->get_transform(2).conv() << "\n"; +// std::cout << "u3 " << player.video()->get_transform(3).conv() << "\n"; + +// for(size_t ii = 0; ii < player.min_nb_frames(); ++ii){ + +// player.uncompress_frame(0, ii, frame); +// grid.add_cloud(frame.cloud, player.video()->get_transform(0).conv()); + +// player.uncompress_frame(1, ii, frame); +// grid.add_cloud(frame.cloud, player.video()->get_transform(1).conv()); + +// player.uncompress_frame(2, ii, frame); +// grid.add_cloud(frame.cloud, player.video()->get_transform(2).conv()); + +// player.uncompress_frame(3, ii, frame); +// grid.add_cloud(frame.cloud, player.video()->get_transform(3).conv()); + +//// } +/// +/// +/// std::cout << "test\n"; + K4FrameCompressor fComp; +// fComp.test(); +/// + K4Frame frame; + +// frame.cloud.resize(30); +// for(size_t ii = 0; ii < frame.cloud.size(); ++ii){ +// frame.cloud.resize[ii] +// } +// std::cout << "uncompress1\n"; + player.uncompress_frame(0, 0, frame); +// frame.mode = K4Mode::Merged; + std::cout << "uncompresses frame has " << frame.cloud.size() << " vertices.\n"; + files::CloudIO::save_cloud("D:/f2.obj", frame.cloud); + + auto cf = fComp.compress(frame, 90); + std::cout << "compressed frame has " << cf->validVerticesCount << " vertices and " << cf->encodedCloudVerticesData.size() << " " << cf->encodedCloudColorData.size() << "\n"; + + frame = K4Frame(); + K4FrameUncompressor fUcomp; + fUcomp.uncompress(cf.get(), frame); + std::cout << "uncompresses frame has " << frame.cloud.size() << " vertices.\n"; + + files::CloudIO::save_cloud("D:/f3.obj", frame.cloud); + + + auto mergedCloud = player.video()->merge_cameras_clouds(10, 0.02f, {-20.f,-20.f,-20.f}, {20.f,20.f,20.f}); + std::cout << "mergedCloud " << mergedCloud.size() << " " << mergedCloud.vertices.size() << " " << mergedCloud.colors.size() << "\n"; + files::CloudIO::save_cloud("D:/after.obj", mergedCloud); + + K4Frame mergedFrame; + player.video()->merge_cameras_clouds(10, 0.02f, {-20.f,-20.f,-20.f}, {20.f,20.f,20.f}, mergedFrame); + + std::cout << "mergedFrame " << mergedFrame.cloud.size() << " " << mergedFrame.idCapture << "\n"; + files::CloudIO::save_cloud("D:/after2.obj", mergedFrame.cloud); + + camera::K4FrameCompressor compressor; + camera::K4FrameUncompressor uncompressor; + if(auto cFrame = compressor.compress(mergedFrame, 90)){ + + K4Frame uFrame; + uncompressor.uncompress(cFrame.get(), uFrame); + + files::CloudIO::save_cloud("D:/after3.obj", uFrame.cloud); + } + + std::cout << "boo " << player.nb_frames(0) << "\n"; + player.merge_cameras(); + + std::cout << "merge\n"; + player.save_to_file("D:/after3.kvid"); + + std::cout << "end save\n"; + + +// std::cout << "merge " << player.video()->nb_cameras() << " " << player.video()->get_camera_data(0)->frames.size() << "\n"; +// player.merge_cameras(); + +// files::CloudIO::save_cloud("D:/before.obj", frame.cloud); +// player.uncompress_frame(0, 0, frame); + +// std::cout << "color " << frame.colorWidth << " " << frame.colorHeight << " " << frame.imageColorData.size() << "\n"; +// std::cout << "depth " << frame.depthWidth << " " << frame.depthHeight << " " << frame.depthData.size() << " " << frame.imageDepthData.size() << "\n"; +// std::cout << "infra " << frame.infraWidth << " " << frame.infraHeight << " " << frame.infraData.size() << " " << frame.imageInfraData.size() << "\n"; +// std::cout << "cloud " << frame.cloud.size() << " " << frame.cloud.vertices.size() << " " << frame.cloud.colors.size() << "\n"; + +// files::CloudIO::save_cloud("D:/after.obj", frame.cloud); + +// Logger::message("base-lib start\n"); + +// tool::Bench::reset(); +// tool::Bench::start("kinect4_test"); +// Logger::message("kinect4_test\n"); +// kinect4_test(); +// tool::Bench::stop(); +// tool::Bench::display(); + +//// tool::Bench::reset(); +//// tool::Bench::start("bench_test"); +//// Logger::message("bench_test\n"); +//// bench_test(); +//// tool::Bench::stop(); +//// tool::Bench::display(); + +// Logger::message("base-lib end\n"); + + return 0; +} + + + diff --git a/cpp-projects/base/camera/frame.cpp b/cpp-projects/base/camera/frame.cpp new file mode 100644 index 0000000..763abe2 --- /dev/null +++ b/cpp-projects/base/camera/frame.cpp @@ -0,0 +1,32 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + + + +#include "frame.hpp" + +using namespace tool::camera; diff --git a/cpp-projects/base/camera/frame.hpp b/cpp-projects/base/camera/frame.hpp new file mode 100644 index 0000000..9d7c056 --- /dev/null +++ b/cpp-projects/base/camera/frame.hpp @@ -0,0 +1,44 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include + +namespace tool::camera{ + + struct Frame{ + std::int32_t idCapture = 0; + std::int64_t afterCaptureTS = 0; + +// constexpr auto catpure_time_ms(std::int64_t startTime) const noexcept{ +// return (afterCaptureTS-startTime)*0.000001; +// } + + virtual ~Frame(){} + }; +} diff --git a/cpp-projects/base/camera/kinect2/k2_config_files.cpp b/cpp-projects/base/camera/kinect2/k2_config_files.cpp new file mode 100644 index 0000000..b8941f7 --- /dev/null +++ b/cpp-projects/base/camera/kinect2/k2_config_files.cpp @@ -0,0 +1,221 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k2_config_files.hpp" + +// std +#include +#include + +// local +#include "utility/format.hpp" +#include "utility/logger.hpp" + +using namespace tool; +using namespace tool::camera; + +bool K2ConfigFiles::save_grabber_settings_config_file(const K2Settings &settings, std::string path){ + + if(path.length() == 0){ + path = (std::filesystem::current_path() / "config/settings/settings_saved.config").string(); + } + + std::ofstream file; + file.open(path); + if(!file.is_open()){ + Logger::error(std::format("K2ConfigFiles: Cannot write on grabber settings config file with path: {}\n", path)); + return false; + } + + file << settings.to_string(); + + Logger::message(std::format("K2ConfigFiles: Grabber settings config file written: {}", path)); + return true; +} + +std::optional K2ConfigFiles::read_grabber_settings_config_file(std::string path){ + + std::filesystem::path fsPath; + if(path.length() == 0){ + fsPath = std::filesystem::current_path() / "config/settings/settings_default.config"; + }else{ + fsPath = path; + if(fsPath.extension() != ".config"){ + Logger::error(std::format("K2ConfigFiles: Invalid extension for settings config file {}\n", fsPath.string())); + return std::nullopt; + } + } + + std::ifstream inConfigFile(fsPath.string()); + if(!inConfigFile.is_open()){ + Logger::error(std::format("K2ConfigFiles: No settings config file found at {}\n", fsPath.string())); + return std::nullopt; + } + + std::string str; + inConfigFile.seekg(0, std::ios::end); + str.reserve(static_cast(inConfigFile.tellg())); + inConfigFile.seekg(0, std::ios::beg); + str.assign((std::istreambuf_iterator(inConfigFile)),std::istreambuf_iterator()); + + Logger::message(std::format("Grabber settings config file read: {}", fsPath.string())); + return {K2Settings::from_string(str)}; +} + +bool K2ConfigFiles::read_grabber_network_config_file(int *readingPort, std::string path){ + + std::filesystem::path fsPath; + if(path.length() == 0){ + fsPath = std::filesystem::current_path() / "config/network/network_default.config"; + }else{ + fsPath = path; + if(fsPath.extension() != ".config"){ + Logger::error(std::format("Invalid extension for grabber network config file {}\n",fsPath.string())); + return false; + } + } + + std::ifstream inConfigFile(fsPath.string()); + if(!inConfigFile.is_open()){ + Logger::error(std::format("Cannot open grabber network config file with path: {}\n", fsPath.string())); + return false; + } + + // retrieve reading port + std::string line; + while (std::getline(inConfigFile, line)){ + *readingPort = std::stoi(line); + } + + Logger::message(std::format("Grabber network config file read: {}", fsPath.string())); + return true; +} + +std::vector K2ConfigFiles::read_manager_network_config_file(std::string path){ + + std::filesystem::path fsPath; + if(path.length() == 0){ + fsPath = std::filesystem::current_path() / "config/network/network_default.config"; + }else{ + fsPath = path; + if(fsPath.extension() != ".config"){ + Logger::error(std::format("Invalid extension for manager network config file {}\n", fsPath.string())); + return {}; + } + } + + std::ifstream inConfigFile(fsPath.string()); + if(!inConfigFile.is_open()){ + Logger::error(std::format("Cannot open manager network config file with path: {}\n", fsPath.string())); + return {}; + } + + // retrieve infos + std::vector infos; + std::string line; + while (std::getline(inConfigFile, line)){ + auto split = tool::String::split(line, ' '); + if(split.size() != 4){ + Logger::error(std::format("Invalid manager network config file with path:: {}\n", fsPath.string())); + return {}; + } + infos.emplace_back(K2GrabberTargetInfo{split[0],std::stoi(split[1]),std::stoi(split[2]),std::stoi(split[3])}); + } + + Logger::message(std::format("Manager network config file read: {}", fsPath.string())); + return infos; +} + + +std::vector K2ConfigFiles::read_manager_calibration_file(std::string path){ + + std::filesystem::path fsPath; + if(path.length() == 0){ + fsPath = std::filesystem::current_path() / "config/calibration/calibration_default.config"; + }else{ + fsPath = path; + if(fsPath.extension() != ".config"){ + Logger::error(std::format("Invalid extension for manager calibration config file {}\n", fsPath.string()));; + return {}; + } + } + + std::ifstream inConfigfile(fsPath.string()); + if(!inConfigfile.is_open()){ + Logger::error(std::format("Cannot open manager calibration config file with path: {}\n", fsPath.string())); + return {}; + } + + std::stringstream buffer; + buffer << inConfigfile.rdbuf(); + inConfigfile.close(); + + + auto read_matrix = [&]()-> std::optional{ + + std::string line; + std::vector values; + for(size_t ii = 0; ii < 4; ++ii){ + + std::getline(buffer, line); + + auto split = String::split(line, ' '); + if(split.size() == 4){ + values.emplace_back(std::stof(split[0])); + values.emplace_back(std::stof(split[1])); + values.emplace_back(std::stof(split[2])); + values.emplace_back(std::stof(split[3])); + }else{ + return {}; + } + } + + return std::make_optional(values.data(), values.size()); + }; + + bool read = true; + std::vector mCalibs; + while(read){ + std::string line; + std::getline(buffer, line); + if(line.size() > 0){ + + auto matrix = read_matrix(); + if(matrix.has_value()){ + mCalibs.emplace_back(std::move(matrix.value())); + }else{ + Logger::error(std::format("Invalid manager calibration config file with path:: {}\n", fsPath.string())); + return {}; + } + + }else{ + read = false; + } + } + + Logger::message(std::format("Manager calibfration config file read: {}", fsPath.string())); + return mCalibs; +} diff --git a/cpp-projects/base/camera/kinect2/k2_config_files.hpp b/cpp-projects/base/camera/kinect2/k2_config_files.hpp new file mode 100644 index 0000000..1268400 --- /dev/null +++ b/cpp-projects/base/camera/kinect2/k2_config_files.hpp @@ -0,0 +1,58 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + + +#pragma once + +// base +#include "geometry/matrix4.hpp" + +// local +#include "k2_network.hpp" + +namespace tool::camera { + +class K2ConfigFiles{ + +public: + + // camera + // # grabber + static bool save_grabber_settings_config_file(const K2Settings &p, std::string path); + static std::optional read_grabber_settings_config_file(std::string path = ""); + + // network + // # grabber + static bool read_grabber_network_config_file(int *readingPort, std::string path = ""); + // # manager + static std::vector read_manager_network_config_file(std::string path = ""); + + // calibration + // # manager + static std::vector read_manager_calibration_file(std::string path = ""); +}; + +} diff --git a/cpp-projects/base/camera/kinect2/k2_device.cpp b/cpp-projects/base/camera/kinect2/k2_device.cpp new file mode 100644 index 0000000..59d9f2f --- /dev/null +++ b/cpp-projects/base/camera/kinect2/k2_device.cpp @@ -0,0 +1,1836 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k2_device.hpp" + +// std +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// windows +#include + +// kinect +#include + +// opencv +#include "opencv2/core.hpp" +#include "opencv2/imgproc/imgproc.hpp" + +// turbojpeg +#include + +// local +#include "utility/view.hpp" +#include "utility/utility.hpp" +#include "utility/benchmark.hpp" +#include "utility/logger.hpp" +#include "data/integers_encoder.hpp" + + +using namespace tool; +using namespace tool::camera; +using namespace tool::geo; +using namespace std::chrono_literals; + + +struct K2Device::Impl{ + + bool cameraInitialized = false; + + bool colorFrameInfosInitialized = false; + float colorFovDiago = 0.f; + float colorFovHori = 0.f; + float colorFovVerti = 0.f; + int colorWidth = 0; + int colorHeight = 0; + unsigned int colorLengthInPixels; + unsigned int colorBytesPerPixel; + + bool depthFrameInfosInitialized = false; + float depthFovDiago = 0.f; + float depthFovHori = 0.f; + float depthFovVerti = 0.f; + int depthWidth = 0; + int depthHeight = 0; + unsigned int depthLengthInPixels; + unsigned int depthBytesPerPixel; + + // source + DWORD frameSource = 0; + // frames + IColorFrame *colorframe = nullptr; + IDepthFrame *depthFrame = nullptr; + IInfraredFrame *infraFrame = nullptr; + ILongExposureInfraredFrame *longExposureInfraFrame = nullptr; + IBodyFrame *bodyFrame = nullptr; + // frames references + IMultiSourceFrame *multiSouceFrame = nullptr; + IColorFrameReference *colorFrameRef = nullptr; + IDepthFrameReference *depthFrameRef = nullptr; + IInfraredFrameReference *infraFrameRef = nullptr; + ILongExposureInfraredFrameReference *longExposureInfraFrameRef = nullptr; + IBodyFrameReference *bodyFrameRef = nullptr; + + // kinect + K2FrameRequest mode = K2FrameRequest::undefined; + IKinectSensor* sensor = nullptr; // Kinect sensor + IMultiSourceFrameReader* reader = nullptr; // Kinect data source + ICoordinateMapper* mapper = nullptr; // Converts between depth, color, and 3d coordinate + CameraIntrinsics camIntrinsics; + + // mapping + uint32_t cameraSpaceTableLength = 0; + PointF* cameraSpaceTable = nullptr; + std::vector depth2rgb; // Maps depth pixels to rgb pixels + std::vector validityDepth2Rgb; + + // indices + std::vector indicesRawColors; + std::vector indicesDepths; + std::vector indicesDepthsWithoutBorders; + std::vector indicesDepthToColor; + + std::vector> indices2dDepths; +// std::vector indicesDepth2Rgb; + + // depth/cloud + std::vector depthMask; + std::unique_ptr> depthDataTemporal = nullptr; + cv::Mat binaryDepth; + cv::Mat erodedBinaryDepth; + data::IntegersEncoder depthCompressor; + K2CloudDataUP cloudData = nullptr; + unsigned short* depthBuffer = nullptr; + unsigned int depthBufferSize = 0; + int previousSumDepthValues = 0; + + // color + tjhandle jpegCompressor; + K2RgbDataUP colorData = nullptr; + K2RgbDataUP processedColorData = nullptr; + unsigned char *tjCompressedImage = nullptr; + + // bodiess + // std::unordered_map idBodies; + + // frames + std::uint32_t currentFrame = 0; + K2FrameUP processedFrame = nullptr; + + Impl(){ + + // init compressor + jpegCompressor = tjInitCompress(); + + depthDataTemporal = std::make_unique>(); + + + depth2rgb.resize(k2_depth_count); + validityDepth2Rgb.resize(k2_depth_count); + + indicesDepths.resize(k2_depth_count); + std::iota(std::begin(indicesDepths), std::end(indicesDepths), 0); + + // ? + indicesDepthsWithoutBorders.resize((k2_depth_width-2)*(k2_depth_height-2)); + size_t count = 0; + for(size_t ii = 0; ii < k2_depth_height; ++ii){ + for(size_t jj = 0; jj < k2_depth_width; ++jj){ + if(ii > 0 && ii < k2_depth_height-1 && jj > 0 && jj < k2_depth_width-1 ){ + indicesDepthsWithoutBorders.emplace_back(count); + } + ++count; + } + } + + size_t id = 0; + indices2dDepths.resize(k2_depth_count); + for(size_t ii = 0; ii < k2_depth_height; ++ii){ + for(size_t jj = 0; jj < k2_depth_width; ++jj){ + indices2dDepths[id++] = std::make_pair(jj,ii); // (0-width / 0-height) + } + } + indicesDepthToColor.resize(k2_depth_count); +// indicesDepth2Rgb.resize(kinect2_depth_count); + + depthMask.resize(k2_depth_count); + + indicesRawColors.resize(k2_raw_color_data_size/4); + std::iota(std::begin(indicesRawColors), std::end(indicesRawColors), 0); + binaryDepth = cv::Mat(cv::Size(k2_depth_width, k2_depth_height), CV_8U); + erodedBinaryDepth = cv::Mat(cv::Size(k2_depth_width, k2_depth_height), CV_8U); + + // final data + colorData = std::make_unique(); + cloudData = std::make_unique(); + processedColorData = std::make_unique(); + + // frame + processedFrame = std::make_unique(true); + } +}; + +bool check_func_sucess(HRESULT res){ + return res >= 0; +} + +K2Device::K2Device() : m_p(std::make_unique()){ + Logger::message("Init kinect2"); +} + +K2Device::~K2Device(){ + + // camera + close(); + + + if(m_p->tjCompressedImage != nullptr){ + tjFree(m_p->tjCompressedImage); + } + + // compression + tjDestroy(m_p->jpegCompressor); + + // get map table + if(m_p->cameraSpaceTable != nullptr){ + delete m_p->cameraSpaceTable; + } +} + +void K2Device::close(){ + + // close sensor + if(m_p->sensor != nullptr){ + m_p->sensor->Close(); + } + + // clean mapper + if (m_p->mapper != nullptr){ + m_p->mapper->Release(); + m_p->mapper = nullptr; + } + + // clean reader + if (m_p->reader != nullptr){ + m_p->reader->Release(); + m_p->reader = nullptr; + } + + // clean sensor + if (m_p->sensor != nullptr){ + m_p->sensor->Release(); + m_p->sensor = nullptr; + } + m_p->cameraInitialized = false; + m_p->mode = K2FrameRequest::undefined; +} + + + +bool K2Device::acquire_multi_sources_frame(){ + + m_p->multiSouceFrame = nullptr; + + using namespace std::chrono; + + auto timeStart = high_resolution_clock::now(); + + bool get = true; + while(get){ + auto ret = m_p->reader->AcquireLatestFrame(&m_p->multiSouceFrame); + if(ret == E_PENDING){ + auto currenTime = duration_cast(high_resolution_clock::now() - timeStart).count(); + if(currenTime > 3000){ + Logger::error("Failed: AcquireLatestFrame still pending"); + return false; + } + continue; + }else if(!check_func_sucess(ret)){ + Logger::error("Failed: AcquireLatestFrame"); + return false; + } + break; + } + + + return true; +} + +bool K2Device::acquire_color_frame(){ + + if(!check_func_sucess(m_p->multiSouceFrame->get_ColorFrameReference(&m_p->colorFrameRef))){ + Logger::warning("Fail get_ColorFrameReference"); + return false; + } + + + auto ret = m_p->colorFrameRef->AcquireFrame(&m_p->colorframe); + if(!check_func_sucess(ret)){ + return false; + } + + + if(!m_p->colorFrameInfosInitialized){ + + IFrameDescription *f= nullptr; + if(check_func_sucess(m_p->colorframe->get_FrameDescription(&f))){ + f->get_DiagonalFieldOfView(&m_p->colorFovDiago); + f->get_HorizontalFieldOfView(&m_p->colorFovHori); + f->get_VerticalFieldOfView(&m_p->colorFovVerti); + f->get_Width(&m_p->colorWidth); + f->get_Height(&m_p->colorHeight); + f->get_LengthInPixels(&m_p->colorLengthInPixels); + f->get_BytesPerPixel(&m_p->colorBytesPerPixel); + + } + m_p->colorFrameInfosInitialized = true; + // Logger::message(QSL("Color FOV: ") % QString::number(m_p->colorFovDiago) % QSL(" ") % QString::number(m_p->colorFovHori) % QSL(" ") % QString::number(m_p->colorFovVerti)); + // Logger::message(QSL("Color Lengths: ") % QString::number(m_p->colorWidth) % QSL(" ") % QString::number(m_p->colorHeight) % QSL(" ") % QString::number(m_p->colorLengthInPixels)% QSL(" ") % QString::number(m_p->colorBytesPerPixel)); + } + + return true; +} + +bool K2Device::acquire_depth_frame(){ + + if(!check_func_sucess(m_p->multiSouceFrame->get_DepthFrameReference(&m_p->depthFrameRef))){ + Logger::warning("Fail get_DepthFrameReference"); + return false; + } + + auto ret = m_p->depthFrameRef->AcquireFrame(&m_p->depthFrame); + if(!check_func_sucess(ret)){ + Logger::warning("Fail to acquire depth frame"); + return false; + } + + if(!m_p->depthFrameInfosInitialized){ + + IFrameDescription *f= nullptr; + if(check_func_sucess(m_p->depthFrame->get_FrameDescription(&f))){ + f->get_DiagonalFieldOfView(&m_p->depthFovDiago); + f->get_HorizontalFieldOfView(&m_p->depthFovHori); + f->get_VerticalFieldOfView(&m_p->depthFovVerti); + f->get_Width(&m_p->depthWidth); + f->get_Height(&m_p->depthHeight); + f->get_LengthInPixels(&m_p->depthLengthInPixels); + f->get_BytesPerPixel(&m_p->depthBytesPerPixel); + } + + m_p->depthFrameInfosInitialized = true; + // Logger::message(QSL("Depth FOV: ") % QString::number(m_p->depthFovDiago) % QSL(" ") % QString::number(m_p->depthFovHori) % QSL(" ") % QString::number(m_p->depthFovVerti)); + // Logger::message(QSL("Depth Lengths: ") % QString::number(m_p->depthWidth) % QSL(" ") % QString::number(m_p->depthHeight) % QSL(" ") % QString::number(m_p->depthLengthInPixels)% QSL(" ") % QString::number(m_p->colorBytesPerPixel)); + } + + + return true; +} + +bool K2Device::acquire_infra_frame(){ + + if(!check_func_sucess(m_p->multiSouceFrame->get_InfraredFrameReference(&m_p->infraFrameRef))){ + Logger::warning("Fail get_InfraredFrameReference"); + return false; + } + if(!check_func_sucess(m_p->infraFrameRef->AcquireFrame(&m_p->infraFrame))){ + return false; + } + + return true; +} + +bool K2Device::acquire_long_exposure_infra_frame(){ + + if(!check_func_sucess(m_p->multiSouceFrame->get_LongExposureInfraredFrameReference(&m_p->longExposureInfraFrameRef))){ + Logger::warning("Fail get_LongExposureInfraredFrameReference"); + return false; + } + if(!check_func_sucess(m_p->longExposureInfraFrameRef->AcquireFrame(&m_p->longExposureInfraFrame))){ + return false; + } + return true; +} + +bool K2Device::acquire_body_frame(){ + + if(!check_func_sucess(m_p->multiSouceFrame->get_BodyFrameReference(&m_p->bodyFrameRef))){ + Logger::warning("Fail get_BodyFrameReference"); + return false; + } + if(!check_func_sucess(m_p->bodyFrameRef->AcquireFrame(&m_p->bodyFrame))){ + return false; + } + + return true; +} + + +bool K2Device::open(K2FrameRequest m){ + + // initialize sensor + if(!m_p->cameraInitialized){ + if(!check_func_sucess(GetDefaultKinectSensor(&m_p->sensor))){ + Logger::error("Failed get default kinect sensor"); + return false; + } + + if (m_p->sensor == nullptr) { + Logger::error("Failed opening sensor"); + return false; + } + + if(!check_func_sucess(m_p->sensor->Open())){ + Logger::error("Cannot open kinect"); + return false; + } + + if(!check_func_sucess(m_p->sensor->get_CoordinateMapper(&m_p->mapper))){ + Logger::error("Fail get coordinate mapper"); + return false; + } + m_p->cameraInitialized = true; + } + + if(m_p->mode == m){ + return true; // nothing to do + }else{ + // update current mode + m_p->mode = m; + } + + // init frame source + m_p->frameSource = 0; + if(color_channel_required(m)){ + m_p->frameSource |= FrameSourceTypes::FrameSourceTypes_Color; + } + if(depth_channel_required(m)){ + m_p->frameSource |= FrameSourceTypes::FrameSourceTypes_Depth; + } + if(infra_channel_required(m)){ + m_p->frameSource |= FrameSourceTypes::FrameSourceTypes_Infrared; + } + if(long_infra_channel_required(m)){ + m_p->frameSource |= FrameSourceTypes::FrameSourceTypes_LongExposureInfrared; + } + m_p->frameSource |= FrameSourceTypes::FrameSourceTypes_Body; + + // reset data + std::fill(std::begin(*m_p->colorData), std::end(*m_p->colorData), Pt3{0,0,0}); + std::fill(std::begin(*m_p->cloudData), std::end(*m_p->cloudData), Pt3f{}); + std::fill(std::begin(m_p->depthMask), std::end(m_p->depthMask), false); + + // open frame source + if(!check_func_sucess(m_p->sensor->OpenMultiSourceFrameReader(m_p->frameSource, &m_p->reader))){ + Logger::error("Failed: OpenMultiSourceFrameReader"); + return false; + } + + return true; +} + + +std::optional K2Device::get_kinect_data() { + + if(m_p->reader == nullptr){ + Logger::error("Kinect not opened"); + return {}; + } + + Bench::start("Kinect2::get_kinect_data"sv); + Bench::start("Kinect2::acquire_multi_sources_frame"sv); + + // update frame mode and id + m_p->processedFrame->mode = m_p->mode; + m_p->processedFrame->frameId = m_p->currentFrame; + + // get data + if(!acquire_multi_sources_frame()){ + clean_frame(); + return {}; + } + + m_p->processedFrame->timeStampGetFrame = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + Bench::stop(); + + // store intrinsics + if(m_p->processedFrame->intrinsics.size() == 0){ + m_p->processedFrame->intrinsics = intrinsics(); + } + + Bench::start("Kinect2::get_depth_data"sv); + // # depth + if(depth_channel_required(m_p->mode)){ + if(acquire_depth_frame()){ + if(!get_depth_data()){ + clean_frame(); + return {}; + } + }else{ + clean_frame(); + return {}; + } + } + Bench::stop(); + Bench::start("Kinect2::get_color_data"sv); + // # color + if(color_channel_required(m_p->mode)){ + if(acquire_color_frame()){ + get_color_data(); + }else{ + clean_frame(); + return {}; + } + } + Bench::stop(); + Bench::start("Kinect2::get_infra_data"sv); + // # infra + if(infra_channel_required(m_p->mode)){ + if(acquire_infra_frame()){ + get_infra_data(); + }else{ + clean_frame(); + return {}; + } + } + Bench::stop(); + Bench::start("Kinect2::get_long_exposure_infra_data"sv); + // # long exposure infra + if(long_infra_channel_required(m_p->mode)){ + if(acquire_long_exposure_infra_frame()){ + get_long_exposure_infra_data(); + }else{ + clean_frame(); + return {}; + } + } + Bench::stop(); + Bench::start("Kinect2::get_body_data"sv); + // # body + if(body_channel_required(m_p->mode)){ + if(acquire_body_frame()){ + get_body_data(); + } + } + Bench::stop(); + Bench::start("Kinect2::post_computing_depth_data"sv); + + // post computing data + if(depth_channel_required(m_p->mode)){ + post_computing_depth_data(); + } + + Bench::stop(); + Bench::start("Kinect2::post_computing_color_data"sv); + + if(color_channel_required(m_p->mode)){ + post_computing_color_data(); + } + + Bench::stop(); + Bench::start("Kinect2::post_computing_infra_data"sv); + + if(infra_channel_required(m_p->mode) || long_infra_channel_required(m_p->mode)){ + post_computing_infra_data(); + } + + Bench::stop(); + Bench::start("Kinect2::post_computing_body_data"sv); + + if(body_channel_required(m_p->mode)){ + post_computing_body_data(); + } + + Bench::stop(); + Bench::start("Kinect2::process"sv); + + // processing + if(m_p->mode == K2FrameRequest::compressed_color_1920x1080){ + process_compressed_color_1920x1080(); + }else if(m_p->mode == K2FrameRequest::compressed_color_512x424){ + process_compressed_color_512x424(); + }else if(m_p->mode == K2FrameRequest::compressed_color_cloud ){ + process_compressed_color_cloud(); + }else if(m_p->mode == K2FrameRequest::compressed_color_mesh){ + process_compressed_color_mesh(); + }else if(m_p->mode == K2FrameRequest::depth_512x424){ + process_depth_512x424(); + }else if(m_p->mode == K2FrameRequest::infra_512x424 || m_p->mode == K2FrameRequest::long_exposure_infra_512x424){ + process_infra_512x424(); + } + Bench::stop(); + Bench::start("Kinect2::release"sv); + + // increase frame id + ++m_p->currentFrame; + + // get end process timestamp + m_p->processedFrame->timeStampEndProcessing = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + + // clean + clean_frame(); + + Bench::stop(); + + Bench::start("Kinect2::copy_frame"sv); + auto copiedFrame = m_p->processedFrame->copy_frame(); + Bench::stop(); + Bench::stop(); + + return std::move(copiedFrame); +} + +K2FrameRequest K2Device::mode() const{ + return m_p->mode; +} + +bool K2Device::save_camera_intrisics(std::string filePath){ + + if(filePath.size() == 0){ + return false; + } + + intrinsics(); + + std::ofstream file; + std::string name(filePath); + file.open(name); + if(file.is_open()){ + file << "Focal length:\n" << m_p->camIntrinsics.FocalLengthX << " " << m_p->camIntrinsics.FocalLengthY << "\n"; + file << "Principal point:\n" << m_p->camIntrinsics.PrincipalPointX << " " << m_p->camIntrinsics.PrincipalPointY << "\n"; + file << "Radial distortion:\n" << m_p->camIntrinsics.RadialDistortionSecondOrder << " " << m_p->camIntrinsics.RadialDistortionFourthOrder << " " << m_p->camIntrinsics.RadialDistortionSixthOrder << "\n"; + }else{ + return false; + } + file.close(); + + return true; +} + +bool K2Device::save_camera_space_depth_table(std::string filePath){ + + // get map table + if(m_p->cameraSpaceTable == nullptr){ + delete m_p->cameraSpaceTable; + m_p->cameraSpaceTable = nullptr; + } + m_p->mapper->GetDepthFrameToCameraSpaceTable(&m_p->cameraSpaceTableLength, &m_p->cameraSpaceTable); + + if(filePath.size() == 0){ + return false; + } + + std::ofstream file; + std::string name(filePath); + file.open(name); + if(file.is_open()){ + for(size_t ii = 0; ii < m_p->cameraSpaceTableLength; ++ii){ + file << m_p->cameraSpaceTable[ii].X << " " << m_p->cameraSpaceTable[ii].Y << "\n"; + } + }else{ + return false; + } + file.close(); + + return true; +} + +//void Kinect2::copy_frame(Kinect2Frame *f){ + +//// m_p->swapLocker.lock(); +// m_p->availableFrame->parameters = parameters; +// if(!m_p->availableFrame->copy_to(f)){ +// Logger::error("Available frame color size is 0"); +// } +//// m_p->swapLocker.unlock(); +// std::cout << "[COP2_" << std::this_thread::get_id() << "]"; +//} + + +std::vector K2Device::intrinsics() const{ + + m_p->mapper->GetDepthCameraIntrinsics(&m_p->camIntrinsics); + return { + m_p->camIntrinsics.FocalLengthX, + m_p->camIntrinsics.FocalLengthY, + m_p->camIntrinsics.PrincipalPointX, + m_p->camIntrinsics.PrincipalPointY, + m_p->camIntrinsics.RadialDistortionSecondOrder, + m_p->camIntrinsics.RadialDistortionFourthOrder, + m_p->camIntrinsics.RadialDistortionSixthOrder + }; +} + +void K2Device::get_color_data(){ + + Bench::start("Kinect2::get_color_data_0"sv); + using namespace std; + + UINT bufferSize = 0; + std::uint8_t* pBuffer = nullptr; + m_p->colorframe->AccessRawUnderlyingBuffer( &bufferSize, &pBuffer ); // YUY2 + m_p->colorframe->get_RelativeTime(&m_p->processedFrame->relativeTimeColor); + Pt4 *colors = reinterpret_cast*>(pBuffer); + + Bench::stop(); + + + if(depth_channel_required(m_p->mode)){ + + Bench::start("Kinect2::get_color_data_1"sv); + + for_each(execution::par_unseq, begin(m_p->indicesDepths), end(m_p->indicesDepths), [&](size_t &id){ + + if(!is_kinect2_depth_valid((*m_p->cloudData)[id].z()) || !m_p->validityDepth2Rgb[id]){ + (*m_p->colorData)[id] = {0,0,0}; + return; + } + + size_t idC = m_p->indicesDepthToColor[id]; + if(idC%2 == 0){ + + Pt3 &c1 = (*m_p->colorData)[idC]; + idC = idC/2; + + const Pt4 &color = colors[idC]; + + // convert to rgb + const int y0 = clamp(static_cast(parameters.yFactor*color.x()), 0, 255); + auto ci = 298 * (y0 - 16); + + const int u0 = clamp(static_cast(parameters.uFactor*color.y()), 0, 255); + const auto d = u0 - 128; + + const int v0 = clamp(static_cast(parameters.vFactor*color.w()), 0, 255); + const auto e = v0 - 128; + + c1.x() = static_cast(clamp((ci + (409 * e) + 128) >> 8, 0, 255)); // red + c1.y() = static_cast(clamp((ci + (-100 * d) - (208 * e) + 128) >> 8, 0, 255)); // green + c1.z() = static_cast(clamp((ci + (516 * d) + 128) >> 8, 0, 255)); // blue + + }else{ + Pt3 &c2 = (*m_p->colorData)[idC]; + idC = (idC-1)/2; + + const Pt4 &color = colors[idC]; + const int y0 = clamp(static_cast(parameters.yFactor*color.x()), 0, 255); + const int u0 = clamp(static_cast(parameters.uFactor*color.y()), 0, 255); + const int y1 = clamp(static_cast(parameters.yFactor*color.z()), 0, 255); + const int v0 = clamp(static_cast(parameters.vFactor*color.w()), 0, 255); + + // convert to rgb + auto c = y0 - 16; + const auto d = u0 - 128; + const auto e = v0 - 128; + + auto ci = 298 * c; + const auto v1 = (516 * d) + 128; + const auto v2 = (-100 * d) - (208 * e) + 128; + const auto v3 = (409 * e) + 128; + + c = y1 - 16; + ci = 298 * c; + + c2.x() = static_cast(clamp((ci + v3) >> 8, 0, 255)); // red + c2.y() = static_cast(clamp((ci + v2) >> 8, 0, 255)); // green + c2.z() = static_cast(clamp((ci + v1) >> 8, 0, 255)); // blue + } + }); + + Bench::stop(); + }else{ + + Bench::start("Kinect2::get_color_data_2"sv); + + for_each(execution::par_unseq, begin(m_p->indicesRawColors), end(m_p->indicesRawColors), [&](size_t &id){ + // get yuv + const Pt4 &color = colors[id]; + const int y0 = clamp(static_cast(parameters.yFactor*color.x()), 0, 255); + const int u0 = clamp(static_cast(parameters.uFactor*color.y()), 0, 255); + const int y1 = clamp(static_cast(parameters.yFactor*color.z()), 0, 255); + const int v0 = clamp(static_cast(parameters.vFactor*color.w()), 0, 255); + + // convert to rgb + auto c = y0 - 16; + const auto d = u0 - 128; + const auto e = v0 - 128; + + const auto idC = id*2; + Pt3 &c1 = (*m_p->colorData)[idC]; + auto ci = 298 * c; + const auto v1 = (516 * d) + 128; + const auto v2 = (-100 * d) - (208 * e) + 128; + const auto v3 = (409 * e) + 128; + c1.x() = static_cast(clamp((ci + v3) >> 8, 0, 255)); // red + c1.y() = static_cast(clamp((ci + v2) >> 8, 0, 255)); // green + c1.z() = static_cast(clamp((ci + v1) >> 8, 0, 255)); // blue + + c = y1 - 16; + ci = 298 * c; + + Pt3 &c2 = (*m_p->colorData)[idC+1]; + c2.x() = static_cast(clamp((ci + v3) >> 8, 0, 255)); // red + c2.y() = static_cast(clamp((ci + v2) >> 8, 0, 255)); // green + c2.z() = static_cast(clamp((ci + v1) >> 8, 0, 255)); // blue + }); + + Bench::stop(); + } +} + + +bool K2Device::get_depth_data() { + + Bench::start("Kinect2::get_depth_data_0"sv); + + // get data + m_p->depthBuffer = nullptr; + m_p->depthFrame->AccessUnderlyingBuffer(&m_p->depthBufferSize, &m_p->depthBuffer); + Bench::stop(); + + int sumDepthValues = std::accumulate(m_p->depthBuffer, m_p->depthBuffer + m_p->depthBufferSize, 0); + if(m_p->previousSumDepthValues > 0){ + if(sumDepthValues == m_p->previousSumDepthValues){ + // identical frame + Logger::error("Identical depth frame"); + return false; + } + } + m_p->previousSumDepthValues = sumDepthValues; + + Bench::start("Kinect2::get_depth_data_1"sv); + + m_p->depthFrame->get_RelativeTime(&m_p->processedFrame->relativeTimeDepth); + m_p->depthFrame->get_DepthMinReliableDistance(&m_p->processedFrame->minReliableDistance); + m_p->depthFrame->get_DepthMaxReliableDistance(&m_p->processedFrame->maxReliableDistance); + + Bench::stop(); + Bench::start("Kinect2::get_depth_data_2"sv); + + // map data + m_p->mapper->MapDepthFrameToCameraSpace( + k2_depth_count, m_p->depthBuffer, // Depth frame data and size of depth frame + k2_depth_count, reinterpret_cast(m_p->cloudData->data())); // Output CameraSpacePoint array and size + + Bench::stop(); + Bench::start("Kinect2::get_depth_data_3"sv); + + m_p->mapper->MapDepthFrameToColorSpace( + k2_depth_count, m_p->depthBuffer, // Depth frame data and size of depth frame + k2_depth_count, m_p->depth2rgb.data()); // Output ColorSpacePoint array and size + + for_each(std::execution::par_unseq, begin(m_p->indicesDepths), end(m_p->indicesDepths), [&](size_t &id){ + const auto &m = m_p->depth2rgb[id]; + m_p->validityDepth2Rgb[id] = !(m.X < 0 || m.Y < 0 || m.X > k2_color_width || m.Y > k2_color_height); + m_p->indicesDepthToColor[id] = {static_cast(m.X) + k2_color_width*(static_cast(m.Y))}; + + }); + + + Bench::stop(); + return true; +} + +void K2Device::get_infra_data(){ + + // get data + unsigned int size; + unsigned short* buffer = nullptr; + m_p->infraFrame->AccessUnderlyingBuffer(&size, &buffer); + m_p->infraFrame->get_RelativeTime(&m_p->processedFrame->relativeTimeInfra); + std::copy(buffer, buffer + size, std::begin(*m_p->processedFrame->infraData)); +} + +void K2Device::get_long_exposure_infra_data(){ + + // get data + unsigned int size; + unsigned short* buffer = nullptr; + m_p->longExposureInfraFrame->AccessUnderlyingBuffer(&size, &buffer); + m_p->longExposureInfraFrame->get_RelativeTime(&m_p->processedFrame->relativeTimeLongInfra); + std::copy(buffer, buffer + size, std::begin(*m_p->processedFrame->infraData)); +} + +void K2Device::get_body_data(){ + + Bench::start("Kinect2::get_body_data_0"sv); + + std::array bodies = {nullptr,nullptr,nullptr,nullptr,nullptr,nullptr}; + if(!check_func_sucess(m_p->bodyFrame->GetAndRefreshBodyData(static_cast(bodies.size()), bodies.data()))){ + return; + } + m_p->bodyFrame->get_RelativeTime(&m_p->processedFrame->relativeTimeBody); + + Bench::stop(); + Bench::start("Kinect2::get_body_data_1"sv); + + for (size_t ii = 0; ii < bodies.size(); ii++) { + + auto body = bodies[ii]; + auto &data = m_p->processedFrame->bodiesData[ii]; + std::uint64_t trackingId = 0; + if(S_OK == body->get_TrackingId(&trackingId)){ // Gets the tracking ID. + data.id = trackingId; + } + + BOOLEAN isTracked = false; + if(S_OK == body->get_IsTracked(&isTracked)){ // Retrieves a boolean value that indicates if the body is tracked. + data.tracked = isTracked; + } + BOOLEAN isRestricted = false; + if(S_OK == body->get_IsRestricted(&isRestricted)){ // Retrieves a boolean value that indicates if the body is restricted from a full range of motion. + data.restricted = isRestricted; + } + DetectionResult engaged; + if(S_OK == body->get_Engaged(&engaged)){ // Gets the level of user engagement. + data.engaged = static_cast(engaged); + } + HandState leftHandState; + if(S_OK == body->get_HandLeftState(&leftHandState)){ // Gets the left hand state. + data.leftHandState = static_cast(leftHandState); + } + TrackingConfidence leftHandConfidence; + if(S_OK == body->get_HandLeftConfidence(&leftHandConfidence)){ // Gets the tracking confidence for the left hand. + data.leftHandHightConfidence = leftHandConfidence == TrackingConfidence::TrackingConfidence_High; + } + HandState rightHandState; + if(S_OK == body->get_HandRightState(&rightHandState)){ // Gets the right hand state. + data.rightHandState = static_cast(rightHandState); + } + TrackingConfidence rightHandConfidence; + if(S_OK == body->get_HandRightConfidence(&rightHandConfidence)){ // Gets the tracking confidence for the right hand. + data.rightHandHightConfidence = rightHandConfidence == TrackingConfidence::TrackingConfidence_High; + } + PointF lean; + if(S_OK == body->get_Lean(&lean)){ // Gets the amount a body is leaning, which is a number between -1 (leaning left or back) and 1 (leaning right or front) + data.lean = {lean.X,lean.Y}; + } + TrackingState leanTracking; + if(S_OK == body->get_LeanTrackingState(&leanTracking)){ // Gets the lean tracking state, which indicates if the body is tracked. + data.leanTracking = static_cast(leanTracking); + } + + DWORD clippedEdges; + if(S_OK == body->get_ClippedEdges(&clippedEdges)){ // Gets the clipped edges. + // ... + } + + std::array joints; + if(S_OK == body->GetJoints(JointType_Count, joints.data())){ + for(auto &joint : joints){ + auto type = static_cast(joint.JointType); + if(data.joints.count(type)==0){ + data.joints[type] = K2BodyJoint{}; + } + data.joints[type].state = static_cast(joint.TrackingState); + const auto &p = joint.Position; + data.joints[type].pos = {p.X,p.Y,p.Z}; + } + } + + std::array jointsOrientations; + if(S_OK == body->GetJointOrientations(JointType_Count, jointsOrientations.data())){ + + for(auto &jointOrientation : jointsOrientations){ + + auto type = static_cast(jointOrientation.JointType); + if(data.joints.count(type)==0){ + data.joints[type] = K2BodyJoint{}; + } + const auto &q =jointOrientation.Orientation; + data.joints[type].rotQuaternion = {q.x,q.y,q.z,q.w}; + } + } + + // body->GetActivityDetectionResults() // Gets the activity detection results from IBody. + // body->GetAppearanceDetectionResults() // Gets the appearance. + // body->GetExpressionDetectionResults(); // always return null, (cross platform with xbox) + body->Release(); + } + + Bench::stop(); + +// auto d1 = std::chrono::duration_cast(t2-t1); +// auto d2 = std::chrono::duration_cast(t3-t2); +// std::cout << "get_body: " << d1.count() << " " << d2.count() <<"\n"; + +// m_p->bodyFrame->get_FloorClipPlane(); +// m_p->bodyFrame->get_BodyFrameSource(); +} + +void K2Device::post_computing_color_data(){ + + using namespace std; + + if(parameters.filterDepthWithColor){ + + auto &depth = *m_p->cloudData.get(); + const auto &p = parameters; + + for_each(execution::par_unseq, begin(m_p->indicesDepths), end(m_p->indicesDepths), [&](size_t &id){ + + if(is_kinect2_depth_valid(depth[id].z()) && m_p->validityDepth2Rgb[id]){ + + auto color = (*m_p->colorData)[m_p->indicesDepthToColor[id]]; + + // euclidian distance + auto delta = sqrt( + (color(0)-p.filterColor(0))*(color(0)-p.filterColor(0)) + + (color(1)-p.filterColor(1))*(color(1)-p.filterColor(1)) + + (color(2)-p.filterColor(2))*(color(2)-p.filterColor(2)) + ); + if(delta > p.maxDiffColor.x()){ + depth[id].z() = k2_invalid_value; + return; + } + } + }); + } + +} + + +void K2Device::post_computing_depth_data(){ + + using namespace std; + + Bench::start("Kinect2::post_computing_depth_data_0"sv); + + + + auto binaryDepthData = m_p->binaryDepth.ptr(); + auto erodedBinaryDepthData = m_p->erodedBinaryDepth.ptr(); + + for_each(execution::par_unseq, begin(m_p->indicesDepths), end(m_p->indicesDepths), [&](size_t &id){ + + auto &depth = (*m_p->cloudData)[id]; + + // reset binary mat + binaryDepthData[id] = 0; + + // remove inf pts + if(!isnormal(depth.x()) || !isnormal(depth.y()) || !isnormal(depth.z())){ + depth = k2_invalid_point; + return; + } + + // width/height filter + const auto &id2d = m_p->indices2dDepths[id]; + if(id2d.first < parameters.minWidth || id2d.first > parameters.maxWidth || id2d.second < parameters.minHeight || id2d.second > parameters.maxHeight){ + depth = k2_invalid_point; + return; + } + + // depth filter + if(depth.z() < parameters.minDepthValue || depth.z() > parameters.maxDepthValue){ + depth = k2_invalid_point; + return; + } + + // binary mat + binaryDepthData[id] = 255; + }); + + Bench::stop(); + Bench::start("Kinect2::post_computing_depth_data_1"sv); + + // erosion + if(parameters.doErosion){ + + cv::Mat elementErode = cv::getStructuringElement( + parameters.erosionType, + cv::Size( 2*parameters.erosionSize + 1, 2*parameters.erosionSize+1), + cv::Point( parameters.erosionSize, parameters.erosionSize) + ); + cv::erode( m_p->binaryDepth, m_p->erodedBinaryDepth, elementErode); + + for_each(execution::par_unseq, begin(m_p->indicesDepths), end(m_p->indicesDepths), [&](size_t &id){ + + if(erodedBinaryDepthData[id] < parameters.minErosionValue){ + (*m_p->cloudData)[id].z() = k2_invalid_value; + } + }); + } + + Bench::stop(); + Bench::start("Kinect2::post_computing_depth_data_2"sv); + + // local depth + constexpr size_t widthPlusOne = k2_depth_width +1; + constexpr size_t widthMinusOne = k2_depth_width -1; + + // neighbours number + const bool doMinNeighboursFiltering = parameters.minNeighboursLoops > 0; + if(doMinNeighboursFiltering){ + + for(size_t numLoop = 0; numLoop < parameters.minNeighboursLoops; ++numLoop){ + + for_each(execution::par_unseq, begin(m_p->indicesDepthsWithoutBorders), end(m_p->indicesDepthsWithoutBorders), [&](size_t &id){ + + auto depth = (*m_p->cloudData)[id]; + const auto depthZ = depth.z(); + if(!is_kinect2_depth_valid(depthZ)){ + return; + } + + size_t count = 0; + const size_t idA = id - widthPlusOne; + const size_t idB = idA + 1; + const size_t idC = idB + 1; + const size_t idD = id - 1; + const size_t idE = id + 1; + const size_t idF = id + widthMinusOne; + const size_t idG = idF + 1; + const size_t idH = idG + 1; + + if(is_kinect2_depth_valid((*m_p->cloudData)[idA].z())){ + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idB].z())){ + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idC].z())){ + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idD].z())){ + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idE].z())){ + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idF].z())){ + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idG].z())){ + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idH].z())){ + ++count; + } + + m_p->depthMask[id] = count < parameters.nbMinNeighboursNb; + }); + + for_each(execution::par_unseq, begin(m_p->indicesDepthsWithoutBorders), end(m_p->indicesDepthsWithoutBorders), [&](size_t &id){ + if(m_p->depthMask[id]){ + (*m_p->cloudData)[id].z() = k2_invalid_value; + } + m_p->depthMask[id] = false; + }); + } + } + + + Bench::stop(); + Bench::start("Kinect2::post_computing_depth_data_3"sv); + + + const bool doLocalDiffFiltering = parameters.maxLocalDiff > 0.f; + if(doLocalDiffFiltering){ + + const float mLocal = parameters.maxLocalDiff/1000.f; + for_each(execution::par_unseq, begin(m_p->indicesDepthsWithoutBorders), end(m_p->indicesDepthsWithoutBorders), [&](size_t &id){ + + m_p->depthMask[id] = false; + + const auto depthZ = (*m_p->cloudData)[id].z(); + if(!is_kinect2_depth_valid(depthZ)){ + return; + } + + float meanDiff = 0.f; + size_t count = 0; + const size_t idA = id - widthPlusOne; + const size_t idB = idA + 1; + const size_t idC = idB + 1; + const size_t idD = id - 1; + const size_t idE = id + 1; + const size_t idF = id + widthMinusOne; + const size_t idG = idF + 1; + const size_t idH = idG + 1; + + if(is_kinect2_depth_valid((*m_p->cloudData)[idA].z())){ + meanDiff += abs((*m_p->cloudData)[idA].z()-depthZ); + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idB].z())){ + meanDiff += abs((*m_p->cloudData)[idB].z()-depthZ); + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idC].z())){ + meanDiff += abs((*m_p->cloudData)[idC].z()-depthZ); + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idD].z())){ + meanDiff += abs((*m_p->cloudData)[idD].z()-depthZ); + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idE].z())){ + meanDiff += abs((*m_p->cloudData)[idE].z()-depthZ); + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idF].z())){ + meanDiff += abs((*m_p->cloudData)[idF].z()-depthZ); + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idG].z())){ + meanDiff += abs((*m_p->cloudData)[idG].z()-depthZ); + ++count; + } + if(is_kinect2_depth_valid((*m_p->cloudData)[idH].z())){ + meanDiff += abs((*m_p->cloudData)[idH].z()-depthZ); + ++count; + } + + m_p->depthMask[id] = (count == 0) ? false : (meanDiff/count > mLocal); + }); + + for_each(execution::par_unseq, begin(m_p->indicesDepthsWithoutBorders), end(m_p->indicesDepthsWithoutBorders), [&](size_t &id){ + if(m_p->depthMask[id]){ + (*m_p->cloudData)[id].z() = k2_invalid_value; + } + m_p->depthMask[id] = false; + }); + } + + Bench::stop(); + Bench::start("Kinect2::post_computing_depth_data_4"sv); + + if(parameters.doTemporalFilter){ + for_each(std::execution::par_unseq, begin(m_p->indicesDepths), end(m_p->indicesDepths), [&](size_t &id){ + if(is_kinect2_depth_valid(m_p->cloudData->at(id).z()) && is_kinect2_depth_valid(m_p->depthDataTemporal->at(id).z())){ + m_p->depthDataTemporal->at(id).z() = 0.5f*(m_p->cloudData->at(id).z()+m_p->depthDataTemporal->at(id).z()); + } + }); + std::swap(m_p->cloudData,m_p->depthDataTemporal); + } + + Bench::stop(); + Bench::start("Kinect2::post_computing_depth_data_5"sv); + + if(parameters.offsetAfterMin > 0.f){ + float min = std::numeric_limits::max(); + for_each(execution::seq, begin(m_p->indicesDepths), end(m_p->indicesDepths), [&](size_t &id){ + + const auto depthZ = (*m_p->cloudData)[id].z(); + if(!is_kinect2_depth_valid(depthZ)){ + return; + } + if(depthZ < min){ + min = depthZ; + } + + }); + for_each(execution::par_unseq, begin(m_p->indicesDepths), end(m_p->indicesDepths), [&](size_t &id){ + if((*m_p->cloudData)[id].z() > min + parameters.offsetAfterMin){ + (*m_p->cloudData)[id].z() = k2_invalid_value; + } + }); + } + + + if(parameters.vmin > 0.f){ + const auto squareMaxDist = parameters.vmin*parameters.vmin; + Pt3f mean = {}; + size_t count = 0; + for_each(std::execution::unseq, begin(m_p->indicesDepths), end(m_p->indicesDepths), [&](size_t &id){ + + const auto depthZ = (*m_p->cloudData)[id].z(); + if(!is_kinect2_depth_valid(depthZ)){ + return; + } + + mean += (*m_p->cloudData)[id]; + count++; + }); + if(count > 0){ + mean /= static_cast(count); + + for_each(execution::par_unseq, begin(m_p->indicesDepths), end(m_p->indicesDepths), [&](size_t &id){ + if(square_norm(Vec3f{geo::vec((*m_p->cloudData)[id], mean)}) > squareMaxDist){ + (*m_p->cloudData)[id].z() = k2_invalid_value; + } + }); + } + } + + Bench::stop(); +} + +void K2Device::post_computing_infra_data(){ + + using namespace std; + uint16_t validV = parameters.infraInvert ? 0 : 1; + uint16_t invalidV = parameters.infraBinary ? (parameters.infraInvert ? 1 : 0) : 0; + + transform(execution::par_unseq, begin(*m_p->processedFrame->infraData), end(*m_p->processedFrame->infraData), begin(*m_p->processedFrame->infraData),[&](uint16_t inf){ + if(inf < parameters.minInfra || inf > parameters.maxInfra){ + return invalidV; + }else if(parameters.infraBinary){ + return validV; + } + return inf; + }); + + auto minMax = minmax_element(begin(*m_p->processedFrame->infraData),end(*m_p->processedFrame->infraData)); + auto min = (*m_p->processedFrame->infraData)[static_cast((minMax.first - begin(*m_p->processedFrame->infraData)))]; + auto max = (*m_p->processedFrame->infraData)[static_cast((minMax.second - begin(*m_p->processedFrame->infraData)))]; + + auto diff = max - min; + if(diff==0){ + diff = 1; + } + + auto diffRange = parameters.maxInfraRange-parameters.minInfraRange; + auto minV = static_cast(numeric_limits::min()); + auto maxV = static_cast(numeric_limits::max()); + transform(execution::par_unseq, begin(*m_p->processedFrame->infraData), end(*m_p->processedFrame->infraData), begin(*m_p->processedFrame->infraData),[&](uint16_t inf){ + return static_cast(clamp(((1.*inf -min)/diff)*diffRange + parameters.minInfraRange, minV, maxV)); + }); +} + +void K2Device::post_computing_body_data(){ + + for(auto &body : m_p->processedFrame->bodiesData){ + for(auto &joint : body.joints){ + const auto &pos = joint.second.pos; + if(pos.z() < parameters.minDepthValue || pos.z() > parameters.maxDepthValue){ + joint.second.state = K2TrackingStateT::not_tracked; + } + } + } +} + +void K2Device::process_depth_512x424(){ + + // copy depth + std::copy(m_p->depthBuffer, m_p->depthBuffer + m_p->depthBufferSize, m_p->processedFrame->depthData->data()); + + // reverse depth + for(size_t ii = 0; ii < k2_depth_height; ++ii){ + auto *col = &(*m_p->processedFrame->depthData)[ii*k2_depth_width]; + std::reverse(col, col + k2_depth_width-1); + } +} + +void K2Device::process_infra_512x424(){ + // reverse infra + for(size_t ii = 0; ii < k2_infrared_height; ++ii){ + auto *col = &(*m_p->processedFrame->infraData)[ii*k2_infrared_width]; + std::reverse(col, col + k2_infrared_width-1); + } +} + +void K2Device::process_compressed_color_1920x1080(){ + + // reverse colors + for(size_t ii = 0; ii < k2_color_height; ++ii){ + auto *col = &(*m_p->colorData)[ii*k2_color_width]; + std::reverse(col, col + k2_color_width-1); + } + + const int jpegQuality = parameters.jpegCompressionRate; + unsigned char *buffer = reinterpret_cast(m_p->colorData->data()); + const int width = k2_color_width; + const int height = k2_color_height; + + // compress color (hight cost) + int ret = tjCompress2(m_p->jpegCompressor, buffer, width, 0, height, TJPF_RGB, + &m_p->tjCompressedImage, &m_p->processedFrame->jpegColorSize, TJSAMP_444, jpegQuality, TJFLAG_FASTDCT); + if(ret == -1){ + Logger::error(("tjCompress2 error with code: ") + std::to_string(tjGetErrorCode(m_p->jpegCompressor))); + return; + } + + if(m_p->processedFrame->compressedImage.size() < m_p->processedFrame->jpegColorSize){ + m_p->processedFrame->compressedImage.resize(m_p->processedFrame->jpegColorSize); + } + std::copy(m_p->tjCompressedImage, m_p->tjCompressedImage + m_p->processedFrame->jpegColorSize, std::begin(m_p->processedFrame->compressedImage)); +} + +void K2Device::process_compressed_color_512x424(){ + + std::for_each(std::execution::par_unseq, std::begin(m_p->indicesDepths), std::end(m_p->indicesDepths), [&](size_t &id){ + + if(is_kinect2_depth_valid((*m_p->cloudData)[id].z()) && m_p->validityDepth2Rgb[id]){ + // color +// const auto &m = m_p->depth2rgb[id]; + size_t idX =m_p->indicesDepthToColor[id];// {static_cast(m.X) + kinect2_color_width*(static_cast(m.Y))}; + (*m_p->processedColorData)[id] = {(*m_p->colorData)[idX].x(),(*m_p->colorData)[idX].y(),(*m_p->colorData)[idX].z()}; + }else{ + (*m_p->processedColorData)[id] = {0,0,0}; + } + }); + + // reverse colors + for(size_t ii = 0; ii < k2_depth_height; ++ii){ + auto *col = &(*m_p->processedColorData)[ii*k2_depth_width]; + std::reverse(col, col + k2_depth_width-1); + } + + const int jpegQuality = parameters.jpegCompressionRate; + unsigned char *buffer = reinterpret_cast(m_p->processedColorData->data()); + const int width = k2_depth_width; + const int height = k2_depth_height; + + int ret = tjCompress2(m_p->jpegCompressor, buffer, width, 0, height, TJPF_RGB, + &m_p->tjCompressedImage, &m_p->processedFrame->jpegColorSize, TJSAMP_444, jpegQuality, TJFLAG_FASTDCT); + if(ret == -1){ + Logger::error(("tjCompress2 error with code: ") + std::to_string(tjGetErrorCode(m_p->jpegCompressor))); + return; + } + + if(m_p->processedFrame->compressedImage.size() < m_p->processedFrame->jpegColorSize){ + m_p->processedFrame->compressedImage.resize(m_p->processedFrame->jpegColorSize); + } + std::copy(m_p->tjCompressedImage, m_p->tjCompressedImage + m_p->processedFrame->jpegColorSize, std::begin(m_p->processedFrame->compressedImage)); +} + + +void K2Device::process_compressed_color_cloud(){ + + using namespace std; + + for_each(execution::par_unseq, begin(m_p->indicesDepths), end(m_p->indicesDepths), [&](size_t &id){ + + if(is_kinect2_depth_valid((*m_p->cloudData)[id].z()) && m_p->validityDepth2Rgb[id]){ + // color + const auto &m = m_p->depth2rgb[id]; + size_t idX = {static_cast(m.X) + k2_color_width*(static_cast(m.Y))}; + (*m_p->processedColorData)[id] = {(*m_p->colorData)[idX].x(),(*m_p->colorData)[idX].y(),(*m_p->colorData)[idX].z()}; + (*m_p->processedFrame->depthData)[id] = static_cast(std::clamp((*m_p->cloudData)[id].z(),0.f,8.f)*1000.f); + + }else{ + (*m_p->processedColorData)[id] = geo::Pt3{0,0,0}; + (*m_p->processedFrame->depthData)[id] = 0; + } + }); + +// // reverse colors +// for(size_t ii = 0; ii < kinect2_depth_height; ++ii){ +// auto *col = &(*m_p->processedColorData)[ii*kinect2_depth_width]; +// reverse(col, col + kinect2_depth_width-1); +// } + +// // reverse depth +// for(size_t ii = 0; ii < kinect2_depth_height; ++ii){ +// auto *d = &(*m_p->processedFrame->depthData)[ii*kinect2_depth_width]; +// reverse(d, d + kinect2_depth_width-1); +// } + + + const int jpegQuality = parameters.jpegCompressionRate; + unsigned char *buffer = reinterpret_cast(m_p->processedColorData->data()); + const int width = k2_depth_width; + const int height = k2_depth_height; + + int ret = tjCompress2(m_p->jpegCompressor, buffer, width, 0, height, TJPF_RGB, + &m_p->tjCompressedImage, &m_p->processedFrame->jpegColorSize, TJSAMP_444, jpegQuality, TJFLAG_FASTDCT); + if(ret == -1){ + Logger::error(("tjCompress2 error with code: ") + std::to_string(tjGetErrorCode(m_p->jpegCompressor))); + return; + } + + if(m_p->processedFrame->compressedImage.size() < m_p->processedFrame->jpegColorSize){ + m_p->processedFrame->compressedImage.resize(m_p->processedFrame->jpegColorSize); + } + std::copy(m_p->tjCompressedImage, m_p->tjCompressedImage + m_p->processedFrame->jpegColorSize, std::begin(m_p->processedFrame->compressedImage)); + + // compress depth integers casted in 32 bits + m_p->processedFrame->compressedDepthSize = m_p->depthCompressor.encode( + reinterpret_cast(m_p->processedFrame->depthData->data()), k2_depth_count/2, + reinterpret_cast(m_p->processedFrame->compressedDepthData.data()), k2_depth_count/2 + 1024 + ); +} + +void K2Device::process_compressed_color_mesh(){ + + using namespace std; + + for_each(std::execution::par_unseq, begin(m_p->indicesDepths), end(m_p->indicesDepths), [&](size_t &id){ + + if(is_kinect2_depth_valid((*m_p->cloudData)[id].z()) && m_p->validityDepth2Rgb[id]){ + // color + const auto &m = m_p->depth2rgb[id]; + size_t idX = {static_cast(m.X) + k2_color_width*(static_cast(m.Y))}; + (*m_p->processedColorData)[id] = {(*m_p->colorData)[idX].x(),(*m_p->colorData)[idX].y(),(*m_p->colorData)[idX].z()}; + (*m_p->processedFrame->depthData)[id] = static_cast(std::clamp((*m_p->cloudData)[id].z(),0.f,8.f)*1000.f); + + }else{ + (*m_p->processedColorData)[id] = geo::Pt3{0,0,0}; + (*m_p->processedFrame->depthData)[id] = 0; + } + }); + +// // reverse colors +// for(size_t ii = 0; ii < kinect2_depth_height; ++ii){ +// auto *col = &(*m_p->processedColorData)[ii*kinect2_depth_width]; +// reverse(col, col + kinect2_depth_width-1); +// } + +// // reverse depth +// for(size_t ii = 0; ii < kinect2_depth_height; ++ii){ +// auto *d = &(*m_p->processedFrame->depthData)[ii*kinect2_depth_width]; +// reverse(d, d + kinect2_depth_width-1); +// } + + const int jpegQuality = parameters.jpegCompressionRate; + unsigned char *buffer= reinterpret_cast(m_p->processedColorData->data()); + const int width = k2_depth_width; + const int height = k2_depth_height; + + int ret = tjCompress2(m_p->jpegCompressor, buffer, width, 0, height, TJPF_RGB, + &m_p->tjCompressedImage, &m_p->processedFrame->jpegColorSize, TJSAMP_444, jpegQuality, TJFLAG_FASTDCT); + if(ret == -1){ + Logger::error(("tjCompress2 error with code: ") + std::to_string(tjGetErrorCode(m_p->jpegCompressor))); + return; + } + + if(m_p->processedFrame->compressedImage.size() < m_p->processedFrame->jpegColorSize){ + m_p->processedFrame->compressedImage.resize(m_p->processedFrame->jpegColorSize); + } + std::copy(m_p->tjCompressedImage, m_p->tjCompressedImage + m_p->processedFrame->jpegColorSize, std::begin(m_p->processedFrame->compressedImage)); + + // compress depth integers casted in 32 bits + m_p->processedFrame->compressedDepthSize = m_p->depthCompressor.encode( + reinterpret_cast(m_p->processedFrame->depthData->data()), k2_depth_count/2, + reinterpret_cast(m_p->processedFrame->compressedDepthData.data()), k2_depth_count/2 + 1024 + ); +} + +void K2Device::release_color_data(){ + if (m_p->colorframe != nullptr){ + m_p->colorframe->Release(); + m_p->colorframe = nullptr; + } + if (m_p->colorFrameRef != nullptr){ + m_p->colorFrameRef->Release(); + m_p->colorFrameRef = nullptr; + } +} + +void K2Device::release_depth_data(){ + if (m_p->depthFrame != nullptr){ + m_p->depthFrame->Release(); + m_p->depthFrame = nullptr; + } + if (m_p->depthFrameRef != nullptr){ + m_p->depthFrameRef->Release(); + m_p->depthFrameRef = nullptr; + } +} + +void K2Device::release_infra_data(){ + if (m_p->infraFrame != nullptr){ + m_p->infraFrame->Release(); + m_p->infraFrame = nullptr; + } + if (m_p->infraFrameRef != nullptr){ + m_p->infraFrameRef->Release(); + m_p->infraFrameRef = nullptr; + } +} + +void K2Device::release_long_exposure_infra_data(){ + if (m_p->longExposureInfraFrame != nullptr){ + m_p->longExposureInfraFrame->Release(); + m_p->longExposureInfraFrame = nullptr; + } + if (m_p->longExposureInfraFrameRef != nullptr){ + m_p->longExposureInfraFrameRef->Release(); + m_p->longExposureInfraFrameRef = nullptr; + } +} + +void K2Device::release_body_data(){ + + if (m_p->bodyFrame != nullptr){ + m_p->bodyFrame->Release(); + m_p->bodyFrame = nullptr; + } + if (m_p->bodyFrameRef != nullptr){ + m_p->bodyFrameRef->Release(); + m_p->bodyFrameRef = nullptr; + } +} + +void K2Device::release_multi_source_data(){ + + if (m_p->multiSouceFrame != nullptr){ + m_p->multiSouceFrame->Release(); + m_p->multiSouceFrame = nullptr; + } +} + +void K2Device::clean_frame(){ + release_depth_data(); + release_color_data(); + release_infra_data(); + release_long_exposure_infra_data(); + release_body_data(); + release_multi_source_data(); +} + + + + +//Pt4 rgb_to_hsl(const Pt4 &rgba){ + +// float H,S,L; + +// float r = (rgba[0] / 255.0f); +// float g = (rgba[1] / 255.0f); +// float b = (rgba[2] / 255.0f); + +// float min = std::min(std::min(r, g), b); +// float max = std::max(std::max(r, g), b); +// float delta = max - min; + +// L = (max + min) / 2; +// if (almost_equal(delta,0.f)){ +// H = 0.f; +// S = 0.0f; +// }else{ +// S = (L <= 0.5f) ? (delta / (max + min)) : (delta / (2 - max - min)); + +// float hue; + +// if (almost_equal(r,max)){ +// hue = ((g - b) / 6.f) / delta; +// }else if (almost_equal(g,max)){ +// hue = (1.0f / 3) + ((b - r) / 6.f) / delta; +// } +// else{ +// hue = (2.0f / 3.f) + ((r - g) / 6.f) / delta; +// } + +// if (hue < 0.f){ +// hue += 1.f; +// } +// if (hue > 1.f){ +// hue -= 1.f; +// } + +// H = hue * 360.f; +// } +// return Pt4{H,S,L,1.f*rgba.w()}; +//} + +//float hue_to_rgb(float v1, float v2, float vH) { + +// if (vH < 0.f){ +// vH += 1.f; +// } + +// if (vH > 1.f){ +// vH -= 1.f; +// } + +// if ((6.f * vH) < 1.f){ +// return (v1 + (v2 - v1) * 6.f * vH); +// } + +// if ((2.f * vH) < 1.f){ +// return v2; +// } + +// if ((3.f * vH) < 2.f){ +// return (v1 + (v2 - v1) * ((2.0f / 3.f) - vH) * 6.f); +// } + +// return v1; +//} + +//Pt4 hsl_to_rgb(Pt4 hsva){ + +// float H = hsva.x(); +// float S = hsva.y(); +// float L = hsva.z(); + +// if (almost_equal(S,0.f)){ +// auto v = static_cast(L * 255.f); +// return Pt4{v,v,v,static_cast(hsva.w())}; +// }else{ +// float v1, v2; +// float hue = H / 360.f; + +// v2 = (L < 0.5f) ? (L * (1 + S)) : ((L + S) - (L * S)); +// v1 = 2 * L - v2; + +// return Pt4{static_cast(255 * hue_to_rgb(v1, v2, hue + (1.0f / 3.f))), +// static_cast(255 * hue_to_rgb(v1, v2, hue)), +// static_cast(255 * hue_to_rgb(v1, v2, hue - (1.0f / 3.f))),static_cast(hsva.w())}; +// } +//} + + + +//void Kinect2::process_color_1920x1080(){ +// // reverse colors +// for(size_t ii = 0; ii < kinect2_color_height; ++ii){ +// auto *col = &(*colorData)[ii*kinect2_color_width]; +// std::reverse(col, col + kinect2_color_width-1); +// } +//} + +//void Kinect2::process_color_512x424(){ + +// std::for_each(std::execution::par_unseq, std::begin(m_p->indicesDepths), std::end(m_p->indicesDepths), [&](size_t &id){ + +// if(!m_p->validityDepth2Rgb[id]){ +// (*processedColorData)[id] = {0,0,0,255}; +// }else{ +// // color +// const auto &m = m_p->depth2rgb[id]; +// size_t idX = {static_cast(m.X) + kinect2_color_width*(static_cast(m.Y))}; +// (*processedColorData)[id] = {(*colorData)[idX].x(),(*colorData)[idX].y(),(*colorData)[idX].z(), 255}; +// } +// }); + +// // reverse colors +// for(size_t ii = 0; ii < kinect2_depth_height; ++ii){ +// auto *col = &(*processedColorData)[ii*kinect2_depth_width]; +// std::reverse(col, col + kinect2_depth_width-1); +// } +//} + +//void Kinect2::process_cloud(){ + + +// currentCloudSize = 0; +// std::for_each(std::execution::seq, std::begin(m_p->indicesDepths), std::end(m_p->indicesDepths), [&](size_t &id){ + +// if(is_kinect2_depth_valid((*cloudData)[id].z()) && m_p->validityDepth2Rgb[id]){ +// // pos +// (*processedCloudData)[currentCloudSize] = (*cloudData)[id]; +// // color +// const auto &m = m_p->depth2rgb[id]; +// size_t idx = (static_cast(m.X)) + kinect2_color_width*(static_cast(m.Y)); +// (*processedColorData)[currentCloudSize] = (*colorData)[idx]; +// // cloud size +// currentCloudSize++; +// }else{ +// // color +// (*processedColorData)[currentCloudSize] = {0,0,0,255}; +// } +// }); +//} + +//void Kinect2::process_mesh(){ + +// std::uint32_t idT = 0; +// currentCloudSize = 0; +// std::for_each(std::execution::seq, std::begin(m_p->indicesDepths), std::end(m_p->indicesDepths), [&](size_t &id){ +// if(is_kinect2_depth_valid((*cloudData)[id].z()) && m_p->validityDepth2Rgb[id]){ + +// // pos +// (*processedCloudData)[currentCloudSize] = (*cloudData)[id]; +// // color +// const auto &m = m_p->depth2rgb[id]; +// size_t idx = (static_cast(m.X)) + kinect2_color_width*(static_cast(m.Y)); +// (*processedColorData)[currentCloudSize] = (*colorData)[idx]; +// // cloud size +// currentCloudSize++; +// // triangle id validity +// m_p->validIdPerPointMesh[id] = idT++; +// }else{ +// // color +// (*processedColorData)[id] = {0,0,0,255}; +// // triangle id validity +// m_p->validIdPerPointMesh[id] = idT; +// } +// }); + +// idT = 0; +// for(std::uint32_t ii = 0; ii < kinect2_depth_width-1; ++ii){ +// for(std::uint32_t jj = 0; jj < kinect2_depth_height-1; ++jj){ + +// const std::uint32_t idTopLeft = jj*kinect2_depth_width+ii; +// const std::uint32_t idTopRight = (jj)*kinect2_depth_width+(ii+1); +// const std::uint32_t idBottomLeft = (jj+1)*kinect2_depth_width+(ii); +// const std::uint32_t idBottomRight = (jj+1)*kinect2_depth_width+(ii+1); + +// const bool idTopLeftValid = is_kinect2_depth_valid((*cloudData)[idTopLeft].z()) && m_p->validityDepth2Rgb[idTopLeft]; +// const bool idTopRightValid = is_kinect2_depth_valid((*cloudData)[idTopRight].z()) && m_p->validityDepth2Rgb[idTopRight]; +// const bool idBottomLeftValid = is_kinect2_depth_valid((*cloudData)[idBottomLeft].z()) && m_p->validityDepth2Rgb[idBottomLeft]; +// const bool idBottomRightValid = is_kinect2_depth_valid((*cloudData)[idBottomRight].z()) && m_p->validityDepth2Rgb[idBottomRight]; + +// int invalidCount = (idTopLeftValid? 1 : 0) + (idTopRightValid? 1 : 0) + (idBottomLeftValid? 1 : 0) + (idBottomRightValid? 1 : 0); +// if(invalidCount < 3){ +// continue; +// } + +// if(invalidCount == 4){ +// if(std::abs((*cloudData)[idTopLeft].z()-(*cloudData)[idBottomRight].z()) < std::abs((*cloudData)[idTopRight].z()-(*cloudData)[idBottomLeft].z())){ +// (*processedtrianglesData)[idT++] = Pt3{m_p->validIdPerPointMesh[idTopLeft], +// m_p->validIdPerPointMesh[idBottomRight], +// m_p->validIdPerPointMesh[idBottomLeft]}; +// (*processedtrianglesData)[idT++] = Pt3{m_p->validIdPerPointMesh[idTopLeft], +// m_p->validIdPerPointMesh[idTopRight], +// m_p->validIdPerPointMesh[idBottomRight]}; +// }else{ +// (*processedtrianglesData)[idT++] = Pt3{m_p->validIdPerPointMesh[idTopLeft], +// m_p->validIdPerPointMesh[idTopRight], +// m_p->validIdPerPointMesh[idBottomLeft]}; +// (*processedtrianglesData)[idT++] = Pt3{m_p->validIdPerPointMesh[idTopRight], +// m_p->validIdPerPointMesh[idBottomRight], +// m_p->validIdPerPointMesh[idBottomLeft]}; +// } +// continue; +// } + +// if(!idTopLeftValid){ +// (*processedtrianglesData)[idT++] = Pt3{m_p->validIdPerPointMesh[idTopRight], +// m_p->validIdPerPointMesh[idBottomRight], +// m_p->validIdPerPointMesh[idBottomLeft]}; +// } +// if(!idTopRightValid){ +// (*processedtrianglesData)[idT++] = Pt3{m_p->validIdPerPointMesh[idTopLeft], +// m_p->validIdPerPointMesh[idBottomRight], +// m_p->validIdPerPointMesh[idBottomLeft]}; +// } +// if(!idBottomLeftValid){ +// (*processedtrianglesData)[idT++] = Pt3{m_p->validIdPerPointMesh[idTopLeft], +// m_p->validIdPerPointMesh[idTopRight], +// m_p->validIdPerPointMesh[idBottomRight]}; +// } +// if(!idBottomRightValid){ +// (*processedtrianglesData)[idT++] = Pt3{m_p->validIdPerPointMesh[idTopLeft], +// m_p->validIdPerPointMesh[idTopRight], +// m_p->validIdPerPointMesh[idBottomLeft]}; +// } +// } +// } + +//} + diff --git a/cpp-projects/base/camera/kinect2/k2_device.hpp b/cpp-projects/base/camera/kinect2/k2_device.hpp new file mode 100644 index 0000000..3191181 --- /dev/null +++ b/cpp-projects/base/camera/kinect2/k2_device.hpp @@ -0,0 +1,134 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + + +#pragma once + +// std +#include + +// local +#include "k2_types.hpp" + +namespace tool::camera { + +/** + * @brief Check validity of input depth value (defined by kinect2_invalid_value) + */ +constexpr bool is_kinect2_depth_valid(float v){ + return v > -5.f; +} + + + +/** + * @brief Kinect frames grabber. Can request data using several modes (Kinect2FrameRequest) + */ +class K2Device { + +public : + + K2Device(); + ~K2Device(); + + /** + * @brief Will try to open kinect2 camera using input mode, return true if success. + */ + bool open(K2FrameRequest mode); + + /** + * @brief Close camera sensors + */ + void close(); + + /** + * @brief Will fetch a new frame for the camera using current mode. + */ + std::optional get_kinect_data(); + + /** + * @brief Return current camera mode + */ + K2FrameRequest mode() const; + + /** + * @brief Save camera intrinsics to input file path. Return true if succes. + */ + bool save_camera_intrisics(std::string filePath); + + /** + * @brief Save camera depth space table to input file path. Return true if succes. + */ + bool save_camera_space_depth_table(std::string filePath); + + + K2Settings parameters; // Kinect2 current camera parameters + + +private: + + /** + * @brief Return camera intrinsics values (Focal length {x,y}, Principal point {x,y}, Radial distortion {second,fourth, sixth} order. + */ + std::vector intrinsics() const; + + bool acquire_multi_sources_frame(); + bool acquire_color_frame(); + bool acquire_depth_frame(); + bool acquire_infra_frame(); + bool acquire_long_exposure_infra_frame(); + bool acquire_body_frame(); + + void get_color_data(); + bool get_depth_data(); + void get_infra_data(); + void get_long_exposure_infra_data(); + void get_body_data(); + + void post_computing_color_data(); + void post_computing_depth_data(); + void post_computing_infra_data(); + void post_computing_body_data(); + + void process_compressed_color_1920x1080(); + void process_compressed_color_512x424(); + void process_compressed_color_cloud(); + void process_compressed_color_mesh(); + void process_depth_512x424(); + void process_infra_512x424(); + + void release_color_data(); + void release_depth_data(); + void release_infra_data(); + void release_long_exposure_infra_data(); + void release_body_data(); + void release_multi_source_data(); + void clean_frame(); + + struct Impl; + std::unique_ptr m_p; +}; +} diff --git a/cpp-projects/base/camera/kinect2/k2_manager.cpp b/cpp-projects/base/camera/kinect2/k2_manager.cpp new file mode 100644 index 0000000..90a097a --- /dev/null +++ b/cpp-projects/base/camera/kinect2/k2_manager.cpp @@ -0,0 +1,78 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k2_manager.hpp" + +// std +#include +#include + +// base +#include "utility/benchmark.hpp" + +using namespace std::chrono; +using namespace tool::camera; + +K2Manager::K2Manager(){ +} + +bool K2Manager::open_kinect(K2FrameRequest mode){ + return initialized = kinect.open(mode); +} + +void K2Manager::close_kinect(){ + initialized = false; + kinect.close(); +} + +std::int64_t K2Manager::get_data(){ + + if(!initialized){ + return -1; + } + + auto timeStart = high_resolution_clock::now(); + timeStampFrame = timeStart.time_since_epoch().count(); + + + Bench::reset(); + if(auto newFrame = kinect.get_kinect_data(); newFrame.has_value()){ + + frame = std::make_shared(std::move(newFrame.value())); +// if(rand()%100 == 0){ +// Bench::display(BenchUnit::microseconds,1); +// } +// std::cout << "d" << duration_cast(high_resolution_clock::now() - timeStart).count() << " "; + return duration_cast(high_resolution_clock::now() - timeStart).count(); + } + + return -1; +} + +void K2Manager::update_parameters(K2Settings parameters){ + kinect.parameters = std::move(parameters); +} + diff --git a/cpp-projects/base/camera/kinect2/k2_manager.hpp b/cpp-projects/base/camera/kinect2/k2_manager.hpp new file mode 100644 index 0000000..778d4b4 --- /dev/null +++ b/cpp-projects/base/camera/kinect2/k2_manager.hpp @@ -0,0 +1,62 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "k2_device.hpp" + +namespace tool::camera{ + +class K2Manager{ + +public: + + K2Manager(); + + bool open_kinect(camera::K2FrameRequest id); + void close_kinect(); + + std::int64_t get_data(); + void update_parameters(camera::K2Settings parameters); + + inline camera::K2FrameRequest mode() const{return kinect.mode();} + + inline bool is_initialized() const { + return initialized; + } + + camera::K2Device kinect; + std::shared_ptr frame = nullptr; + std::mutex lock; + +private: + + std::int64_t timeStampFrame = 0; + std::atomic_bool initialized = false; +}; + +} diff --git a/cpp-projects/base/camera/kinect2/k2_network.hpp b/cpp-projects/base/camera/kinect2/k2_network.hpp new file mode 100644 index 0000000..2417a9c --- /dev/null +++ b/cpp-projects/base/camera/kinect2/k2_network.hpp @@ -0,0 +1,112 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "k2_types.hpp" + +namespace tool::camera { + +// tcp +enum class K2TcpCommand : uint8_t{ + Init, + OpenCamera, + CloseCamera, + UpdateParameters, + AskNewFrame, + ShutdownComputer, + RestartComputer, + AskToSaveSettingsFile +}; + +struct K2TcpPacket{ + std::uint8_t idGrabber; // 8 + K2TcpCommand cmd; // 8 + K2FrameRequest frameMode; // 8 + K2Settings p; + int64_t t; // 64 + uint16_t sizeUdpPackets; // 16 + int writingPort; // 32 + std::array writingIpv6Address; // 45*8 +}; + +struct K2UdpHeader{ + + std::int64_t timeStamp = 0; // 64 + std::uint64_t frameId : 32, offset : 32; // 64 + std::uint64_t sizeFullData : 32, size1: 32; // 64 + std::uint64_t size2 : 32, totalNbPackets : 16, sizePacketData : 16; // 64 + std::float_t intrinsics1; // 32 + std::float_t intrinsics2; // 32 + std::float_t intrinsics3; // 32 + std::float_t intrinsics4; // 32 + uint16_t idPacket = 0; // 16 + uint8_t idGrabber = 0; // 8 + K2FrameRequest frameMode = K2FrameRequest::compressed_color_512x424; // 8 +}; + +//struct K2UdpFrameData2{ + +// K2UdpHeader firstHeader; +// size_t sizePts = 0; +// size_t sizeTris = 0; +// // more infos +// // ... +//}; + +struct K2UdpFrameData{ + + K2UdpFrameData() : data(std::make_unique()), bufferData(std::make_unique()){} + + std::mutex swapLock; + K2FrameDataUP data = nullptr; + K2FrameDataUP bufferData = nullptr; + + size_t sizePts = 0; + size_t sizeTris = 0; + + void swap(){ + swapLock.lock(); + std::swap(data, bufferData); + swapLock.unlock(); + } + + // enum class LastUpdated {normal,colored,mesh,none}; + // LastUpdated lastUpdate = LastUpdated::none; + + K2UdpHeader firstHeader; +}; + +struct K2GrabberTargetInfo{ + std::string name; + int targetPreferredInterface; + int targetReadingPort; + int targetWritingPort; +}; + + +} diff --git a/cpp-projects/base/camera/kinect2/k2_types.cpp b/cpp-projects/base/camera/kinect2/k2_types.cpp new file mode 100644 index 0000000..6d9a110 --- /dev/null +++ b/cpp-projects/base/camera/kinect2/k2_types.cpp @@ -0,0 +1,225 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ +#include "k2_types.hpp" + +// std +#include + +// turbojpeg +#include + +using namespace std; + +using namespace tool; +using namespace tool::camera; + +std::string K2Settings::to_string() const{ + std::ostringstream ss; + ss << "window: " << minWidth << " " << maxWidth << " " << minHeight << " " << maxHeight << "\n"; + ss << "infra_threshold: " << minInfra << " " << maxInfra << " " << minInfraRange << " " << maxInfraRange << "\n"; + ss << "color_factor: " << yFactor << " " << uFactor << " " << vFactor << "\n"; + ss << "depth_threshold: " << minDepthValue << " " << maxDepthValue << " " << maxLocalDiff << " " << offsetAfterMin << " " << vmin << " " << vmax << " " << gmin << "\n"; + ss << "compression: " << static_cast(jpegCompressionRate) << "\n"; + ss << "smoothing: " << static_cast(smoothingKernelSize )<< " " << static_cast(smoothingMethod) << "\n"; + ss << "neigbhours: " << static_cast(nbMinNeighboursNb) << " " << static_cast(minNeighboursLoops) << "\n"; + ss << "erosion: " << static_cast(erosionSize) << " " << static_cast(erosionType) << " " << static_cast(minErosionValue) << "\n"; + ss << "fps: " << static_cast(fps) << "\n"; + ss << "color_filter: " << static_cast(filterColor.x()) << " " << static_cast(filterColor.y()) << " " << static_cast(filterColor.z()) << " " + << static_cast(maxDiffColor.x()) << " " << static_cast(maxDiffColor.y()) << " " << static_cast(maxDiffColor.z()) << "\n"; + ss << "flags: " <(std::stoi(split[1])); + p.maxInfra = static_cast(std::stoi(split[2])); + p.minInfraRange = static_cast(std::stoi(split[3])); + p.maxInfraRange = static_cast(std::stoi(split[4])); + } + + std::getline(in, line); + split = String::split(line, ' '); + if(split.size() == 4){ + p.yFactor = std::stof(split[1]); + p.uFactor = std::stof(split[2]); + p.vFactor = std::stof(split[3]); + } + + std::getline(in, line); + split = String::split(line, ' '); + if(split.size() == 8){ + p.minDepthValue = std::stof(split[1]); + p.maxDepthValue = std::stof(split[2]); + p.maxLocalDiff = std::stof(split[3]); + p.offsetAfterMin = std::stof(split[4]); + p.vmin = std::stof(split[5]); + p.vmax = std::stof(split[6]); + p.gmin = std::stof(split[7]); + } + + std::getline(in, line); + split = String::split(line, ' '); + if(split.size() == 2){ + p.jpegCompressionRate = static_cast(std::stoi(split[1])); + } + + std::getline(in, line); + split = String::split(line, ' '); + if(split.size() == 3){ + p.smoothingKernelSize = static_cast(std::stoi(split[1])); + p.smoothingMethod = static_cast(std::stoi(split[2])); + } + + std::getline(in, line); + split = String::split(line, ' '); + if(split.size() == 3){ + p.nbMinNeighboursNb = static_cast(std::stoi(split[1])); + p.minNeighboursLoops = static_cast(std::stoi(split[2])); + } + + std::getline(in, line); + split = String::split(line, ' '); + if(split.size() == 4){ + p.erosionSize = static_cast(std::stoi(split[1])); + p.erosionType = static_cast(std::stoi(split[2])); + p.minErosionValue = static_cast(std::stoi(split[3])); + } + + std::getline(in, line); + split = String::split(line, ' '); + if(split.size() == 2){ + p.fps = static_cast(std::stoi(split[1])); + } + + std::getline(in, line); + split = String::split(line, ' '); + if(split.size() >= 7){ + p.filterColor = {static_cast(std::stoi(split[1])), + static_cast(std::stoi(split[2])), + static_cast(std::stof(split[3])) + }; + + p.maxDiffColor = {static_cast(std::stoi(split[4])), + static_cast(std::stoi(split[5])), + static_cast(std::stof(split[6])) + }; + } + + std::getline(in, line); + split = String::split(line, ' '); + if(split.size() >= 7){ + p.filterDepthWithColor = (std::stoi(split[1])==1); + p.infraInvert = (std::stoi(split[2])==1); + p.infraBinary = (std::stoi(split[3])==1); + p.smoothingEnabled = (std::stoi(split[4])==1); + p.doErosion = (std::stoi(split[5])==1); + p.doTemporalFilter = (std::stoi(split[6])==1); + } + + return p; +} + + + +K2Frame::K2Frame(bool initData){ + + if(initData){ + infraData = std::make_unique(); + depthData = std::make_unique(); + compressedDepthData.resize(2*k2_depth_count+1024); + } +} + + +K2Frame K2Frame::copy_frame(){ + + K2Frame frameCopy(false); + + // copy infos + frameCopy.parameters = parameters; + frameCopy.intrinsics = intrinsics; + frameCopy.mode = mode; + frameCopy.frameId = frameId; + frameCopy.relativeTimeColor = relativeTimeColor; + frameCopy.relativeTimeDepth = relativeTimeDepth; + frameCopy.relativeTimeInfra = relativeTimeInfra; + frameCopy.relativeTimeLongInfra = relativeTimeLongInfra; + frameCopy.relativeTimeBody = relativeTimeBody; + frameCopy.minReliableDistance = minReliableDistance; + frameCopy.maxReliableDistance = maxReliableDistance; + frameCopy.timeStampGetFrame = timeStampGetFrame; + frameCopy.timeStampEndProcessing = timeStampEndProcessing; + + // copy color data + if(frameCopy.mode == K2FrameRequest::compressed_color_1920x1080 || frameCopy.mode == K2FrameRequest::compressed_color_512x424 || + frameCopy.mode == K2FrameRequest::compressed_color_cloud || frameCopy.mode == K2FrameRequest::compressed_color_mesh){ + + frameCopy.jpegColorSize = jpegColorSize; + frameCopy.compressedImage = compressedImage; + } + + if(frameCopy.mode == K2FrameRequest::compressed_color_cloud || frameCopy.mode == K2FrameRequest::compressed_color_mesh){ + + frameCopy.compressedDepthSize = compressedDepthSize; + frameCopy.compressedDepthData = compressedDepthData; + frameCopy.bodiesData = bodiesData; + + }else if(frameCopy.mode == K2FrameRequest::depth_512x424){ + + frameCopy.depthData = std::make_unique(*depthData); + + }else if(frameCopy.mode == K2FrameRequest::infra_512x424 || frameCopy.mode == K2FrameRequest::long_exposure_infra_512x424){ + + if(frameCopy.infraData == nullptr){ + frameCopy.infraData = std::make_unique(); + } + + copy(begin(*infraData), end(*infraData), begin(*frameCopy.infraData)); + } + + return frameCopy; +} + + + diff --git a/cpp-projects/base/camera/kinect2/k2_types.hpp b/cpp-projects/base/camera/kinect2/k2_types.hpp new file mode 100644 index 0000000..681779a --- /dev/null +++ b/cpp-projects/base/camera/kinect2/k2_types.hpp @@ -0,0 +1,553 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include +#include +#include + +// base +#include "geometry/point2.hpp" +#include "geometry/point3.hpp" +#include "geometry/point4.hpp" +#include "geometry/voxel.hpp" +#include "utility/vector.hpp" +#include "utility/string.hpp" +#include "utility/tuple_array.hpp" + +namespace tool::camera{ + +using namespace std::literals::string_view_literals; + +enum class K2Detection : std::uint8_t{ + unknow= 0, + no, + maybe, + yes, +}; + +enum class K2HandStateT : std::uint8_t{ + unknow= 0, + not_tracked, + open, + closed, + lasso +}; + +enum class K2TrackingStateT : std::uint8_t{ + not_tracked=0, // body tracked + inferred, + tracked +}; + +enum class K2BodyJointType : std::uint8_t{ + spine_base = 0, + spine_mid, + neck, + head, + shoulder_left, + elbow_left, + wrist_left, + hand_left, + shoulder_right, + elbow_right, + wrist_right, + hand_right, + hip_left, + knee_left, + ankle_left, + foot_left, + hip_right, + knee_right, + ankle_right, + foot_right, + spine_shoulder, + hand_tip_left, + thumb_left, + hand_tip_right, + thumb_right, + SizeEnum +}; + + +using SV = std::string_view; +using K2JointName = SV; +using K2TJoint = std::tuple< + K2BodyJointType, K2JointName>; +[[maybe_unused]] static constexpr TupleArray k2JointsM{{ + K2TJoint{K2BodyJointType::spine_base, "spine_base"sv}, + K2TJoint{K2BodyJointType::spine_mid, "spine_mid"sv}, + K2TJoint{K2BodyJointType::neck, "neck"sv}, + K2TJoint{K2BodyJointType::head, "head"sv}, + K2TJoint{K2BodyJointType::shoulder_left, "shoulder_left"sv}, + K2TJoint{K2BodyJointType::elbow_left, "elbow_left"sv}, + K2TJoint{K2BodyJointType::wrist_left, "wrist_left"sv}, + K2TJoint{K2BodyJointType::hand_left, "hand_left"sv}, + K2TJoint{K2BodyJointType::shoulder_right, "shoulder_right"sv}, + K2TJoint{K2BodyJointType::elbow_right, "elbow_right"sv}, + K2TJoint{K2BodyJointType::wrist_right, "wrist_right"sv}, + K2TJoint{K2BodyJointType::hand_right, "hand_right"sv}, + K2TJoint{K2BodyJointType::hip_left, "hip_left"sv}, + K2TJoint{K2BodyJointType::knee_left, "knee_left"sv}, + K2TJoint{K2BodyJointType::ankle_left, "ankle_left"sv}, + K2TJoint{K2BodyJointType::foot_left, "foot_left"sv}, + K2TJoint{K2BodyJointType::hip_right, "hip_right"sv}, + K2TJoint{K2BodyJointType::knee_right, "knee_right"sv}, + K2TJoint{K2BodyJointType::ankle_right, "ankle_right"sv}, + K2TJoint{K2BodyJointType::foot_right, "foot_right"sv}, + K2TJoint{K2BodyJointType::spine_shoulder, "spine_shoulder"sv}, + K2TJoint{K2BodyJointType::hand_tip_left, "hand_tip_left"sv}, + K2TJoint{K2BodyJointType::thumb_left, "thumb_left"sv}, + K2TJoint{K2BodyJointType::hand_tip_right, "hand_tip_right"sv}, + K2TJoint{K2BodyJointType::thumb_right, "thumb_right"sv}, +}}; + + +enum class K2FrameRequest : uint8_t { + compressed_color_cloud=0, + compressed_color_mesh, + compressed_color_512x424, + compressed_color_1920x1080, + depth_512x424, // 512 * 424 * (sizeof(float)) -> d + infra_512x424, // 512 * 424 * (sizeof(ushort)) -> v + long_exposure_infra_512x424, // 512 * 424 * (sizeof(ushort)) -> v + undefined, + SizeEnum +}; + +enum K2MorphShapes : unsigned char{ + MORPH_RECT = 0, + MORPH_CROSS = 1, + MORPH_ELLIPSE = 2 +}; + +// constants / aliases +// # color (Pt3 uint8 / Pt4 uint8) +constexpr std::uint32_t k2_color_width = 1920; +constexpr std::uint32_t k2_color_height = 1080; +constexpr std::uint32_t k2_color_count = k2_color_width * k2_color_height; +[[maybe_unused]] constexpr std::uint32_t k2_color_data_size = k2_color_count * 4; +[[maybe_unused]] constexpr std::uint32_t k2_raw_color_data_size = k2_color_count * 2; +using K2RgbData = std::array,k2_color_count>; +using K2RgbDataUP = std::unique_ptr; +using K2RgbaData = std::array,k2_color_count>; +using K2RgbaDataUP = std::unique_ptr; + +// # infrared +constexpr std::uint32_t k2_infrared_width = 512; +constexpr std::uint32_t k2_infrared_height = 424; +constexpr std::uint32_t k2_infrared_count = k2_infrared_width * k2_infrared_height; +[[maybe_unused]] constexpr std::uint32_t k2_infrared_data_size = k2_infrared_count * 2; +using K2InfraData = std::array; +using K2InfraDataUP = std::unique_ptr; + +// # depth (Int16) +constexpr std::uint32_t k2_depth_width = 512; +constexpr std::uint32_t k2_depth_height = 424; +constexpr std::uint32_t k2_depth_count = k2_depth_width * k2_depth_height; +[[maybe_unused]] constexpr std::uint32_t k2_depth_data_size = k2_depth_count * 2; +using K2DepthData = std::array; +using K2DepthDataUP = std::unique_ptr; + +// # cloud (Pt3f) +using K2CloudData = std::array; +using K2CloudDataUP = std::unique_ptr; + +// # colored cloud (Voxel) +//using K2ColoredCloudData = std::array; +//using K2ColoredCloudDataUP = std::unique_ptr; + +// # triangles id +constexpr std::uint32_t k2_triangles_count = (k2_depth_width-1) * (k2_depth_height-1) * 2; +[[maybe_unused]] constexpr std::uint32_t k2_triangles_id_count = k2_triangles_count * 3; + +using K2TrianglesData = std::array, k2_triangles_count>; +using K2TrianglesDataUP = std::unique_ptr; + +// # bodies joint +constexpr std::uint32_t k2_body_joint_data_size64 = 4; +constexpr std::uint32_t k2_body_joints_count = 25; +constexpr std::uint32_t k2_bodies_count = 6; +constexpr std::uint32_t k2_bodies_joints_count = k2_bodies_count * k2_body_joints_count; +constexpr std::uint32_t k2_bodies_joints_data_size64 = k2_bodies_joints_count * k2_body_joint_data_size64; +[[maybe_unused]] constexpr std::uint32_t k2_bodies_joints_data_size8 = k2_bodies_joints_data_size64 * 8; + +constexpr std::uint32_t k2_voxels_data_size64 = k2_depth_count; +constexpr std::uint32_t k2_triangles_data_size64 = k2_triangles_count; + +constexpr float k2_invalid_value = -10.f; +[[maybe_unused]] constexpr geo::Pt3f k2_invalid_point = {k2_invalid_value,k2_invalid_value,k2_invalid_value}; + +// mode mapping +// # for each component category : name +using K2Requirment = bool; +using K2ColorR = K2Requirment; +using K2DepthR = K2Requirment; +using K2InfraR = K2Requirment; +using K2LongInfraR = K2Requirment; +using K2BodyR = K2Requirment; +using K2WidthFrame = int; +using K2HeightFrame = int; +using K2FrameName = SV; +using K2FR = K2FrameRequest; + +using K2TFrame = std::tuple< + K2FR, K2ColorR, K2DepthR, K2InfraR, K2LongInfraR, K2BodyR, K2WidthFrame, K2HeightFrame, K2FrameName>; +static constexpr TupleArray frames= {{ + K2TFrame{K2FR::compressed_color_cloud, true, true, false, false, true, k2_depth_width, k2_depth_height, "compressed_color_cloud"sv}, + K2TFrame{K2FR::compressed_color_mesh, true, true, false, false, true, k2_depth_width, k2_depth_height, "compressed_color_mesh"sv}, + K2TFrame{K2FR::compressed_color_512x424, true, true, false, false, false, k2_depth_width, k2_depth_height, "compressed_image_color_512x424"sv}, + K2TFrame{K2FR::compressed_color_1920x1080, true, false, false, false, false, k2_color_width, k2_color_height, "compressed_image_color_1920x1080"sv}, + K2TFrame{K2FR::depth_512x424, false, true, false, false, false, k2_depth_width, k2_depth_height, "image_depth_512x424"sv}, + K2TFrame{K2FR::infra_512x424, false, false, true, false, false, k2_infrared_width, k2_infrared_height, "image_infra_512x424"sv}, + K2TFrame{K2FR::long_exposure_infra_512x424, false, false, false, true, false, k2_infrared_width, k2_infrared_height, "image_long_exposure_infra_512x424"sv}, + K2TFrame{K2FR::undefined, false, false, false, false, false, 0, 0, "undefined"sv}, +}}; + +[[maybe_unused]] static bool color_channel_required(K2FrameRequest r) { + return frames.at<0,1>(r); +} +[[maybe_unused]] static bool depth_channel_required(K2FrameRequest r) { + return frames.at<0,2>(r); +} +[[maybe_unused]] static bool infra_channel_required(K2FrameRequest r) { + return frames.at<0,3>(r); +} +[[maybe_unused]] static bool long_infra_channel_required(K2FrameRequest r) { + return frames.at<0,4>(r); +} +[[maybe_unused]] static bool body_channel_required(K2FrameRequest r) { + return frames.at<0,5>(r); +} +[[maybe_unused]] static auto all_requests_names(){ + return frames.elements_not_matching_columns_values<0,8>(K2FR::undefined); +} + +[[maybe_unused]] static constexpr K2FrameRequest index_to_mode(size_t id){ + if(id < static_cast(K2FR::undefined) && id >= 0){ + return static_cast(id); + } + return K2FR::undefined; +} + +[[maybe_unused]] static constexpr uint8_t mode_to_index(K2FrameRequest r){ + return static_cast(r); +} + +using K2CloudInt64Data = std::array; +using K2CloudInt64DataUP = std::unique_ptr; + +using K2MeshInt64Data = std::array; +using K2MeshInt64DataUP = std::unique_ptr; + +// network types +struct K2Joint4x64{ + std::uint64_t jointPosX : 32, jointPosY : 32; + std::uint64_t jointPosZ : 32, jointQuaX : 32; + std::uint64_t jointQuaY : 32, jointQuaZ : 32; + std::uint64_t jointQuaW : 32, jointState : 2, jointid : 5, + bodyId : 17, bodyTracked : 1, bodyRestricted : 1, + leftHandState : 2, rightHandState : 2, leftHandConfidence : 1, rightHandConfidence : 1; +}; + +struct K2BodyJoint{ + geo::Pt3f pos; + geo::Pt4f rotQuaternion; + K2TrackingStateT state = K2TrackingStateT::not_tracked; +}; + +struct K2BodyInfos{ + std::uint64_t id = 0; + bool tracked = false; + bool restricted = false; + K2Detection engaged = K2Detection::unknow; + K2HandStateT leftHandState = K2HandStateT::unknow; + K2HandStateT rightHandState = K2HandStateT::unknow; + bool leftHandHightConfidence = false; + bool rightHandHightConfidence = false; + geo::Pt2f lean; + K2TrackingStateT leanTracking = K2TrackingStateT::not_tracked; + std::unordered_map joints; +}; + +[[maybe_unused]] static K2Joint4x64 joint_to_int(const K2BodyInfos &b, const K2BodyJointType type, const K2BodyJoint &j){ + K2Joint4x64 ji; + ji.bodyId = b.id; + ji.bodyTracked = b.tracked; + ji.jointid = static_cast(type); + ji.jointState = static_cast(j.state); + ji.leftHandState = static_cast(b.leftHandState); + ji.rightHandState = static_cast(b.rightHandState); + ji.leftHandConfidence = b.leftHandHightConfidence ? 1 : 0; + ji.rightHandConfidence = b.rightHandHightConfidence ? 1 : 0; + + std::vector p = {j.pos.x(),j.pos.y(),j.pos.z()}; + std::vector pi = {0,0,0}; + std::copy(reinterpret_cast(p.data()), reinterpret_cast(p.data())+3, pi.data()); + ji.jointPosX = pi[0]; + ji.jointPosY = pi[1]; + ji.jointPosZ = pi[2]; + + std::vector q = {j.rotQuaternion.x(),j.rotQuaternion.y(),j.rotQuaternion.z(), j.rotQuaternion.w()}; + std::vector qi = {0,0,0,0}; + std::copy(reinterpret_cast(q.data()), reinterpret_cast(q.data())+4, qi.data()); + ji.jointQuaX = qi[0]; + ji.jointQuaY = qi[1]; + ji.jointQuaZ = qi[2]; + ji.jointQuaW = qi[3]; + + return ji; +} + +// body id / body tracked / body restricted +[[maybe_unused]] static std::tuple int_to_joint(const K2Joint4x64 &ji){ + + K2BodyJoint j; + j.state = static_cast(ji.jointState); + + std::vector pi = { + static_cast(ji.jointPosX), + static_cast(ji.jointPosY), + static_cast(ji.jointPosZ) + }; + std::copy(std::begin(pi), std::end(pi), reinterpret_cast(j.pos.array.data())); + + std::vector qi = { + static_cast(ji.jointQuaX), + static_cast(ji.jointQuaY), + static_cast(ji.jointQuaZ), + static_cast(ji.jointQuaW) + }; + std::copy(std::begin(qi), std::end(qi), reinterpret_cast(j.rotQuaternion.array.data())); + + return {ji.bodyId, ji.bodyTracked, ji.bodyRestricted, static_cast(ji.jointid), j}; +} + + +// display data +struct K2ImageDisplayData{ + + K2ImageDisplayData(){ + // TODO + } + + std::vector colors; + size_t sizeColors; + std::mutex dataLocker; +}; + +struct K2CloudDisplayData{ + + K2CloudDisplayData(){ + points = std::make_unique(); + normals = std::make_unique(); + colors = std::make_unique(); + } + + K2CloudDataUP points = nullptr; + K2CloudDataUP colors = nullptr; + K2CloudDataUP normals = nullptr; + + size_t sizePts; + std::mutex dataLocker; +}; + +struct K2MeshDisplayData{ + + K2MeshDisplayData(){ + points = std::make_unique(); + normals = std::make_unique(); + colors = std::make_unique(); + triangles = std::make_unique(); + } + + K2CloudDataUP points = nullptr; + K2CloudDataUP colors = nullptr; + K2CloudDataUP normals = nullptr; + K2TrianglesDataUP triangles = nullptr; + + size_t sizePts; + size_t sizeTris; + std::mutex dataLocker; +}; + +struct K2BodiesDisplayData{ + std::array bodies; + std::mutex dataLocker; +}; + + +// frame data +struct K2FrameData{ + + // infos +// std::uint32_t currentFrame; +// std::vector intrinsics; + // timestamp + // times + // frame id + // sizes + + K2FrameData(){ + depth = std::make_unique(); + infra = std::make_unique(); + } + + K2DepthDataUP depth = nullptr; + K2InfraDataUP infra = nullptr; + + std::vector compressedColor; + std::vector compressedCloud; +}; +using K2FrameDataUP = std::unique_ptr; + + +struct K2SavedData{ + + std::int64_t timeStamp = 0; + + size_t frameId = 0; + + float focalLengthX=0.f; + float focalLengthY=0.f; + float principalPointX=0.f; + float principalPointY=0.f; + + size_t sizeDepths=0; + size_t sizeColors=0; + std::vector depths; + std::vector colors; + + std::array bodies; +}; + + +struct K2Settings{ + + // # width / height + unsigned int minWidth = 0; + unsigned int maxWidth = k2_depth_width; + unsigned int minHeight = 0; + unsigned int maxHeight =k2_depth_height; + + // infra + unsigned short minInfra = 0; + unsigned short maxInfra = 10000; + unsigned short minInfraRange = 0; + unsigned short maxInfraRange = 255; + + // # color + float yFactor = 1.f; + float uFactor = 1.f; + float vFactor = 1.f; + + // # depth + float minDepthValue = 0.3f; + float maxDepthValue = 8.f; + float maxLocalDiff = 0.1f; + float offsetAfterMin = -1.f; + float vmin = 0.f; + float vmax = 0.5f; + float gmin = 0.1f; + + // compression + unsigned char jpegCompressionRate = 80; + + // smoothing + unsigned char smoothingKernelSize; + unsigned char smoothingMethod; + + // # neigbhours + unsigned char nbMinNeighboursNb = 1; + unsigned char minNeighboursLoops = 1; + + // erosion + unsigned char erosionSize = 2; + K2MorphShapes erosionType = MORPH_ELLIPSE; // opencv + unsigned char minErosionValue = 255; + + // fps + unsigned char fps = 30; + + // filtered color + geo::Pt3 filterColor = geo::Pt3(255,0,0); + geo::Pt3 maxDiffColor = geo::Pt3(10,40,40); + + // flogs + bool filterDepthWithColor = false; + bool infraInvert = false; + bool infraBinary = false; + bool smoothingEnabled = false; + bool doErosion = false; + bool doTemporalFilter = false; + + std::string to_string() const; + static K2Settings from_string(const std::string ¶metersStr); +}; + + +struct K2Frame{ + + K2Frame(bool initData); + + K2Frame copy_frame(); + + K2FrameRequest mode; + K2Settings parameters; + std::vector intrinsics; + + // output formated kinect data + // # infos + std::int64_t timeStampGetFrame = 0; + std::int64_t timeStampEndProcessing = 0; + std::int64_t relativeTimeColor = 0; + std::int64_t relativeTimeDepth = 0; + std::int64_t relativeTimeInfra = 0; + std::int64_t relativeTimeLongInfra = 0; + std::int64_t relativeTimeBody = 0; + std::uint16_t minReliableDistance = 0; // 500 constant? + std::uint16_t maxReliableDistance = 0; // 4500 + std::uint32_t frameId = 0; + long unsigned int jpegColorSize = 0; + size_t compressedDepthSize = 0; + // # color + std::vector compressedImage; + // # depth + std::vector compressedDepthData; + // # infra + K2InfraDataUP infraData = nullptr; // (V) + // # depth + K2DepthDataUP depthData = nullptr; // (V) + // # bodies + std::array bodiesData; // 6 bodies, 25 joints + +}; +using K2FrameUP = std::unique_ptr; + + +} diff --git a/cpp-projects/base/camera/kinect4/k4_actions_settings.cpp b/cpp-projects/base/camera/kinect4/k4_actions_settings.cpp new file mode 100644 index 0000000..bfce556 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_actions_settings.cpp @@ -0,0 +1,58 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_actions_settings.hpp" + +// local +#include "utility/io_data.hpp" + +using namespace tool::camera; + +auto K4ActionsSettings::default_init_for_grabber() -> K4ActionsSettings{ + K4ActionsSettings actions; + actions.startDevice = false; + actions.openCamera = false; + return actions; +} + +auto K4ActionsSettings::default_init_for_manager() -> K4ActionsSettings{ + K4ActionsSettings actions; + actions.startDevice = false; + actions.openCamera = false; + return actions; +} + +auto K4ActionsSettings::init_from_data(std::int8_t *data) -> void{ + size_t offset = 0; + read(startDevice, data, offset); + read(openCamera, data, offset); +} + +auto K4ActionsSettings::convert_to_data(std::int8_t *data) const -> void{ + size_t offset = 0; + write(startDevice, data, offset); + write(openCamera, data, offset); +} diff --git a/cpp-projects/base/camera/kinect4/k4_actions_settings.hpp b/cpp-projects/base/camera/kinect4/k4_actions_settings.hpp new file mode 100644 index 0000000..6da0ee5 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_actions_settings.hpp @@ -0,0 +1,49 @@ +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "files/binary_settings.hpp" + +namespace tool::camera { + +struct K4ActionsSettings : files::SubBinarySettings{ + + // device + bool startDevice = true; + bool openCamera = true; + + static auto default_init_for_grabber() -> K4ActionsSettings; + static auto default_init_for_manager() -> K4ActionsSettings; + + // i/o + auto init_from_data(std::int8_t *data) -> void override; + auto convert_to_data(std::int8_t *data) const -> void override; + auto total_data_size() const noexcept-> size_t override{ + return sizeof(startDevice) + sizeof(openCamera); + } +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_calibrator.cpp b/cpp-projects/base/camera/kinect4/k4_calibrator.cpp new file mode 100644 index 0000000..8b18b04 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_calibrator.cpp @@ -0,0 +1,537 @@ + +/******************************************************************************* +** Toolset-k4-scaner-manager ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_calibrator.hpp" + +// std +#include + +// open3D +#include "open3d/pipelines/registration/Registration.h" +#include "open3d/pipelines/registration/Feature.h" +#include "open3d/geometry/PointCloud.h" + +// base +#include "utility/time.hpp" +#include "utility/logger.hpp" +#include "utility/string_geo.hpp" +#include "camera/kinect4/k4_model.hpp" + +using namespace tool::camera; +using namespace tool::geo; + +auto from_eigen_mat(const Eigen::Matrix4d_u &eMat) -> Mat4f { + // apply transformation to source + Mat4f mat = Mat4f::identity(); + for(int ii = 0; ii < 4; ++ii){ + for(int jj = 0; jj < 4; ++jj){ + mat.at(ii,jj) = static_cast(eMat(jj,ii)); + } + } + return mat; +} + +struct K4Calibrator::Impl{ + + auto process_cloud(const geo::ColoredCloudData &cloud) -> geo::ColoredCloudData; + auto set_model_cloud(size_t idModel, const K4Model &modelTr) -> bool; + auto set_source_cloud(size_t idSource, const K4Model &sourceTr) -> bool; + auto do_RANSAC(unsigned int seed) const -> open3d::pipelines::registration::RegistrationResult; + auto do_ICP() const -> open3d::pipelines::registration::RegistrationResult; + + static auto best_registration_results(const std::vector> &results) -> std::tuple; + static auto compute_new_model(const open3d::pipelines::registration::RegistrationResult &result, const geo::Mat4f &model) -> geo::Mat4f; + static auto convert_to_opend3d_pc(const ColoredCloudData &cloud) -> std::shared_ptr; + + K4CalibratorSettings settings; + std::vector grabbersData; + geo::ColoredCloudData modelCloud; + geo::ColoredCloudData sourceCloud; + + auto compute_charmfer_distance_between_with_current_source(const geo::Mat4f &model){ + + // The chamfer command is used to compute the Chamfer distance between two point clouds. + // The Chamfer distance is computed by summing the squared distances between nearest neighbor correspondences of two point clouds. + auto sc = sourceCloud; + sc.vertices.apply_transformation(model); + + auto pcSourceCloud = convert_to_opend3d_pc(sc); + double sum = 0.0; + auto distances = m_modelPcCloud->ComputePointCloudDistance(*pcSourceCloud); + for(auto distance : distances){ + sum += distance*distance; + } + return sum; + } + + std::mt19937 gen; + +private: + + auto preprocess_features(const ColoredCloudData &cloud) -> std::tuple, std::shared_ptr>; + + std::shared_ptr m_modelPcCloud = nullptr; + std::shared_ptr m_modelFpfh = nullptr; + std::shared_ptr m_sourcePcCloud = nullptr; + std::shared_ptr m_sourceFpfh = nullptr; +}; + +auto K4Calibrator::Impl::set_model_cloud(size_t idModel, const K4Model &modelTr) -> bool{ + + // retrieve model cloud + modelCloud = settings.useProcessed ? + grabbersData[idModel].processedCloud : + grabbersData[idModel].calibrationCloud; + + // apply transformation + modelCloud.vertices.apply_transformation(modelTr.compute_full_transformation()); + + // process features + m_modelPcCloud = nullptr; + m_modelFpfh = nullptr; + std::tie(m_modelPcCloud, m_modelFpfh) = preprocess_features(modelCloud); + if(!m_modelPcCloud->HasPoints()){ + return false; + } + + return true; +} + +auto K4Calibrator::Impl::set_source_cloud(size_t idSource, const K4Model &sourceTr) -> bool{ + + // retrieve source cloud + sourceCloud = settings.useProcessed ? + grabbersData[idSource].processedCloud : + grabbersData[idSource].calibrationCloud; + + // apply transformation + sourceCloud.vertices.apply_transformation(sourceTr.compute_full_transformation()); + + // process features + m_sourcePcCloud = nullptr; + m_sourceFpfh = nullptr; + std::tie(m_sourcePcCloud, m_sourceFpfh) = preprocess_features(sourceCloud); + + if(!m_sourcePcCloud->HasPoints()){ + return false; + } + + return true; +} + +auto K4Calibrator::Impl::process_cloud(const geo::ColoredCloudData &cloud) -> ColoredCloudData{ + + if(!cloud.has_vertices()){ + return {}; + } + + // process cloud frame + auto processedCloud = cloud; + if(settings.removeOutliers){ + processedCloud.remove_outliers(processedCloud.vertices.mean_position(), settings.maxDistanceOutlier); + } + + if(settings.downSample){ + + auto pcdDown = convert_to_opend3d_pc(processedCloud)->VoxelDownSample(settings.downSampleVoxelSize); + processedCloud.resize(pcdDown->points_.size()); + for(size_t ii = 0; ii < pcdDown->points_.size(); ++ii){ + processedCloud.vertices[ii] = { + static_cast(pcdDown->points_[ii].x()), + static_cast(pcdDown->points_[ii].y()), + static_cast(pcdDown->points_[ii].z()) + }; + } + } + + if(settings.computeSphereCenter){ + auto mean = processedCloud.vertices.mean_position(); + mean.z() = processedCloud.vertices.min_z() + settings.ballRay; + processedCloud = ColoredCloudData(mean, processedCloud.colors[0]); + } + + return processedCloud; +} + + +auto K4Calibrator::Impl::preprocess_features(const ColoredCloudData &cloud) -> + + std::tuple, std::shared_ptr>{ + + // create point cloud + auto pcd = Impl::convert_to_opend3d_pc(cloud); + + // normals + pcd->EstimateNormals(open3d::geometry::KDTreeSearchParamHybrid(settings.normalRadius, settings.normalNeighbours)); + + // compute features + auto pcd_fpfh = open3d::pipelines::registration::ComputeFPFHFeature( + *pcd, + open3d::geometry::KDTreeSearchParamHybrid( + settings.ransac.FPFHFeatureRadius, + settings.ransac.FPFHFeatureMaxNeghbours + ) + ); + + return std::make_tuple(pcd, pcd_fpfh); +} + + +auto K4Calibrator::Impl::do_RANSAC(unsigned int seed) const -> open3d::pipelines::registration::RegistrationResult{ + + // set registration parameters + auto correspondence_checker_edge_length = open3d::pipelines::registration::CorrespondenceCheckerBasedOnEdgeLength(settings.ransac.similaritiesThreshold); + auto correspondence_checker_distance = open3d::pipelines::registration::CorrespondenceCheckerBasedOnDistance(settings.ransac.distanceThreshold); + std::vector> correspondence_checker ={ + correspondence_checker_edge_length, + correspondence_checker_distance + }; + + return open3d::pipelines::registration::RegistrationRANSACBasedOnFeatureMatching( + *m_sourcePcCloud, *m_modelPcCloud, *m_sourceFpfh, *m_modelFpfh, + settings.ransac.mutualFilter, + settings.ransac.maxDistanceCorr, + open3d::pipelines::registration::TransformationEstimationPointToPoint(false), + settings.ransac.N, + correspondence_checker, + open3d::pipelines::registration::RANSACConvergenceCriteria(settings.ransac.maxIteration, settings.ransac.confidence), + seed + ); +} + +auto K4Calibrator::Impl::do_ICP() const -> open3d::pipelines::registration::RegistrationResult{ + return open3d::pipelines::registration::RegistrationICP( + *m_sourcePcCloud, + *m_modelPcCloud, + settings.icpMaxDistanceCorr, + Eigen::Matrix4d::Identity(), + open3d::pipelines::registration::TransformationEstimationPointToPoint(false), + open3d::pipelines::registration::ICPConvergenceCriteria(settings.icpRelativeFitness, settings.icpRelativeRmse, settings.icpMaxIteration) + ); +} + +auto K4Calibrator::Impl::best_registration_results(const std::vector> &results) -> std::tuple{ + + auto bestResult = results[0]; + double rmseMin = 10000.0; + double distanceMin = 10000.0; + + for(const auto &result : results){ + auto regRes = std::get<0>(result); + auto distance = std::get<1>(result); + + if(regRes.inlier_rmse_ > 0.0){ + if(almost_equal(regRes.inlier_rmse_, rmseMin)){ + if(distance < distanceMin){ + distanceMin = distance; + bestResult = result; + } + }else if(regRes.inlier_rmse_ < rmseMin){ + rmseMin = regRes.inlier_rmse_; + distanceMin = distance; + bestResult = result; + } + } + } + return bestResult; +} + +auto K4Calibrator::Impl::compute_new_model(const open3d::pipelines::registration::RegistrationResult &result, const Mat4f &model) -> Mat4f{ + return from_eigen_mat(result.transformation_) * model; +} + +auto K4Calibrator::Impl::convert_to_opend3d_pc(const ColoredCloudData &cloud) -> std::shared_ptr{ + + auto pcd = std::make_shared(); + pcd->points_.reserve(cloud.size()); + for(size_t ii = 0; ii < cloud.size(); ++ii){ + pcd->points_.emplace_back( + static_cast(cloud.vertices[ii].x()), + static_cast(cloud.vertices[ii].y()), + static_cast(cloud.vertices[ii].z()) + ); + } + return pcd; +} + +K4Calibrator::K4Calibrator(): m_p(std::make_unique()){} + +K4Calibrator::~K4Calibrator(){} + +auto K4Calibrator::initialize(size_t nbGrabbers) -> void{ + + m_p->grabbersData.resize(nbGrabbers); + for(size_t ii = 0; ii < m_p->grabbersData.size(); ++ii){ + m_p->grabbersData[ii].id = ii; + m_calibrations.push_back(std::nullopt); + m_states.nbFramesRegistered.push_back(0); + } + +} + +auto K4Calibrator::nb_frames_registered(size_t idGrabber) const noexcept -> size_t{ + return m_p->grabbersData[idGrabber].frames.size(); +} + +auto K4Calibrator::size_all_calibration_cloud() const noexcept -> size_t{ + size_t total = 0; + for(const auto &grabberData : m_p->grabbersData){ + total += grabberData.calibrationCloud.size(); + } + return total; +} + +auto K4Calibrator::calibration_grabber_data(size_t idGrabber) const -> const K4CalibratorGrabberData*{ + if(idGrabber < m_p->grabbersData.size()){ + return &m_p->grabbersData.at(idGrabber); + } + return nullptr; +} + +auto K4Calibrator::add_frame(size_t idCloud, std::shared_ptr frame) -> void{ + + if(!m_states.isRegistering){ + return; + } + + if(m_states.elapsedTime.count() <= m_p->settings.timeToWaitBeforeRegisteringMs){ + return; + } + + // retrieve cloud + ColoredCloudData &cloud = frame->cloud; + if(cloud.size() == 0){ + return; + } + + auto &grabberData = m_p->grabbersData[idCloud]; + if(grabberData.frames.size() > 0){ + if(grabberData.frames.back()->idCapture == frame->idCapture){ + Logger::error("Previous cloud already added.\n"); + return; + } + } + + m_states.nbFramesRegistered[idCloud]++; + grabberData.frames.push_back(frame); + + add_to_calibration_cloud(idCloud, cloud); + add_to_proccessed_cloud(idCloud, m_p->process_cloud(cloud)); + send_data_updated_signal(); +} + +auto K4Calibrator::process_all_frames() -> void{ + + for(auto &grabberData : m_p->grabbersData){ + grabberData.calibrationCloud.clear(); + grabberData.processedCloud.clear(); + + for(auto &frame : grabberData.frames){ + add_to_calibration_cloud(grabberData.id, frame->cloud); + add_to_proccessed_cloud(grabberData.id, m_p->process_cloud(frame->cloud)); + } + } + send_data_updated_signal(); +} + +auto K4Calibrator::calibrate(const std::vector &models) -> bool{ + + size_t idModel = m_p->settings.modelSelectionId; + if(m_states.nbFramesRegistered[idModel] == 0){ + Logger::error(std::format("No frames registered for model with id [{}], calibration aborted.\n",idModel)); + return false; + } + + std::vector idSources; + if(m_p->settings.sourceSelectionId == m_p->settings.models.size()){ + + for(size_t ii = 0; ii < m_p->settings.models.size(); ++ii){ + if(ii != idModel){ + if(m_states.nbFramesRegistered[ii] == 0){ + Logger::warning(std::format("No frames registered for source with id [{}], source removed from calibration list.\n", ii)); + continue; + } + idSources.push_back(ii); + } + } + }else{ + + if(m_p->settings.sourceSelectionId != idModel){ + idSources.push_back(m_p->settings.sourceSelectionId); + }else{ + Logger::error("Source must be different from model, calibration aborted.\n"); + return false; + } + } + + if(idSources.size() == 0){ + Logger::error("No source set, calibration aborted.\n"); + return false; + } + + Logger::message(std::format("model {}\n", idModel)); + for(auto &s : idSources){ + Logger::message(std::format("source {}\n", s)); + } + + if(!m_p->set_model_cloud(idModel, models[idModel])){ + Logger::error("Model cloud is empty, calibration aborted.\n"); + return false; + } + + Logger::message("Start calibration.\n"); + for(auto &calibration : m_calibrations){ + calibration = std::nullopt; + } + + for(auto idSource : idSources){ + + if(idSource == idModel){ + Logger::error("Id model and source are the same, source ignored.\n"); + continue; + } + + if(!m_p->set_source_cloud(idSource, models[idSource])){ + Logger::error("Source cloud is empty, source ignored.\n"); + return false; + } + + // do registration + Logger::message(std::format("Register cloud [{}]\n",idSource)); + std::vector> results; + if(m_p->settings.doRansac){ + // RANSAC + std::uniform_int_distribution<> dist(0, 100000); + for(size_t ii = 0; ii < m_p->settings.ransac.nbTries; ++ii){ + auto result = m_p->do_RANSAC(dist(m_p->gen)); + auto resMat = from_eigen_mat(result.transformation_); + auto distance = m_p->compute_charmfer_distance_between_with_current_source(resMat); + results.push_back({result,distance}); + Logger::message(std::format("RANSAC [ID TRY:{}] [Fitness:{:.4f}] [RMSE{:.4f}] [Dist:{:.4f}] [Res:{}]\n", ii, result.fitness_, result.inlier_rmse_, distance, tool::to_string(resMat))); + } + }else{ // ICP + auto result = m_p->do_ICP(); + auto resMat = from_eigen_mat(result.transformation_); + auto distance = m_p->compute_charmfer_distance_between_with_current_source(resMat); + results.push_back({result,distance}); + Logger::message(std::format("ICP [Fitness:{:.4f}] [RMSE{:.4f}] [Dist:{:.4f}] [Res:{}]\n", result.fitness_, result.inlier_rmse_, distance, tool::to_string(resMat))); + } + + // retrieve best calibration + auto bestResult = Impl::best_registration_results(results); + Logger::message(std::format("Best [Fitness:{:.4f}] [RMSE{:.4f}] [Dist:{:.4f}] [Res:{}]\n", std::get<0>(bestResult).fitness_, std::get<0>(bestResult).inlier_rmse_, std::get<1>(bestResult), tool::to_string(from_eigen_mat(std::get<0>(bestResult).transformation_)))); + + // update models with new calibration + K4Model newM; + newM.transformation = models[idSource].compute_full_transformation() * from_eigen_mat(std::get<0>(bestResult).transformation_); + + // send calibration + m_calibrations[idSource] = newM; + new_calibration_signal(idSource, m_calibrations[idSource].value()); + + Logger::message(std::format("Calibration updated for cloud [{}]\n", idSource)); + } + Logger::message("End calibration.\n"); + + return true; +} + +auto K4Calibrator::validate_calibration() -> void{ + + for(size_t ii = 0; ii < m_calibrations.size(); ++ii){ + if(m_calibrations[ii].has_value()){ + validated_calibration_signal(ii, m_calibrations[ii].value()); + } + } +} + +auto K4Calibrator::update_settings(const K4CalibratorSettings &calibrationS) -> void{ + m_p->settings = calibrationS; +} + +auto K4Calibrator::send_data_updated_signal() -> void{ + data_updated_signal(m_p->settings.modelSelectionId, m_p->settings.sourceSelectionId, &m_p->grabbersData); +} + +auto K4Calibrator::start_registering() -> void{ + + if((m_states.startTime == std::chrono::milliseconds(0)) || (m_states.elapsedTime.count() < m_p->settings.durationMs)){ + m_states.startTime = tool::Time::milliseconds_since_epoch() - m_states.elapsedTime; + m_states.isRegistering = true; + states_updated_signal(m_states); + } +} + +auto K4Calibrator::stop_registering() -> void{ + m_states.isRegistering = false; + states_updated_signal(m_states); +} + +auto K4Calibrator::reset_registering() -> void{ + + // reset data + for(auto &grabberData : m_p->grabbersData){ + grabberData.clean(); + } + for(auto &calibration : m_calibrations){ + calibration = std::nullopt; + } + send_data_updated_signal(); + + // reset states + m_states.startTime = tool::Time::milliseconds_since_epoch(); + m_states.elapsedTime = std::chrono::milliseconds(0); + + std::fill(std::begin(m_states.nbFramesRegistered), std::end(m_states.nbFramesRegistered), 0); + states_updated_signal(m_states); +} + +auto K4Calibrator::update_time() -> void{ + + if(m_states.isRegistering){ + m_states.elapsedTime = (tool::Time::milliseconds_since_epoch() - m_states.startTime); + } + if(m_states.elapsedTime.count() > m_p->settings.durationMs){ + m_states.isRegistering = false; + } + + if(m_states.isRegistering && size_all_calibration_cloud() > 5000000){ + m_states.isRegistering = false; + Logger::message("Max calibration cloud size reached.\n"); + } + + states_updated_signal(m_states); +} + + +auto K4Calibrator::add_to_calibration_cloud(size_t idCloud, const geo::ColoredCloudData &cloud) -> void { + m_p->grabbersData[idCloud].calibrationCloud.merge(cloud); +} + +auto K4Calibrator::add_to_proccessed_cloud(size_t idCloud, const geo::ColoredCloudData &cloud) -> void{ + m_p->grabbersData[idCloud].processedCloud.merge(cloud); +} + diff --git a/cpp-projects/base/camera/kinect4/k4_calibrator.hpp b/cpp-projects/base/camera/kinect4/k4_calibrator.hpp new file mode 100644 index 0000000..2d57384 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_calibrator.hpp @@ -0,0 +1,86 @@ + +/******************************************************************************* +** Toolset-k4-scaner-manager ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "thirdparty/sigslot/signal.hpp" +#include "camera/kinect4/k4_model.hpp" +#include "camera/kinect4/k4_calibrator_settings.hpp" + +namespace tool::camera { + +class K4Calibrator{ +public: + + K4Calibrator(); + ~K4Calibrator(); + + auto initialize(size_t nbGrabbers) -> void; + + // getters + auto nb_frames_registered(size_t idGrabber) const noexcept -> size_t; + auto size_all_calibration_cloud() const noexcept -> size_t; + auto calibration_grabber_data(size_t idGrabber) const -> const K4CalibratorGrabberData*; + + // registration + auto is_registering() const -> bool {return m_states.isRegistering;} + auto update_time() -> void; + auto start_registering() -> void; + auto stop_registering() -> void; + auto reset_registering() -> void; + + // frames + auto add_frame(size_t idCloud, std::shared_ptr frame) -> void; + auto process_all_frames() -> void; + + // calibrate + auto calibrate(const std::vector &models) -> bool; + auto validate_calibration() -> void; + + // settings + auto update_settings(const K4CalibratorSettings &calibrationS) -> void; + + // signals + auto send_data_updated_signal() -> void; + sigslot::signal states_updated_signal; + sigslot::signal new_calibration_signal; + sigslot::signal validated_calibration_signal; + sigslot::signal*> data_updated_signal; + +private : + + auto add_to_calibration_cloud(size_t idCloud, const geo::ColoredCloudData &cloud) -> void; + auto add_to_proccessed_cloud(size_t idCloud, const geo::ColoredCloudData &cloud) -> void; + + K4CalibratorStates m_states; + std::vector> m_calibrations; + + struct Impl; + std::unique_ptr m_p; +}; + +} diff --git a/cpp-projects/base/camera/kinect4/k4_calibrator_settings.hpp b/cpp-projects/base/camera/kinect4/k4_calibrator_settings.hpp new file mode 100644 index 0000000..b519822 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_calibrator_settings.hpp @@ -0,0 +1,154 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include +#include +#include + +// local +#include "camera/kinect4/k4_frame.hpp" + +namespace tool::camera { + +struct RANSACSettings{ + + int nbTries = 100; + double confidence = 0.999; // Desired probability of success. Used for estimating early termination. + int N = 3; // ransac with `ransac_n` correspondences. + int maxIteration = 100000; // Maximum iteration before iteration stops. + double maxDistanceCorr = 0.075; // Maximum correspondence points-pair + double similaritiesThreshold = 0.9; // specify the threshold within which 2 arbitrary edges are similar + double distanceThreshold = 0.075; // Distance threashold for the check. + bool mutualFilter = true; // Enables mutual filter such that the correspondence of the source point's correspondence is itself. + + double FPFHFeatureRadius = 0.25; // Specifies the radius of the search. + int FPFHFeatureMaxNeghbours = 100; // Specifies the max neighbors to be searched. +}; + +struct K4CalibratorStates{ + + // # time + bool isRegistering = false; + std::chrono::milliseconds startTime = std::chrono::milliseconds(0); + std::chrono::milliseconds elapsedTime = std::chrono::milliseconds(0); + std::vector nbFramesRegistered; + + // # actions + bool recomputeRegisteringProcessing = false; + bool resetCalibration = false; + bool startCalibration = false; + bool stopCalibration = false; + bool calibrate = false; + bool validateCalibration = false; + bool updateDisplaySettings = false; + bool updateFilteringMode = false; + int filteringMode = 0; + + auto reset_actions() -> void{ + recomputeRegisteringProcessing = false; + resetCalibration = false; + startCalibration = false; + stopCalibration = false; + calibrate = false; + validateCalibration = false; + updateDisplaySettings = false; + updateFilteringMode = false; + } +}; + +struct K4CalibratorSettings{ + + // # grabbers to use + std::vector models; + std::vector sources; + int modelSelectionId = 0; + int sourceSelectionId = 0; + + // # registering + std::int64_t durationMs = 5000; + std::int64_t timeToWaitBeforeRegisteringMs = 0; + int maxFramesToRecord = 1000; + + // # processing + bool useProcessed = true; + bool removeOutliers = true; + bool computeSphereCenter = true; + bool downSample = false; + float maxDistanceOutlier = 0.15f; + double downSampleVoxelSize = 0.05; + float ballRay = 0.025f; + + // # calibration + bool gridSearch = false; // ignore calibration parameters and use a grid search for fitting them + bool advancedParameters = false; + bool doRansac = true; // if true uses RANSAC, else ICP + // ## normal + double normalRadius = 0.1; // radius of the search + int normalNeighbours = 30; // max neighbours to be searched + // ## ransac + RANSACSettings ransac; + // ## icp + double icpRelativeFitness = 0.000001; // If relative change (difference) of fitness score is lower than relative_fitness, the iteration stops. + double icpRelativeRmse = 0.000001; // If relative change (difference) of inliner RMSE score is lower than relative_rmse, the iteration stops. + double icpMaxDistanceCorr = 0.075; // max_correspondence_distance Maximum correspondence points-pair + int icpMaxIteration = 30; // Maximum iteration before iteration stops. +}; + + +struct K4CalibratorGrabberData{ + + size_t id; + std::vector> frames; + geo::ColoredCloudData calibrationCloud; + geo::ColoredCloudData processedCloud; + + auto clean() -> void{ + frames.clear(); + calibrationCloud.clear(); + processedCloud.clear(); + } +}; + + +} + +namespace tool::graphics{ + +struct K4CalibratorDrawerSettings{ +// enum class Mode : int{ +// Normal = 0, +// Calibration +// }; + +// Mode displayMode = Mode::Normal; + bool displayCalibrationCloud = true; + bool displayProcessedCloud = true; +}; + +} diff --git a/cpp-projects/base/camera/kinect4/k4_color_settings.cpp b/cpp-projects/base/camera/kinect4/k4_color_settings.cpp new file mode 100644 index 0000000..6fed25c --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_color_settings.cpp @@ -0,0 +1,65 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_color_settings.hpp" + +// local +#include "utility/io_data.hpp" + +using namespace tool::camera; + +auto K4ColorSettings::init_from_data(std::int8_t *data) -> void { + size_t offset = 0; + read(autoExposureTime, data, offset); + read(exposureTimeAbsolute, data, offset, 0, 5); + read(brightness, data, offset); + read(contrast, data, offset, 0, 10); + read(saturation, data, offset, 0, 63); + read(sharpness, data, offset, 0, 4); + read(autoWhiteBalance, data, offset); + read(whiteBalance, data, offset, 2500, 12500); + read(backlightCompensation, data, offset); + read(gain, data, offset); + std::int8_t plf; + read(plf, data, offset); + powerlineFrequency = static_cast(plf); +} + +auto K4ColorSettings::convert_to_data(std::int8_t *data) const -> void{ + size_t offset = 0; + write(autoExposureTime, data, offset); + write(exposureTimeAbsolute, data, offset); + write(brightness, data, offset); + write(contrast, data, offset); + write(saturation, data, offset); + write(sharpness, data, offset); + write(autoWhiteBalance, data, offset); + write(whiteBalance, data, offset); + write(backlightCompensation, data, offset); + write(gain, data, offset); + write(static_cast(powerlineFrequency), data, offset); +} diff --git a/cpp-projects/base/camera/kinect4/k4_color_settings.hpp b/cpp-projects/base/camera/kinect4/k4_color_settings.hpp new file mode 100644 index 0000000..15fb018 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_color_settings.hpp @@ -0,0 +1,66 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "k4_types.hpp" +#include "files/binary_settings.hpp" + +namespace tool::camera { + +struct K4ColorSettings : files::BinaryFileSettings{ + + std::uint16_t whiteBalance = 4500; // 2500 - 12500 + std::uint8_t brightness = 128; // 0 - 255 + std::uint8_t contrast = 5; // 0 - 10 + std::uint8_t saturation = 32; // 0 - 63 + std::uint8_t sharpness = 4; // 0 - 4 + std::uint8_t gain = 128; // 0 - 255 + std::int8_t exposureTimeAbsolute = 5; // 0 - 5 + K4PowerlineFrequency powerlineFrequency = K4PowerlineFrequency::F60; + + bool backlightCompensation = false; + bool autoExposureTime = true; + bool autoWhiteBalance = true; + + K4ColorSettings() = default; + K4ColorSettings(std::int8_t *data){K4ColorSettings::init_from_data(data);} + + // i/o + auto init_from_data(std::int8_t *data) -> void override; + auto convert_to_data(std::int8_t *data) const -> void override; + auto total_data_size() const noexcept -> size_t override{ + return + sizeof(std::uint16_t)*1+ + sizeof(std::uint8_t)*5+ + sizeof(std::int8_t)*2+ + sizeof(bool)*3; + } + auto type() const noexcept -> std::int32_t override {return static_cast(SettingsType::Color);}; + auto file_description() const noexcept -> std::string_view override {return settings_name(static_cast(type()));} +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_compressed_frame.cpp b/cpp-projects/base/camera/kinect4/k4_compressed_frame.cpp new file mode 100644 index 0000000..5ada7df --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_compressed_frame.cpp @@ -0,0 +1,495 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_compressed_frame.hpp" + +// local +#include "utility/io_data.hpp" +#include "utility/io_fstream.hpp" + +using namespace tool::camera; + +auto K4CompressedFrame::init_from_file_stream(std::ifstream &file) -> void{ + + size_t encColorDataSize = 0; + size_t encDepthDataSize = 0; + size_t encInfraDataSize = 0; + size_t encCloudVertexDataSize = 0; + size_t encCloudColorDataSize = 0; + size_t nbAudioFrames = 0; + + // init + // # infos + read(idCapture, file); + read(afterCaptureTS, file); + read(mode, file); + + // # color + read(colorWidth, file); + read(colorHeight, file); + read(encColorDataSize, file); + if(encColorDataSize > 0){ + encodedColorData.resize(encColorDataSize); + read_array(encodedColorData.data(), file, encColorDataSize); + } + // # depth + read(depthWidth, file); + read(depthHeight, file); + read(encDepthDataSize, file); + if(encDepthDataSize > 0){ + encodedDepthData.resize(encDepthDataSize); + read_array(encodedDepthData.data(), file, encDepthDataSize); + } + // # infra + read(infraWidth, file); + read(infraHeight, file); + read(encInfraDataSize, file); + if(encInfraDataSize > 0){ + encodedInfraData.resize(encInfraDataSize); + read_array(encodedInfraData.data(), file, encInfraDataSize); + } + + // # cloud + // ## vertex + read(validVerticesCount, file); + read(encCloudVertexDataSize, file); + if(encCloudVertexDataSize > 0){ + encodedCloudVerticesData.resize(encCloudVertexDataSize); + read_array(encodedCloudVerticesData.data(), file, encCloudVertexDataSize); + } + // ## color + read(cloudColorWidth, file); + read(cloudColorHeight, file); + read(encCloudColorDataSize, file); + if(encCloudColorDataSize > 0){ + encodedCloudColorData.resize(encCloudColorDataSize); + read_array(encodedCloudColorData.data(), file, encCloudColorDataSize); + } + + bool hasCalibration = false; + read(hasCalibration, file); + if(hasCalibration){ + k4a_calibration_t rCalibration; + read(rCalibration, file); + calibration = rCalibration; + } + + // # imu + bool hasIMU = false; + read(hasIMU, file); + if(hasIMU){ + K4ImuSample rImuSample; + read(rImuSample, file); + imuSample = rImuSample; + } + + // # audio + read(nbAudioFrames, file); + if(nbAudioFrames > 0){ + audioFrames.resize(nbAudioFrames); + read_array(audioFrames.data()->data(), file, nbAudioFrames*7); + } + + // # bodies + bool hasBodies = false; + read(hasBodies, file); + if(hasBodies){ + // TODO + } + + // extra + +} + +auto K4CompressedFrame::write_to_file_stream(std::ofstream &file) -> void{ + + size_t encColorDataSize = encodedColorData.size(); + size_t encDepthDataSize = encodedDepthData.size(); + size_t encInfraDataSize = encodedInfraData.size(); + size_t encCloudVertexDataSize = encodedCloudVerticesData.size(); + size_t encCloudColorDataSize = encodedCloudColorData.size(); + size_t nbAudioFrames = audioFrames.size(); + + // write + // # infos + write(idCapture, file); + write(afterCaptureTS, file); + write(mode, file); + + // # color + write(colorWidth, file); + write(colorHeight, file); + write(encColorDataSize, file); + if(encColorDataSize > 0){ + write_array(encodedColorData.data(), file, encColorDataSize); + } + + // # depth + write(depthWidth, file); + write(depthHeight, file); + write(encDepthDataSize, file); + if(encDepthDataSize > 0){ + write_array(encodedDepthData.data(), file, encDepthDataSize); + } + + // # infra + write(infraWidth, file); + write(infraHeight, file); + write(encInfraDataSize, file); + if(encInfraDataSize > 0){ + write_array(encodedInfraData.data(), file, encInfraDataSize); + } + + // # cloud + // ## vertex + write(validVerticesCount, file); + write(encCloudVertexDataSize, file); + if(encCloudVertexDataSize > 0){ + write_array(encodedCloudVerticesData.data(), file, encCloudVertexDataSize); + } + + // ## color + write(cloudColorWidth, file); + write(cloudColorHeight, file); + write(encCloudColorDataSize, file); + if(encCloudColorDataSize > 0){ + write_array(encodedCloudColorData.data(), file, encCloudColorDataSize); + } + + // ## calibration + write(calibration.has_value(), file); + if(calibration.has_value()){ + write(calibration.value(), file); + } + + // # imu + write(imuSample.has_value(), file); + if(imuSample.has_value()){ + write(imuSample.value(), file); + } + + // # audio + write(nbAudioFrames, file); + if(nbAudioFrames > 0){ + write_array(audioFrames.data()->data(), file, nbAudioFrames*7); + } + + // # bodies + bool hasBodies = false; + write(hasBodies, file); + // TODO +} + +#include +auto K4CompressedFrame::init_legacy_cloud_frame_file_stream(std::ifstream &file) -> void{ + + // # read info + read(idCapture, file); + read(afterCaptureTS, file); + + // # read cloud + std::int32_t validVerticesC; + read(validVerticesC, file); + validVerticesCount = validVerticesC; + std::int32_t cloudBufferSize; + read(cloudBufferSize, file); + encodedCloudVerticesData.resize(cloudBufferSize); + if(!encodedCloudVerticesData.empty()){ + read_array(encodedCloudVerticesData.data(), file, encodedCloudVerticesData.size()); + } + + // # read color + std::int16_t colorW, colorH; + std::int32_t colorBufferSize; + read(colorW, file); + read(colorH, file); + read(colorBufferSize, file); + colorWidth = colorW; + colorHeight = colorH; +// std::cout << colorBufferSize << " " << colorWidth << " " << colorHeight << " " << colorWidth*colorHeight << "\n"; +// encodedColorData.resize(colorBufferSize); + encodedCloudColorData.resize(colorBufferSize); + if(!encodedCloudColorData.empty()){ + read_array(encodedCloudColorData.data(), file, encodedCloudColorData.size()); +// read_array(encodedColorData.data(), file, encodedColorData.size()); + } + + // # read audio + std::int32_t audioBufferSize; + read(audioBufferSize, file); + + audioFrames.resize(audioBufferSize); + if(!audioFrames.empty()){ + read_array(reinterpret_cast(audioFrames.data()), file, audioBufferSize*7); + } + // # read imu + read_array(file, reinterpret_cast(&imuSample), sizeof (K4ImuSample)); + +} + +auto K4CompressedFrame::init_legacy_full_frame_file_stream(std::ifstream &file) -> void{ + + // # read info + read(afterCaptureTS, file); + read(mode, file); + + read(calibration, file); + //read_array(reinterpret_cast(&calibration), file, sizeof (k4a_calibration_t)); + std::int32_t validVerticesC; + read(validVerticesC, file); + validVerticesCount = validVerticesC; + // # read color + std::int16_t colorW, colorH; + std::int32_t colorBufferSize; + read(colorW, file); + read(colorH, file); + read(colorBufferSize, file); + colorWidth = colorW; + colorHeight = colorH; + encodedColorData.resize(colorBufferSize); + if(!encodedColorData.empty()){ + read_array(encodedColorData.data(), file, encodedColorData.size()); + } + // # read depth + std::int16_t depthW, depthH; + std::int32_t depthBufferSize; + read(depthW, file); + read(depthH, file); + read(depthBufferSize, file); + depthWidth = depthW; + depthHeight = depthH; + encodedDepthData.resize(depthBufferSize); + if(!encodedDepthData.empty()){ + read_array(encodedDepthData.data(), file, encodedDepthData.size()); + } + // # read infra + std::int16_t infraW, infraH; + std::int32_t infraBufferSize; + read(infraW, file); + read(infraH, file); + read(infraBufferSize, file); + infraWidth = infraW; + infraHeight = infraH; + encodedInfraData.resize(infraBufferSize); + if(!encodedInfraData.empty()){ + read_array(encodedInfraData.data(), file, encodedInfraData.size()); + } + + // # read audio + std::int32_t audioBufferSize; + read(audioBufferSize, file); + audioFrames.resize(audioBufferSize); + if(!audioFrames.empty()){ + read_array(reinterpret_cast(audioFrames.data()), file, audioBufferSize*7); + } + // # read imu + read_array(file, reinterpret_cast(&imuSample), sizeof (K4ImuSample)); +} + +auto K4CompressedFrame::init_from_data(int8_t *data) -> void{ + + size_t offset = 0; + size_t encColorDataSize = 0; + size_t encDepthDataSize = 0; + size_t encInfraDataSize = 0; + size_t encCloudVertexDataSize = 0; + size_t encCloudColorDataSize = 0; + size_t nbAudioFrames = 0; + + // init + // # infos + read(idCapture, data, offset); + read(afterCaptureTS, data, offset); + read(mode, data, offset); + + // # color + read(colorWidth, data, offset); + read(colorHeight, data, offset); + read(encColorDataSize, data, offset); + if(encColorDataSize > 0){ + encodedColorData.resize(encColorDataSize); + read_array(encodedColorData.data(), data, encColorDataSize, offset); + } + + // # depth + read(depthWidth, data, offset); + read(depthHeight, data, offset); + read(encDepthDataSize, data, offset); + if(encDepthDataSize > 0){ + encodedDepthData.resize(encDepthDataSize); + read_array(encodedDepthData.data(), data, encDepthDataSize, offset); + } + // # infra + read(infraWidth, data, offset); + read(infraHeight, data, offset); + read(encInfraDataSize, data, offset); + if(encInfraDataSize > 0){ + encodedInfraData.resize(encInfraDataSize); + read_array(encodedInfraData.data(), data, encInfraDataSize, offset); + } + + // # cloud + // ## vertex + read(validVerticesCount, data, offset); + read(encCloudVertexDataSize, data, offset); + if(encCloudVertexDataSize > 0){ + encodedCloudVerticesData.resize(encCloudVertexDataSize); + read_array(encodedCloudVerticesData.data(), data, encCloudVertexDataSize, offset); + } + // ## color + read(cloudColorWidth, data, offset); + read(cloudColorHeight, data, offset); + read(encCloudColorDataSize, data, offset); + if(encCloudColorDataSize > 0){ + encodedCloudColorData.resize(encCloudColorDataSize); + read_array(encodedCloudColorData.data(), data, encCloudColorDataSize, offset); + } + + // # calibration + bool hasCalibration = false; + read(hasCalibration, data, offset); + if(hasCalibration){ + k4a_calibration_t rCalibration; + read(rCalibration, data, offset); + calibration = rCalibration; + } + + // # imu + bool hasIMU = false; + read(hasIMU, data, offset); + if(hasIMU){ + K4ImuSample rImuSample; + read(rImuSample, data, offset); + imuSample = rImuSample; + } + + // # audio + read(nbAudioFrames, data, offset); + if(nbAudioFrames > 0){ + audioFrames.resize(nbAudioFrames); + read_array(audioFrames.data()->data(), data, nbAudioFrames*7, offset); + } + + // # bodies + bool hasBodies = false; + read(hasBodies, data, offset); + if(hasBodies){ + // TODO + } +} + + +auto K4CompressedFrame::convert_to_data(std::vector &data) -> size_t{ + + size_t encColorDataSize = encodedColorData.size(); + size_t encDepthDataSize = encodedDepthData.size(); + size_t encInfraDataSize = encodedInfraData.size(); + size_t encCloudVertexDataSize = encodedCloudVerticesData.size(); + size_t encCloudColorDataSize = encodedCloudColorData.size(); + size_t nbAudioFrames = audioFrames.size(); + size_t totalDataSize = total_data_size(); + + size_t offset = 0; + if(data.size() < totalDataSize){ + data.resize(totalDataSize); + } + + auto dataP = data.data(); + + // write + // # infos + write(idCapture, dataP, offset); + write(afterCaptureTS, dataP, offset); + write(mode, dataP, offset); + + // # color + write(colorWidth, dataP, offset); + write(colorHeight, dataP, offset); + write(encColorDataSize, dataP, offset); + if(encColorDataSize > 0){ + write_array(encodedColorData.data(), dataP, encColorDataSize, offset); + } + + // # depth + write(depthWidth, dataP, offset); + write(depthHeight, dataP, offset); + write(encDepthDataSize, dataP, offset); + if(encDepthDataSize > 0){ + write_array(encodedDepthData.data(), dataP, encDepthDataSize, offset); + } + + // # infra + write(infraWidth, dataP, offset); + write(infraHeight, dataP, offset); + write(encInfraDataSize, dataP, offset); + if(encInfraDataSize > 0){ + write_array(encodedInfraData.data(), dataP, encInfraDataSize, offset); + } + + // # cloud + // ## vertex + write(validVerticesCount, dataP, offset); + write(encCloudVertexDataSize, dataP, offset); + if(encCloudVertexDataSize > 0){ + write_array(encodedCloudVerticesData.data(), dataP, encCloudVertexDataSize, offset); + } + // ## color + write(cloudColorWidth, dataP, offset); + write(cloudColorHeight, dataP, offset); + write(encCloudColorDataSize, dataP, offset); + if(encCloudColorDataSize > 0){ + write_array(encodedCloudColorData.data(), dataP, encCloudColorDataSize, offset); + } + + // # calibration + write(calibration.has_value(), dataP, offset); + if(calibration.has_value()){ + write(calibration.value(), dataP, offset); + } + + // # imu + write(imuSample.has_value(), dataP, offset); + if(imuSample.has_value()){ + write(imuSample.value(), dataP, offset); + } + + // # audio + write(nbAudioFrames, dataP, offset); + if(nbAudioFrames > 0){ + write_array(audioFrames.data()->data(), dataP, nbAudioFrames*7, offset); + } + + // # bodies + write(false, dataP, offset); + // TODO + + + + return totalDataSize; +} + + diff --git a/cpp-projects/base/camera/kinect4/k4_compressed_frame.hpp b/cpp-projects/base/camera/kinect4/k4_compressed_frame.hpp new file mode 100644 index 0000000..33c7df5 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_compressed_frame.hpp @@ -0,0 +1,135 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "camera/frame.hpp" +#include "k4_types.hpp" + +namespace tool::camera{ + + +struct K4CompressedFrame : Frame{ + + K4Mode mode; + + // color + size_t colorWidth = 0; + size_t colorHeight = 0; + std::vector encodedColorData; + + // depth + size_t depthWidth = 0; + size_t depthHeight = 0; + std::vector encodedDepthData; + + // infrared + size_t infraWidth = 0; + size_t infraHeight = 0; + std::vector encodedInfraData; + + // cloud + // # vertices + size_t validVerticesCount = 0; + std::vector encodedCloudVerticesData; + // # colors + size_t cloudColorWidth = 0; + size_t cloudColorHeight = 0; + std::vector encodedCloudColorData; + // # calibration + std::optional calibration = std::nullopt; + + // imu + std::optional imuSample = std::nullopt; + + // audio + std::vector> audioFrames; + + // sizes + auto infos_size() const noexcept{ + return + sizeof(std::int32_t) + // id capture + sizeof(std::int64_t) + // after capture TS + sizeof(K4Mode); + } + + auto color_size() const noexcept -> size_t{ + return encodedColorData.size() + + sizeof(size_t) * 3; // colorWidth, colorHeight, encoded size + } + auto depth_size() const noexcept -> size_t{ + return encodedDepthData.size() + + sizeof(size_t) * 3; // depthWidth, depthHeight, encoded size + } + auto infra_size() const noexcept -> size_t{ + return encodedInfraData.size() + + sizeof(size_t) * 3; // infraWidth, infraHeight, encoded size + } + + auto cloud_vertices_size() const noexcept -> size_t{ + return encodedCloudVerticesData.size() + + sizeof(size_t)*2; // validVerticesCount, encoded size + } + auto cloud_color_size() const noexcept -> size_t{ + return encodedCloudColorData.size() + + sizeof(size_t) * 3; // cloudColorWidth, cloudColorHeight, encoded size + } + auto calibration_size() const noexcept -> size_t{ + return (calibration.has_value() ? sizeof(k4a_calibration_t) : 0) + + sizeof(bool); // has calibration + } + + auto imu_sample_size() const noexcept -> size_t{ + return (imuSample.has_value() ? sizeof(K4ImuSample) : 0) + + sizeof(bool); // has IMU + } + auto audio_size() const noexcept -> size_t{ + return audioFrames.size()*7*sizeof(float) + + sizeof(size_t); // nb audio frames + } + auto bodies_size() const noexcept -> size_t{ + return sizeof(bool); // has body // TODO + } + + auto total_data_size() const -> size_t{ + return + infos_size() + + color_size() + cloud_color_size() + depth_size() + infra_size() + cloud_vertices_size() + + calibration_size() + imu_sample_size() + audio_size() + bodies_size(); + } + + // file stream + auto init_from_file_stream(std::ifstream &file) -> void; + auto write_to_file_stream(std::ofstream &file) -> void; + // # legacy + auto init_legacy_cloud_frame_file_stream(std::ifstream &file) -> void; + auto init_legacy_full_frame_file_stream(std::ifstream &file) -> void; + // data + auto init_from_data(std::int8_t *data) -> void; + auto convert_to_data(std::vector &data) -> size_t; +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_config_settings.cpp b/cpp-projects/base/camera/kinect4/k4_config_settings.cpp new file mode 100644 index 0000000..1fd1fe6 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_config_settings.cpp @@ -0,0 +1,73 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_config_settings.hpp" + +// local +#include "utility/io_data.hpp" + + +using namespace tool::camera; + +auto K4ConfigSettings::default_init_for_grabber() -> K4ConfigSettings{ + K4ConfigSettings config; + // TODO + return config; +} + +auto K4ConfigSettings::default_init_for_manager() -> K4ConfigSettings{ + K4ConfigSettings config; + // TODO + return config; +} + +auto K4ConfigSettings::init_from_data(std::int8_t *data) -> void{ + size_t offset = 0; + read(idDevice, data, offset); + read(mode, data, offset); + read(synchronizeColorAndDepth, data, offset); + read(delayBetweenColorAndDepthUsec, data, offset); + read(synchMode, data, offset); + read(subordinateDelayUsec, data, offset); + read(disableLED, data, offset); + read(btOrientation, data, offset); + read(btProcessingMode, data, offset); + read(btGPUId, data, offset); +} + +auto K4ConfigSettings::convert_to_data(std::int8_t *data) const -> void{ + size_t offset = 0; + write(idDevice, data, offset); + write(mode, data, offset); + write(synchronizeColorAndDepth, data, offset); + write(delayBetweenColorAndDepthUsec, data, offset); + write(synchMode, data, offset); + write(subordinateDelayUsec, data, offset); + write(disableLED, data, offset); + write(btOrientation, data, offset); + write(btProcessingMode, data, offset); + write(btGPUId, data, offset); +} diff --git a/cpp-projects/base/camera/kinect4/k4_config_settings.hpp b/cpp-projects/base/camera/kinect4/k4_config_settings.hpp new file mode 100644 index 0000000..73346d3 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_config_settings.hpp @@ -0,0 +1,72 @@ +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "k4_types.hpp" +#include "files/binary_settings.hpp" + +namespace tool::camera { + +struct K4ConfigSettings : files::SubBinarySettings{ + + std::uint32_t idDevice = 0; + K4Mode mode = K4Mode::Cloud_640x576_MJPEG; + bool synchronizeColorAndDepth = true; + int delayBetweenColorAndDepthUsec = 0; + K4SynchronisationMode synchMode = K4SynchronisationMode::Standalone; + int subordinateDelayUsec = 0; + bool disableLED = false; + + // abt + K4BTSensorOrientation btOrientation = K4BTSensorOrientation::default_orientation; + K4BTProcessingMode btProcessingMode = K4BTProcessingMode::GPU_DIRECTML; + std::int8_t btGPUId = 0; + + static auto default_init_for_grabber() -> K4ConfigSettings; + static auto default_init_for_manager() -> K4ConfigSettings; + + // i/o + auto init_from_data(std::int8_t *data) -> void override; + auto convert_to_data(std::int8_t *data) const -> void override; + auto total_data_size() const noexcept-> size_t override{ + return + sizeof(idDevice) + + sizeof(mode) + + sizeof(synchronizeColorAndDepth) + + sizeof(delayBetweenColorAndDepthUsec) + + sizeof(synchMode) + + sizeof(subordinateDelayUsec) + + sizeof(disableLED) + + sizeof(btOrientation) + + sizeof(btProcessingMode) + + sizeof(btGPUId); + } +}; + + + +} diff --git a/cpp-projects/base/camera/kinect4/k4_data.cpp b/cpp-projects/base/camera/kinect4/k4_data.cpp new file mode 100644 index 0000000..7516a85 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_data.cpp @@ -0,0 +1,407 @@ + + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_data.hpp" + +using namespace tool::geo; +using namespace tool::camera; + +std::tuple K4PackedVoxel::pack(const geo::Pt3 &pos, const geo::Pt4 &col) noexcept{ + + const int x = static_cast(pos.x())+4096; + const int y = static_cast(pos.y())+4096; + const int z = static_cast(pos.z()); + + std::uint32_t p1 = + /** 0-0 */((x & 0b1)) | + /** 1-0 */((y & 0b1) << 1) | + /** 2-0 */((z & 0b1) << 2) | + /** 3-1 */((x & 0b10) << 2) | + /** 4-1 */((y & 0b10) << 3) | + /** 5-1 */((z & 0b10) << 4) | + /** 6-2 */((x & 0b100) << 4) | + /** 7-2 */((y & 0b100) << 5) | + /** 8-2 */((z & 0b100) << 6) | + /** 9-3 */((x & 0b1000) << 6) | + /** 10-3 */((y & 0b1000) << 7) | + /** 11-3 */((z & 0b1000) << 8) | + /** 12-4 */((x & 0b10000) << 8) | + /** 13-4 */((y & 0b10000) << 9) | + /** 14-4 */((z & 0b10000) << 10) | + /** 15-5 */((x & 0b100000) << 10) | + /** 16-5 */((y & 0b100000) << 11) | + /** 17-5 */((z & 0b100000) << 12) | + /** 18-6 */((x & 0b1000000) << 12) | + /** 19-6 */((y & 0b1000000) << 13) | + /** 20-6 */((z & 0b1000000) << 14) | + /** 21-7 */((x & 0b10000000) << 14) | + /** 22-7 */((y & 0b10000000) << 15) | + /** 23-7 */((z & 0b10000000) << 16) | + /** 24-8 */((x & 0b100000000) << 16) | + /** 25-8 */((y & 0b100000000) << 17) | + /** 26-8 */((z & 0b100000000) << 18) | + /** 27-9 */((x & 0b1000000000) << 18) | + /** 28-9 */((y & 0b1000000000) << 19) | + /** 29-9 */((z & 0b1000000000) << 20) | + /** 30-10 */((x & 0b10000000000) << 20) | + /** 31-10 */((y & 0b10000000000) << 21); + + const std::uint8_t r = col.x(); + const std::uint8_t g = col.y(); + const std::uint8_t b = col.z(); + std::uint32_t p2 = + /** 0-0 */ ((r & 0b1)) | + /** 1-0 */ ((g & 0b1) << 1) | + /** 2-0 */ ((b & 0b1) << 2) | + /** 3-1 */ ((r & 0b10) << 2) | + /** 4-1 */ ((g & 0b10) << 3) | + /** 5-1 */ ((b & 0b10) << 4) | + /** 6-2 */ ((r & 0b100) << 4) | + /** 7-2 */ ((g & 0b100) << 5) | + /** 8-2 */ ((b & 0b100) << 6) | + /** 9-3 */ ((r & 0b1000) << 6) | + /** 10-3 */ ((g & 0b1000) << 7) | + /** 11-3 */ ((b & 0b1000) << 8) | + /** 12-4 */ ((r & 0b10000) << 8) | + /** 13-4 */ ((g & 0b10000) << 9) | + /** 14-4 */ ((b & 0b10000) << 10) | + /** 15-5 */ ((r & 0b100000) << 10) | + /** 16-5 */ ((g & 0b100000) << 11) | + /** 17-5 */ ((b & 0b100000) << 12) | + /** 18-6 */ ((r & 0b1000000) << 12) | + /** 19-6 */ ((g & 0b1000000) << 13) | + /** 20-6 */ ((b & 0b1000000) << 14) | + /** 21-7 */ ((r & 0b10000000) << 14) | + /** 22-7 */ ((g & 0b10000000) << 15) | + /** 23-7 */ ((b & 0b10000000) << 16) | + + /** 24-10 */ ((z & 0b10000000000) << 14) | + /** 25-11 */ ((x & 0b100000000000) << 14) | + /** 26-11 */ ((y & 0b100000000000) << 15) | + /** 27-11 */ ((z & 0b100000000000) << 16) | + /** 28-12 */ ((x & 0b1000000000000) << 16) | + /** 29-12 */ ((y & 0b1000000000000) << 17) | + /** 30-12 */ ((z & 0b1000000000000) << 18) | + /** 31-13 */ ((z & 0b10000000000000) << 18); + + return {p1,p2}; +} + +void K4PackedVoxel::unpack(std::uint32_t p1, std::uint32_t p2, geo::Pt3 &pos, geo::Pt3 &col) noexcept{ + + pos.x() = static_cast( + /* 0 */ ((p1 & 0b1)) | + /* 1 */ ((p1 & 0b1000) >> 2) | + /* 2 */ ((p1 & 0b1000000) >> 4) | + /* 3 */ ((p1 & 0b1000000000) >> 6) | + /* 4 */ ((p1 & 0b1000000000000) >> 8) | + /* 5 */ ((p1 & 0b1000000000000000) >> 10) | + /* 6 */ ((p1 & 0b1000000000000000000) >> 12) | + /* 7 */ ((p1 & 0b1000000000000000000000) >> 14) | + /* 8 */ ((p1 & 0b1000000000000000000000000) >> 16) | + /* 9 */ ((p1 & 0b1000000000000000000000000000) >> 18) | + /* 10 */ ((p1 & 0b1000000000000000000000000000000) >> 20) | + /* 11 */ ((p2 & 0b10000000000000000000000000) >> 14) | + /* 12 */ ((p2 & 0b10000000000000000000000000000) >> 16) - 4096); + + pos.y() = static_cast( + /* 0 */ ((p1 & 0b10) >> 1) | + /* 1 */ ((p1 & 0b10000) >> 3) | + /* 2 */ ((p1 & 0b10000000) >> 5) | + /* 3 */ ((p1 & 0b10000000000) >> 7) | + /* 4 */ ((p1 & 0b10000000000000) >> 9) | + /* 5 */ ((p1 & 0b10000000000000000) >> 11) | + /* 6 */ ((p1 & 0b10000000000000000000) >> 13) | + /* 7 */ ((p1 & 0b10000000000000000000000) >> 15) | + /* 8 */ ((p1 & 0b10000000000000000000000000) >> 17) | + /* 9 */ ((p1 & 0b10000000000000000000000000000) >> 19) | + /* 10 */ ((p1 & 0b10000000000000000000000000000000) >> 21) | + /* 11 */ ((p2 & 0b100000000000000000000000000) >> 15) | + /* 12 */ ((p2 & 0b100000000000000000000000000000) >> 17) - 4096); + + pos.z() = static_cast( + /* 0 */ ((p1 & 0b100) >> 2) | + /* 1 */ ((p1 & 0b100000) >> 4) | + /* 2 */ ((p1 & 0b100000000) >> 6) | + /* 3 */ ((p1 & 0b100000000000) >> 8) | + /* 4 */ ((p1 & 0b100000000000000) >> 10) | + /* 5 */ ((p1 & 0b100000000000000000) >> 12) | + /* 6 */ ((p1 & 0b100000000000000000000) >> 14) | + /* 7 */ ((p1 & 0b100000000000000000000000) >> 16) | + /* 8 */ ((p1 & 0b100000000000000000000000000) >> 18) | + /* 9 */ ((p1 & 0b100000000000000000000000000000) >> 20) | + /* 10 */ ((p2 & 0b1000000000000000000000000) >> 14) | + /* 11 */ ((p2 & 0b1000000000000000000000000000) >> 16) | + /* 12 */ ((p2 & 0b1000000000000000000000000000000) >> 18) | + /* 13 */ ((p2 & 0b10000000000000000000000000000000) >> 19)); + + col.x() = static_cast( + /* 0 */ ((p2 & 0b1)) | + /* 1 */ ((p2 & 0b1000) >> 2) | + /* 2 */ ((p2 & 0b1000000) >> 4) | + /* 3 */ ((p2 & 0b1000000000) >> 6) | + /* 4 */ ((p2 & 0b1000000000000) >> 8) | + /* 5 */ ((p2 & 0b1000000000000000) >> 10) | + /* 6 */ ((p2 & 0b1000000000000000000) >> 12) | + /* 7 */ ((p2 & 0b1000000000000000000000) >> 14)); + + col.y() = static_cast( + /* 0 */ ((p2 & 0b10) >> 1) | + /* 1 */ ((p2 & 0b10000) >> 3) | + /* 2 */ ((p2 & 0b10000000) >> 5) | + /* 3 */ ((p2 & 0b10000000000) >> 7) | + /* 4 */ ((p2 & 0b10000000000000) >> 9) | + /* 5 */ ((p2 & 0b10000000000000000) >> 11) | + /* 6 */ ((p2 & 0b10000000000000000000) >> 13) | + /* 7 */ ((p2 & 0b10000000000000000000000) >> 15)); + + col.z() = static_cast( + /* 0 */ ((p2 & 0b100) >> 2) | + /* 1 */ ((p2 & 0b100000) >> 4) | + /* 2 */ ((p2 & 0b100000000) >> 6) | + /* 3 */ ((p2 & 0b100000000000) >> 8) | + /* 4 */ ((p2 & 0b100000000000000) >> 10) | + /* 5 */ ((p2 & 0b100000000000000000) >> 12) | + /* 6 */ ((p2 & 0b100000000000000000000) >> 14) | + /* 7 */ ((p2 & 0b100000000000000000000000) >> 16)); +} + +std::uint64_t K4PackedVoxel::pack64(const geo::Pt3 &pos, const geo::Pt4 &col){ + + // 0 x0 y0 z0 r0 g0 b0 + // 6 x1 y1 z1 r1 g1 b1 + // 12 x2 y2 z2 r2 g2 b2 + // 18 x3 y3 z3 r3 g3 b3 + // 24 x4 y4 z4 r4 g4 b4 + // 30 x5 y5 z5 r5 g5 b5 + // 36 x6 y6 z6 r6 g6 b6 + // 42 x7 y7 z7 r7 g7 b7 + // 48 x8 y8 z8 x9 y9 z9 + // 54 x10 y10 z10 x11 y11 z11 + // 60 x12 y12 z12 z13 + + const int x = static_cast(pos.x())+4096; + const int y = static_cast(pos.y())+4096; + const int z = static_cast(pos.z()); + const std::uint8_t r = col.x(); + const std::uint8_t g = col.y(); + const std::uint8_t b = col.z(); + + using t = std::uint64_t; + return t{ + /** 0-0 */ (t(x & 0b1)) | + /** 1-0 */ (t(y & 0b1) << 1) | + /** 2-0 */ (t(z & 0b1) << 2) | + /** 3-0 */ (t(r & 0b1) << 3) | + /** 4-0 */ (t(g & 0b1) << 4) | + /** 5-0 */ (t(b & 0b1) << 5) | + /** 6-1 */ (t(x & 0b10) << 5) | + /** 7-1 */ (t(y & 0b10) << 6) | + /** 8-1 */ (t(z & 0b10) << 7) | + /** 9-1 */ (t(r & 0b10) << 8) | + /** 10-1 */ (t(g & 0b10) << 9) | + /** 11-1 */ (t(b & 0b10) << 10) | + /** 12-2 */ (t(x & 0b100) << 10) | + /** 13-2 */ (t(y & 0b100) << 11) | + /** 14-2 */ (t(z & 0b100) << 12) | + /** 15-2 */ (t(r & 0b100) << 13) | + /** 16-2 */ (t(g & 0b100) << 14) | + /** 17-2 */ (t(b & 0b100) << 15) | + /** 18-3 */ (t(x & 0b1000) << 15) | + /** 19-3 */ (t(y & 0b1000) << 16) | + /** 20-3 */ (t(z & 0b1000) << 17) | + /** 21-3 */ (t(r & 0b1000) << 18) | + /** 22-3 */ (t(g & 0b1000) << 19) | + /** 23-3 */ (t(b & 0b1000) << 20) | + /** 24-4 */ (t(x & 0b10000) << 20) | + /** 25-4 */ (t(y & 0b10000) << 21) | + /** 26-4 */ (t(z & 0b10000) << 22) | + /** 27-4 */ (t(r & 0b10000) << 23) | + /** 28-4 */ (t(g & 0b10000) << 24) | + /** 29-4 */ (t(b & 0b10000) << 25) | + /** 30-5 */ (t(x & 0b100000) << 25) | + /** 31-5 */ (t(y & 0b100000) << 26) | + /** 32-5 */ (t(z & 0b100000) << 27) | + /** 33-5 */ (t(r & 0b100000) << 28) | + /** 34-5 */ (t(g & 0b100000) << 29) | + /** 35-5 */ (t(b & 0b100000) << 30) | + /** 36-6 */ (t(x & 0b1000000) << 30) | + /** 37-6 */ (t(y & 0b1000000) << 31) | + /** 38-6 */ (t(z & 0b1000000) << 32) | + /** 39-6 */ (t(r & 0b1000000) << 33) | + /** 40-6 */ (t(g & 0b1000000) << 34) | + /** 41-6 */ (t(b & 0b1000000) << 35) | + /** 42-7 */ (t(x & 0b10000000) << 35) | + /** 43-7 */ (t(y & 0b10000000) << 36) | + /** 44-7 */ (t(z & 0b10000000) << 37) | + /** 45-7 */ (t(r & 0b10000000) << 38) | + /** 46-7 */ (t(g & 0b10000000) << 39) | + /** 47-7 */ (t(b & 0b10000000) << 40) | + /** 48-8 */ (t(x & 0b100000000) << 40) | + /** 49-8 */ (t(y & 0b100000000) << 41) | + /** 50-8 */ (t(z & 0b100000000) << 42) | + /** 51-9 */ (t(x & 0b1000000000) << 42) | + /** 52-9 */ (t(y & 0b1000000000) << 43) | + /** 53-9 */ (t(z & 0b1000000000) << 44) | + /** 54-10 */ (t(x & 0b10000000000) << 44) | + /** 55-10 */ (t(y & 0b10000000000) << 45) | + /** 56-10 */ (t(z & 0b10000000000) << 46) | + /** 57-11 */ (t(x & 0b100000000000) << 46) | + /** 58-11 */ (t(y & 0b100000000000) << 47) | + /** 59-11 */ (t(z & 0b100000000000) << 48) | + /** 60-12 */ (t(x & 0b1000000000000) << 48) | + /** 61-12 */ (t(y & 0b1000000000000) << 49) | + /** 62-12 */ (t(z & 0b1000000000000) << 50) | + /** 63-13 */ (t(z & 0b10000000000000) << 50) + }; +} + +void K4PackedVoxel::unpack64(uint64_t p, geo::Pt3 &pos, geo::Pt4 &col){ + + pos.x() = static_cast( + /* 0 */ ((p & 0b1)) | + /* 1 */ ((p & 0b1000000) >> 5) | + /* 2 */ ((p & 0b1000000000000) >> 10) | + /* 3 */ ((p & 0b1000000000000000000) >> 15) | + /* 4 */ ((p & 0b1000000000000000000000000) >> 20) | + /* 5 */ ((p & 0b1000000000000000000000000000000) >> 25) | + /* 6 */ ((p & 0b1000000000000000000000000000000000000) >> 30) | + /* 7 */ ((p & 0b1000000000000000000000000000000000000000000) >> 35) | + /* 8 */ ((p & 0b1000000000000000000000000000000000000000000000000) >> 40) | + /* 9 */ ((p & 0b1000000000000000000000000000000000000000000000000000) >> 42) | + /* 10 */ ((p & 0b1000000000000000000000000000000000000000000000000000000) >> 44) | + /* 11 */ ((p & 0b1000000000000000000000000000000000000000000000000000000000) >> 46) | + /* 12 */ ((p & 0b1000000000000000000000000000000000000000000000000000000000000) >> 48) - 4096); + + pos.y() = static_cast( + /* 0 */ ((p & 0b10) >> 1) | + /* 1 */ ((p & 0b10000000) >> 6) | + /* 2 */ ((p & 0b10000000000000) >> 11) | + /* 3 */ ((p & 0b10000000000000000000) >> 16) | + /* 4 */ ((p & 0b10000000000000000000000000) >> 21) | + /* 5 */ ((p & 0b10000000000000000000000000000000) >> 26) | + /* 6 */ ((p & 0b10000000000000000000000000000000000000) >> 31) | + /* 7 */ ((p & 0b10000000000000000000000000000000000000000000) >> 36) | + /* 8 */ ((p & 0b10000000000000000000000000000000000000000000000000) >> 41) | + /* 9 */ ((p & 0b10000000000000000000000000000000000000000000000000000) >> 43) | + /* 10 */ ((p & 0b10000000000000000000000000000000000000000000000000000000) >> 45) | + /* 11 */ ((p & 0b10000000000000000000000000000000000000000000000000000000000) >> 47) | + /* 12 */ ((p & 0b10000000000000000000000000000000000000000000000000000000000000) >> 49) - 4096); + + pos.z() = static_cast( + /* 0 */ ((p & 0b100) >> 2) | + /* 1 */ ((p & 0b100000000) >> 7) | + /* 2 */ ((p & 0b100000000000000) >> 12) | + /* 3 */ ((p & 0b100000000000000000000) >> 17) | + /* 4 */ ((p & 0b100000000000000000000000000) >> 22) | + /* 5 */ ((p & 0b100000000000000000000000000000000) >> 27) | + /* 6 */ ((p & 0b100000000000000000000000000000000000000) >> 32) | + /* 7 */ ((p & 0b100000000000000000000000000000000000000000000) >> 37) | + /* 8 */ ((p & 0b100000000000000000000000000000000000000000000000000) >> 42) | + /* 9 */ ((p & 0b100000000000000000000000000000000000000000000000000000) >> 44) | + /* 10 */ ((p & 0b100000000000000000000000000000000000000000000000000000000) >> 46) | + /* 11 */ ((p & 0b100000000000000000000000000000000000000000000000000000000000) >> 48) | + /* 12 */ ((p & 0b100000000000000000000000000000000000000000000000000000000000000) >> 50) | + /* 13 */ ((p & 0b1000000000000000000000000000000000000000000000000000000000000000) >> 50)); + + col.x() = static_cast( + /* 0 */ ((p & 0b1000) >> 3) | + /* 1 */ ((p & 0b1000000000) >> 8) | + /* 2 */ ((p & 0b1000000000000000) >> 13) | + /* 3 */ ((p & 0b1000000000000000000000) >> 18) | + /* 4 */ ((p & 0b1000000000000000000000000000) >> 23) | + /* 5 */ ((p & 0b1000000000000000000000000000000000) >> 28) | + /* 6 */ ((p & 0b1000000000000000000000000000000000000000) >> 33) | + /* 7 */ ((p & 0b1000000000000000000000000000000000000000000000) >> 38)); + + col.y() = static_cast( + /* 0 */ ((p & 0b10000) >> 4) | + /* 1 */ ((p & 0b10000000000) >> 9) | + /* 2 */ ((p & 0b10000000000000000) >> 14) | + /* 3 */ ((p & 0b10000000000000000000000) >> 19) | + /* 4 */ ((p & 0b10000000000000000000000000000) >> 24) | + /* 5 */ ((p & 0b10000000000000000000000000000000000) >> 29) | + /* 6 */ ((p & 0b10000000000000000000000000000000000000000) >> 34) | + /* 7 */ ((p & 0b10000000000000000000000000000000000000000000000) >> 39)); + + col.z() = static_cast( + /* 0 */ ((p & 0b100000) >> 5) | + /* 1 */ ((p & 0b100000000000) >> 10) | + /* 2 */ ((p & 0b100000000000000000) >> 15) | + /* 3 */ ((p & 0b100000000000000000000000) >> 20) | + /* 4 */ ((p & 0b100000000000000000000000000000) >> 25) | + /* 5 */ ((p & 0b100000000000000000000000000000000000) >> 30) | + /* 6 */ ((p & 0b100000000000000000000000000000000000000000) >> 35) | + /* 7 */ ((p & 0b100000000000000000000000000000000000000000000000) >> 40)); +} + +Pt4 K4PackedVoxel::pack_xy(std::int16_t x, std::int16_t y){ + Pt4 c; + + c.x() = + /* 0 */((x & 0b1)) | + /* 1 */((x & 0b100) >> 1) | + /* 2 */((x & 0b10000) >> 2) | + /* 3 */((x & 0b1000000) >> 3) | + /* 4 */((x & 0b100000000) >> 4) | + /* 5 */((x & 0b10000000000) >> 5) | + /* 6 */((x & 0b1000000000000) >> 6) | + /* 7 */((x & 0b100000000000000) >> 7); + + c.y() = + /* 0 */((y & 0b1)) | + /* 1 */((y & 0b100) >> 1) | + /* 2 */((y & 0b10000) >> 2) | + /* 3 */((y & 0b1000000) >> 3) | + /* 4 */((y & 0b100000000) >> 4) | + /* 5 */((y & 0b10000000000) >> 5) | + /* 6 */((y & 0b1000000000000) >> 6) | + /* 7 */((y & 0b100000000000000) >> 7); + + c.z() = + /* 0 */((x & 0b10) >> 1) | + /* 1 */((x & 0b1000) >> 2) | + /* 2 */((x & 0b100000) >> 3) | + /* 3 */((x & 0b10000000) >> 4) | + /* 4 */((x & 0b1000000000) >> 5) | + /* 5 */((x & 0b100000000000) >> 6) | + /* 6 */((x & 0b10000000000000) >> 7) | + /* 7 */((x & 0b1000000000000000)>> 8); + + c.w() = + /* 0 */((y & 0b10) >> 1) | + /* 1 */((y & 0b1000) >> 2) | + /* 2 */((y & 0b100000) >> 3) | + /* 3 */((y & 0b10000000) >> 4) | + /* 4 */((y & 0b1000000000) >> 5) | + /* 5 */((y & 0b100000000000) >> 6) | + /* 6 */((y & 0b10000000000000) >> 7) | + /* 7 */((y & 0b1000000000000000)>> 8); + + return c; +} diff --git a/cpp-projects/base/camera/kinect4/k4_data.hpp b/cpp-projects/base/camera/kinect4/k4_data.hpp new file mode 100644 index 0000000..9d9c80e --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_data.hpp @@ -0,0 +1,56 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + + +// local +#include "k4_types.hpp" +#include "camera/frame.hpp" + +namespace tool::camera{ + + struct K4VoxelData{ + std::int64_t idX : 13, idY : 13, idZ : 14, r : 8, g : 8, b : 8; + }; + + struct K4PackedVoxel{ + static std::tuple pack(const geo::Pt3 &pos, const geo::Pt4 &col) noexcept; + static void unpack(std::uint32_t p1, std::uint32_t p2, geo::Pt3 &pos, geo::Pt3 &col) noexcept; + static std::uint64_t pack64(const geo::Pt3 &pos, const geo::Pt4 &col); + static void unpack64(std::uint64_t p, geo::Pt3 &pos, geo::Pt4 &col); + static geo::Pt4 pack_xy(std::int16_t x, std::int16_t y); + }; +} + + + + + + + + + diff --git a/cpp-projects/base/camera/kinect4/k4_data_settings.cpp b/cpp-projects/base/camera/kinect4/k4_data_settings.cpp new file mode 100644 index 0000000..8372b14 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_data_settings.cpp @@ -0,0 +1,97 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_data_settings.hpp" + +// local +#include "utility/io_data.hpp" + +using namespace tool::camera; + +auto K4DataSettings::default_init_for_grabber() -> K4DataSettings{ + K4DataSettings data; + data.generateRGBLocalFrame = true; + data.generateDepthLocalFrame = true; + data.generateInfraLocalFrame = true; + data.generateCloudLocal = true; + return data; +} + +auto K4DataSettings::default_init_for_manager() -> K4DataSettings{ + K4DataSettings data; + data.generateRGBLocalFrame = false; + data.generateDepthLocalFrame = false; + data.generateInfraLocalFrame = false; + data.generateCloudLocal = false; + return data; +} + +auto K4DataSettings::init_from_data(std::int8_t *data) -> void{ + + size_t offset = 0; + read(captureAudio, data, offset); + read(captureIMU, data, offset); + read(captureBodies, data, offset); + + read(generateRGBLocalFrame, data, offset); + read(generateDepthLocalFrame, data, offset); + read(generateInfraLocalFrame, data, offset); + read(generateCloudLocal, data, offset); + + read(sendColor, data, offset); + read(sendDepth, data, offset); + read(sendInfra, data, offset); + read(sendCloud, data, offset); + read(sendIMU, data, offset); + read(sendAudio, data, offset); + read(sendBodies, data, offset); + + read(btTemporalSmoothing, data, offset); +} + +auto K4DataSettings::convert_to_data(std::int8_t *data) const -> void{ + + size_t offset = 0; + write(captureAudio, data, offset); + write(captureIMU, data, offset); + write(captureBodies, data, offset); + + write(generateRGBLocalFrame, data, offset); + write(generateDepthLocalFrame, data, offset); + write(generateInfraLocalFrame, data, offset); + write(generateCloudLocal, data, offset); + + write(sendColor, data, offset); + write(sendDepth, data, offset); + write(sendInfra, data, offset); + write(sendCloud, data, offset); + write(sendIMU, data, offset); + write(sendAudio, data, offset); + write(sendBodies, data, offset); + + write(btTemporalSmoothing, data, offset); +} + diff --git a/cpp-projects/base/camera/kinect4/k4_data_settings.hpp b/cpp-projects/base/camera/kinect4/k4_data_settings.hpp new file mode 100644 index 0000000..a506b15 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_data_settings.hpp @@ -0,0 +1,84 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "files/binary_settings.hpp" + +namespace tool::camera { + +struct K4DataSettings : files::SubBinarySettings{ + + // capture + bool captureAudio = true; + bool captureIMU = true; + bool captureBodies = false; + + // send + bool sendColor = true; + bool sendDepth = false; + bool sendInfra = false; + bool sendCloud = true; + bool sendIMU = true; + bool sendAudio = true; + bool sendBodies = false; + + // display locally on grabber + bool generateRGBLocalFrame = false; + bool generateDepthLocalFrame = false; + bool generateInfraLocalFrame = false; + bool generateCloudLocal = false; + + // body tracking + float btTemporalSmoothing = 0.f; + + static auto default_init_for_grabber() -> K4DataSettings; + static auto default_init_for_manager() -> K4DataSettings; + + // i/o + auto init_from_data(std::int8_t *data) -> void override; + auto convert_to_data(std::int8_t *data) const -> void override; + auto total_data_size() const noexcept-> size_t override{ + return + sizeof(captureAudio) + + sizeof(captureIMU) + + sizeof(captureBodies) + + sizeof(generateRGBLocalFrame) + + sizeof(generateDepthLocalFrame) + + sizeof(generateInfraLocalFrame) + + sizeof(generateCloudLocal) + + sizeof(sendColor) + + sizeof(sendDepth) + + sizeof(sendInfra) + + sizeof(sendCloud) + + sizeof(sendIMU) + + sizeof(sendAudio) + + sizeof(sendBodies)+ + sizeof(btTemporalSmoothing); + } +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_delay.cpp b/cpp-projects/base/camera/kinect4/k4_delay.cpp new file mode 100644 index 0000000..bb3eac2 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_delay.cpp @@ -0,0 +1,40 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_delay.hpp" + +// local +#include "utility/io_data.hpp" + +using namespace tool::camera; + +auto K4Delay::init_from_data(int8_t *data) -> void{ + read(delayMs, data); +} + +auto K4Delay::convert_to_data(int8_t *data) const -> void{ + write(delayMs, data); +} diff --git a/cpp-projects/base/camera/kinect4/k4_delay.hpp b/cpp-projects/base/camera/kinect4/k4_delay.hpp new file mode 100644 index 0000000..1f52e6d --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_delay.hpp @@ -0,0 +1,43 @@ +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "k4_types.hpp" +#include "files/binary_settings.hpp" + +namespace tool::camera { + +struct K4Delay : files::BinarySettings{ + std::int64_t delayMs = 0; + auto init_from_data(std::int8_t *data) -> void override; + auto convert_to_data(std::int8_t *data) const -> void override; + auto total_data_size() const noexcept -> size_t override{return sizeof(delayMs);} + auto type() const noexcept -> std::int32_t override {return static_cast(SettingsType::Delay);}; + auto file_description() const noexcept -> std::string_view override {return settings_name(static_cast(type()));} +}; + +} diff --git a/cpp-projects/base/camera/kinect4/k4_device.cpp b/cpp-projects/base/camera/kinect4/k4_device.cpp new file mode 100644 index 0000000..e240db9 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_device.cpp @@ -0,0 +1,2320 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + + +#include "k4_device.hpp" + +// std +#include +#include +#include + +// kinect4 +#include +#include + +// libyuv +#include "thirdparty/libyuv/libyuv.h" +// nanobenchmark +//#include "thirdparty/nanobench/nanobench.h" + +// local +// # kinect4 +#include "k4a/k4astaticimageproperties.h" +#include "k4a/k4amicrophonelistener.h" +#include "k4a/k4aaudiomanager.h" +//#include "k4a/k4aaudiochanneldatagraph.h" +// # utility +#include "utility/time.hpp" +#include "utility/logger.hpp" +#include "utility/benchmark.hpp" +#include "utility/vector.hpp" +#include "utility/string_unordered_map.hpp" +//#include "utility/format.hpp" +// # geometry +#include "geometry/point4.hpp" +// # graphics +#include "graphics/color.hpp" +// # camera +#include "k4_frame_compressor.hpp" +#include "k4_frame_uncompressor.hpp" + +using namespace tool; +using namespace tool::geo; +using namespace tool::camera; + + +struct K4Device::Impl{ + + enum class Connectivity : std::int8_t{ + Connectivity_4, + Connectivity_8 + }; + + K4Device *kinect4 = nullptr; + + // device + uint32_t deviceCount = 0; + std::string serialNumber; + k4a::device device; + k4a::calibration calibration; + k4a::transformation transformation; + k4abt::tracker tracker; + k4a_device_configuration_t k4aConfig = K4A_DEVICE_CONFIG_INIT_DISABLE_ALL; + k4abt_tracker_configuration_t k4aBtConfig = K4ABT_TRACKER_CONFIG_DEFAULT; + + K4ConfigSettings config; + std::unique_ptr capture = nullptr; + + // images + // # timestamps + std::chrono::nanoseconds colorImageTS = std::chrono::nanoseconds{0}; + std::chrono::nanoseconds depthImageTS = std::chrono::nanoseconds{0}; + std::chrono::nanoseconds infraredImageTS = std::chrono::nanoseconds{0}; + // # capture + std::optional colorImage = std::nullopt; + std::optional depthImage = std::nullopt; + std::optional infraredImage = std::nullopt; + std::optional pointCloudImage = std::nullopt; + // # processing + std::optional convertedColorImage = std::nullopt; + std::optional depthSizedColorImage = std::nullopt; + + // audio + std::shared_ptr microphone = nullptr; + std::shared_ptr audioListener = nullptr; + size_t lastFrameCount = 0; + std::vector audioFrames; + + // imu + K4ImuSample imuSample; + + // bodies + std::chrono::nanoseconds bodiesTS = std::chrono::nanoseconds{0}; + size_t bodiesCount = 0; + std::vector bodies; + + // parameters + K4DataSettings data; + K4Filters filters; + + // infos + size_t idCapture = 0; + size_t colorWidth = 0; + size_t colorHeight = 0; + size_t colorSize = 0; + size_t depthWidth = 0; + size_t depthHeight = 0; + size_t depthSize = 0; + K4ColorResolution colorResolution; + K4ImageFormat imageFormat; + K4DepthMode depthMode; + + // profiling + s_umap> timestamps; + s_umap> localTimestamps; + float nbCapturePerSecond = 0.f; + + // state + bool camerasStarted = false; + std::atomic_bool readFramesFromCameras = false; + std::atomic_bool sendData = true; + + float temperature = 0.f; + size_t validDepthValues = 0; + size_t meanBiggestZoneId = 0; + + // arrays indices + std::vector indicesDepths1D; + std::vector indicesDepths1DNoBorders; + std::vector> indicesNeighbours4Depth1D; + std::vector> indicesNeighbours8Depth1D; + std::vector> indexDepthVertexCorrrespondance; + + std::vector> indicesDepthsSortedCorrespondanceNoBorders; + + std::vector> indicesDepths3D; + std::vector indicesColors1D; + + std::vector filteringMask; + std::vector depthMask; + std::vector zonesId; + + std::vector depthFiltering; + + // compression + K4FrameCompressor frameCompressor; + K4FrameUncompressor frameUncompressor; + + // thread/lockers + std::mutex parametersM; /**< mutex for reading parameters at beginning of a new frame in thread function */ + std::unique_ptr frameReaderT = nullptr; + + // delay buffer + std::int64_t millisecondsDelay = 0; + std::vector>> frames; + std::vector>> compressedFrames; + + // functions + auto read_frames(K4Mode mode) -> void; + // # init data + auto init_data(K4Mode mode) -> void; + // # read data + auto read_from_microphones() -> void; + auto read_from_imu() -> void; + // # get images + auto get_color_image() -> bool; + auto get_depth_image() -> bool; + auto get_infra_image(K4Mode mode) -> bool; + // # processing + auto convert_color_image(const K4Filters &f) -> void; + auto resize_color_to_fit_depth() -> void; + auto filter_depth_image(const K4Filters &f, K4Mode mode) -> void; + auto filter_color_image(const K4Filters &f) -> void; + auto filter_infrared_image(const K4Filters &f) -> void; + auto generate_cloud(const K4DataSettings &d, K4Mode mode) -> void; + auto compress_frame(const K4Filters &f, const K4DataSettings &d, K4Mode mode) -> std::unique_ptr; + auto create_local_frame(const K4DataSettings &d, K4Mode mode) -> void; + + // profiling + auto get_duration_between_ms(std::string_view from, std::string_view to) noexcept -> std::optional; + auto get_duration_between_micro_s(std::string_view from, std::string_view to) noexcept -> std::optional; + + + static auto generate_config(bool synchInConnected, bool synchOutConnected, const K4ConfigSettings &config) -> k4a_device_configuration_t; + static auto generate_config( + bool synchInConnected, + bool synchOutConnected, + K4ImageFormat colFormat, + K4ColorResolution colResolution, + K4DepthMode depthMode = K4DepthMode::NFOV_UNBINNED, + K4Framerate fps = K4Framerate::F30, + bool synchronizeColorAndDepth = true, + int delayBetweenColorAndDepthUsec = 0, + K4SynchronisationMode synchMode = K4SynchronisationMode::Standalone, + int subordinateDelayUsec = 0, + bool disableLED = false) -> k4a_device_configuration_t; + + static auto generate_bt_config(const K4ConfigSettings &config) -> k4abt_tracker_configuration_t; + +private: + + auto maximum_local_depth_difference(float max, Connectivity connectivity) -> void; + auto keep_only_biggest_cluster() -> void; + auto mininum_neighbours(std::uint8_t nbLoops, std::uint8_t nbMinNeighbours, Connectivity connectivity) -> void; + auto erode(std::uint8_t nbLoops, Connectivity connectivity) -> void; +}; + +auto K4Device::Impl::get_duration_between_ms(std::string_view from, std::string_view to) noexcept -> std::optional{ + + std::unique_lock lock(parametersM, std::try_to_lock); + if(!lock.owns_lock()){ + return std::nullopt; + } + + if(timestamps.contains(from) && timestamps.contains(to)){ + if(timestamps.at(from).has_value() && timestamps.at(to).has_value()){ + return std::chrono::duration_cast(timestamps.at(to).value()-timestamps.at(from).value()); + } + return std::nullopt; + } + return std::nullopt; +} + +auto K4Device::Impl::get_duration_between_micro_s(std::string_view from, std::string_view to) noexcept -> std::optional{ + + std::unique_lock lock(parametersM, std::try_to_lock); + if(!lock.owns_lock()){ + return std::nullopt; + } + + if(timestamps.contains(from) && timestamps.contains(to)){ + if(timestamps.at(from).has_value() && timestamps.at(to).has_value()){ + return std::chrono::duration_cast(timestamps.at(to).value()-timestamps.at(from).value()); + } + return std::nullopt; + } + return std::nullopt; +} + +auto K4Device::Impl::generate_config(bool synchInConnected, bool synchOutConnected,const K4ConfigSettings &config) -> k4a_device_configuration_t{ + + k4a_device_configuration_t ka4Config = K4A_DEVICE_CONFIG_INIT_DISABLE_ALL; + ka4Config.color_format = static_cast(image_format(config.mode)); + ka4Config.color_resolution = static_cast(color_resolution(config.mode)); + ka4Config.depth_mode = static_cast(depth_mode(config.mode)); + ka4Config.camera_fps = static_cast(framerate(config.mode)); + + ka4Config.synchronized_images_only = false; + + if(depth_mode(config.mode) == K4DepthMode::OFF){ + ka4Config.synchronized_images_only = false; + }else{ + ka4Config.synchronized_images_only = config.synchronizeColorAndDepth; + } + ka4Config.depth_delay_off_color_usec = config.delayBetweenColorAndDepthUsec; + ka4Config.disable_streaming_indicator = config.disableLED; + ka4Config.wired_sync_mode = static_cast(config.synchMode); + ka4Config.subordinate_delay_off_master_usec = config.subordinateDelayUsec; + + // check modes + if(config.synchMode == K4SynchronisationMode::Subordinate){ + if(!synchInConnected){ + ka4Config.wired_sync_mode = static_cast(K4SynchronisationMode::Standalone); + Logger::warning("No input synchronisation cable found, switch from [Subordinate] to [Standalone] mode and set subordinate delay to [0].\n"); + } + }else if(config.synchMode == K4SynchronisationMode::Master){ + if(!synchOutConnected){ + ka4Config.wired_sync_mode = static_cast(K4SynchronisationMode::Standalone); + Logger::warning("No output synchronisation cable found, switch from [Master] to [Standalone] mode.\n"); + } + } + + if(config.synchMode == K4SynchronisationMode::Master && config.subordinateDelayUsec != 0){ + Logger::warning("Subordinate delay != 0 for mode [Master], subordinate delay is now set to [0].\n"); + ka4Config.subordinate_delay_off_master_usec = 0; + }else if (config.synchMode == K4SynchronisationMode::Standalone && config.subordinateDelayUsec != 0){ + Logger::warning("Subordinate delay != 0 for mode [Standalone], subordinate delay is now set to [0].\n"); + ka4Config.subordinate_delay_off_master_usec = 0; + } + + return ka4Config; +} + +auto K4Device::Impl::generate_config( + bool synchInConnected, + bool synchOutConnected, + K4ImageFormat colFormat, + K4ColorResolution colResolution, + K4DepthMode depthMode, + K4Framerate fps, + bool synchronizeColorAndDepth, + int delayBetweenColorAndDepthUsec, + K4SynchronisationMode synchMode, + int subordinateDelayUsec, + bool disableLED) -> k4a_device_configuration_t{ + + k4a_device_configuration_t config = K4A_DEVICE_CONFIG_INIT_DISABLE_ALL; + config.color_format = static_cast(colFormat); + config.color_resolution = static_cast(colResolution); + config.depth_mode = static_cast(depthMode); + config.camera_fps = static_cast(fps); + + if(depthMode == K4DepthMode::OFF){ + config.synchronized_images_only = false; + }else{ + config.synchronized_images_only = synchronizeColorAndDepth; + } + config.depth_delay_off_color_usec = delayBetweenColorAndDepthUsec; + config.disable_streaming_indicator = disableLED; + + // check modes + if(synchMode == K4SynchronisationMode::Subordinate){ + if(!synchInConnected){ + synchMode = K4SynchronisationMode::Standalone; + Logger::warning("No input synchronisation cable found, switch from [Subordinate] to [Standalone] mode and set subordinate delay to [0].\n"); + } + }else if(synchMode == K4SynchronisationMode::Master){ + if(!synchOutConnected){ + synchMode = K4SynchronisationMode::Standalone; + Logger::warning("No output synchronisation cable found, switch from [Master] to [Standalone] mode.\n"); + } + } + + if(synchMode == K4SynchronisationMode::Master && subordinateDelayUsec != 0){ + Logger::warning("Subordinate delay != 0 for mode [Master], subordinate delay is now set to [0].\n"); + subordinateDelayUsec = 0; + }else if (synchMode == K4SynchronisationMode::Standalone && subordinateDelayUsec != 0){ + Logger::warning("Subordinate delay != 0 for mode [Standalone], subordinate delay is now set to [0].\n"); + subordinateDelayUsec = 0; + } + config.wired_sync_mode = static_cast(synchMode);; + config.subordinate_delay_off_master_usec = subordinateDelayUsec; + + Logger::message(std::format("config.color_format: {}\n", static_cast(config.color_format))); + Logger::message(std::format("config.color_resolution: {}\n", static_cast(config.color_resolution))); + Logger::message(std::format("config.depth_mode: {}\n", static_cast(config.depth_mode))); + Logger::message(std::format("config.camera_fps: {}\n", static_cast(config.camera_fps))); + + return config; +} + +auto K4Device::Impl::generate_bt_config(const K4ConfigSettings &config) -> k4abt_tracker_configuration_t{ + + k4abt_tracker_configuration_t ka4BtConfig; + ka4BtConfig.gpu_device_id = config.btGPUId; + ka4BtConfig.processing_mode = static_cast(config.btProcessingMode); + ka4BtConfig.sensor_orientation = static_cast(config.btOrientation); + ka4BtConfig.model_path = nullptr; + return ka4BtConfig; +} + + + + +K4Device::K4Device() : i(std::make_unique()){ + + i->kinect4 = this; + + // init audio manager + const int audioInitStatus = k4a::K4AAudioManager::Instance().Initialize(); + if (audioInitStatus != SoundIoErrorNone){ + Logger::error("[K4Device] Failed to initialize audio backend: {}\n", soundio_strerror(audioInitStatus)); + } + + refresh_devices_list(); +} + +K4Device::~K4Device(){ + clean(); +} + +auto K4Device::refresh_devices_list() -> void{ + + // cameras + i->deviceCount = k4a::device::get_installed_count(); + if(i->deviceCount == 0){ + Logger::error("[K4Device] No K4A devices found\n"); + }else{ + Logger::message(std::format("[K4Device] Devices found: {}\n", i->deviceCount)); + } + + // microphones + if(i->microphone != nullptr){ + if(i->microphone->IsStarted()){ + i->microphone->Stop(); + } + i->microphone = nullptr; + } + + + k4a::K4AAudioManager::Instance().RefreshDevices(); + size_t nbDevices = k4a::K4AAudioManager::Instance().get_devices_count(); + Logger::message(std::format("[K4Device] Audio devices count: {}\n", nbDevices)); + + for(size_t ii = 0; ii < nbDevices; ++ii){ + std::string deviceName = k4a::K4AAudioManager::Instance().get_device_name(ii); + Logger::message(std::format(" - {}\n", deviceName)); + if (deviceName.find("Azure Kinect Microphone Array") != std::string::npos) { + Logger::message(std::format("Found Azure kinect microphones array.\n")); + + i->microphone = k4a::K4AAudioManager::Instance().get_microphone_for_device(deviceName); + if(i->microphone == nullptr){ + Logger::error(std::format("[K4Device] Cannot retrieve microphone.\n")); + i->audioListener = nullptr; + return; + } + + Logger::message("[K4Device] Start microphone. \n"); + if(i->microphone->Start() != SoundIoErrorNone){ + Logger::error(std::format("[K4Device] Cannot start microphone.\n")); + return; + } + + if(i->microphone->IsStarted()){ + Logger::message("[K4Device] Create listener. \n"); + i->audioListener = i->microphone->CreateListener(); + if(i->audioListener == nullptr){ + Logger::error(std::format("[K4Device] Cannot init audio listener.\n")); + return; + } + } + } + } + + if(i->audioListener != nullptr){ + Logger::message("[K4Device] Microphone listener created. \n"); + } + +} + +auto K4Device::open(std::uint32_t deviceId) -> bool{ + + if(is_opened()){ + return false; + } + + i->serialNumber = ""; + + if(deviceId >= i->deviceCount){ + Logger::error("[K4Device] Invalid device id\n"); + return false; + } + + try { + i->device = k4a::device::open(i->config.idDevice = deviceId); + } catch (std::runtime_error error) { + Logger::error(std::format("[K4Device] open error: {}\n", error.what())); + return false; + } + + const auto version = i->device.get_version(); + const auto fb = version.firmware_build; + const auto fs = version.firmware_signature; + bool debugFB = fb == K4A_FIRMWARE_BUILD_DEBUG; + std::string fsStr; + switch (fs) { + case K4A_FIRMWARE_SIGNATURE_MSFT: + fsStr = "Microsoft signed"; + break; + case K4A_FIRMWARE_SIGNATURE_TEST: + fsStr = "Test signed"; + break; + case K4A_FIRMWARE_SIGNATURE_UNSIGNED: + fsStr = "Unsigned"; + break; + } + + i->serialNumber = i->device.get_serialnum(); + + Logger::message("[K4Device] Device opened:\n"); + Logger::message(std::format(" Serialnum: {}\n", i->serialNumber)); + Logger::message(" Version:\n"); + Logger::message(std::format(" Firmware build: {}\n", (debugFB ? "[debug]" : "[release]"))); + Logger::message(std::format(" Firmware signature: {}\n", fsStr)); + Logger::message(std::format(" Color camera firmware version {}.{}.{}\n", version.rgb.major, version.rgb.minor, version.rgb.iteration)); + Logger::message(std::format(" Depth camera firmware version {}.{}.{}\n", version.depth.major, version.depth.minor, version.depth.iteration)); + Logger::message(std::format(" Audio device firmware version {}.{}.{}\n", version.audio.major, version.audio.minor, version.audio.iteration)); + Logger::message(std::format(" Depth device firmware version {}.{}.{}\n", version.depth_sensor.major, version.depth_sensor.minor, version.depth_sensor.iteration)); + Logger::message(" Synch:\n"); + Logger::message(std::format(" IN connected {}\n", i->device.is_sync_in_connected())); + Logger::message(std::format(" OUT connected {}\n", i->device.is_sync_out_connected())); + + + return true; +} + +auto K4Device::nb_devices() const noexcept -> uint32_t{ + return i->deviceCount; +} + +auto K4Device::device_name() const -> std::string{ + return i->serialNumber; +} + +auto K4Device::is_opened() const noexcept -> bool{ + return i->device.is_valid(); +} + +auto K4Device::cameras_started() const noexcept -> bool{ + return i->camerasStarted; +} + +auto K4Device::get_nb_capture_per_second() const noexcept -> float { + return i->nbCapturePerSecond; +} + +auto K4Device::mode() const noexcept -> K4Mode{ + return i->config.mode; +} + +auto K4Device::get_capture_duration_ms() noexcept -> int64_t{ + if(auto duration = i->get_duration_between_ms("before_capture"sv, "after_capture"sv); duration.has_value()){ + return duration.value().count(); + } + return -1; +} + +auto K4Device::get_processing_duration_ms() noexcept -> int64_t{ + if(auto duration = i->get_duration_between_ms("after_capture"sv, "after_processing"sv); duration.has_value()){ + return duration.value().count(); + } + return -1; +} + +auto K4Device::get_compressing_duration_ms() noexcept -> int64_t{ + if(auto duration = i->get_duration_between_ms("after_processing"sv, "after_capture"sv); duration.has_value()){ + return duration.value().count(); + } + return -1; +} + +auto K4Device::get_duration_between_ms(std::string_view from, std::string_view to) noexcept -> int64_t{ + if(auto duration = i->get_duration_between_ms(from, to); duration.has_value()){ + return duration.value().count(); + } + return -1; +} + +auto K4Device::get_duration_between_micro_s(std::string_view from, std::string_view to) noexcept -> int64_t{ + if(auto duration = i->get_duration_between_micro_s(from, to); duration.has_value()){ + return duration.value().count(); + } + return -1; +} + + + +auto K4Device::is_LED_disabled() const noexcept -> bool{ + return i->config.disableLED; +} + +auto K4Device::synch_mode() const noexcept -> K4SynchronisationMode{ + return i->config.synchMode; +} + +auto K4Device::subordinate_delay_usec() const noexcept -> int{ + return i->config.subordinateDelayUsec; +} + +auto K4Device::color_and_depth_synchronized() const noexcept -> bool{ + return i->config.synchronizeColorAndDepth; +} + +auto K4Device::delay_between_color_and_depth_usec() const noexcept -> int{ + return i->config.delayBetweenColorAndDepthUsec; +} + +auto K4Device::close() -> void{ + + if(i->microphone){ + if(i->microphone->IsStarted()){ + i->microphone->Stop(); + } + if(i->audioListener){ + i->audioListener = nullptr; + } + } + + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + + if(i->readFramesFromCameras){ + Logger::error("[K4Device] Reading must be stopped before closing the device.\n"); + return; + } + i->device.close(); + + std::this_thread::sleep_for(std::chrono::milliseconds(50)); +} + + +auto K4Device::clean() -> void{ + if(is_opened()){ + stop_cameras(); + close(); + } +} + +auto K4Device::device_id() const -> std::uint32_t{ + return i->config.idDevice; +} + + +auto K4Device::set_data_settings(const K4DataSettings &dataS) -> void { + i->parametersM.lock(); + i->data = dataS; +// i->tracker.set_temporal_smoothing(dataS.btTemporalSmoothing); + i->parametersM.unlock(); +} + +auto K4Device::set_filters(const K4Filters &filters) -> void{ + i->parametersM.lock(); + i->filters = filters; + i->parametersM.unlock(); +} + +void K4Device::set_color_settings(const K4ColorSettings &colorS) { + + k4a_color_control_mode_t mode; + std::int32_t currentValue, newValue; + k4a_color_control_command_t type; + + try{ + + type = K4A_COLOR_CONTROL_EXPOSURE_TIME_ABSOLUTE; + i->device.get_color_control(type, &mode, ¤tValue); + + switch(static_cast(colorS.exposureTimeAbsolute)){ + case K4ExposureTimesMicroS::t500: newValue = 500; break; + case K4ExposureTimesMicroS::t1250: newValue = 1250; break; + case K4ExposureTimesMicroS::t2500: newValue = 2500; break; + case K4ExposureTimesMicroS::t8330: newValue = 8330; break; + case K4ExposureTimesMicroS::t16670: newValue = 16670; break; + case K4ExposureTimesMicroS::t33330: + if(colorS.powerlineFrequency == K4PowerlineFrequency::F60){ + newValue = 33330; + }else{ + newValue = 30000; + }break; + } + + if((currentValue != newValue) || (mode != (colorS.autoExposureTime ? K4A_COLOR_CONTROL_MODE_AUTO : K4A_COLOR_CONTROL_MODE_MANUAL))){ + i->device.set_color_control( + type, + colorS.autoExposureTime ? K4A_COLOR_CONTROL_MODE_AUTO : K4A_COLOR_CONTROL_MODE_MANUAL, + newValue + ); + } + + type = K4A_COLOR_CONTROL_WHITEBALANCE; + i->device.get_color_control(type, &mode, ¤tValue); + newValue = colorS.whiteBalance - (colorS.whiteBalance % 10); + + if((currentValue != newValue) || (mode != (colorS.autoWhiteBalance ? K4A_COLOR_CONTROL_MODE_AUTO : K4A_COLOR_CONTROL_MODE_MANUAL))){ + i->device.set_color_control( + type, + colorS.autoWhiteBalance ? K4A_COLOR_CONTROL_MODE_AUTO : K4A_COLOR_CONTROL_MODE_MANUAL, + newValue + ); + } + + type = K4A_COLOR_CONTROL_BRIGHTNESS; + i->device.get_color_control(type, &mode, ¤tValue); + newValue = static_cast(colorS.brightness); + if(currentValue != newValue){ + i->device.set_color_control(type, K4A_COLOR_CONTROL_MODE_MANUAL, newValue); + } + + type = K4A_COLOR_CONTROL_CONTRAST; + i->device.get_color_control(type, &mode, ¤tValue); + newValue = static_cast(colorS.contrast); + if(currentValue != newValue){ + i->device.set_color_control(type, K4A_COLOR_CONTROL_MODE_MANUAL, newValue); + } + + type = K4A_COLOR_CONTROL_SHARPNESS; + i->device.get_color_control(type, &mode, ¤tValue); + newValue = static_cast(colorS.sharpness); + if(currentValue != newValue){ + i->device.set_color_control(type, K4A_COLOR_CONTROL_MODE_MANUAL, newValue); + } + + type = K4A_COLOR_CONTROL_SATURATION; + i->device.get_color_control(type, &mode, ¤tValue); + newValue = static_cast(colorS.saturation); + if(currentValue != newValue){ + i->device.set_color_control(type, K4A_COLOR_CONTROL_MODE_MANUAL, newValue); + } + + type = K4A_COLOR_CONTROL_BACKLIGHT_COMPENSATION; + i->device.get_color_control(type, &mode, ¤tValue); + newValue = colorS.backlightCompensation ? 1 : 0; + if(currentValue != newValue){ + i->device.set_color_control(type, K4A_COLOR_CONTROL_MODE_MANUAL, newValue); + } + + type = K4A_COLOR_CONTROL_GAIN; + i->device.get_color_control(type, &mode, ¤tValue); + newValue = static_cast(colorS.gain); + if(currentValue != newValue){ + i->device.set_color_control(type, K4A_COLOR_CONTROL_MODE_MANUAL, newValue); + } + + type = K4A_COLOR_CONTROL_POWERLINE_FREQUENCY; + i->device.get_color_control(type, &mode, ¤tValue); + newValue = static_cast(colorS.powerlineFrequency); + if(currentValue != newValue){ + i->device.set_color_control(type, K4A_COLOR_CONTROL_MODE_MANUAL, newValue); + } + + } catch (std::runtime_error error) { + Logger::error(std::format("[K4Device] Set color settings error: {} T:{} CV:{} NV:{}\n", error.what(), static_cast(type), currentValue, newValue)); + } +} + +auto K4Device::send_data_state(bool state) -> void { + i->sendData = state; +} + +auto K4Device::set_delay(K4Delay delay) -> void{ + i->millisecondsDelay = delay.delayMs; +} + +auto K4Device::start_cameras(const K4ConfigSettings &configS) -> bool{ + + if(cameras_started() || !is_opened()){ + return false; + } + + i->config = configS; + i->k4aConfig = i->generate_config(i->device.is_sync_in_connected(), i->device.is_sync_out_connected(), i->config); + i->k4aBtConfig = i->generate_bt_config(i->config); + + try { + + Logger::message("[K4Device] start cameras\n"); + Logger::message("[Config]\n"); + Logger::message(std::format(" color format: {}\n", static_cast(i->k4aConfig.color_format))); + Logger::message(std::format(" depth mode: {}\n", static_cast(i->k4aConfig.depth_mode))); + i->device.start_cameras(&i->k4aConfig); + + Logger::message("[K4Device] Retrieve calibration\n"); + i->calibration = i->device.get_calibration(i->k4aConfig.depth_mode, i->k4aConfig.color_resolution); + + const auto &c = i->calibration; + Logger::message("[Calibration]\n"); + Logger::message(std::format(" color resolution: {}\n", static_cast(c.color_resolution))); + Logger::message(" color camera:\n"); + Logger::message(std::format(" width: {}\n", c.color_camera_calibration.resolution_width)); + Logger::message(std::format(" height: {}\n", c.color_camera_calibration.resolution_height)); + Logger::message(std::format(" metric radius: {}\n", c.color_camera_calibration.metric_radius)); + Logger::message(" depth mode:\n"); + Logger::message(std::format(" width: {}\n", c.depth_camera_calibration.resolution_width)); + Logger::message(std::format(" height: {}\n", c.depth_camera_calibration.resolution_height)); + + Logger::message("[K4Device] start imu\n"); + i->device.start_imu(); + + if(depth_mode(configS.mode) != K4DepthMode::OFF){ + Logger::message("[K4Device] start body tracker\n"); + i->tracker = k4abt::tracker::create(i->calibration, i->k4aBtConfig); + } + + } catch (k4a::error error) { + Logger::error("[K4Device] start_cameras error: {}\n", error.what()); + i->k4aConfig = K4A_DEVICE_CONFIG_INIT_DISABLE_ALL; + return false; + } catch (std::runtime_error error) { + Logger::error("[K4Device] start_cameras error: {}\n", error.what()); + i->k4aConfig = K4A_DEVICE_CONFIG_INIT_DISABLE_ALL; + return false; + } + + Logger::message("[K4Device] Color control\n"); + k4a_color_control_mode_t mode; + std::int32_t value; + i->device.get_color_control(K4A_COLOR_CONTROL_EXPOSURE_TIME_ABSOLUTE, &mode, &value); + Logger::message(std::format(" Exposure time: [{}] [{}]\n", value, mode == K4A_COLOR_CONTROL_MODE_AUTO ? "auto" : "manual")); + i->device.get_color_control(K4A_COLOR_CONTROL_BRIGHTNESS, &mode, &value); + Logger::message(std::format(" Brightness: [{}] [{}]\n", value, mode == K4A_COLOR_CONTROL_MODE_AUTO ? "auto" : "manual")); + i->device.get_color_control(K4A_COLOR_CONTROL_CONTRAST, &mode, &value); + Logger::message(std::format(" Contrast: [{}] [{}]\n", value, mode == K4A_COLOR_CONTROL_MODE_AUTO ? "auto" : "manual")); + i->device.get_color_control(K4A_COLOR_CONTROL_SHARPNESS, &mode, &value); + Logger::message(std::format(" Sharpness: [{}] [{}]\n", value, mode == K4A_COLOR_CONTROL_MODE_AUTO ? "auto" : "manual")); + i->device.get_color_control(K4A_COLOR_CONTROL_SATURATION, &mode, &value); + Logger::message(std::format(" Saturation: [{}] [{}]\n", value, mode == K4A_COLOR_CONTROL_MODE_AUTO ? "auto" : "manual")); + i->device.get_color_control(K4A_COLOR_CONTROL_WHITEBALANCE, &mode, &value); + Logger::message(std::format(" White balance: [{}] [{}]\n", value, mode == K4A_COLOR_CONTROL_MODE_AUTO ? "auto" : "manual")); + i->device.get_color_control(K4A_COLOR_CONTROL_BACKLIGHT_COMPENSATION, &mode, &value); + Logger::message(std::format(" Backlight compensation: [{}] [{}]\n", value, mode == K4A_COLOR_CONTROL_MODE_AUTO ? "auto" : "manual")); + i->device.get_color_control(K4A_COLOR_CONTROL_GAIN, &mode, &value); + Logger::message(std::format(" Gain: [{}] [{}]\n", value, mode == K4A_COLOR_CONTROL_MODE_AUTO ? "auto" : "manual")); + i->device.get_color_control(K4A_COLOR_CONTROL_POWERLINE_FREQUENCY, &mode, &value); + Logger::message(std::format(" Powerline frequency: [{}] [{}]\n", value, mode == K4A_COLOR_CONTROL_MODE_AUTO ? "auto" : "manual")); + + Logger::message("[K4Device] Camera started\n"); + i->camerasStarted = true; + + // start thread + i->frameReaderT = std::make_unique(&K4Device::Impl::read_frames, i.get(), i->config.mode); + + return true; +} + +auto K4Device::stop_cameras() -> void{ + + i->readFramesFromCameras = false; + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + if(i->frameReaderT != nullptr){ + if(i->frameReaderT->joinable()){ + i->frameReaderT->join(); + } + i->frameReaderT = nullptr; + } + + if(i->camerasStarted){ + if(depth_mode(i->config.mode) != K4DepthMode::OFF){ + Logger::message("[K4Device] Shutdown body tracker\n"); + i->tracker.shutdown(); + } + Logger::message("[K4Device] Stop IMU\n"); + i->device.stop_imu(); + Logger::message("[K4Device] Stop cameras\n"); + i->device.stop_cameras(); + i->camerasStarted = false; + } + + std::this_thread::sleep_for(std::chrono::milliseconds(100)); +} + +auto K4Device::Impl::read_frames(K4Mode mode) -> void{ + + // check device + if(!device.is_valid() || readFramesFromCameras){ + Logger::error("[K4Device::Impl::read_frames] Cannot start reading frames.\n"); + return; + } + + // initialization + init_data(mode); + + // start loop + readFramesFromCameras = true; + + auto fps = framerate(mode); + int32_t timeoutMs = 0; + switch (fps) { + case K4Framerate::F30: + timeoutMs = 40; + break; + case K4Framerate::F15: + timeoutMs = 70; + break; + case K4Framerate::F5: + timeoutMs = 250; + break; + } + + std::vector capturesTimes; + timestamps.clear(); + localTimestamps.clear(); + + while(readFramesFromCameras){ + + // copy parameters + parametersM.lock(); + const auto f = filters; + const auto d = data; + for(auto &ts : localTimestamps){ + timestamps[ts.first] = ts.second; + ts.second = std::nullopt; + } + parametersM.unlock(); + + // read data from device + try { + + // update before capture timestamp + localTimestamps["before_capture"sv] = Time::nanoseconds_since_epoch(); + + Bench::start("[K4Device::Impl::read_frames] Device get_capture"); + bool success = false; + try{ + success = device.get_capture(capture.get(), std::chrono::milliseconds(timeoutMs)); + localTimestamps["after_capture"sv] = Time::nanoseconds_since_epoch(); + Bench::stop(); + }catch(const std::runtime_error &e){ + Logger::error(std::format("[K4Device::Impl::read_frames] Get capture runtime error: {}\n", e.what())); + Bench::stop(); + readFramesFromCameras = false; + break; + } + if(!success){ + Bench::stop(); + } + + if(d.captureAudio){ + read_from_microphones(); + } + + if(d.captureIMU){ + read_from_imu(); + } + + if(!success){ +// Logger::error(std::format("to[{}] ",timeoutMs)); + continue; + } + + if(d.captureBodies && (depth_mode(config.mode) != K4DepthMode::OFF)){ + if(tracker.enqueue_capture(*capture.get(), std::chrono::milliseconds(1))){ + + if(k4abt::frame bodyFrame = tracker.pop_result(std::chrono::milliseconds(1)); bodyFrame != nullptr){ + bodiesCount = bodyFrame.get_num_bodies(); + if(bodies.size() < bodiesCount){ + bodies.resize(bodiesCount); + } + for(size_t ii = 0; ii < bodiesCount; ++ii){ + bodies[ii].update(bodyFrame.get_body(ii)); + } + bodiesTS = bodyFrame.get_system_timestamp(); + } + +// k4a::image body_index_map = body_frame.get_body_index_map(); +// if (body_index_map != nullptr){ +//// print_body_index_map_middle_line(body_index_map); +// }else{ +// Logger::error("Error: Failed to generate bodyindex map!\n"); +// } + } + } + + // compute capture framerate + auto aftertCatpureTs = localTimestamps["after_capture"sv].value(); + capturesTimes.push_back(aftertCatpureTs); + bool foundT = false; + size_t idT = 0; + for(size_t ii = 0; ii < capturesTimes.size(); ++ii){ + auto diff = std::chrono::duration_cast(aftertCatpureTs - capturesTimes[ii]); + if(std::chrono::duration_cast(diff).count() > 5000){ + foundT = true; + idT = ii; + }else{ + break; + } + } + if(foundT){ + tool::erase_range(capturesTimes, 0, idT + 1); + } + nbCapturePerSecond = capturesTimes.size()/5.f; + + } catch (std::exception &error) { + Logger::error(std::format("[K4Device::Impl::read_frames] Get capture error: {}\n", error.what())); + readFramesFromCameras = false; + break; + } + + // get images + if(!get_color_image()){ + continue; + } + if(!get_depth_image()){ + continue; + } + if(!get_infra_image(mode)){ + continue; + } + localTimestamps["after_get_images"sv] = Time::nanoseconds_since_epoch(); + +// Logger::message(std::format("{} {} {} {}\n", +// std::chrono::duration_cast(colorImageTS), +// std::chrono::duration_cast(depthImageTS), +// std::chrono::duration_cast(infraredImageTS), +// std::chrono::duration_cast(localTimestamps["after_capture"sv].value()) +// )); + + // process + convert_color_image(f); + localTimestamps["after_color_convert"sv] = Time::nanoseconds_since_epoch(); + + resize_color_to_fit_depth(); + localTimestamps["after_color_resize"sv] = Time::nanoseconds_since_epoch(); + + filter_depth_image(f, mode); + localTimestamps["after_depth_filter"sv] = Time::nanoseconds_since_epoch(); + + filter_color_image(f); + localTimestamps["after_color_filter"sv] = Time::nanoseconds_since_epoch(); + + filter_infrared_image(f); + localTimestamps["after_infrared_filter"sv] = Time::nanoseconds_since_epoch(); + + generate_cloud(d, mode); + localTimestamps["after_cloud_generation"sv] = Time::nanoseconds_since_epoch(); + localTimestamps["after_processing"sv] = Time::nanoseconds_since_epoch(); + + if(!readFramesFromCameras){ + break; + } + + // send frames + if(sendData){ + if(auto compressedFrame = compress_frame(f,d,mode); compressedFrame != nullptr){ + + localTimestamps["after_compressing"sv] = Time::nanoseconds_since_epoch(); + + // test + compressedFrame->afterCaptureTS = localTimestamps["after_compressing"sv].value().count() - compressedFrame->afterCaptureTS; + using namespace std::chrono; +// std::cout << duration_cast(nanoseconds(compressedFrame->afterCaptureTS)) << " "; + + // store frame + compressedFrames.push_back(std::make_tuple(localTimestamps["after_capture"sv].value(), std::move(compressedFrame))); + + // check delay + bool foundFrame = false; + size_t idFrame = 0; + for(size_t ii = 0; ii < compressedFrames.size(); ++ii){ + auto diff = duration_cast(localTimestamps["after_capture"sv].value() - std::get<0>(compressedFrames[ii])); + if(diff.count() >= millisecondsDelay){ + foundFrame = true; + idFrame = ii; + }else{ + break; + } + } + + // send frame + if(foundFrame){ + kinect4->new_compressed_frame_signal(std::move(std::get<1>(compressedFrames[idFrame]))); + tool::erase_range(compressedFrames, 0, idFrame + 1); + } + localTimestamps["after_compressed_frame_sending"sv] = Time::nanoseconds_since_epoch(); + } + } + + if(d.generateRGBLocalFrame || d.generateDepthLocalFrame || d.generateInfraLocalFrame || d.generateCloudLocal){ + create_local_frame(d, mode); + localTimestamps["after_local_frame_sending"sv] = Time::nanoseconds_since_epoch(); + } + + idCapture++; + } +} + + +auto K4Device::Impl::filter_depth_image(const K4Filters &f, K4Mode mode) -> void{ + + if(!depthImage.has_value()){ + return; + } + + Bench::start("[K4Device] Filter depth"); + + // retrieve buffers + auto depthBuffer = reinterpret_cast(depthImage->get_buffer()); + ColorRGBA8 *colorBuffer = colorImage.has_value() ? reinterpret_cast(colorImage.value().get_buffer()) : nullptr; + uint16_t *infraredBuffer = infraredImage.has_value() ? reinterpret_cast(infraredImage.value().get_buffer()) : nullptr; + + static_cast(infraredBuffer); + + const auto dRange = range(mode)*1000.f; + auto minD = f.minDepthValue < dRange.x() ? static_cast(dRange.x()) : f.minDepthValue; + auto maxD = f.maxDepthValue > dRange.y() ? static_cast(dRange.y()) : f.maxDepthValue; + + auto hsvDiffColor = Convert::to_hsv(f.filterColor); + + // reset depth mask + std::fill(depthMask.begin(), depthMask.end(), 1); + +// for(size_t id = 0; id < indicesDepths1DNoBorders.size(); ++id){ +// indicesDepthsSortedCorrespondanceNoBorders[id] = {indicesDepths1DNoBorders[id], depthBuffer[indicesDepths1DNoBorders[id]]}; +// } +// auto sortDepth = [](const std::tuple &lhs, const std::tuple &rhs){ +// return std::get<1>(lhs) < std::get<1>(rhs); +// }; +// std::sort(std::begin(indicesDepthsSortedCorrespondanceNoBorders), std::end(indicesDepthsSortedCorrespondanceNoBorders), sortDepth); + +// auto plane1 = f.plane1; +// plane1.normal = normalize(plane1.normal); + + // v' = q * v * conjugate(q) + // t = 2 * cross(q.xyz, v) + // v' = v + q.w * t + cross(q.xyz, t) + +// auto pl1Tr = geo::transform(geo::Pt3f{1.f,1.f,1.f}, f.p1Rot, f.p1Pos); +// auto pl1Dir = (Vec4f(0,1,0,1)*pl1Tr).xyz(); + auto pl1Dir = normalize(f.p1Rot);//pl1Tr.col(0).xyz();//*-1.f; + +// auto dir = Vec3f{0,1,0}; +// auto q = Quaternion::from_euler(f.p1Rot); +// auto t = 2.f * cross(Vec3f{q.x,q.y,q.z}, dir); +// auto pl1Dir = dir + q.w * t + cross(Vec3f{q.x,q.y,q.z}, t); +// auto pl1Dir = q * Vec3f(1,0,0); +// auto pl1Dir = normalize(f.p1Rot); + + // depth/width/height/mask/color/infra filtering + for_each(std::execution::par_unseq, std::begin(indicesDepths3D), std::end(indicesDepths3D), [&](const Pt3 &dIndex){ + + size_t id = dIndex.x(); + size_t ii = dIndex.y(); + size_t jj = dIndex.z(); + + const auto ¤tDepth = depthBuffer[id]; + + // check validity + if(currentDepth == k4_invalid_depth_value){ + depthMask[id] = 0; + return; + } + + // depth filtering + if((ii < f.minWidth) || (ii > f.maxWidth) || // width + (jj < f.minHeight) || (jj > f.maxHeight) || // height + (currentDepth < minD) || (currentDepth > maxD) ){// depth + depthMask[id] = 0; + return; + } + + // plane filtering + if(f.p1FMode != K4Filters::PlaneFilteringMode::None){ + geo::Pt3 pt{0.001f * ii,0.001f * jj, 0.001f * currentDepth}; + + if(dot(pt - f.p1Pos, pl1Dir) < 0){ + if(f.p1FMode == K4Filters::PlaneFilteringMode::Above){ + depthMask[id] = 0; + return; + } + }else{ + if(f.p1FMode == K4Filters::PlaneFilteringMode::Below){ + depthMask[id] = 0; + return; + } + } + } + +// if(plane_equation(pt, f.plane1) < 0){ +// depthMask[id] = 0; +// return; +// } + + // mask +// if(!f.depthMask.at(id)){ +// depthMask[id] = 0; +// return; +// } + + // color filtering + if(colorImage.has_value() && f.filterDepthWithColor){ + + auto hsv = Convert::to_hsv(colorBuffer[id]); + if((std::abs(hsv.h()- hsvDiffColor.h()) > f.maxDiffColor.x()) || + (std::abs(hsv.s()- hsvDiffColor.s()) > f.maxDiffColor.y()) || + (std::abs(hsv.v()- hsvDiffColor.v()) > f.maxDiffColor.z())){ + depthMask[id] = 0; + return; + } + } + + // infrared filtering + // ... + }); + +// for(size_t ii = 0; ii < 100; ++ii){ +// auto id = std::get<0>(indicesDepthsSortedCorrespondance[ii]); +// depthMask[id] = 0; +// std::cout << std::get<1>(indicesDepthsSortedCorrespondance[ii]) << " "; +//// colorBuffer[id].rgba() = {255,0,0,255}; +//// std::cout << ii << " " << id << " | "; +// } + + if(f.doLocalDiffFiltering){ + maximum_local_depth_difference(f.maxLocalDiff, Connectivity::Connectivity_4); + } + + // minimum neighbours filtering + if(f.doMinNeighboursFiltering){ + mininum_neighbours(f.minNeighboursLoops, f.nbMinNeighbours, Connectivity::Connectivity_4); + } + + // erosion + if(f.doErosion){ + erode(f.erosionLoops, Connectivity::Connectivity_8); + } + + // remove edges + if(f.keepOnlyBiggestCluster){// && colorImage.has_value()){ + +// std::fill(filteringMask.begin(),filteringMask.end(), 0); + +// for_each(std::execution::seq, std::begin(indicesDepthsSortedCorrespondanceNoBorders), std::end(indicesDepthsSortedCorrespondanceNoBorders), [&](const auto &t){ + +// auto id = indicesDepths1DNoBorders[std::get<0>(t)]; +// if(depthMask[id] == 0){ +// return; +// } + +// size_t count = 0; +// float currDepth = depthBuffer[id]; + +// for(auto cId : indicesNeighbours8Depth1D[id]){ + +// if(depthMask[cId] == 0){ +// continue; +// } + +// if(currDepth > depthBuffer[cId]){ +// continue; +// } + +// if(filteringMask[cId] == 0){ +// ++count; +// continue; +// } + +// if(std::abs(currDepth - depthBuffer[cId]) > f.maxLocalDiff){ +// ++count; +// } +// } + +// filteringMask[id] = count > f.nbMinNeighbours ? 1 : 0; +// }); + +// for(size_t ii = 0; ii < filteringMask.size(); ++ii){ +// if(filteringMask[ii] == 1){ +// depthMask[ii] = 0; +// } +// } + + + +// // detect edge +// // si même couleur et depth > 0 +// for(size_t numLoop = 0; numLoop < f.minNeighboursLoops; ++numLoop){ + +// std::fill(filteringMask.begin(),filteringMask.end(), 0); + +// for_each(std::execution::par_unseq, std::begin(indicesDepths1DNoBorders), std::end(indicesDepths1DNoBorders), [&](size_t id){ + +// if(depthMask[id] == 0){ +// return; +// } + +// size_t count = 0; +// float currDepth = depthBuffer[id]; + +// for(auto cId : indicesNeighbours8Depth1D[id]){ + +// if(depthMask[cId] == 0){ +// continue; +// } + +//// if(filteringMask[cId] == 0){ +//// ++count; +//// continue; +//// } + +// if(std::abs(currDepth - depthBuffer[cId]) > f.maxLocalDiff){ +// ++count; +// } +// } + +// filteringMask[id] = count > f.nbMinNeighbours ? 1 : 0; +// }); + +// for(size_t ii = 0; ii < filteringMask.size(); ++ii){ +// if(filteringMask[ii] == 1){ +// depthMask[ii] = 0; +// } +// } +// } + } + + // keep only biggest cluster + if(f.keepOnlyBiggestCluster){ + keep_only_biggest_cluster(); + }else{ + meanBiggestZoneId = 0; + } + + // count valid depth values + validDepthValues = 0; + for_each(std::execution::unseq, std::begin(indicesDepths1D), std::end(indicesDepths1D), [&](size_t id){ + if(depthMask[id] == 0){ + depthBuffer[id] = k4_invalid_depth_value; +// colorBuffer[id].rgba() = {255,0,0,255}; + indexDepthVertexCorrrespondance[id] = {id, -1}; + }else{ + indexDepthVertexCorrrespondance[id] = {id, validDepthValues}; + validDepthValues++; + } + }); + Bench::stop(); + + + return; + + + +// // smooth filtering +// if(f.keepOnlyBiggestCluster){ + +// std::fill(depthFiltering.begin(), depthFiltering.end(), k4_invalid_depth_value); + +// float k = 1.f/9.f; + +// for_each(std::execution::par_unseq, std::begin(indicesDepths1DNoBorders), std::end(indicesDepths1DNoBorders), [&](size_t id){ + +// if(depthMask[id] == 0){ +// return; +// } + +// float total = 0.f; +// for(auto cId : neighbours8Depth1D[id]){ +// if(depthMask[cId] == 1){ +// total += depthBuffer[cId]*k; +// }else{ +// depthFiltering[id] = depthBuffer[id]; +// return; +// } +// } +// depthFiltering[id] = static_cast(total + depthBuffer[id]*k); +// }); + +// std::copy(depthFiltering.begin(), depthFiltering.end(), depthBuffer); +// } + +} + +auto K4Device::Impl::keep_only_biggest_cluster() -> void{ + + // empty zones + std::fill(zonesId.begin(), zonesId.end(), 0); + + size_t currentZoneId = 1; + int biggestZone = -1; + size_t sizeBiggestZone = 0; + for_each(std::execution::unseq, std::begin(indicesDepths1D), std::end(indicesDepths1D), [&](size_t pt){ + + if(zonesId[pt] != 0){ + return; + } + + if(depthMask[pt] == 0){ + return; + } + + size_t zoneId = currentZoneId; + std::queue ids; + ids.push(pt); + + // count new zone + size_t count = 0; + while(ids.size() > 0){ + size_t id = ids.front(); + ids.pop(); + + if(zonesId[id] != 0){ + continue; + } + + if(depthMask[id] == 0){ + continue; + } + + zonesId[id] = zoneId; + count++; + + if(id >= 1){ + ids.push(id-1); + + if(id >= depthWidth-1){ + + ids.push(id-depthWidth+1); + + if(id >= depthWidth){ + + ids.push(id-depthWidth); + + if(id >= depthWidth + 1){ + ids.push(id-depthWidth-1); + } + } + } + } + + if(id < depthSize-1){ + + ids.push(id+1); + + if(id < depthSize -depthWidth+1){ + ids.push(id+depthWidth-1); + + if(id < depthSize -depthWidth){ + ids.push(id+depthWidth); + + if(id < depthSize -depthWidth-1){ + ids.push(id+depthWidth+1); + } + } + } + } + } + + if(count != 0){ + if(count > sizeBiggestZone){ + sizeBiggestZone = count; + biggestZone = static_cast(currentZoneId); + } + currentZoneId++; + } + }); + + if(biggestZone != -1){ + Pt2 meanBiggestZone = {0,0}; + size_t count = 0; + for_each(std::execution::unseq, std::begin(indicesDepths3D), std::end(indicesDepths3D), [&](const auto &pt){ + if(zonesId[pt.x()] != biggestZone){ + depthMask[pt.x()] = 0; + } + + if(depthMask[pt.x()] == 1){ + meanBiggestZone.x() += pt.y(); + meanBiggestZone.y() += pt.z(); + ++count; + } + }); + + meanBiggestZone /= count; + meanBiggestZoneId = meanBiggestZone.y() * depthWidth + meanBiggestZone.x(); + } +} + +auto K4Device::Impl::mininum_neighbours(uint8_t nbLoops, uint8_t nbMinNeighbours, Connectivity connectivity) -> void{ + + for(std::uint8_t numLoop = 0; numLoop < nbLoops; ++numLoop){ + + // reset filtering mask + std::fill(filteringMask.begin(),filteringMask.end(), 0); + + for_each(std::execution::par_unseq, std::begin(indicesDepths1DNoBorders), std::end(indicesDepths1DNoBorders), [&](size_t id){ + + if(depthMask[id] == 0){ + return; + } + + std::uint8_t count = 0; + if(connectivity == Connectivity::Connectivity_4){ + for(auto cId : indicesNeighbours4Depth1D[id]){ + + if(depthMask[cId] == 1){ + ++count; + } + } + filteringMask[id] = count == 4 ? 1 : 0; + }else{ + for(auto cId : indicesNeighbours8Depth1D[id]){ + + if(depthMask[cId] == 1){ + ++count; + } + } + filteringMask[id] = count == 8 ? 1 : 0; + } + + filteringMask[id] = (count < nbMinNeighbours) ? 1 : 0; + }); + + for(size_t ii = 0; ii < filteringMask.size(); ++ii){ + if(filteringMask[ii] == 1){ + depthMask[ii] = 0; + } + } + } +} + +auto K4Device::Impl::maximum_local_depth_difference(float max, Connectivity connectivity) -> void{ + + auto depthBuffer = reinterpret_cast(depthImage->get_buffer()); + + std::fill(filteringMask.begin(),filteringMask.end(), 0); + + for_each(std::execution::par_unseq, std::begin(indicesDepths1DNoBorders), std::end(indicesDepths1DNoBorders), [&](size_t id){ + + if(depthMask[id] == 0){ + return; + } + + float meanDiff = 0; + float currDepth = depthBuffer[id]; + size_t count = 0; + if(connectivity == Connectivity::Connectivity_4){ + for(auto cId : indicesNeighbours4Depth1D[id]){ + if(depthMask[cId] == 1){ + meanDiff += abs(depthBuffer[cId]-currDepth); + ++count; + } + } + }else{ + for(auto cId : indicesNeighbours8Depth1D[id]){ + if(depthMask[cId] == 1){ + meanDiff += abs(depthBuffer[cId]-currDepth); + ++count; + } + } + } + filteringMask[id] = (count == 0) ? 0 : ((1.*meanDiff/count < max) ? 1 : 0); + + }); + + for(size_t ii = 0; ii < filteringMask.size(); ++ii){ + if(filteringMask[ii] == 0){ + depthMask[ii] = 0; + } + } +} + + +auto K4Device::Impl::erode(uint8_t nbLoops, Connectivity connectivity) -> void{ + + for(size_t numLoop = 0; numLoop < nbLoops; ++numLoop){ + + std::fill(filteringMask.begin(),filteringMask.end(), 0); + + for_each(std::execution::par_unseq, std::begin(indicesDepths1DNoBorders), std::end(indicesDepths1DNoBorders), [&](size_t id){ + + if(depthMask[id] == 0){ + return; + } + + uint8_t count = 0; + if(connectivity == Connectivity::Connectivity_4){ + for(auto cId : indicesNeighbours4Depth1D[id]){ + + if(depthMask[cId] == 1){ + ++count; + } + } + filteringMask[id] = count == 4 ? 1 : 0; + }else{ + for(auto cId : indicesNeighbours8Depth1D[id]){ + + if(depthMask[cId] == 1){ + ++count; + } + } + filteringMask[id] = count == 8 ? 1 : 0; + } + }); + + for(size_t ii = 0; ii < filteringMask.size(); ++ii){ + if(filteringMask[ii] == 0){ + depthMask[ii] = 0; + } + } + } +} + +auto K4Device::Impl::filter_color_image(const K4Filters &f) -> void{ + + if(!colorImage.has_value()){ + return; + } + + if(!depthImage.has_value() && !infraredImage.has_value()){ + return; + } + + Bench::start("[K4Device] Filter color"); + + // retrieve buffers + geo::Pt4* colorBuffer = reinterpret_cast*>(colorImage->get_buffer()); + int16_t* depthBuffer = depthImage.has_value() ? reinterpret_cast(depthImage.value().get_buffer()) : nullptr; + uint16_t* infraredBuffer = infraredImage.has_value() ? reinterpret_cast(infraredImage.value().get_buffer()) : nullptr; + + static_cast(infraredBuffer); + + for_each(std::execution::par_unseq, std::begin(indicesDepths1D), std::end(indicesDepths1D), [&](size_t id){ + if(f.invalidateColorFromDepth){ + if(depthBuffer[id] == k4_invalid_depth_value){ + colorBuffer[id] = k4_invalid_color_value; + }else{ + if(f.keepOnlyBiggestCluster){ + colorBuffer[meanBiggestZoneId] = {255,0,0,255}; + } + } + } + }); + + Bench::stop(); +} + +auto K4Device::Impl::filter_infrared_image(const K4Filters &f) -> void{ + + if(!infraredImage.has_value()){ + return; + } + + if(!colorImage.has_value() && !depthImage.has_value()){ + return; + } + + Bench::start("[K4Device] Filter infra"); + + // retrieve buffers + uint16_t* infraredBuffer = reinterpret_cast(infraredImage->get_buffer()); + geo::Pt4* colorBuffer = colorImage.has_value() ? reinterpret_cast*>(colorImage.value().get_buffer()) : nullptr; + int16_t* depthBuffer = depthImage.has_value() ? reinterpret_cast(depthImage.value().get_buffer()) : nullptr; + + static_cast(colorBuffer); + + for_each(std::execution::par_unseq, std::begin(indicesDepths1D), std::end(indicesDepths1D), [&](size_t id){ + if(f.invalidateInfraFromDepth){ + if(depthBuffer[id] == k4_invalid_depth_value){ + infraredBuffer[id] = k4_invalid_infra_value; + } + } + }); + + Bench::stop(); +} + +auto K4Device::Impl::generate_cloud(const K4DataSettings &d, K4Mode mode) -> void{ + + if(has_cloud(mode) && pointCloudImage.has_value() && (d.sendCloud || d.generateCloudLocal)){ + Bench::start("[K4Device] Transformation depth_image_to_point_cloud"); + transformation.depth_image_to_point_cloud(depthImage.value(), K4A_CALIBRATION_TYPE_DEPTH, &pointCloudImage.value()); + Bench::stop(); + } +} + +auto K4Device::Impl::compress_frame(const K4Filters &f, const K4DataSettings &d, K4Mode mode) -> std::unique_ptr{ + + tool::Bench::start("[K4Device::compress_frame] Generate compressed frame"); + + frameCompressor.set_settings(d); + auto compressedFrame = frameCompressor.compress( + mode, + colorImage, f.jpegCompressionRate, + depthImage, validDepthValues, + infraredImage, + pointCloudImage, + calibration, + d.captureAudio ? reinterpret_cast(audioFrames.data()) : nullptr, lastFrameCount, + d.captureIMU ? &imuSample : nullptr + ); + + if(compressedFrame != nullptr){ + + compressedFrame->idCapture = static_cast(idCapture); + +// if(depthImageTS.count() != 0){ +// std::cout << "d-"; +// compressedFrame->afterCaptureTS = (depthImageTS + Time::offsetNano).count(); +// }else if(infraredImageTS.count() != 0){ +// std::cout << "c-"; +// compressedFrame->afterCaptureTS = (infraredImageTS + Time::offsetNano).count(); +// }else if(colorImageTS.count() != 0){ +// std::cout << "b-"; +// compressedFrame->afterCaptureTS = (colorImageTS + Time::offsetNano).count(); +// }else{ +// std::cout << "a-"; + compressedFrame->afterCaptureTS = localTimestamps["after_capture"sv]->count(); +// } + } + + tool::Bench::stop(); + + return compressedFrame; +} + +auto K4Device::Impl::create_local_frame(const K4DataSettings &d, K4Mode mode) -> void{ + + // write frame + tool::Bench::start("[K4Device::create_local_frame] Write display data frame"); + + auto dFrame = std::make_unique(); + dFrame->idCapture = static_cast(idCapture); + +// if(depthImageTS.count() != 0){ +// dFrame->afterCaptureTS = depthImageTS.count(); +// }else if(infraredImageTS.count() != 0){ +// dFrame->afterCaptureTS = infraredImageTS.count(); +// }else if(colorImageTS.count() != 0){ +// dFrame->afterCaptureTS = colorImageTS.count(); +// }else{ + dFrame->afterCaptureTS = localTimestamps["after_capture"sv]->count(); +// } + + dFrame->mode = mode; + + // init depth frame + static constexpr std::array depthGradient ={ + Pt3f{0.f,0.f,1.f}, + {0.f,1.f,1.f}, + {0.f,1.f,0.f}, + {1.f,1.f,0.f}, + {1.f,0.f,0.f}, + }; + + // color frame + if(d.generateRGBLocalFrame && colorImage.has_value()){ + + tool::Bench::start("[K4Device::create_local_frame] color"); + + size_t width, height; + if(depthImage.has_value()){ + width = depthImage->get_width_pixels(); + height = depthImage->get_height_pixels(); + }else{ + width = colorImage->get_width_pixels(); + height = colorImage->get_height_pixels(); + } + dFrame->colorWidth = width; + dFrame->colorHeight = height; + dFrame->imageColorData.resize(width*height); + + auto colorBuffer = reinterpret_cast*>(colorImage->get_buffer()); + std::vector *ids; + if(depthImage.has_value()){ + ids = &indicesDepths1D; + }else{ + ids = &indicesColors1D; + } + + for_each(std::execution::par_unseq, std::begin(*ids), std::end(*ids), [&](size_t id){ + dFrame->imageColorData[id] = { + colorBuffer[id].z(), + colorBuffer[id].y(), + colorBuffer[id].x(), + 255 + }; + }); + + tool::Bench::stop(); + } + + // depth frame + if(d.generateDepthLocalFrame && depthImage.has_value()){ + + tool::Bench::start("[K4Device::create_local_frame] depth"); + + dFrame->depthWidth = depthImage->get_width_pixels(); + dFrame->depthHeight = depthImage->get_height_pixels(); + dFrame->imageDepthData.resize(dFrame->depthWidth * dFrame->depthHeight); + + auto depthBuffer = reinterpret_cast(depthImage->get_buffer()); + const auto dRange = range(mode)*1000.f; + const auto diff = dRange.y() - dRange.x(); + + for_each(std::execution::par_unseq, std::begin(indicesDepths1D), std::end(indicesDepths1D), [&](size_t id){ + + if(depthBuffer[id] == k4_invalid_depth_value){ + dFrame->imageDepthData[id] = geo::Pt3{}; + return; + } + + float vF = (static_cast(depthBuffer[id]) - dRange.x())/diff; + float intPart; + float decPart = std::modf((vF*(depthGradient.size()-1)), &intPart); + size_t idG = static_cast(intPart); + + auto col = depthGradient[idG]*(1.f-decPart) + depthGradient[idG+1]*decPart; + dFrame->imageDepthData[id] = { + static_cast(255*col.x()), + static_cast(255*col.y()), + static_cast(255*col.z()) + }; + }); + + tool::Bench::stop(); + } + + // infrared frame + if(d.generateInfraLocalFrame && infraredImage.has_value()){ + + tool::Bench::start("[K4Device::create_local_frame] display_infrared"); + + dFrame->infraWidth = infraredImage->get_width_pixels();; + dFrame->infraHeight = infraredImage->get_height_pixels();; + dFrame->imageInfraData.resize(dFrame->infraWidth * dFrame->infraHeight); + + auto infraBuffer = reinterpret_cast(infraredImage->get_buffer()); + + const float max = 2000; + for_each(std::execution::par_unseq, std::begin(indicesDepths1D), std::end(indicesDepths1D), [&](size_t id){ + + float vF = static_cast(infraBuffer[id]); + if(vF > max){ + vF = max; + } + vF/=max; + dFrame->imageInfraData[id] = { + static_cast(255*vF), + static_cast(255*vF), + static_cast(255*vF) + }; + }); + + tool::Bench::stop(); + } + + // cloud + if(d.generateCloudLocal && pointCloudImage.has_value() && colorImage.has_value() && depthImage.has_value()){ + + tool::Bench::start("[K4Device::create_local_frame] cloud"); + + dFrame->cloud.vertices.resize(validDepthValues); + dFrame->cloud.colors.resize(validDepthValues); + dFrame->cloud.normals.resize(validDepthValues); + + auto cloudBuffer = reinterpret_cast*>(pointCloudImage->get_buffer()); + auto colorBuffer = reinterpret_cast*>(colorImage->get_buffer()); + auto depthBuffer = reinterpret_cast(depthImage->get_buffer()); + + for_each(std::execution::par_unseq, std::begin(indexDepthVertexCorrrespondance), std::end(indexDepthVertexCorrrespondance), [&](auto idC){ + + auto idD = std::get<0>(idC); + if(depthBuffer[idD] == k4_invalid_depth_value){ + return; + } + + auto idV = std::get<1>(idC); + dFrame->cloud.vertices[idV]= geo::Pt3f{ + static_cast(-cloudBuffer[idD].x()), + static_cast(-cloudBuffer[idD].y()), + static_cast( cloudBuffer[idD].z()) + }*0.001f; + dFrame->cloud.colors[idV] = geo::Pt3f{ + static_cast(colorBuffer[idD].z()), + static_cast(colorBuffer[idD].y()), + static_cast(colorBuffer[idD].x()) + }/255.f; + + // A B C + // D I E + // F G H + const auto &idN = indicesNeighbours8Depth1D[idD]; + const auto &idDVC = indexDepthVertexCorrrespondance; + const auto &v = dFrame->cloud.vertices; + Vec3f normal{}; + + const auto &vId = v[idV]; + if(idN[0] != -1 && std::get<1>(idDVC[idN[0]]) != -1){ + if(idN[3] != -1 && std::get<1>(idDVC[idN[3]]) != -1){ // vId x vIA + normal += cross(v[std::get<1>(idDVC[idN[3]])] - vId, v[std::get<1>(idDVC[idN[0]])] - vId); + } + if(idN[1] != -1 && std::get<1>(idDVC[idN[1]]) != -1){ // vIA x vIB + normal += cross(v[std::get<1>(idDVC[idN[0]])] - vId, v[std::get<1>(idDVC[idN[1]])] - vId); + } + } + if(idN[2] != -1 && std::get<1>(idDVC[idN[2]]) != -1){ + if(idN[1] != -1 && std::get<1>(idDVC[idN[1]]) != -1){ // vIB x vIC + normal += cross(v[std::get<1>(idDVC[idN[1]])] - vId, v[std::get<1>(idDVC[idN[2]])] - vId); + } + if(idN[4] != -1 && std::get<1>(idDVC[idN[4]]) != -1){ // vIC x vIE + normal += cross(v[std::get<1>(idDVC[idN[2]])] - vId, v[std::get<1>(idDVC[idN[4]])] - vId); + } + } + if(idN[7] != -1 && std::get<1>(idDVC[idN[7]]) != -1){ + if(idN[4] != -1 && std::get<1>(idDVC[idN[4]]) != -1){ // vIE x vIH + normal += cross(v[std::get<1>(idDVC[idN[4]])] - vId, v[std::get<1>(idDVC[idN[7]])] - vId); + } + if(idN[6] != -1 && std::get<1>(idDVC[idN[6]]) != -1){ // vIH x vIG + normal += cross(v[std::get<1>(idDVC[idN[7]])] - vId, v[std::get<1>(idDVC[idN[6]])] - vId); + } + } + if(idN[5] != -1 && std::get<1>(idDVC[idN[5]]) != -1){ + if(idN[6] != -1 && std::get<1>(idDVC[idN[6]]) != -1){ // vIG x vIF + normal += cross(v[std::get<1>(idDVC[idN[5]])] - vId, v[std::get<1>(idDVC[idN[6]])] - vId); + } + if(idN[3] != -1 && std::get<1>(idDVC[idN[3]]) != -1){ // vIF x vID + normal += cross(v[std::get<1>(idDVC[idN[6]])] - vId, v[std::get<1>(idDVC[idN[3]])] - vId); + } + } + dFrame->cloud.normals[idV] = normalize(normal); + }); + + tool::Bench::stop(); + } + + // imu sample + if(d.captureIMU){ + dFrame->imuSample = imuSample; + }else{ + dFrame->imuSample = std::nullopt; + } + + tool::Bench::stop(); + + // send audio + if(d.captureAudio && lastFrameCount != 0){ + + tool::Bench::start("[K4Device::create_local_frame] audio"); + + // copy audio frames + dFrame->audioFrames.resize(lastFrameCount); + auto audioFrom = reinterpret_cast(audioFrames.data()); + std::copy(audioFrom, audioFrom + 7*lastFrameCount, reinterpret_cast(dFrame->audioFrames.data())); + + tool::Bench::stop(); + } + + // send bodies + if(d.captureBodies){ + dFrame->bodies.resize(bodiesCount); + std::copy(bodies.begin(), bodies.begin() + bodiesCount, dFrame->bodies.begin()); + } + + // store frame + frames.push_back(std::make_tuple(localTimestamps["after_capture"sv].value(), std::move(dFrame))); + + // check delay + auto checkDelayTS = Time::nanoseconds_since_epoch(); + bool foundFrame = false; + size_t idFrame = 0; + for(size_t ii = 0; ii < frames.size(); ++ii){ + auto diff = std::chrono::duration_cast(checkDelayTS - std::get<0>(frames[ii])); + if(std::chrono::duration_cast(diff).count() > millisecondsDelay){ + foundFrame = true; + idFrame = ii; + }else{ + break; + } + } + + // send frame + if(foundFrame){ + kinect4->new_frame_signal(std::shared_ptr(std::move(std::get<1>(frames[idFrame])))); + tool::erase_range(frames, 0, idFrame + 1); + } +} + + +auto K4Device::Impl::init_data(K4Mode mode) -> void{ + + // init capture + capture = std::make_unique(); + + // init transform + transformation = k4a::transformation(calibration); + + // reset images + // # timestamps + colorImageTS = std::chrono::nanoseconds{0}; + depthImageTS = std::chrono::nanoseconds{0}; + infraredImageTS = std::chrono::nanoseconds{0}; + // # capture + colorImage = std::nullopt; + depthImage = std::nullopt; + infraredImage = std::nullopt; + pointCloudImage = std::nullopt; + // # processing + convertedColorImage = std::nullopt; + depthSizedColorImage = std::nullopt; + + // reset sizes + idCapture = 0; + colorWidth = 0; + colorHeight = 0; + colorSize = 0; + depthWidth = 0; + depthHeight = 0; + depthSize = 0; + colorResolution = color_resolution(mode); + imageFormat = image_format(mode); + depthMode = depth_mode(mode); + + if(colorResolution != K4ColorResolution::OFF){ + + // retrieve colors dimensions + const auto colorDims = k4a::GetColorDimensions(static_cast(colorResolution)); + colorWidth = std::get<0>(colorDims); + colorHeight = std::get<1>(colorDims); + colorSize = colorWidth*colorHeight; + + if(imageFormat == K4ImageFormat::YUY2 || imageFormat == K4ImageFormat::NV12 || imageFormat == K4ImageFormat::MJPEG){ + convertedColorImage = k4a::image::create(K4A_IMAGE_FORMAT_COLOR_BGRA32, + static_cast(colorWidth), + static_cast(colorHeight), + static_cast(colorWidth * 4 * sizeof(uint8_t)) + ); + } + + // set color indices + indicesColors1D.resize(colorSize); + std::iota(std::begin(indicesColors1D), std::end(indicesColors1D), 0); + } + + + if(depthMode != K4DepthMode::OFF){ + + // retrieve depth dimensions + auto depthRes = depth_resolution(mode); + depthWidth = depthRes.x(); + depthHeight = depthRes.y(); + depthSize = depthWidth*depthHeight; + + // init resized color image + if(colorResolution != K4ColorResolution::OFF){ + depthSizedColorImage = k4a::image::create(K4A_IMAGE_FORMAT_COLOR_BGRA32, + static_cast(depthWidth), + static_cast(depthHeight), + static_cast(depthWidth * 4 * sizeof(uint8_t)) + ); + } + + // set depth indices + depthMask.resize(depthSize); + filteringMask.resize(depthSize); + depthFiltering.resize(depthSize); + zonesId.resize(depthSize); + indicesDepths1D.resize(depthSize); + indexDepthVertexCorrrespondance.resize(depthSize); + + + indicesDepthsSortedCorrespondanceNoBorders.resize((depthWidth-2)*(depthHeight-2)); + + std::iota(std::begin(indicesDepths1D), std::end(indicesDepths1D), 0); + indicesDepths3D.resize(depthSize); + + indicesDepths1DNoBorders.clear(); + indicesDepths1DNoBorders.reserve((depthWidth-2)*(depthHeight-2)); + + indicesNeighbours4Depth1D.clear(); + indicesNeighbours4Depth1D.reserve(depthSize); + + indicesNeighbours8Depth1D.clear(); + indicesNeighbours8Depth1D.reserve(depthSize); + + size_t id = 0; + for(size_t ii = 0; ii < depthHeight; ++ii){ + for(size_t jj = 0; jj < depthWidth; ++jj){ + + indicesDepths3D[id] = {id,jj,ii}; + + // A B C + // D I E + // F G H + std::int32_t idA = -1; + std::int32_t idD = -1; + std::int32_t idF = -1; + std::int32_t idC = -1; + std::int32_t idE = -1; + std::int32_t idH = -1; + std::int32_t idB = -1; + std::int32_t idG = -1; + + bool notOnLeft = jj > 0; + bool notOnRight = jj < depthWidth - 1; + bool notOnTop = ii > 0; + bool notOnBottom = ii < depthHeight-1; + + if(notOnLeft){ + idD = id - 1; + if(notOnTop){ + idA = id - depthWidth-1; + } + if(notOnBottom){ + idF = id + depthWidth-1; + } + } + if(notOnRight){ + idE = id + 1; + if(notOnTop){ + idC = id - depthWidth + 1; + } + if(notOnBottom){ + idH = id + depthWidth + 1; + } + } + if(notOnTop){ + idB = id - depthWidth; + } + if(notOnBottom){ + idG = id + depthWidth; + } + + indicesNeighbours4Depth1D.push_back({idB,idD,idE,idG}); + indicesNeighbours8Depth1D.push_back({idA,idB,idC,idD,idE,idF,idG,idH}); + + if(notOnLeft && notOnRight && notOnTop && notOnBottom){ + indicesDepths1DNoBorders.push_back(id); + } + + ++id; + } + } + + // init cloud image + if(has_cloud(mode)){ + pointCloudImage = k4a::image::create(K4A_IMAGE_FORMAT_CUSTOM, + static_cast(depthWidth), + static_cast(depthHeight), + static_cast(depthWidth * 3 * sizeof(int16_t)) + ); + } + } +} + + +auto K4Device::Impl::read_from_microphones() -> void{ + + lastFrameCount = 0; + if(audioListener != nullptr){ + + // process audio frame + audioListener->ProcessFrames([&](k4a::K4AMicrophoneFrame *frame, const size_t frameCount) { + + // store last count + lastFrameCount = frameCount; + if(lastFrameCount == 0){ + return lastFrameCount; + } + + // resize audio buffer + if(audioFrames.size() < lastFrameCount){ + audioFrames.resize(lastFrameCount); + } + + // copy data + std::copy(frame, frame + lastFrameCount, audioFrames.begin()); + + return lastFrameCount; + }); + + if (audioListener->GetStatus() != SoundIoErrorNone){ + Logger::error(std::format("[K4Device] Error while recording {}\n", soundio_strerror(audioListener->GetStatus()))); + }else if (audioListener->Overflowed()){ + Logger::warning(std::format("[K4Device] Warning: sound overflow detected!\n")); + audioListener->ClearOverflowed(); + } + } +} + +auto K4Device::Impl::read_from_imu() -> void{ + k4a_imu_sample_t sample; + if(device.get_imu_sample(&sample, std::chrono::milliseconds(1))){ + imuSample.temperature = sample.temperature; + const auto &dAcc = sample.acc_sample.xyz; + imuSample.acc ={dAcc.x,dAcc.y,dAcc.z}; + imuSample.accTsMs = sample.acc_timestamp_usec; + const auto &dGyr = sample.gyro_sample.xyz; + imuSample.gyr = {dGyr.x,dGyr.y,dGyr.z}; + imuSample.gyrTsMs = sample.gyro_timestamp_usec; + kinect4->new_imu_sample_signal(imuSample); + } +} + +auto K4Device::Impl::get_color_image() -> bool{ + if(colorResolution != K4ColorResolution::OFF){ + + Bench::start("[K4Device] Capture get_color_image"); + colorImage = capture->get_color_image(); + Bench::stop(); + + if (!colorImage->is_valid()){ + Logger::error("[K4Device] Failed to get color image from capture\n"); + return false; + } + + colorImageTS = colorImage->get_system_timestamp(); + } + return true; +} + +auto K4Device::Impl::get_depth_image() -> bool{ + + if(depthMode != K4DepthMode::OFF){ + + Bench::start("[K4Device] Capture get_depth_image"); + depthImage = capture->get_depth_image(); + Bench::stop(); + + if (!depthImage->is_valid()){ + Logger::error("[K4Device] Failed to get depth image from capture\n"); + return false; + } + + depthImageTS = depthImage->get_system_timestamp(); + } + + return true; +} + +auto K4Device::Impl::get_infra_image(K4Mode mode) -> bool{ + + if(has_infrared(mode)){ + + Bench::start("[K4Device] Capture get_ir_image"); + infraredImage = capture->get_ir_image(); + Bench::stop(); + + if (!infraredImage->is_valid()){ + Logger::error("[K4Device] Failed to get infrared image from capture\n"); + return false; + } + + infraredImageTS = infraredImage->get_system_timestamp(); + } + + return true; +} + + +auto K4Device::Impl::convert_color_image(const K4Filters &f) -> void{ + + if(colorResolution == K4ColorResolution::OFF){ + return; + } + + if(imageFormat == K4ImageFormat::NV12){ + + // libyuv refers to pixel order in system-endian order but OpenGL refers to + // pixel order in big-endian order, which is why we create the OpenGL texture + // as "RGBA" but then use the "ABGR" libyuv function here. + + const int luminanceStride = colorImage->get_width_pixels(); + const int hueSatStride = colorImage->get_width_pixels(); + const uint8_t *hueSatStart = colorImage->get_buffer() + luminanceStride * colorImage->get_height_pixels(); + + Bench::start("[K4Device] NV12 convert"); + int result = libyuv::NV12ToARGB( + colorImage->get_buffer(), // src_y + luminanceStride, // src_stride_y + hueSatStart, // src_vu + hueSatStride, // src_stride_vu + convertedColorImage->get_buffer(), // dst_argb + colorImage->get_width_pixels() * static_cast(sizeof(geo::Pt4)), // dst_stride_argb + colorImage->get_width_pixels(), // width + colorImage->get_height_pixels() // height + ); + Bench::stop(); + + if (result != 0){ + Logger::error("[K4Device::Impl::convert_color_image] libyuv::NV12ToARGB error.\n"); + return; + } + + colorImage = convertedColorImage; + + } else if(imageFormat == K4ImageFormat::YUY2 ){ // //YUY2 is a 4:2:2 format, so there are 4 bytes per 'chunk' of data, and each 'chunk' represents 2 pixels. + + Bench::start("[K4Device] YUY2 convert"); + const int stride = colorImage->get_width_pixels() * 4 / 2; + int result = libyuv::YUY2ToARGB( + colorImage->get_buffer(), // src_yuy2, + stride, // src_stride_yuy2, + convertedColorImage->get_buffer(), // dst_argb, + colorImage->get_width_pixels() * static_cast(sizeof(geo::Pt4)), // dst_stride_argb + colorImage->get_width_pixels(), // width, + colorImage->get_height_pixels() // height + ); + Bench::stop(); + + if (result != 0){ + Logger::error("[K4Device::Impl::convert_color_image] libyuv::YUY2ToARGB error.\n"); + return; + } + + colorImage = convertedColorImage; + + }else if(imageFormat == K4ImageFormat::MJPEG){ + + frameUncompressor.uncompress_jpeg_8_bits_data( + colorImage->get_width_pixels(), + colorImage->get_height_pixels(), + K4FrameUncompressor::ColorFormat::BGRA, + colorImage->get_size(), + colorImage->get_buffer(), + convertedColorImage->get_buffer() + ); + + colorImage = convertedColorImage; + + }else if(imageFormat == K4ImageFormat::BGRA32){ + // nothing to do + } +} + +auto K4Device::Impl::resize_color_to_fit_depth() -> void{ + + if(colorImage.has_value() && depthImage.has_value()){ + + Bench::start("[K4Device] Transformation color_image_to_depth_camera"); + transformation.color_image_to_depth_camera( + depthImage.value(), + colorImage.value(), + &depthSizedColorImage.value()); + + colorImage = depthSizedColorImage; + Bench::stop(); + } +} + + + diff --git a/cpp-projects/base/camera/kinect4/k4_device.hpp b/cpp-projects/base/camera/kinect4/k4_device.hpp new file mode 100644 index 0000000..cf956ba --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_device.hpp @@ -0,0 +1,96 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "thirdparty/sigslot/signal.hpp" +#include "k4_frame.hpp" +#include "k4_compressed_frame.hpp" +#include "k4_color_settings.hpp" +#include "k4_filters.hpp" +#include "k4_data_settings.hpp" +#include "k4_config_settings.hpp" +#include "k4_delay.hpp" + +namespace tool::camera { + +class K4Device { + +public: + + K4Device(); + ~K4Device(); + + auto open(std::uint32_t deviceId) -> bool; + auto refresh_devices_list() -> void; + auto close() -> void; + auto clean() -> void; + + // getters + auto nb_devices() const noexcept -> std::uint32_t; + auto device_name() const -> std::string; + auto device_id() const -> std::uint32_t; + auto is_opened() const noexcept -> bool; + auto cameras_started() const noexcept -> bool; + auto get_nb_capture_per_second() const noexcept -> float; + auto get_capture_duration_ms() noexcept -> std::int64_t; + auto get_processing_duration_ms() noexcept -> std::int64_t; + auto get_compressing_duration_ms() noexcept -> std::int64_t; + auto get_duration_between_ms(std::string_view from, std::string_view to) noexcept -> std::int64_t; + auto get_duration_between_micro_s(std::string_view from, std::string_view to) noexcept -> std::int64_t; + + // # config + auto mode() const noexcept -> K4Mode; + auto is_LED_disabled() const noexcept -> bool; + auto synch_mode() const noexcept -> K4SynchronisationMode; + auto subordinate_delay_usec() const noexcept -> int; + auto color_and_depth_synchronized() const noexcept -> bool; + auto delay_between_color_and_depth_usec() const noexcept -> int; + + // cameras + auto start_cameras(const K4ConfigSettings &configS) -> bool; + auto stop_cameras() -> void; + + // settings + auto set_data_settings(const K4DataSettings &dataS) -> void; + auto set_filters(const K4Filters &filters) -> void; + auto set_color_settings(const K4ColorSettings &colorS) -> void; + auto send_data_state(bool state) -> void; + auto set_delay(K4Delay delay) -> void; + + // signals + sigslot::signal> new_frame_signal; + sigslot::signal> new_compressed_frame_signal; + sigslot::signal new_imu_sample_signal; + +private: + + struct Impl; + std::unique_ptr i; +}; +} + diff --git a/cpp-projects/base/camera/kinect4/k4_device_manager.cpp b/cpp-projects/base/camera/kinect4/k4_device_manager.cpp new file mode 100644 index 0000000..f367f94 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_device_manager.cpp @@ -0,0 +1,192 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_device_manager.hpp" + +// local +// # utility +#include "utility/format.hpp" +//#include "utility/thread.hpp" +// # camera +#include "camera/kinect4/k4_device.hpp" + +using namespace tool::camera; +using namespace std::chrono; + +struct K4DeviceManager::Impl{ + + // device + std::unique_ptr kinect = nullptr; + +// // frames +// SpinLock compressedFrameL; +// std::shared_ptr lastCompressedFrame = nullptr; +}; + +K4DeviceManager::K4DeviceManager(): i(std::make_unique()){ +} + +K4DeviceManager::~K4DeviceManager(){ + clean(); +} + +auto K4DeviceManager::initialize() -> void { + + i->kinect = std::make_unique(); + + init_connections(); + + for(size_t ii = 0; ii < i->kinect->nb_devices(); ++ii){ + auto t = std::format("Cam {}", ii); + update_device_name_signal(static_cast(ii), t); + } +} + +auto K4DeviceManager::clean() -> void { + i->kinect->clean(); +} + +auto K4DeviceManager::update_delay(K4Delay delayMs) -> void{ + i->kinect->set_delay(delayMs); +} + +auto K4DeviceManager::update_filters(const K4Filters &filters) -> void { + i->kinect->set_filters(filters); +} + +auto K4DeviceManager::update_color_settings(const K4ColorSettings &colorS) -> void { + i->kinect->set_color_settings(colorS); +} + +auto K4DeviceManager::get_capture_duration_ms() -> int64_t{ + return i->kinect->get_capture_duration_ms(); +} +auto K4DeviceManager::get_nb_capture_per_second() -> float{ + return i->kinect->get_nb_capture_per_second(); +} + +auto K4DeviceManager::get_processing_duration_ms() -> int64_t{ + return i->kinect->get_processing_duration_ms(); +} + +auto K4DeviceManager::get_compressing_duration_ms() -> int64_t{ + return i->kinect->get_compressing_duration_ms(); +} + +auto K4DeviceManager::get_duration_between_ms(std::string_view from, std::string_view to) noexcept -> int64_t{ + return i->kinect->get_duration_between_ms(from, to); +} + +auto K4DeviceManager::get_duration_between_micro_s(std::string_view from, std::string_view to) noexcept -> int64_t{ + return i->kinect->get_duration_between_micro_s(from, to); +} + +auto K4DeviceManager::update_device_list() -> void{ + + i->kinect->refresh_devices_list(); + + if(i->kinect->nb_devices() > 0){ + for(size_t ii = 0; ii < i->kinect->nb_devices(); ++ii){ + update_device_name_signal(static_cast(ii), std::format("Id:{} Num:...", ii)); + } + }else{ + update_device_name_signal(-1, ""); + } +} + +auto K4DeviceManager::update_settings(const K4DeviceSettings &settings) -> void{ + + i->kinect->set_data_settings(settings.dataS); + + bool stopDevice = + ((i->kinect->is_opened() ? (i->kinect->device_id() != settings.configS.idDevice) : false) || !settings.actionsS.startDevice); + + bool stopCamera = + stopDevice || + !settings.actionsS.openCamera || + (settings.configS.mode != i->kinect->mode()) || + (settings.configS.disableLED != i->kinect->is_LED_disabled()) || + (settings.configS.synchronizeColorAndDepth != i->kinect->color_and_depth_synchronized()) || + (settings.configS.delayBetweenColorAndDepthUsec != i->kinect->delay_between_color_and_depth_usec()) || + (settings.configS.subordinateDelayUsec != i->kinect->subordinate_delay_usec()) || + (settings.configS.synchMode != i->kinect->synch_mode()); + + // stop / close + if(i->kinect->is_opened()){ + if(stopCamera){ + i->kinect->stop_cameras(); + } + if(stopDevice){ + i->kinect->close(); + } + } + + if(stopDevice || stopCamera){ + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + + // open device + if(settings.actionsS.startDevice){ + if(i->kinect->open(settings.configS.idDevice)){ + update_device_name_signal(settings.configS.idDevice, fmt("Id:{} Num:{}", settings.configS.idDevice, i->kinect->device_name())); + } + } + + // start camera + if(settings.actionsS.openCamera){ + i->kinect->start_cameras(settings.configS); + } +} + +auto K4DeviceManager::init_connections() -> void { + + i->kinect->new_frame_signal.connect([&](std::shared_ptr frame){ + new_frame_signal(frame); + }); + + i->kinect->new_compressed_frame_signal.connect([&](std::shared_ptr frame){ + new_compressed_frame_signal(frame); +// std::lock_guard l(i->compressedFrameL); +// i->lastCompressedFrame = frame; + }); + + i->kinect->new_imu_sample_signal.connect([&](K4ImuSample sample){ + new_imu_sample_signal(sample); + }); +} + +//auto K4DeviceManager::get_last_compressed_frame() -> std::shared_ptr{ + +// if(!i->compressedFrameL.try_lock()){ +// return nullptr; +// } + +// std::shared_ptr frame = nullptr; +// std::swap(frame, i->lastCompressedFrame); +// i->compressedFrameL.unlock(); + +// return frame; +//} diff --git a/cpp-projects/base/camera/kinect4/k4_device_manager.hpp b/cpp-projects/base/camera/kinect4/k4_device_manager.hpp new file mode 100644 index 0000000..0d3afaa --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_device_manager.hpp @@ -0,0 +1,80 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "thirdparty/sigslot/signal.hpp" +#include "camera/kinect4/k4_frame.hpp" +#include "camera/kinect4/k4_compressed_frame.hpp" +#include "camera/kinect4/k4_device_settings.hpp" +#include "camera/kinect4/k4_filters.hpp" +#include "camera/kinect4/k4_delay.hpp" +#include "camera/kinect4/k4_color_settings.hpp" + +namespace tool::camera { + +template +using SSS = sigslot::signal; + +class K4DeviceManager{ +public: + + K4DeviceManager(); + ~K4DeviceManager(); + + auto initialize() -> void; + auto clean() -> void; + + auto update_delay(K4Delay delayMs) -> void; + auto update_device_list() -> void; + auto update_settings(const K4DeviceSettings &settings) -> void; + auto update_filters(const K4Filters &filters) -> void; + auto update_color_settings(const K4ColorSettings &colorS) -> void; + + auto get_capture_duration_ms() -> std::int64_t; + auto get_processing_duration_ms() -> std::int64_t; + auto get_compressing_duration_ms() -> std::int64_t; + auto get_duration_between_ms(std::string_view from, std::string_view to) noexcept -> std::int64_t; + auto get_duration_between_micro_s(std::string_view from, std::string_view to) noexcept -> std::int64_t; + + auto get_nb_capture_per_second() -> float; + +// auto get_last_compressed_frame() -> std::shared_ptr; + + sigslot::signal> new_frame_signal; + sigslot::signal> new_compressed_frame_signal; + sigslot::signal new_imu_sample_signal; + static inline SSS update_device_name_signal; + +private: + + auto init_connections() -> void; + + struct Impl; + std::unique_ptr i; +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_device_settings.cpp b/cpp-projects/base/camera/kinect4/k4_device_settings.cpp new file mode 100644 index 0000000..caf57e5 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_device_settings.cpp @@ -0,0 +1,67 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_device_settings.hpp" + +// base +#include "utility/io_data.hpp" + +using namespace tool::camera; + + +auto K4DeviceSettings::default_init_for_grabber() -> K4DeviceSettings{ + K4DeviceSettings device; + device.configS = K4ConfigSettings::default_init_for_grabber(); + device.dataS = K4DataSettings::default_init_for_grabber(); + device.actionsS = K4ActionsSettings::default_init_for_grabber(); + return device; +} + +auto K4DeviceSettings::default_init_for_manager() -> K4DeviceSettings{ + K4DeviceSettings device; + device.configS = K4ConfigSettings::default_init_for_manager(); + device.dataS = K4DataSettings::default_init_for_manager(); + device.actionsS = K4ActionsSettings::default_init_for_manager(); + return device; +} + +auto K4DeviceSettings::init_from_data(std::int8_t *data) -> void { + configS.init_from_data(data); + data += configS.total_data_size(); + dataS.init_from_data(data); + data += dataS.total_data_size(); + actionsS.init_from_data(data); +} + +auto K4DeviceSettings::convert_to_data(std::int8_t *data) const -> void{ + configS.convert_to_data(data); + data += configS.total_data_size(); + dataS.convert_to_data(data); + data += dataS.total_data_size(); + actionsS.convert_to_data(data); +} + diff --git a/cpp-projects/base/camera/kinect4/k4_device_settings.hpp b/cpp-projects/base/camera/kinect4/k4_device_settings.hpp new file mode 100644 index 0000000..f8fba70 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_device_settings.hpp @@ -0,0 +1,65 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "k4_config_settings.hpp" +#include "k4_data_settings.hpp" +#include "k4_actions_settings.hpp" + +namespace tool::camera { + + struct K4DeviceSettings : files::BinaryFileSettings{ + + K4ConfigSettings configS; + K4DataSettings dataS; + K4ActionsSettings actionsS; + + K4DeviceSettings() = default; + K4DeviceSettings(std::int8_t *data){K4DeviceSettings::init_from_data(data);} + ~K4DeviceSettings(){} + + static auto default_init_for_grabber() -> K4DeviceSettings; + static auto default_init_for_manager() -> K4DeviceSettings; + + // i/o + auto init_from_data(std::int8_t *data) -> void override; + auto convert_to_data(std::int8_t *data) const -> void override; + auto total_data_size() const noexcept -> size_t override{ + return + configS.total_data_size() + + dataS.total_data_size() + + actionsS.total_data_size(); + } + auto type() const noexcept -> std::int32_t override {return static_cast(SettingsType::Device);}; + auto file_description() const noexcept -> std::string_view override {return settings_name(static_cast(type()));} + + static auto save_array_to_file(const std::vector &devicesA, const std::string &filePath) -> bool; + static auto init_array_from_file(std::vector &devicesA, const std::string &filePath) -> bool; + }; + +} diff --git a/cpp-projects/base/camera/kinect4/k4_display_settings.cpp b/cpp-projects/base/camera/kinect4/k4_display_settings.cpp new file mode 100644 index 0000000..4aeaf36 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_display_settings.cpp @@ -0,0 +1,58 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_display_settings.hpp" + +// local +//#include "utility/io_data.hpp" + +using namespace tool::camera; + + +auto K4SceneDisplaySettings::default_init_for_grabber() -> K4SceneDisplaySettings{ + K4SceneDisplaySettings display; + // TODO + return display; +} + +auto K4SceneDisplaySettings::default_init_for_manager() -> K4SceneDisplaySettings{ + K4SceneDisplaySettings display; + // TODO + return display; +} + +auto K4CloudDisplaySettings::default_init_for_grabber() -> K4CloudDisplaySettings{ + K4CloudDisplaySettings display; + // TODO + return display; +} + +auto K4CloudDisplaySettings::default_init_for_manager() -> K4CloudDisplaySettings{ + K4CloudDisplaySettings display; + // TODO + return display; +} + diff --git a/cpp-projects/base/camera/kinect4/k4_display_settings.hpp b/cpp-projects/base/camera/kinect4/k4_display_settings.hpp new file mode 100644 index 0000000..4f66d2d --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_display_settings.hpp @@ -0,0 +1,60 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point4.hpp" +//#include "files/binary_settings.hpp" + +namespace tool::camera { + +struct K4SceneDisplaySettings{ + geo::Pt4f backgroundColor = {0.83f, 0.84f, 0.81f, 1.f}; + + // local + int drawOnlyCloudId = -1; + + // i/o + static auto default_init_for_grabber() -> K4SceneDisplaySettings; + static auto default_init_for_manager() -> K4SceneDisplaySettings; +}; + +struct K4CloudDisplaySettings{ + bool cloudVisible = true; + bool forceCloudColor = false; + geo::Pt4f cloudColor = {1.f,0.f,0.f, 1.f}; + float factorUnicolor = 0.65f; + bool useVoxels = false; + float sizePoints = 5.f; + float sizeVoxels = 0.002f; + + // i/o + static auto default_init_for_grabber() -> K4CloudDisplaySettings; + static auto default_init_for_manager() -> K4CloudDisplaySettings; +}; + +} diff --git a/cpp-projects/base/camera/kinect4/k4_filters.cpp b/cpp-projects/base/camera/kinect4/k4_filters.cpp new file mode 100644 index 0000000..cf94a45 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_filters.cpp @@ -0,0 +1,106 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_filters.hpp" + +// local +#include "utility/io_data.hpp" + +using namespace tool::camera; + +auto K4Filters::default_init_for_calibration() -> K4Filters{ + K4Filters filters; + // TODO + return filters; +} + +auto K4Filters::init_from_data(std::int8_t *data) -> void{ + + size_t offset = 0; + read(minWidth, data, offset, m_inputFileSize); + read(maxWidth, data, offset, m_inputFileSize); + read(minHeight, data, offset, m_inputFileSize); + read(maxHeight, data, offset, m_inputFileSize); + read(minDepthValue, data, offset, m_inputFileSize); + read(maxDepthValue, data, offset, m_inputFileSize); + read(yFactor, data, offset, m_inputFileSize); + read(uFactor, data, offset, m_inputFileSize); + read(vFactor, data, offset, m_inputFileSize); + read(filterDepthWithColor, data, offset, m_inputFileSize); + read(filterColor, data, offset, m_inputFileSize); + read(maxDiffColor, data, offset, m_inputFileSize); + read(jpegCompressionRate, data, offset, m_inputFileSize); + read(doLocalDiffFiltering, data, offset, m_inputFileSize); + read(maxLocalDiff, data, offset, m_inputFileSize); + read(doMinNeighboursFiltering, data, offset, m_inputFileSize); + read(nbMinNeighbours, data, offset, m_inputFileSize); + read(minNeighboursLoops, data, offset, m_inputFileSize); + read(doErosion, data, offset, m_inputFileSize); + read(erosionLoops, data, offset, m_inputFileSize); + read(keepOnlyBiggestCluster, data, offset, m_inputFileSize); + read(invalidateColorFromDepth, data, offset, m_inputFileSize); + read(invalidateInfraFromDepth, data, offset, m_inputFileSize); + read(p1FMode, data, offset, m_inputFileSize); + read(p1Pos, data, offset, m_inputFileSize); + read(p1Rot, data, offset, m_inputFileSize); + read(p2FMode, data, offset, m_inputFileSize); + read(p2Pos, data, offset, m_inputFileSize); + read(p2Rot, data, offset, m_inputFileSize); +} + +auto K4Filters::convert_to_data(std::int8_t *data) const -> void{ + + size_t offset = 0; + write(minWidth, data, offset); + write(maxWidth, data, offset); + write(minHeight, data, offset); + write(maxHeight, data, offset); + write(minDepthValue, data, offset); + write(maxDepthValue, data, offset); + write(yFactor, data, offset); + write(uFactor, data, offset); + write(vFactor, data, offset); + write(filterDepthWithColor, data, offset); + write(filterColor, data, offset); + write(maxDiffColor, data, offset); + write(jpegCompressionRate, data, offset); + write(doLocalDiffFiltering, data, offset); + write(maxLocalDiff, data, offset); + write(doMinNeighboursFiltering, data, offset); + write(nbMinNeighbours, data, offset); + write(minNeighboursLoops, data, offset); + write(doErosion, data, offset); + write(erosionLoops, data, offset); + write(keepOnlyBiggestCluster, data, offset); + write(invalidateColorFromDepth, data, offset); + write(invalidateInfraFromDepth, data, offset); + write(p1FMode, data, offset); + write(p1Pos, data, offset); + write(p1Rot, data, offset); + write(p2FMode, data, offset); + write(p2Pos, data, offset); + write(p2Rot, data, offset); +} diff --git a/cpp-projects/base/camera/kinect4/k4_filters.hpp b/cpp-projects/base/camera/kinect4/k4_filters.hpp new file mode 100644 index 0000000..6979fae --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_filters.hpp @@ -0,0 +1,125 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "files/binary_settings.hpp" +#include "graphics/color.hpp" +#include "utility/bit_mask.hpp" +#include "k4_types.hpp" +#include "geometry/plane3.hpp" + +namespace tool::camera { + +struct K4Filters : files::BinaryFileSettings{ + + // # width / height + unsigned int minWidth = 0; + unsigned int maxWidth = depth_resolution(k4DefaultMode).x(); + unsigned int minHeight = 0; + unsigned int maxHeight = depth_resolution(k4DefaultMode).y(); + + // color + float yFactor = 1.f; // deprecated + float uFactor = 1.f; // deprecated + float vFactor = 1.f; // deprecated + + ColorRGB32 filterColor = ColorRGB32{0.f,0.5f,0.08f}; + geo::Pt3f maxDiffColor = geo::Pt3f{20.f,0.5f,0.5f}; + + // # depth + std::int16_t minDepthValue = static_cast(range(k4DefaultMode).x()*1000.f); + std::int16_t maxDepthValue = static_cast(range(k4DefaultMode).y()*1000.f); + + // compression + unsigned char jpegCompressionRate = 80; + + // # neigbhours + float maxLocalDiff = 10.f; + unsigned char nbMinNeighbours = 1; + unsigned char minNeighboursLoops = 1; + + // # erosion + unsigned char erosionLoops = 1; + + // flogs + bool filterDepthWithColor = false; + bool doLocalDiffFiltering = true; + bool doMinNeighboursFiltering = false; + bool doErosion = false; + bool keepOnlyBiggestCluster = false; + bool invalidateColorFromDepth = false; + bool invalidateInfraFromDepth = false; + + enum class PlaneFilteringMode : std::int8_t{ + None, + Above, + Below + }; + + PlaneFilteringMode p1FMode = PlaneFilteringMode::None; + geo::Pt3f p1Pos; + geo::Pt3f p1Rot; + + PlaneFilteringMode p2FMode = PlaneFilteringMode::None; + geo::Pt3f p2Pos; + geo::Pt3f p2Rot; + + // masks + static constexpr auto maskMaxRes = camera::depth_resolution(camera::K4Mode::Cloud_1024x1024); + static constexpr auto maskMaxSize = maskMaxRes.x()*maskMaxRes.y(); + BitMask depthMask = BitMask(true); + + // local + int idPencil = 0; + + K4Filters() = default; + K4Filters(std::int8_t *data){K4Filters::init_from_data(data);} + static auto default_init_for_calibration() -> K4Filters; + + // i/o + auto init_from_data(std::int8_t *data) -> void override; + auto convert_to_data(std::int8_t *data) const -> void override; + + auto total_data_size() const noexcept -> size_t override{ + return + sizeof(unsigned int)*4+ + sizeof(float)*5+ + sizeof(geo::Pt3f)*2+ + sizeof(std::int16_t)*2+ + sizeof(unsigned char)*4 + + sizeof(bool)*7 + + // planes test + sizeof(geo::Pt3f)*4 + + sizeof(std::int8_t) * 2 + ; + } + auto type() const noexcept -> std::int32_t override {return static_cast(SettingsType::Filters);}; + auto file_description() const noexcept -> std::string_view override {return settings_name(static_cast(type()));} +}; + +} diff --git a/cpp-projects/base/camera/kinect4/k4_frame.cpp b/cpp-projects/base/camera/kinect4/k4_frame.cpp new file mode 100644 index 0000000..894899f --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_frame.cpp @@ -0,0 +1,30 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_frame.hpp" + +using namespace tool::camera; diff --git a/cpp-projects/base/camera/kinect4/k4_frame.hpp b/cpp-projects/base/camera/kinect4/k4_frame.hpp new file mode 100644 index 0000000..16e39ba --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_frame.hpp @@ -0,0 +1,71 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "camera/frame.hpp" +#include "geometry/cloud.hpp" +#include "k4_types.hpp" + +namespace tool::camera{ + +struct K4Frame : Frame{ + + // info + K4Mode mode; + + // color + size_t colorWidth = 0; + size_t colorHeight = 0; + std::vector> imageColorData; + + // depth image + size_t depthWidth = 0; + size_t depthHeight = 0; + std::vector depthData; + std::vector> imageDepthData; + + // infra + size_t infraWidth = 0; + size_t infraHeight = 0; + std::vector infraData; + std::vector> imageInfraData; + + // cloud + geo::ColoredCloudData cloud; + + // imu + std::optional imuSample; + + // audio + std::vector> audioFrames; + + // bodies + std::vector bodies; +}; + +} diff --git a/cpp-projects/base/camera/kinect4/k4_frame_compressor.cpp b/cpp-projects/base/camera/kinect4/k4_frame_compressor.cpp new file mode 100644 index 0000000..0c80b63 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_frame_compressor.cpp @@ -0,0 +1,470 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_frame_compressor.hpp" + +// std +#include + +// turbojpg +#include + +// turbopfor +#include "TurboPFor/vp4.h" + +// local +// # utility +#include "utility/logger.hpp" + +using namespace tool; +using namespace tool::geo; +using namespace tool::camera; + +struct K4FrameCompressor::Impl{ + tjhandle jpegCompressor = nullptr; + unsigned char *tjCompressedImage = nullptr; + size_t currentSizeAllocatedToJPegCompressor = 0; + + std::vector indicesValid1D; + std::vector processedCloudData; + std::vector processedColorData; + std::vector processedAudioData; + + bool addColor = false; + bool addDepth = false; + bool addInfra = false; + bool addCloud = false; + bool addIMU = false; + bool addAudio = false; + bool addBodies = false; +}; + +K4FrameCompressor::K4FrameCompressor() : i(std::make_unique()){ + i->jpegCompressor = tjInitCompress(); +} +K4FrameCompressor::~K4FrameCompressor(){ + if(i->tjCompressedImage != nullptr){ + tjFree(i->tjCompressedImage); + } + tjDestroy(i->jpegCompressor); +} + +auto K4FrameCompressor::set_settings(const K4DataSettings &ds) -> void{ + i->addColor = ds.sendColor; + i->addDepth = ds.sendDepth; + i->addInfra = ds.sendInfra; + i->addCloud = ds.sendCloud; + i->addIMU = ds.sendIMU; + i->addAudio = ds.sendAudio; + i->addBodies = ds.sendBodies; +} + +auto K4FrameCompressor::compress_jpeg_8_bits_data(size_t width, size_t height, size_t dim, std::uint8_t *data, std::vector &encodedData, int jpegQuality) -> bool{ + + if(dim < 3 || dim > 4){ + Logger::error(std::format("Invalid dimension {}.\n", dim)); + return false; + } + + auto jpegSize = width*height*dim;//cFrame->colorHeight*cFrame->colorWidth*4; + if(i->tjCompressedImage == nullptr){ + i->tjCompressedImage = tjAlloc(static_cast(i->currentSizeAllocatedToJPegCompressor = jpegSize)); + }else if(i->currentSizeAllocatedToJPegCompressor < jpegSize){ + tjFree(i->tjCompressedImage); + i->tjCompressedImage = tjAlloc(static_cast(i->currentSizeAllocatedToJPegCompressor = jpegSize)); + } + + long unsigned int jpegColorSize = 0; + int ret = tjCompress2( + i->jpegCompressor, + data, + static_cast(width), + 0, + static_cast(height), + dim == 4 ? TJPF_BGRA : TJPF_BGR, + &i->tjCompressedImage, &jpegColorSize, TJSAMP_444, jpegQuality, TJFLAG_NOREALLOC | TJFLAG_FASTDCT + ); + + if(ret == -1){ + Logger::error(std::format("[K4FrameCompressor:compress_color_image] tjCompress2 error with code: {}\n", tjGetErrorStr2(i->jpegCompressor))); + return false; + } + + encodedData.resize(jpegColorSize); + std::copy(i->tjCompressedImage, i->tjCompressedImage + jpegColorSize, encodedData.begin()); + + return true; +} + +auto K4FrameCompressor::compress_lossless_16_bits_128padded_data(size_t size, std::uint16_t *data, std::vector &encodedData) -> void{ + + if(size % 128 != 0){ + Logger::error("[K4FrameCompressor:compress_lossless_16_bits_128padded_data] Data is not 128 padded.\n"); + return; + } + + // resize encoding buffer + encodedData.resize(size*2); + + // encode + size_t encodedBytesNb = p4nzenc128v16( + data, + size, + encodedData.data() + ); + encodedData.resize(encodedBytesNb); +} + + +auto K4FrameCompressor::compress( + K4Mode mode, + std::optional colorImage, int jpegQuality, + std::optional depthImage, size_t validDepthValues, + std::optional infraImage, + std::optional cloud, std::optional calibration, + float *audioData, size_t audioSize, K4ImuSample *imuSample) -> std::unique_ptr{ + + // create compressed frame + auto cFrame = std::make_unique(); + cFrame->mode = mode; + + // color + if(colorImage.has_value() && i->addColor){ + cFrame->colorWidth = colorImage->get_width_pixels(); + cFrame->colorHeight = colorImage->get_height_pixels(); + compress_jpeg_8_bits_data(cFrame->colorWidth, cFrame->colorHeight, 4, colorImage.value().get_buffer(), cFrame->encodedColorData, jpegQuality); + } + // depth + if(depthImage.has_value() && i->addDepth){ + cFrame->depthWidth = depthImage->get_width_pixels(); + cFrame->depthHeight = depthImage->get_height_pixels(); + compress_lossless_16_bits_128padded_data(cFrame->depthWidth*cFrame->depthHeight,reinterpret_cast(depthImage.value().get_buffer()), cFrame->encodedDepthData); + } + + // infrared + if(infraImage.has_value() && i->addInfra){ + cFrame->infraWidth = infraImage->get_width_pixels(); + cFrame->infraHeight = infraImage->get_height_pixels(); + compress_lossless_16_bits_128padded_data(cFrame->infraWidth*cFrame->infraHeight, reinterpret_cast(infraImage.value().get_buffer()), cFrame->encodedInfraData); + } + + // cloud + cFrame->validVerticesCount = validDepthValues; + if(colorImage.has_value() && depthImage.has_value() && cloud.has_value() && i->addCloud){ + + auto colorData = reinterpret_cast*>(colorImage.value().get_buffer()); + auto depthData = reinterpret_cast(depthImage.value().get_buffer()); + auto cloudData = reinterpret_cast*>(cloud.value().get_buffer()); + + // fill indices array + const auto idV = cFrame->validVerticesCount; + const size_t depthSize = depthImage.value().get_width_pixels()*depthImage.value().get_height_pixels(); + if(i->indicesValid1D.size() < depthSize){ + i->indicesValid1D.resize(depthSize); + std::iota(std::begin(i->indicesValid1D), std::end(i->indicesValid1D), 0); + } + + // fill valid id + std::vector validId; + validId.reserve(idV); + for_each(std::execution::unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + depthSize, [&](size_t id){ + if(depthData[id] != k4_invalid_depth_value){ + validId.push_back(id); + } + }); + + // resize cloud data + // # vertices + size_t cloudVerticesBufferSize = idV*3; + size_t rest = cloudVerticesBufferSize % 128; + size_t paddeCloudVerticesBufferPaddedSize = rest == 0 ? cloudVerticesBufferSize : (cloudVerticesBufferSize + 128 - rest); + if(i->processedCloudData.size() < paddeCloudVerticesBufferPaddedSize){ + i->processedCloudData.resize(paddeCloudVerticesBufferPaddedSize); + } + std::fill(i->processedCloudData.begin(), i->processedCloudData.end(), 0); + // # colors + // color frame is always 128 padded + cFrame->cloudColorWidth = colorImage.value().get_width_pixels(); + cFrame->cloudColorHeight = colorImage.value().get_height_pixels(); + const size_t colorBufferSize = cFrame->cloudColorWidth*cFrame->cloudColorHeight*3; + if(i->processedColorData.size() < colorBufferSize){ + i->processedColorData.resize(colorBufferSize); + } + std::fill(i->processedColorData.begin(), i->processedColorData.end(), 0); + + // fill data + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + i->processedCloudData[id] = static_cast(static_cast(cloudData[validId[id]].x())+4096); + i->processedCloudData[idV + id] = static_cast(static_cast(cloudData[validId[id]].y())+4096); + i->processedCloudData[2*idV + id] = static_cast(cloudData[validId[id]].z()); + + i->processedColorData[id*3+0] = colorData[validId[id]].x(); + i->processedColorData[id*3+1] = colorData[validId[id]].y(); + i->processedColorData[id*3+2] = colorData[validId[id]].z(); + }); + + // compress cloud vertices data + compress_lossless_16_bits_128padded_data(paddeCloudVerticesBufferPaddedSize, i->processedCloudData.data(), cFrame->encodedCloudVerticesData); + + // compress cloud color data + compress_jpeg_8_bits_data(cFrame->cloudColorWidth, cFrame->cloudColorHeight, 3, i->processedColorData.data(), cFrame->encodedCloudColorData, jpegQuality); + } + if(calibration.has_value()){ + cFrame->calibration = calibration.value(); + } + + // audio + if(audioData != nullptr && audioSize > 0 && i->addAudio){ + cFrame->audioFrames.resize(audioSize); + std::copy(audioData, audioData + 7*audioSize, reinterpret_cast(cFrame->audioFrames.data())); + } + + // imu + if(imuSample != nullptr && i->addIMU){ + cFrame->imuSample = *imuSample; + } + + // bodies + if(i->addBodies){ + // ... + } + + return cFrame; +} + + +auto K4FrameCompressor::compress(K4Frame &frame, int jpegQuality) -> std::unique_ptr{ + + // create compressed frame + auto cFrame = std::make_unique(); + cFrame->mode = frame.mode; + cFrame->idCapture = frame.idCapture; + cFrame->afterCaptureTS = frame.afterCaptureTS; + + // color + if(!frame.imageColorData.empty()){ + cFrame->colorWidth = frame.colorWidth; + cFrame->colorHeight = frame.colorHeight; + compress_jpeg_8_bits_data(cFrame->colorWidth, cFrame->colorHeight, 3, frame.imageColorData.front().array.data(), cFrame->encodedColorData, jpegQuality); + } + + // depth + if(!frame.depthData.empty()){ + cFrame->depthWidth = frame.depthWidth; + cFrame->depthHeight = frame.depthHeight; + compress_lossless_16_bits_128padded_data(cFrame->depthWidth*cFrame->depthHeight, frame.depthData.data(), cFrame->encodedDepthData); + } + + // infra + if(!frame.infraData.empty()){ + cFrame->infraWidth = frame.infraWidth; + cFrame->infraHeight = frame.infraHeight; + compress_lossless_16_bits_128padded_data(cFrame->infraWidth*cFrame->infraHeight, frame.infraData.data(), cFrame->encodedInfraData); + } + + // cloud + if(!frame.cloud.empty()){ + + const auto idV = frame.cloud.size(); + cFrame->validVerticesCount = idV; + + // fill indices array + if(i->indicesValid1D.size() < idV){ + i->indicesValid1D.resize(idV); + std::iota(std::begin(i->indicesValid1D), std::end(i->indicesValid1D), 0); + } + + // resize cloud data + // # vertices + size_t cloudVerticesBufferSize = idV*3; + size_t rest = cloudVerticesBufferSize % 128; + size_t paddeCloudVerticesBufferPaddedSize = rest == 0 ? cloudVerticesBufferSize : (cloudVerticesBufferSize + 128 - rest); + if(i->processedCloudData.size() < paddeCloudVerticesBufferPaddedSize){ + i->processedCloudData.resize(paddeCloudVerticesBufferPaddedSize); + } + std::fill(i->processedCloudData.begin(), i->processedCloudData.begin() + paddeCloudVerticesBufferPaddedSize, 0); + // # colors + // we use the same padding than for the vertices + if(i->processedColorData.size() < paddeCloudVerticesBufferPaddedSize){ + i->processedColorData.resize(paddeCloudVerticesBufferPaddedSize); + } + std::fill(i->processedColorData.begin(), i->processedColorData.begin() + paddeCloudVerticesBufferPaddedSize, 0); + + if(frame.mode != K4Mode::Merged){ + + // process cloud buffer + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + i->processedCloudData[id] = static_cast((-frame.cloud.vertices[id].x()*1000.f) +4096.f); + i->processedCloudData[idV + id] = static_cast((-frame.cloud.vertices[id].y()*1000.f) +4096.f); + i->processedCloudData[2*idV + id] = static_cast(frame.cloud.vertices[id].z()*1000.f); + + i->processedColorData[id*3+0] = static_cast(frame.cloud.colors[id].z()*255.f); + i->processedColorData[id*3+1] = static_cast(frame.cloud.colors[id].y()*255.f); + i->processedColorData[id*3+2] = static_cast(frame.cloud.colors[id].x()*255.f); + }); + + }else{ + + // process cloud buffer + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + i->processedCloudData[id] = static_cast(frame.cloud.vertices[id].x()*1000.f + 32768.f); + i->processedCloudData[idV + id] = static_cast(frame.cloud.vertices[id].y()*1000.f + 32768.f); + i->processedCloudData[2*idV + id] = static_cast(frame.cloud.vertices[id].z()*1000.f + 32768.f); + + i->processedColorData[id*3+0] = static_cast(frame.cloud.colors[id].z()*255.f); + i->processedColorData[id*3+1] = static_cast(frame.cloud.colors[id].y()*255.f); + i->processedColorData[id*3+2] = static_cast(frame.cloud.colors[id].x()*255.f); + }); + } + + + // compress processed cloud data + compress_lossless_16_bits_128padded_data( + paddeCloudVerticesBufferPaddedSize, + i->processedCloudData.data(), + cFrame->encodedCloudVerticesData + ); + + // compress processed color data + size_t cloudColorSize = paddeCloudVerticesBufferPaddedSize/3; + cFrame->cloudColorWidth = cloudColorSize/128; + cFrame->cloudColorHeight = 128; + compress_jpeg_8_bits_data(cFrame->cloudColorWidth, cFrame->cloudColorHeight , 3, i->processedColorData.data(), cFrame->encodedCloudColorData, jpegQuality); + } + + // imu + if(frame.imuSample.has_value()){ + cFrame->imuSample = frame.imuSample; + } + + // audio + cFrame->audioFrames = frame.audioFrames; + + // bodies + // ... + + return cFrame; +} + +#include + +auto K4FrameCompressor::test() -> void{ + + std::cout << "ENCODING:\n"; + + size_t inputSize = 128 + 35; + std::vector input; + input.resize(inputSize); + std::iota(input.begin(), input.end(), 12000); + + + for(const auto &v : input){ + std::cout << v << " "; + } + std::cout << "\n"; + + + std::cout << "input size " << input.size() << "\n"; + + size_t rest = input.size() % 128; + size_t paddedSize = rest == 0 ? input.size() : input.size() + 128-rest; + input.resize(paddedSize); + + std::cout << "padded size " << input.size() << "\n"; + + + std::vector encoded; + encoded.resize(paddedSize*2); + + std::cout << "base encoded size " << encoded.size() << "\n"; + + size_t encodedBytesNb = p4nzenc128v16( + reinterpret_cast(input.data()), + paddedSize, + encoded.data() + ); + + encoded.resize(encodedBytesNb); + std::cout << "encoded size " << encoded.size() << "\n"; + + std::cout << "DECODING:\n"; + + std::vector decoded; + decoded.resize(paddedSize); + + size_t decodedBytesNb = p4nzdec128v16( + encoded.data(), + paddedSize, + decoded.data() + ); + decoded.resize(inputSize); + + std::cout << "decoded bytes " << decodedBytesNb << "\n"; + + for(const auto &v : decoded){ + std::cout << v << " "; + } + std::cout << "\n"; + +} + + + +// audio compressed +// ... + +// if(audioSize > 0){ + +// resize processed data +// const size_t paddedAudioSizeDiff = 128-((audioSize)%128); +// const size_t processsedAudioDataSize = idV*3 + paddedDiff; +// if(i->processedCloudData.size() < processsedCloudDataSize){ +// i->processedCloudData.resize(processsedCloudDataSize); +// } +// std::fill(i->processedCloudData.begin(), i->processedCloudData.end(), 0); + +// cFrame->encodedAudioData; + + // process audio buffer + // ... + + // compress audio buffer + // ... + + + // -32768 + // 32767 + + // -1.000 +1.0000 + // * 32767 + // -32767 +32767 + + // encodedBytesNb = p +// } diff --git a/cpp-projects/base/camera/kinect4/k4_frame_compressor.hpp b/cpp-projects/base/camera/kinect4/k4_frame_compressor.hpp new file mode 100644 index 0000000..c82be59 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_frame_compressor.hpp @@ -0,0 +1,79 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// kinect +#include "k4a/k4a.hpp" + +// local +#include "k4_compressed_frame.hpp" +#include "k4_frame.hpp" +#include "k4_data_settings.hpp" + +namespace tool::camera{ + +struct K4FrameCompressor{ + + K4FrameCompressor(); + ~K4FrameCompressor(); + + + auto set_settings(const K4DataSettings &ds) -> void; + + auto compress( + K4Mode mode, + std::optional colorImage, int jpegQuality, + std::optional depthImage, size_t validDepthValues, + std::optional infraImage, + std::optional cloud, + std::optional calibration, + float *audioData, size_t audioSize, + K4ImuSample *imuSample + ) -> std::unique_ptr; + + auto compress(K4Frame &frame, int jpegQuality) -> std::unique_ptr; + + auto test() -> void; + +private: + + auto compress_jpeg_8_bits_data(size_t width, size_t height, size_t dim, std::uint8_t *data, std::vector &encodedData, int jpegQuality) -> bool; + auto compress_lossless_16_bits_128padded_data(size_t size, std::uint16_t *data, std::vector &encodedData) -> void; + + struct Impl; + std::unique_ptr i; +}; +} + + + + + + + + + diff --git a/cpp-projects/base/camera/kinect4/k4_frame_processing.hpp b/cpp-projects/base/camera/kinect4/k4_frame_processing.hpp new file mode 100644 index 0000000..edc3489 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_frame_processing.hpp @@ -0,0 +1,64 @@ + +/******************************************************************************* +** Toolbox-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "k4_frame_uncompressor.hpp" + +namespace tool::camera{ + +struct K4FramesProcessing{ + + K4FramesProcessing(); + ~K4FramesProcessing(){ + clean(); + } + + auto start() -> void; + auto stop() -> void; + auto clean() -> void; + + auto new_compressed_frame(std::shared_ptr frame) -> void; + auto new_frame(std::shared_ptr frame) -> void; + auto get_frame() -> std::shared_ptr; + auto invalid_frame() -> void; + +private: + + auto process() -> void; + + std::atomic_bool m_isProcessing = false; + std::unique_ptr m_thread = nullptr; + std::unique_ptr m_locker = nullptr; + + std::shared_ptr m_lastCF = nullptr; + std::shared_ptr m_lastF = nullptr; + std::shared_ptr m_frame = nullptr; + + std::unique_ptr m_frameUncompressor; +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_frame_uncompressor.cpp b/cpp-projects/base/camera/kinect4/k4_frame_uncompressor.cpp new file mode 100644 index 0000000..a03cdf4 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_frame_uncompressor.cpp @@ -0,0 +1,1131 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_frame_uncompressor.hpp" + +// std +#include + +// kinect +#include "k4a/k4a.hpp" + +// turbojpg +#include + +// turbopfor +#include "TurboPFor/vp4.h" + +// local +// # utility +#include "utility/logger.hpp" +#include "utility/types.hpp" + +using namespace tool; +using namespace tool::geo; +using namespace tool::camera; + +struct K4FrameUncompressor::Impl{ + + Impl(){ + jpegUncompressor = tjInitDecompress(); + + const size_t defaultWidth = 640; + const size_t defaultHeight = 576; + const size_t defaultResolution = defaultWidth*defaultHeight; + indicesDepths1D.reserve(defaultResolution); + indicesValid1D.reserve(defaultResolution); + decodedColorData.reserve(defaultResolution); + decodedDepthData.reserve(defaultResolution); + decodedVerticesData.reserve(defaultResolution); + + // generate k4a image for storing depth values + depthImage = k4a::image::create( + k4a_image_format_t::K4A_IMAGE_FORMAT_DEPTH16, + to_int(defaultWidth), + to_int(defaultHeight), + static_cast(defaultWidth * 1 * sizeof(uint16_t)) + ); + + // generate k4a image for storing cloud values + pointCloudImage = k4a::image::create(K4A_IMAGE_FORMAT_CUSTOM, + to_int(defaultWidth), + to_int(defaultHeight), + static_cast(defaultWidth * 3 * sizeof(int16_t)) + ); + } + + ~Impl(){ + tjDestroy(jpegUncompressor); + } + + tjhandle jpegUncompressor = nullptr; + std::vector indicesDepths1D; + + std::vector indicesValid1D; + std::vector> decodedColorData; + std::vector decodedDepthData; + std::vector decodedVerticesData; + + k4a::image depthImage; + k4a::image pointCloudImage; + std::tuple> modeTr; + + static constexpr std::array depthGradient ={ + Pt3f{0.f,0.f,1.f}, + {0.f,1.f,1.f}, + {0.f,1.f,0.f}, + {1.f,1.f,0.f}, + {1.f,0.f,0.f}, + }; + + // utility + auto cloud_image_data() -> geo::Pt3*; + auto update_id_array(size_t idV) -> void; + +}; + +auto K4FrameUncompressor::Impl::cloud_image_data() -> Pt3*{ + if(pointCloudImage.is_valid()){ + return reinterpret_cast*>(pointCloudImage.get_buffer()); + } + return nullptr; +} +auto K4FrameUncompressor::Impl::update_id_array(size_t idV) -> void{ + if(indicesValid1D.size() < idV){ + indicesValid1D.resize(idV); + std::iota(std::begin(indicesValid1D), std::end(indicesValid1D), 0); + } +} + +K4FrameUncompressor::K4FrameUncompressor() : i(std::make_unique()){} + +K4FrameUncompressor::~K4FrameUncompressor(){} + + +auto K4FrameUncompressor::uncompress_jpeg_8_bits_data(size_t width, size_t height, ColorFormat format, size_t jpegSize, std::uint8_t *jpegData, std::uint8_t *data) -> bool{ + + // uncompress + const int decompressStatus = tjDecompress2( + i->jpegUncompressor, + jpegData, + static_cast(jpegSize), + data, + static_cast(width), + 0, // pitch + static_cast(height), + format, + TJFLAG_FASTDCT + ); + if(decompressStatus == -1){ + Logger::error("[K4FrameUncompressor:uncompress_jpeg_8_bits_data] Error uncompress color.\n"); + return false; + } + return true; +} + +auto K4FrameUncompressor::uncompress_jpeg_8_bits_data(size_t width, size_t height, ColorFormat format, std::vector &encodedData, std::vector &data) -> bool{ + + size_t dim = 4; + if(format == ColorFormat::BGR || format == ColorFormat::RGB){ + dim = 3; + } + + // resize uncompressed buffer + const size_t colorSize = width * height * dim; + if(data.size() != colorSize){ + data.resize(colorSize); + } + + // uncompress + const int decompressStatus = tjDecompress2( + i->jpegUncompressor, + encodedData.data(), + static_cast(encodedData.size()), + reinterpret_cast(data.data()), + static_cast(width), + 0, // pitch + static_cast(height), + format, + TJFLAG_FASTDCT + ); + if(decompressStatus == -1){ + Logger::error("[FrameUncompressor] Error uncompress color.\n"); + return false; + } + + return true; +} + +auto K4FrameUncompressor::uncompress_jpeg_8_bits_data(size_t width, size_t height, ColorFormat format, std::vector &jpegData, std::vector> &uncompressedColor) -> bool{ + + // resize uncompressed buffer + const size_t colorSize = width * height; + if(uncompressedColor.size() != colorSize){ + uncompressedColor.resize(colorSize); + } + return uncompress_jpeg_8_bits_data(width, height, format, jpegData.size(), jpegData.data(), reinterpret_cast(uncompressedColor.data())); +} + +auto K4FrameUncompressor::uncompress_jpeg_8_bits_data(size_t width, size_t height, ColorFormat format, std::vector &jpegData, std::vector> &uncompressedColor) -> bool{ + + // resize uncompressed buffer + const size_t colorSize = width * height; + if(uncompressedColor.size() != colorSize){ + uncompressedColor.resize(colorSize); + } + return uncompress_jpeg_8_bits_data(width, height, format, jpegData.size(), jpegData.data(), reinterpret_cast(uncompressedColor.data())); +} + +auto K4FrameUncompressor::uncompress_lossless_16_bits_128padded_data(size_t uncompressedSize, std::vector &encodedData, std::vector &data) -> bool{ + + size_t rest = uncompressedSize % 128; + size_t paddedUncompressedSize = rest == 0 ? uncompressedSize : uncompressedSize + 128-rest; + + if(data.size() < paddedUncompressedSize){ + data.resize(paddedUncompressedSize); + } + + size_t decodedBytesNb = p4nzdec128v16( + encodedData.data(), + paddedUncompressedSize, + data.data()); + if(decodedBytesNb == 0){ + Logger::error("[K4FrameUncompressor::uncompress_lossless_16_bits_128padded_data] Error decoding data.\n"); + return false; + } + + data.resize(uncompressedSize); + + return true; +} + +auto K4FrameUncompressor::convert_to_depth_image(K4Mode mode, size_t depthWidth, size_t depthHeight, const std::vector &uncompressedDepth, std::vector > &imageDepth) -> void{ + + // resize image buffer + size_t imageDepthSize = depthWidth * depthHeight*3; + if(imageDepth.size() != imageDepthSize){ + imageDepth.resize(imageDepthSize); + } + + const auto dRange = range(mode)*1000.f; + const auto diff = dRange(1) - dRange(0); + + // convert data + for(size_t ii = 0; ii < uncompressedDepth.size(); ++ii){ + + if(uncompressedDepth[ii] == k4_invalid_depth_value){ + imageDepth[ii] = {0,0,0}; + continue;; + } + + float vF = (static_cast(uncompressedDepth[ii]) - dRange(0))/diff; + float intPart; + float decPart = std::modf((vF*(i->depthGradient.size()-1)), &intPart); + size_t idG = static_cast(intPart); + + auto col = i->depthGradient[idG]*(1.f-decPart) + i->depthGradient[idG+1]*decPart; + imageDepth[ii] = { + static_cast(255*col.x()), + static_cast(255*col.y()), + static_cast(255*col.z()) + }; + } +} + +auto K4FrameUncompressor::convert_to_infra_image(size_t infraWidth, size_t infraHeight, const std::vector &uncompressedInfra, std::vector> &imageInfra) -> void{ + + // resize image buffer + size_t imageInfraSize = infraWidth * infraHeight; + if(imageInfra.size() != imageInfraSize){ + imageInfra.resize(imageInfraSize); + } + + // convert data + const float max = 2000; + for(size_t ii = 0; ii < uncompressedInfra.size(); ++ii){ + + float vF = static_cast(uncompressedInfra[ii]); + if(vF > max){ + vF = max; + } + vF/=max; + + imageInfra[ii] = { + static_cast(255*vF), + static_cast(255*vF), + static_cast(255*vF) + }; + } +} + +auto K4FrameUncompressor::generate_cloud(K4Mode mode, size_t dephtWidth, size_t depthHeight, k4a_calibration_t &calibration, const std::vector &uncompressedDepth) -> void{ + + + // reset k4a transformation if necessary + if(!std::get<1>(i->modeTr).has_value() || (mode != std::get<0>(i->modeTr))){ + std::get<0>(i->modeTr) = mode; + std::get<1>(i->modeTr) = k4a_transformation_create(&calibration); + } + + // reset k4a images if necessary + bool resetK4AImages = false; + if(i->depthImage.is_valid()){ + if(i->depthImage.get_width_pixels() != to_int(dephtWidth) || + i->depthImage.get_height_pixels() != to_int(depthHeight) ){ + resetK4AImages = true; + } + }else{ + resetK4AImages = true; + } + if(resetK4AImages){ + + // generate k4a image for storing depth values + i->depthImage = k4a::image::create( + k4a_image_format_t::K4A_IMAGE_FORMAT_DEPTH16, + to_int(dephtWidth), + to_int(depthHeight), + static_cast(dephtWidth * 1 * sizeof(uint16_t)) + ); + + // generate k4a image for storing cloud values + i->pointCloudImage = k4a::image::create(K4A_IMAGE_FORMAT_CUSTOM, + to_int(dephtWidth), + to_int(depthHeight), + static_cast(dephtWidth * 3 * sizeof(int16_t)) + ); + } + + // copy depth values + std::copy(uncompressedDepth.begin(), uncompressedDepth.end(), reinterpret_cast(i->depthImage.get_buffer())); + + // generate point cloud from depth image + k4a_transformation_depth_image_to_point_cloud( + std::get<1>(i->modeTr).value(), + i->depthImage.handle(), + K4A_CALIBRATION_TYPE_DEPTH, + i->pointCloudImage.handle() + ); +} + + +auto K4FrameUncompressor::convert_to_cloud( + K4Mode mode, + size_t validVerticesCount, + ColoredCloudData &cloud, + std::vector> &uncompressedColor, + std::vector &uncompressedVertices + ) -> void{ + + const auto vvc = validVerticesCount; + i->update_id_array(vvc); + + // resize cloud if necessary + cloud.resize(vvc); + + if(mode != K4Mode::Merged){ + + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + vvc, [&](size_t id){ + + cloud.vertices[id] = Pt3f{ + -(static_cast(uncompressedVertices[ id]) - 4096), + -(static_cast(uncompressedVertices[vvc + id]) - 4096), + static_cast(uncompressedVertices [2*vvc + id]) + }*0.001f; + + cloud.colors[id] = Pt3f{ + static_cast(uncompressedColor[id].x()), + static_cast(uncompressedColor[id].y()), + static_cast(uncompressedColor[id].z()) + }/255.f; + }); + }else{ + + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + vvc, [&](size_t id){ + + cloud.vertices[id] = Pt3f{ + static_cast(uncompressedVertices[ id]) -32768.f, + static_cast(uncompressedVertices[vvc + id]) -32768.f, + static_cast(uncompressedVertices [2*vvc + id]) -32768.f, + }*0.001f; + + cloud.colors[id] = Pt3f{ + static_cast(uncompressedColor[id].x()), + static_cast(uncompressedColor[id].y()), + static_cast(uncompressedColor[id].z()) + }/255.f; + }); + } +} + +auto K4FrameUncompressor::convert_to_cloud( + size_t validVerticesCount, + ColoredCloudData &cloud, + std::vector> &uncompressedColor, + std::vector &uncompressedDepth, + geo::Pt3 *cloudBuffer + ) -> void{ + + const auto vvc = validVerticesCount; + + // resize cloud if necessary + cloud.resize(vvc); + + // resize depth indices + if(i->indicesDepths1D.size() != uncompressedDepth.size()){ + i->indicesDepths1D.resize(uncompressedDepth.size()); + std::iota(std::begin(i->indicesDepths1D), std::end(i->indicesDepths1D), 0); + } + + // update cloud values + size_t idV = 0; + for_each(std::execution::unseq, std::begin(i->indicesDepths1D), std::end(i->indicesDepths1D), [&](size_t id){ + + if(uncompressedDepth[id] == k4_invalid_depth_value){ + return; + } + + cloud.vertices[idV]= Pt3f{ + static_cast(-cloudBuffer[id].x()), + static_cast(-cloudBuffer[id].y()), + static_cast( cloudBuffer[id].z()) + }*0.001f; + cloud.colors[idV] = Pt3f{ + static_cast(uncompressedColor[id].x()), + static_cast(uncompressedColor[id].y()), + static_cast(uncompressedColor[id].z()) + }/255.f; + + ++idV; + }); +} + +auto K4FrameUncompressor::convert_to_cloud( + K4VertexMeshData* vertices, + std::vector>& uncompressedColor, + std::vector& uncompressedDepth, + geo::Pt3 *cloudBuffer) -> void{ + + // resize depth indices + if(i->indicesDepths1D.size() != uncompressedDepth.size()){ + i->indicesDepths1D.resize(uncompressedDepth.size()); + std::iota(std::begin(i->indicesDepths1D), std::end(i->indicesDepths1D), 0); + } + + // update cloud values + size_t idV = 0; + for_each(std::execution::unseq, std::begin(i->indicesDepths1D), std::end(i->indicesDepths1D), [&](size_t id){ + + if(uncompressedDepth[id] == k4_invalid_depth_value){ + return; + } + vertices[idV].pos = Pt3f{ + static_cast(-cloudBuffer[id].x()), + static_cast(-cloudBuffer[id].y()), + static_cast( cloudBuffer[id].z()) + }*0.001f; + vertices[idV].col = Pt4{ + uncompressedColor[id].x(), + uncompressedColor[id].y(), + uncompressedColor[id].z(), + 255 + }; + + ++idV; + }); +} + +auto K4FrameUncompressor::convert_to_cloud( + K4Mode mode, size_t validVerticesCount, Pt3f *vertices, Pt3f *colors, + std::vector> &uncompressedColor, std::vector &uncompressedVertices) -> void{ + + const auto idV = validVerticesCount; + i->update_id_array(idV); + + if(mode != K4Mode::Merged){ + + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + vertices[id]= Pt3f{ + -(static_cast(uncompressedVertices[ id])-4096.f), + -(static_cast(uncompressedVertices[idV + id])-4096.f), + static_cast(uncompressedVertices[2*idV + id]) + }*0.001f; + + colors[id] = Pt3f{ + static_cast(uncompressedColor[id].x()), + static_cast(uncompressedColor[id].y()), + static_cast(uncompressedColor[id].z()) + }/255.f; + }); + }else{ + + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + vertices[id]= Pt3f{ + static_cast(uncompressedVertices[ id])-32768.f, + static_cast(uncompressedVertices[idV + id])-32768.f, + static_cast(uncompressedVertices[2*idV + id])-32768.f + }*0.001f; + + colors[id] = Pt3f{ + static_cast(uncompressedColor[id].x()), + static_cast(uncompressedColor[id].y()), + static_cast(uncompressedColor[id].z()) + }/255.f; + }); + } +} + +auto K4FrameUncompressor::convert_to_cloud(K4Mode mode, size_t validVerticesCount, Pt3f *vertices, Pt4f *colors, std::vector> &uncompressedColor, std::vector &uncompressedVertices) -> void{ + + const auto idV = validVerticesCount; + i->update_id_array(idV); + + if(mode != K4Mode::Merged){ + + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + vertices[id]= Pt3f{ + -(static_cast(uncompressedVertices[ id])-4096.f), + -(static_cast(uncompressedVertices[idV + id])-4096.f), + static_cast(uncompressedVertices[2*idV + id]) + }*0.001f; + + colors[id] = Pt4f{ + static_cast(uncompressedColor[id].x()), + static_cast(uncompressedColor[id].y()), + static_cast(uncompressedColor[id].z()), + 255.f + }/255.f; + }); + }else{ + + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + vertices[id]= Pt3f{ + static_cast(uncompressedVertices[ id])-32768.f, + static_cast(uncompressedVertices[idV + id])-32768.f, + static_cast(uncompressedVertices[2*idV + id])-32768.f + }*0.001f; + + colors[id] = Pt4f{ + static_cast(uncompressedColor[id].x()), + static_cast(uncompressedColor[id].y()), + static_cast(uncompressedColor[id].z()), + 255.f + }/255.f; + }); + } +} + +auto K4FrameUncompressor::convert_to_cloud(K4Mode mode, size_t validVerticesCount, Pt3f *vertices, Pt3 *colors, std::vector> &uncompressedColor, std::vector &uncompressedVertices) -> void{ + const auto idV = validVerticesCount; + i->update_id_array(idV); + + if(mode != K4Mode::Merged){ + + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + vertices[id]= Pt3f{ + -(static_cast(uncompressedVertices[ id])-4096.f), + -(static_cast(uncompressedVertices[idV + id])-4096.f), + static_cast(uncompressedVertices[2*idV + id]) + }*0.001f; + + colors[id] = { + uncompressedColor[id].x(), + uncompressedColor[id].y(), + uncompressedColor[id].z(), + }; + }); + }else{ + + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + vertices[id]= Pt3f{ + static_cast(uncompressedVertices[ id])-32768.f, + static_cast(uncompressedVertices[idV + id])-32768.f, + static_cast(uncompressedVertices[2*idV + id])-32768.f + }*0.001f; + + colors[id] = { + uncompressedColor[id].x(), + uncompressedColor[id].y(), + uncompressedColor[id].z(), + }; + }); + } +} + +auto K4FrameUncompressor::convert_to_cloud( + K4Mode mode, + size_t validVerticesCount, + K4VertexMeshData *vertices, + std::vector> &uncompressedColor, + std::vector &uncompressedVertices) -> void{ + + const auto idV = validVerticesCount; + i->update_id_array(idV); + + if(mode != K4Mode::Merged){ + + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + vertices[id].pos = Pt3f{ + -(static_cast(uncompressedVertices[ id])-4096.f), + -(static_cast(uncompressedVertices[idV + id])-4096.f), + static_cast(uncompressedVertices[2*idV + id]) + }*0.001f; + + vertices[id].col = { + uncompressedColor[id].x(), + uncompressedColor[id].y(), + uncompressedColor[id].z(), + 255 + }; + }); + + }else{ + + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + vertices[id].pos = Pt3f{ + static_cast(uncompressedVertices[ id])-32768.f, + static_cast(uncompressedVertices[idV + id])-32768.f, + static_cast(uncompressedVertices[2*idV + id])-32768.f + }*0.001f; + + vertices[id].col = { + uncompressedColor[id].x(), + uncompressedColor[id].y(), + uncompressedColor[id].z(), + 255 + }; + }); + } +} + +auto K4FrameUncompressor::convert_to_cloud(K4Mode mode, size_t validVerticesCount, Pt3f *vertices, Pt4 *colors, std::vector> &uncompressedColor, std::vector &uncompressedVertices) -> void{ + + const size_t idV = validVerticesCount; + i->update_id_array(idV); + + if(mode != K4Mode::Merged){ + + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + vertices[id]= Pt3f{ + -(static_cast(uncompressedVertices[ id])-4096), + -(static_cast(uncompressedVertices[idV + id])-4096), + static_cast(uncompressedVertices[2*idV + id]) + }*0.001f; + + colors[id] = { + uncompressedColor[id].x(), + uncompressedColor[id].y(), + uncompressedColor[id].z(), + 255 + }; + }); + + }else{ + + for_each(std::execution::par_unseq, std::begin(i->indicesValid1D), std::begin(i->indicesValid1D) + idV, [&](size_t id){ + + vertices[id]= Pt3f{ + static_cast(uncompressedVertices[ id])-32768.f, + static_cast(uncompressedVertices[idV + id])-32768.f, + static_cast(uncompressedVertices[2*idV + id])-32768.f + }*0.001f; + + colors[id] = { + uncompressedColor[id].x(), + uncompressedColor[id].y(), + uncompressedColor[id].z(), + 255 + }; + }); + } +} + +auto K4FrameUncompressor::convert_to_cloud( + Pt3f *vertices, Pt3f *colors, + std::vector > &uncompressedColor, std::vector &uncompressedDepth, geo::Pt3 *cloudBuffer) -> void{ + + // resize depth indices + if(i->indicesDepths1D.size() != uncompressedDepth.size()){ + i->indicesDepths1D.resize(uncompressedDepth.size()); + std::iota(std::begin(i->indicesDepths1D), std::end(i->indicesDepths1D), 0); + } + + // update cloud values + size_t idV = 0; + for_each(std::execution::unseq, std::begin(i->indicesDepths1D), std::end(i->indicesDepths1D), [&](size_t id){ + + if(uncompressedDepth[id] == k4_invalid_depth_value){ + return; + } + vertices[idV] = Pt3f{ + static_cast(-cloudBuffer[id].x()), + static_cast(-cloudBuffer[id].y()), + static_cast( cloudBuffer[id].z()) + }*0.001f; + colors[idV] = Pt3f{ + 1.f*uncompressedColor[id].x(), + 1.f*uncompressedColor[id].y(), + 1.f*uncompressedColor[id].z() + }/255.f; + + ++idV; + }); +} + +auto K4FrameUncompressor::convert_to_cloud( + Pt3f *vertices, Pt4f *colors, + std::vector > &uncompressedColor, std::vector &uncompressedDepth, geo::Pt3 *cloudBuffer) -> void{ + + // resize depth indices + if(i->indicesDepths1D.size() != uncompressedDepth.size()){ + i->indicesDepths1D.resize(uncompressedDepth.size()); + std::iota(std::begin(i->indicesDepths1D), std::end(i->indicesDepths1D), 0); + } + + // update cloud values + size_t idV = 0; + for_each(std::execution::unseq, std::begin(i->indicesDepths1D), std::end(i->indicesDepths1D), [&](size_t id){ + + if(uncompressedDepth[id] == k4_invalid_depth_value){ + return; + } + vertices[idV] = Pt3f{ + static_cast(-cloudBuffer[id].x()), + static_cast(-cloudBuffer[id].y()), + static_cast( cloudBuffer[id].z()) + }*0.001f; + colors[idV] = Pt4f{ + 1.f*uncompressedColor[id].x(), + 1.f*uncompressedColor[id].y(), + 1.f*uncompressedColor[id].z(), + 255.f + }/255.f; + + ++idV; + }); +} + +auto K4FrameUncompressor::convert_to_cloud( + Pt3f *vertices, Pt3 *colors, + std::vector > &uncompressedColor, std::vector &uncompressedDepth, geo::Pt3 *cloudBuffer) -> void{ + + // resize depth indices + if(i->indicesDepths1D.size() != uncompressedDepth.size()){ + i->indicesDepths1D.resize(uncompressedDepth.size()); + std::iota(std::begin(i->indicesDepths1D), std::end(i->indicesDepths1D), 0); + } + + // update cloud values + size_t idV = 0; + for_each(std::execution::unseq, std::begin(i->indicesDepths1D), std::end(i->indicesDepths1D), [&](size_t id){ + + if(uncompressedDepth[id] == k4_invalid_depth_value){ + return; + } + vertices[idV] = Pt3f{ + static_cast(-cloudBuffer[id].x()), + static_cast(-cloudBuffer[id].y()), + static_cast( cloudBuffer[id].z()) + }*0.001f; + colors[idV] = Pt3{ + uncompressedColor[id].x(), + uncompressedColor[id].y(), + uncompressedColor[id].z() + }; + + ++idV; + }); + +} + +auto K4FrameUncompressor::convert_to_cloud( + + Pt3f *vertices, Pt4 *colors, + std::vector > &uncompressedColor, std::vector &uncompressedDepth, geo::Pt3 *cloudBuffer) -> void{ + + // resize depth indices + if(i->indicesDepths1D.size() != uncompressedDepth.size()){ + i->indicesDepths1D.resize(uncompressedDepth.size()); + std::iota(std::begin(i->indicesDepths1D), std::end(i->indicesDepths1D), 0); + } + + // update cloud values + size_t idV = 0; + for_each(std::execution::unseq, std::begin(i->indicesDepths1D), std::end(i->indicesDepths1D), [&](size_t id){ + + if(uncompressedDepth[id] == k4_invalid_depth_value){ + return; + } + vertices[idV] = Pt3f{ + static_cast(-cloudBuffer[id].x()), + static_cast(-cloudBuffer[id].y()), + static_cast( cloudBuffer[id].z()) + }*0.001f; + colors[idV] = Pt4{ + uncompressedColor[id].x(), + uncompressedColor[id].y(), + uncompressedColor[id].z(), + 255 + }; + + ++idV; + }); +} + +auto K4FrameUncompressor::uncompress(K4CompressedFrame *cFrame, K4Frame &frame) -> bool{ + + // info + frame.idCapture = cFrame->idCapture; + frame.afterCaptureTS = cFrame->afterCaptureTS; + frame.mode = cFrame->mode; + + // reset sizes + frame.colorWidth = 0; + frame.colorHeight = 0; + frame.depthWidth = 0; + frame.depthHeight = 0; + frame.infraWidth = 0; + frame.infraHeight = 0; + + // color + if(!cFrame->encodedColorData.empty()){ + + if(!uncompress_jpeg_8_bits_data(cFrame->colorWidth, cFrame->colorHeight, ColorFormat::RGBA, cFrame->encodedColorData, frame.imageColorData)){ + return false; + } + frame.colorWidth = cFrame->colorWidth; + frame.colorHeight = cFrame->colorHeight; + } + + // depth + if(!cFrame->encodedDepthData.empty()){ + + if(!uncompress_lossless_16_bits_128padded_data(cFrame->depthWidth*cFrame->depthHeight, cFrame->encodedDepthData, frame.depthData)){ + return false; + } + + convert_to_depth_image(cFrame->mode, cFrame->depthWidth, cFrame->depthHeight, frame.depthData, frame.imageDepthData); + frame.depthWidth = cFrame->depthWidth; + frame.depthHeight = cFrame->depthHeight; + } + + // infra + if(!cFrame->encodedInfraData.empty()){ + + if(!uncompress_lossless_16_bits_128padded_data(cFrame->infraWidth*cFrame->infraHeight, cFrame->encodedInfraData, frame.infraData)){ + return false; + } + + convert_to_infra_image(cFrame->infraWidth, cFrame->infraHeight, frame.infraData, frame.imageInfraData); + frame.infraWidth = cFrame->infraWidth; + frame.infraHeight = cFrame->infraHeight; + } + + // cloud + if(cFrame->validVerticesCount > 0){ + + if(!cFrame->encodedCloudVerticesData.empty() && !cFrame->encodedCloudColorData.empty()){ + + if(!uncompress_lossless_16_bits_128padded_data(cFrame->validVerticesCount*3, cFrame->encodedCloudVerticesData, i->decodedVerticesData)){ + return false; + } + + if(!uncompress_jpeg_8_bits_data(cFrame->cloudColorWidth, cFrame->cloudColorHeight, ColorFormat::RGB, cFrame->encodedCloudColorData, i->decodedColorData)){ + return false; + } + + convert_to_cloud(cFrame->mode, cFrame->validVerticesCount, frame.cloud, i->decodedColorData, i->decodedVerticesData); + + }else if(cFrame->calibration.has_value() && !frame.imageColorData.empty() && !frame.depthData.empty()){ + + generate_cloud(cFrame->mode, cFrame->depthWidth, cFrame->depthHeight, cFrame->calibration.value(), frame.depthData); + + convert_to_cloud(cFrame->validVerticesCount, frame.cloud, frame.imageColorData, frame.depthData, i->cloud_image_data()); + } + } + + // imu + if(cFrame->imuSample.has_value()){ + frame.imuSample = cFrame->imuSample; + }else{ + frame.imuSample = std::nullopt; + } + + // audio + if(cFrame->audioFrames.size() > 0){ + frame.audioFrames = cFrame->audioFrames; + } + + // bodies + // ... + + return true; +} + +auto K4FrameUncompressor::uncompress(K4CompressedFrame *cFrame, Pt3f *vertices, Pt3f *colors) -> bool{ + + // cloud + if(cFrame->validVerticesCount > 0){ + + if(!cFrame->encodedCloudVerticesData.empty() && !cFrame->encodedCloudColorData.empty()){ + + // decode processed colors + if(!uncompress_jpeg_8_bits_data(cFrame->colorWidth, cFrame->colorHeight, ColorFormat::RGB, cFrame->encodedCloudColorData, i->decodedColorData)){ + return false; + } + + // decode vertices + if(!uncompress_lossless_16_bits_128padded_data(cFrame->validVerticesCount, cFrame->encodedCloudVerticesData, i->decodedVerticesData)){ + return false; + } + + // convert + convert_to_cloud(cFrame->mode, cFrame->validVerticesCount, vertices, colors, i->decodedColorData, i->decodedVerticesData); + + return true; + + }else if(cFrame->calibration.has_value() && !cFrame->encodedColorData.empty() && !cFrame->encodedDepthData.empty()){ + + // decode colors + if(!uncompress_jpeg_8_bits_data(cFrame->colorWidth, cFrame->colorHeight, ColorFormat::RGB, cFrame->encodedColorData, i->decodedColorData)){ + return false; + } + + // decode depth + if(!uncompress_lossless_16_bits_128padded_data(cFrame->depthWidth*cFrame->depthHeight, cFrame->encodedDepthData, i->decodedDepthData)){ + return false; + } + + // generate cloud + generate_cloud(cFrame->mode, cFrame->depthWidth, cFrame->depthHeight, cFrame->calibration.value(), i->decodedDepthData); + + // convert + convert_to_cloud(vertices, colors, i->decodedColorData, i->decodedDepthData, i->cloud_image_data()); + + return true; + } + } + return false; +} + +auto K4FrameUncompressor::uncompress(K4CompressedFrame *cFrame, Pt3f *vertices, Pt4f *colors) -> bool{ + + // cloud + if(cFrame->validVerticesCount > 0){ + + if(!cFrame->encodedCloudVerticesData.empty() && !cFrame->encodedCloudColorData.empty()){ + + // decode processed colors + if(!uncompress_jpeg_8_bits_data(cFrame->colorWidth, cFrame->colorHeight, ColorFormat::RGB, cFrame->encodedCloudColorData, i->decodedColorData)){ + return false; + } + + // decode vertices + if(!uncompress_lossless_16_bits_128padded_data(cFrame->validVerticesCount, cFrame->encodedCloudVerticesData, i->decodedVerticesData)){ + return false; + } + + // convert + convert_to_cloud(cFrame->mode, cFrame->validVerticesCount, vertices, colors, i->decodedColorData, i->decodedVerticesData); + + return true; + + }else if(cFrame->calibration.has_value() && !cFrame->encodedColorData.empty() && !cFrame->encodedDepthData.empty()){ + + // decode colors + if(!uncompress_jpeg_8_bits_data(cFrame->colorWidth, cFrame->colorHeight, ColorFormat::RGB, cFrame->encodedColorData, i->decodedColorData)){ + return false; + } + + // decode depth + if(!uncompress_lossless_16_bits_128padded_data(cFrame->depthWidth*cFrame->depthHeight, cFrame->encodedDepthData, i->decodedDepthData)){ + return false; + } + + // generate cloud + generate_cloud(cFrame->mode, cFrame->depthWidth, cFrame->depthHeight, cFrame->calibration.value(), i->decodedDepthData); + + // convert + convert_to_cloud(vertices, colors, i->decodedColorData, i->decodedDepthData, i->cloud_image_data()); + + return true; + } + } + return false; +} + +auto K4FrameUncompressor::uncompress(K4CompressedFrame *cFrame, Pt3f *vertices, Pt3 *colors) -> bool{ + + // cloud + if(cFrame->validVerticesCount > 0){ + + if(!cFrame->encodedCloudVerticesData.empty() && !cFrame->encodedCloudColorData.empty()){ + + // decode processed colors + if(!uncompress_jpeg_8_bits_data(cFrame->colorWidth, cFrame->colorHeight, ColorFormat::RGB, cFrame->encodedCloudColorData, i->decodedColorData)){ + return false; + } + + // decode vertices + if(!uncompress_lossless_16_bits_128padded_data(cFrame->validVerticesCount, cFrame->encodedCloudVerticesData, i->decodedVerticesData)){ + return false; + } + + // convert + convert_to_cloud(cFrame->mode, cFrame->validVerticesCount, vertices, colors, i->decodedColorData, i->decodedVerticesData); + + return true; + + }else if(cFrame->calibration.has_value() && !cFrame->encodedColorData.empty() && !cFrame->encodedDepthData.empty()){ + + // decode colors + if(!uncompress_jpeg_8_bits_data(cFrame->colorWidth, cFrame->colorHeight, ColorFormat::RGB, cFrame->encodedColorData, i->decodedColorData)){ + return false; + } + + // decode depth + if(!uncompress_lossless_16_bits_128padded_data(cFrame->depthWidth*cFrame->depthHeight, cFrame->encodedDepthData, i->decodedDepthData)){ + return false; + } + + // generate cloud + generate_cloud(cFrame->mode, cFrame->depthWidth, cFrame->depthHeight, cFrame->calibration.value(), i->decodedDepthData); + + // convert + convert_to_cloud(vertices, colors, i->decodedColorData, i->decodedDepthData, i->cloud_image_data()); + + return true; + } + } + return false; +} + +auto K4FrameUncompressor::uncompress(K4CompressedFrame *cFrame, Pt3f *vertices, Pt4 *colors) -> bool{ + + // cloud + if(cFrame->validVerticesCount > 0){ + + if(!cFrame->encodedCloudVerticesData.empty() && !cFrame->encodedCloudColorData.empty()){ + + // decode processed colors + if(!uncompress_jpeg_8_bits_data(cFrame->colorWidth, cFrame->colorHeight, ColorFormat::RGB, cFrame->encodedCloudColorData, i->decodedColorData)){ + return false; + } + + // decode vertices + if(!uncompress_lossless_16_bits_128padded_data(cFrame->validVerticesCount, cFrame->encodedCloudVerticesData, i->decodedVerticesData)){ + return false; + } + + // convert + convert_to_cloud(cFrame->mode, cFrame->validVerticesCount, vertices, colors, i->decodedColorData, i->decodedVerticesData); + + return true; + + }else if(cFrame->calibration.has_value() && !cFrame->encodedColorData.empty() && !cFrame->encodedDepthData.empty()){ + + // decode colors + if(!uncompress_jpeg_8_bits_data(cFrame->colorWidth, cFrame->colorHeight, ColorFormat::RGB, cFrame->encodedColorData, i->decodedColorData)){ + return false; + } + + // decode depth + if(!uncompress_lossless_16_bits_128padded_data(cFrame->depthWidth*cFrame->depthHeight, cFrame->encodedDepthData, i->decodedDepthData)){ + return false; + } + + // generate cloud + generate_cloud(cFrame->mode, cFrame->depthWidth, cFrame->depthHeight, cFrame->calibration.value(), i->decodedDepthData); + + // convert + convert_to_cloud(vertices, colors, i->decodedColorData, i->decodedDepthData, i->cloud_image_data()); + + return true; + } + } + return false; +} + + +auto K4FrameUncompressor::uncompress(K4CompressedFrame *cFrame, K4VertexMeshData *vertices) -> bool{ + + + // cloud + if(cFrame->validVerticesCount > 0){ + + if(!cFrame->encodedCloudVerticesData.empty() && !cFrame->encodedCloudColorData.empty()){ + + // decode vertices + if(!uncompress_lossless_16_bits_128padded_data(cFrame->validVerticesCount*3, cFrame->encodedCloudVerticesData, i->decodedVerticesData)){ + return false; + } + + // decode processed colors + if(!uncompress_jpeg_8_bits_data(cFrame->cloudColorWidth, cFrame->cloudColorHeight, ColorFormat::RGB, cFrame->encodedCloudColorData, i->decodedColorData)){ + return false; + } + + // convert + convert_to_cloud(cFrame->mode, cFrame->validVerticesCount, vertices, i->decodedColorData, i->decodedVerticesData); + + return true; + + }else if(cFrame->calibration.has_value() && !cFrame->encodedColorData.empty() && !cFrame->encodedDepthData.empty()){ + + // decode colors + if(!uncompress_jpeg_8_bits_data(cFrame->colorWidth, cFrame->colorHeight, ColorFormat::RGB, cFrame->encodedColorData, i->decodedColorData)){ + return false; + } + + // decode depth + if(!uncompress_lossless_16_bits_128padded_data(cFrame->depthWidth*cFrame->depthHeight, cFrame->encodedDepthData, i->decodedDepthData)){ + return false; + } + + // generate cloud + generate_cloud(cFrame->mode, cFrame->depthWidth, cFrame->depthHeight, cFrame->calibration.value(), i->decodedDepthData); + + // convert + convert_to_cloud(vertices, i->decodedColorData, i->decodedDepthData, i->cloud_image_data()); + + return true; + } + } + return false; +} + + + + diff --git a/cpp-projects/base/camera/kinect4/k4_frame_uncompressor.hpp b/cpp-projects/base/camera/kinect4/k4_frame_uncompressor.hpp new file mode 100644 index 0000000..b0f2363 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_frame_uncompressor.hpp @@ -0,0 +1,106 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "k4_frame.hpp" +#include "k4_compressed_frame.hpp" + +namespace tool::camera{ + + +struct K4FrameUncompressor{ + + enum ColorFormat : std::int8_t { + RGB = 0,RGBA = 7,BGR = 1,BGRA = 8 + }; + + K4FrameUncompressor(); + ~K4FrameUncompressor(); + + // uncompress data + auto uncompress_jpeg_8_bits_data(size_t width, size_t height, ColorFormat format, size_t jpegSize, std::uint8_t *jpegData, std::uint8_t *data) -> bool; // data not resized + auto uncompress_jpeg_8_bits_data(size_t width, size_t height, ColorFormat format, std::vector &jpegData, std::vector &data) -> bool; + auto uncompress_jpeg_8_bits_data(size_t width, size_t height, ColorFormat format, std::vector &jpegData, std::vector> &data) -> bool; + auto uncompress_jpeg_8_bits_data(size_t width, size_t height, ColorFormat format, std::vector &jpegData, std::vector> &data) -> bool; + + auto uncompress_lossless_16_bits_128padded_data(size_t validVerticesCount, std::vector &encoded, std::vector &decodedVertices) -> bool; + + // convert decoded data + auto convert_to_depth_image(K4Mode mode, size_t depthWidth, size_t depthHeight, const std::vector &uncompressedDepth, std::vector> &imageDepth) -> void; + auto convert_to_infra_image(size_t infraWidth, size_t infraHeight, const std::vector &uncompressedInfra, std::vector> &imageInfra) -> void; + auto generate_cloud(K4Mode mode, size_t dephtWidth, size_t depthHeight, k4a_calibration_t &calibration, const std::vector &uncompressedDepth) -> void; + + + // convert to cloud + // # colored cloud data + // ## from vertices + auto convert_to_cloud(K4Mode mode, size_t validVerticesCount, geo::ColoredCloudData &cloudData, std::vector> &uncompressedColor, std::vector &uncompressedVertices) -> void; + // ## from cloud buffer + auto convert_to_cloud(size_t validVerticesCount, geo::ColoredCloudData &cloudData, std::vector> &uncompressedColor, std::vector &uncompressedDepth, geo::Pt3 *cloudBuffer) -> void; + // # vertices,colors pointers + // ## from vertices + auto convert_to_cloud(K4Mode mode, size_t validVerticesCount, geo::Pt3f *vertices, geo::Pt3f *colors, std::vector> &uncompressedColor, std::vector &uncompressedVertices) -> void; + auto convert_to_cloud(K4Mode mode, size_t validVerticesCount, geo::Pt3f *vertices, geo::Pt4f *colors, std::vector> &uncompressedColor, std::vector &uncompressedVertices) -> void; + auto convert_to_cloud(K4Mode mode, size_t validVerticesCount, geo::Pt3f *vertices, geo::Pt3 *colors, std::vector> &uncompressedColor, std::vector &uncompressedVertices) -> void; + auto convert_to_cloud(K4Mode mode, size_t validVerticesCount, geo::Pt3f *vertices, geo::Pt4 *colors, std::vector> &uncompressedColor, std::vector &uncompressedVertices) -> void; + // ## from cloud bgeo::uffer + auto convert_to_cloud(geo::Pt3f *vertices, geo::Pt3f *colors, std::vector> &uncompressedColor, std::vector &uncompressedDepth, geo::Pt3 *cloudBuffer) -> void; + auto convert_to_cloud(geo::Pt3f *vertices, geo::Pt4f *colors, std::vector> &uncompressedColor, std::vector &uncompressedDepth, geo::Pt3 *cloudBuffer) -> void; + auto convert_to_cloud(geo::Pt3f *vertices, geo::Pt3 *colors, std::vector> &uncompressedColor, std::vector &uncompressedDepth, geo::Pt3 *cloudBuffer) -> void; + auto convert_to_cloud(geo::Pt3f *vertices, geo::Pt4 *colors, std::vector> &uncompressedColor, std::vector &uncompressedDepth, geo::Pt3 *cloudBuffer) -> void; + // # K4 vertex mesh data + // ## from vertices + auto convert_to_cloud(K4Mode mode, size_t validVerticesCount, K4VertexMeshData *vertices, std::vector> &uncompressedColor, std::vector &uncompressedVertices) -> void; + // ## from cloud buffer + auto convert_to_cloud(K4VertexMeshData *vertices, std::vector> &uncompressedColor, std::vector &uncompressedDepth, geo::Pt3 *cloudBuffer) -> void; + + // uncompress to frame + auto uncompress(K4CompressedFrame *cFrame, K4Frame &frame) -> bool; + auto uncompress(K4CompressedFrame *cFrame, geo::Pt3f *vertices, geo::Pt3f *colors) -> bool; + auto uncompress(K4CompressedFrame *cFrame, geo::Pt3f *vertices, geo::Pt4f *colors) -> bool; + auto uncompress(K4CompressedFrame *cFrame, geo::Pt3f *vertices, geo::Pt3 *colors) -> bool; + auto uncompress(K4CompressedFrame *cFrame, geo::Pt3f *vertices, geo::Pt4 *colors) -> bool; + auto uncompress(K4CompressedFrame *cFrame, K4VertexMeshData *vertices) -> bool; + + + +private: + + struct Impl; + std::unique_ptr i; +}; +} + + + + + + + + + diff --git a/cpp-projects/base/camera/kinect4/k4_grabber_data_processing.cpp b/cpp-projects/base/camera/kinect4/k4_grabber_data_processing.cpp new file mode 100644 index 0000000..35ffdef --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_grabber_data_processing.cpp @@ -0,0 +1,139 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_grabber_data_processing.hpp" + +using namespace tool::camera; + +K4GrabberDataProcessing::K4GrabberDataProcessing(){ + m_locker = std::make_unique(); + m_locker = std::make_unique(); + m_frameUncompressor = std::make_unique(); +} + +K4GrabberDataProcessing::~K4GrabberDataProcessing(){ + clean(); +} + +auto K4GrabberDataProcessing::start() -> void { + + if(m_isProcessing){ + return; + } + m_thread = std::make_unique(&K4GrabberDataProcessing::process, this); +} + +auto K4GrabberDataProcessing::stop() -> void { + m_isProcessing = false; +} + +auto K4GrabberDataProcessing::clean() -> void { + + m_isProcessing = false; + + if(m_thread != nullptr){ + if(m_thread->joinable()){ + m_thread->join(); + } + m_thread = nullptr; + } +} + +auto K4GrabberDataProcessing::new_compressed_frame(std::shared_ptr frame) -> void { + std::lock_guard guard(*m_locker); + m_lastCF = frame; +} + +auto K4GrabberDataProcessing::new_frame(std::shared_ptr frame) -> void { + std::lock_guard guard(*m_locker); + m_lastF = frame; +} + +auto K4GrabberDataProcessing::get_frame() -> std::shared_ptr { + std::lock_guard guard(*m_locker); + return m_frame; +} + +auto K4GrabberDataProcessing::get_compressed_frame() -> std::shared_ptr { + std::lock_guard guard(*m_locker); + return m_cFrame; +} + +auto K4GrabberDataProcessing::invalid_frame() -> void { + std::lock_guard guard(*m_locker); + m_frame = nullptr; +} + +auto K4GrabberDataProcessing::invalid_compressed_frame() -> void { + std::lock_guard guard(*m_locker); + m_cFrame = nullptr; +} + + +auto K4GrabberDataProcessing::process() -> void { + + m_isProcessing = true; + + while(m_isProcessing){ + + std::shared_ptr frameToBeUncompresed = nullptr; + { + std::lock_guard guard(*m_locker); + + // check for new compressed frame + if(m_lastCF){ + // store last + m_cFrame = m_lastCF; + // invalid it + m_lastCF = nullptr; + // ask for uncompression + frameToBeUncompresed = m_cFrame; + } + // check for new frame + if(m_lastF){ + // store last + m_frame = m_lastF; + // invalid it + m_lastF = nullptr; + } + } + + // uncompress + { + if(frameToBeUncompresed != nullptr){ + auto uncompressedFrame = std::make_shared(); + if(m_frameUncompressor->uncompress(frameToBeUncompresed.get(), *uncompressedFrame)){ + std::lock_guard guard(*m_locker); + m_frame = uncompressedFrame; + } + } + } + + // sleep + using namespace std::chrono_literals; + std::this_thread::sleep_for(1ms); + } +} diff --git a/cpp-projects/base/camera/kinect4/k4_grabber_data_processing.hpp b/cpp-projects/base/camera/kinect4/k4_grabber_data_processing.hpp new file mode 100644 index 0000000..637cce1 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_grabber_data_processing.hpp @@ -0,0 +1,66 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "k4_frame_uncompressor.hpp" + +namespace tool::camera{ + +struct K4GrabberDataProcessing{ + + K4GrabberDataProcessing(); + ~K4GrabberDataProcessing(); + + auto start() -> void; + auto stop() -> void; + auto clean() -> void; + + auto new_compressed_frame(std::shared_ptr frame) -> void; + auto new_frame(std::shared_ptr frame) -> void; + auto get_frame() -> std::shared_ptr; + auto get_compressed_frame() -> std::shared_ptr; + auto invalid_frame() -> void; + auto invalid_compressed_frame() -> void; + +private: + + auto process() -> void; + + std::atomic_bool m_isProcessing = false; + std::unique_ptr m_thread = nullptr; + std::unique_ptr m_locker = nullptr; + + std::shared_ptr m_lastCF = nullptr; + std::shared_ptr m_lastF = nullptr; + + std::shared_ptr m_cFrame = nullptr; + std::shared_ptr m_frame = nullptr; + + std::unique_ptr m_frameUncompressor; +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_model.cpp b/cpp-projects/base/camera/kinect4/k4_model.cpp new file mode 100644 index 0000000..abd0e97 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_model.cpp @@ -0,0 +1,57 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_model.hpp" + +// local +#include "utility/string.hpp" + +using namespace tool::camera; + +auto K4Model::init_from_text(const std::string &text) -> void { + size_t id = 0; + for(const auto &line : String::split(text, '\n')){ + if(line.length() == 0){ + return; + } + for(const auto &value : String::split(line, ' ')){ + transformation.array[id++] = std::stof(value); + if(id == 16){ + return; + } + } + } +} + +auto K4Model::convert_to_text() const -> std::string { + const auto &t = transformation; + return std::format("{} {} {} {}\n{} {} {} {}\n{} {} {} {}\n{} {} {} {}\n", + t.at(0),t.at(1),t.at(2),t.at(3), + t.at(4),t.at(5),t.at(6),t.at(7), + t.at(8),t.at(9),t.at(10),t.at(11), + t.at(12),t.at(13),t.at(14),t.at(15) + ); +} diff --git a/cpp-projects/base/camera/kinect4/k4_model.hpp b/cpp-projects/base/camera/kinect4/k4_model.hpp new file mode 100644 index 0000000..43068fd --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_model.hpp @@ -0,0 +1,54 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "k4_types.hpp" +#include "geometry/matrix4.hpp" +#include "files/text_settings.hpp" + +namespace tool::camera { +struct K4Model : files::TextSettings{ + + geo::Mat4f transformation = geo::Mat4f::identity(); + + // local + geo::Vec3f rotation = {}; + geo::Vec3f translation = {}; + geo::Vec3f scaling = {1.f,1.f,1.f}; + + auto compute_full_transformation() const -> geo::Mat4f{ + return geo::transform(scaling, rotation, translation) * transformation; + } + + // i/o + auto init_from_text(const std::string &text) -> void override; + auto convert_to_text() const -> std::string override; + auto type() const noexcept -> std::int32_t override {return static_cast(SettingsType::Model);}; + auto file_description() const noexcept -> std::string_view override {return settings_name(static_cast(type()));} +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_player.cpp b/cpp-projects/base/camera/kinect4/k4_player.cpp new file mode 100644 index 0000000..0ba8872 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_player.cpp @@ -0,0 +1,311 @@ + + +#include "k4_player.hpp" + +// local +#include "utility/logger.hpp" + +using namespace tool::camera; + +auto K4Player::set_video(const K4VolumetricVideo &video) -> void{ + + m_videoResource = video; + + m_currentCompressedFrames.clear(); + m_currentCompressedFrames.resize(m_videoResource.nb_cameras()); + std::fill(std::begin(m_currentCompressedFrames), std::end(m_currentCompressedFrames), nullptr); + + m_currentFrames.clear(); + m_currentFrames.resize(m_videoResource.nb_cameras()); + std::fill(std::begin(m_currentFrames), std::end(m_currentFrames), nullptr); + update_states(); + + std::vector models; + for(size_t ii = 0; ii < m_videoResource.nb_cameras(); ++ii){ + K4Model model; + model.transformation = m_videoResource.get_transform(ii).conv(); + models.push_back(model); + } + initialize_signal(std::move(models)); +} + +#include +auto K4Player::display_infos() -> void{ + + using namespace std::chrono; + + + for(size_t idC = 0; idC < video()->nb_cameras(); ++idC){ + + std::cout << "FROM idc " << idC << "\n"; + auto firstC = video()->get_camera_data(idC)->first_frame_capture_timestamp().value(); + for(size_t idC2 = 0; idC2 < video()->nb_cameras(); ++idC2){ + auto camD = video()->get_camera_data(idC2); + auto diff = nanoseconds(camD->first_frame_capture_timestamp().value() - firstC); + std::cout << " Camera " << idC2 << " mdiff: " << duration_cast(diff) << " "<< camD->first_frame_capture_timestamp().value() << " " << camD->last_frame_capture_timestamp().value() << "\n"; + } + } + +// for(size_t idC = 0; idC < video()->nb_cameras(); ++idC){ +// auto camD = video()->get_camera_data(idC); +// auto first = camD->first_frame_capture_timestamp().value(); +// std::cout << "CAMERA " << idC << "\n\n"; +// for(size_t idF = 0; idF < camD->nb_frames(); ++idF){ +// auto ts = camD->get_compressed_frame(idF).lock()->afterCaptureTS; +// auto diffN = nanoseconds(ts-first); +// auto diffN2 = nanoseconds(ts - firstC); +// std::cout << duration_cast(diffN) << " " << duration_cast(diffN2) << " | "; +// } +// std::cout << "\n"; +// } +} + + +auto K4Player::start_playing() -> void{ + if(m_sw.is_started()){ + return; + } + m_sw.start(); + + update_states(); +} + +auto K4Player::stop_playing() -> void{ + if(!m_sw.is_started()){ + return; + } + m_sw.stop(); + + update_states(); +} + +auto K4Player::current_time_ms() const noexcept -> double{ + return m_sw.ellapsed_milli_s(); +} + +auto K4Player::is_looping() const noexcept -> bool { + return m_settings.doLoop; +} + +auto K4Player::restart() -> void{ + set_current_time(0.0); +} + +auto K4Player::set_current_time(double timeMs) -> void{ + + + if(timeMs > video()->duration_ms()){ + timeMs = video()->duration_ms(); + } + + m_states.currentTime = timeMs; + m_sw.set_current_time(m_states.currentTime); + + update_states(); +} + +auto K4Player::update_time() -> void{ + + auto cTime = current_time_ms(); + if(cTime > video()->duration_ms()){ + if(is_looping()){ + set_current_time(0.0); + }else{ + stop_playing(); + } + } + + for(size_t idC = 0; idC < m_videoResource.nb_cameras(); ++idC){ + + if(m_videoResource.nb_frames(idC) == 0){ + continue; + } + + if(auto idF = m_videoResource.closest_frame_id_from_time(idC, cTime); idF.has_value()){ + m_currentCompressedFrames[idC] = m_videoResource.get_compressed_frame(idC, idF.value()).lock(); + update_states(); + }else{ + Logger::error(idF.error()); + } + } +} + +auto K4Player::remove_until_current_frame() -> void{ + + for(size_t ii = 0; ii < m_videoResource.nb_cameras(); ++ii){ + m_videoResource.remove_compressed_frames_until(ii, current_frame_id(ii)); + } + + set_current_time(0.0); +} + +auto K4Player::remove_after_current_frame() -> void{ + + for(size_t ii = 0; ii < m_videoResource.nb_cameras(); ++ii){ + m_videoResource.remove_compressed_frames_after(ii, current_frame_id(ii)); + } + + update_states(); +} + +auto K4Player::merge() -> void{ +// Logger::error("Not implemented.\n"); + merge_cameras(0.005f, {-20.f,-20.f,-20.f}, {+20.f,+20.f,+20.f}); +} + +auto K4Player::merge_cameras(float voxelSize, tool::geo::Pt3f minBound, tool::geo::Pt3f maxBound) -> void{ + + if(m_videoResource.nb_cameras() < 1){ + return; + } + + m_videoResource.merge_all_cameras(voxelSize, minBound, maxBound); + +// m_videoResource.keep_only_one_camera(2); + + m_currentCompressedFrames = {nullptr}; + m_currentFrames = {nullptr}; + update_states(); + + std::vector models(1); + models.front().transformation = m_videoResource.get_transform(0).conv(); + initialize_signal(std::move(models)); +} + +auto K4Player::update_settings(const K4PlayerSettings &playerS) noexcept -> void { + m_settings = playerS; +} + +auto K4Player::uncompress_frame(size_t idCamera, K4Frame &frame) -> bool{ + + if(idCamera >= m_currentCompressedFrames.size()){ + // ... + return false; + } + + if(m_currentCompressedFrames[idCamera] == nullptr){ + // ... + return false; + } + + if(m_currentCompressedFrames[idCamera]->idCapture == frame.idCapture){ + return false; + } + + return m_videoResource.uncompress_frame(idCamera, m_currentCompressedFrames[idCamera].get(), frame); +} + +auto K4Player::current_frame_id(size_t idCamera) const -> size_t{ + size_t idF = 0; + for(auto &frame : m_videoResource.get_camera_data(idCamera)->frames){ + if(frame->idCapture == current_frame_id_capture(idCamera)){ + return idF; + } + ++idF; + } + return 0; +} + +auto K4Player::current_frame_id_capture(size_t idCamera) const -> size_t{ + if(m_currentCompressedFrames[idCamera] != nullptr){ + return m_currentCompressedFrames[idCamera]->idCapture; + } + return 0; +} + +auto K4Player::is_playing() const -> bool{ + return m_sw.is_started(); +} + +auto K4Player::update_frames() -> void{ + + for(size_t idC = 0; idC < video()->nb_cameras(); ++idC){ + + if(video()->nb_frames(idC) == 0){ + continue; + } + + if(m_currentFrames[idC] == nullptr){ + m_currentFrames[idC] = std::make_shared(); + } + + if(uncompress_frame(idC, *m_currentFrames[idC])){ + new_frame_signal(idC, m_currentFrames[idC]); + } + } +} + +auto K4Player::video() -> K4VolumetricVideo* { + return &m_videoResource; +} + +auto K4Player::load_from_file(std::string_view path) -> bool{ + + auto res = m_videoResource.load_from_file(path); + + if(res){ + m_currentCompressedFrames.clear(); + m_currentCompressedFrames.resize(m_videoResource.nb_cameras()); + std::fill(std::begin(m_currentCompressedFrames), std::end(m_currentCompressedFrames), nullptr); + + m_currentFrames.clear(); + m_currentFrames.resize(m_videoResource.nb_cameras()); + std::fill(std::begin(m_currentFrames), std::end(m_currentFrames), nullptr); + + update_states(); + + std::vector models; + for(size_t ii = 0; ii < m_videoResource.nb_cameras(); ++ii){ + K4Model model; + model.transformation = m_videoResource.get_transform(ii).conv(); + models.push_back(model); + } + initialize_signal(std::move(models)); + } + + return res; +} + +auto K4Player::save_to_file(std::string_view path) -> bool{ return m_videoResource.save_to_file(path);} + +auto K4Player::merge_before(K4VolumetricVideo &other) -> void{ + + if(other.nb_cameras() != m_videoResource.nb_cameras()){ + Logger::error("Incompatible number of camreras.\n"); + return; + } + + for(size_t ii = 0; ii < m_videoResource.nb_cameras(); ++ii){ + if(m_videoResource.get_camera_data(ii)->transform != other.get_camera_data(ii)->transform){ + Logger::warning("The video to be merged has different transforms.\n"); + break; + } + } + + // ... + update_states(); +} + +auto K4Player::save_cloud_to_file(std::string_view path) -> bool{ + Logger::error("Not implemented.\n"); + return true; +} + +auto K4Player::update_states() -> void{ + + if(m_states.nbFrames.size() != m_videoResource.nb_cameras()){ + m_states.nbFrames.resize(m_videoResource.nb_cameras()); + m_states.currentFrames.resize(m_videoResource.nb_cameras()); + } + + for(size_t ii = 0; ii < m_videoResource.nb_cameras(); ++ii){ + m_states.nbFrames[ii] = m_videoResource.nb_frames(ii); + m_states.currentFrames[ii] = current_frame_id_capture(ii); + } + + m_states.isPlaying = m_sw.is_started(); + m_states.duration = video()->duration_ms(); + m_states.currentTime = current_time_ms(); + states_updated_signal(m_states); +} + diff --git a/cpp-projects/base/camera/kinect4/k4_player.hpp b/cpp-projects/base/camera/kinect4/k4_player.hpp new file mode 100644 index 0000000..8794686 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_player.hpp @@ -0,0 +1,96 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "thirdparty/sigslot/signal.hpp" +#include "utility/stop_watch.hpp" +#include "camera/kinect4/k4_volumetric_video.hpp" +#include "camera/kinect4/k4_player_settings.hpp" +#include "camera/kinect4/k4_model.hpp" + +namespace tool::camera { + +class K4Player{ +public: + + // update + auto update_time() -> void; + auto update_frames() ->void; + + // video + auto video() -> K4VolumetricVideo*; + auto set_video(const K4VolumetricVideo &video) -> void; + auto display_infos() -> void;; + + // frames + auto uncompress_frame(size_t idCamera, K4Frame &frame) -> bool; + auto current_frame_id(size_t idCamera) const -> size_t; + auto current_frame_id_capture(size_t idCamera) const -> size_t; + + // navigation + auto is_playing() const -> bool; + auto start_playing() -> void; + auto stop_playing() -> void; + auto restart() -> void; + auto set_current_time(double timeMs) -> void; + auto current_time_ms() const noexcept -> double; + auto is_looping() const noexcept -> bool; + + // edit + auto remove_until_current_frame() -> void; + auto remove_after_current_frame() -> void; + auto merge() -> void; + auto merge_cameras(float voxelSize, tool::geo::Pt3f minBound, tool::geo::Pt3f maxBound) -> void; + + // settings + auto update_settings(const K4PlayerSettings &playerS) noexcept -> void; + + // i/o + auto load_from_file(std::string_view path) -> bool; + auto save_to_file(std::string_view path) -> bool; + auto merge_before(K4VolumetricVideo &other) -> void; + auto save_cloud_to_file(std::string_view path) -> bool; + + // signals + sigslot::signal> initialize_signal; + sigslot::signal states_updated_signal; + sigslot::signal> new_frame_signal; + +private: + + auto update_states() -> void; + + K4PlayerStates m_states; + K4PlayerSettings m_settings; + + StopWatch m_sw; + std::vector> m_currentCompressedFrames; + std::vector> m_currentFrames; + K4VolumetricVideo m_videoResource; +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_player_settings.hpp b/cpp-projects/base/camera/kinect4/k4_player_settings.hpp new file mode 100644 index 0000000..0b4f3b4 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_player_settings.hpp @@ -0,0 +1,70 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include + +// local +//#include "files/binary_settings.hpp" + +namespace tool::camera { + +struct K4PlayerStates{ + + bool isPlaying = false; + std::vector nbFrames; + std::vector currentFrames; + double currentTime = 0.0; + double duration = 0.0; + + // actions + bool play = false; + bool pause = false; + bool restart = false; + bool moveTime = false; + bool removeUntil = false; + bool removeAfter = false; + bool merge = false; + bool info = false; + + auto reset_actions() -> void{ + play = false; + pause = false; + restart = false; + moveTime = false; + removeUntil = false; + removeAfter = false; + merge = false; + info = false; + } +}; + +struct K4PlayerSettings{ + bool doLoop = true; +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_recorder.cpp b/cpp-projects/base/camera/kinect4/k4_recorder.cpp new file mode 100644 index 0000000..2b84159 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_recorder.cpp @@ -0,0 +1,224 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_recorder.hpp" + +using namespace tool::camera; +using namespace std::chrono; + +auto K4Recorder::initialize(size_t nbGrabbers) -> void{ + + m_videoResource.initialize(nbGrabbers); + m_states.nbFramesRecorded.resize(nbGrabbers); + m_states.currentFrames.resize(nbGrabbers); + + m_currentFrames.resize(nbGrabbers); + m_currentCompressedFrames.resize(nbGrabbers); + + states_updated_signal(m_states); +} + +auto K4Recorder::uncompress_frame(size_t idCamera, K4Frame &frame) -> bool{ + + if(idCamera >= m_currentFrames.size()){ + // ... + return false; + } + + if(m_currentCompressedFrames[idCamera] == nullptr){ + // ... + return false; + } + + if(m_currentCompressedFrames[idCamera]->idCapture == frame.idCapture){ + return false; + } + + return m_videoResource.uncompress_frame(idCamera, m_currentCompressedFrames[idCamera].get(), frame); +} + +auto K4Recorder::add_compressed_frame_to_default_camera(std::shared_ptr frame) -> void{ + add_compressed_frame(0, std::move(frame)); +} + +#include +#include + +auto K4Recorder::add_compressed_frame(size_t idCamera, std::shared_ptr frame) -> void{ + + if(!is_recording() || idCamera >= m_videoResource.nb_cameras()){ + return; + } + + if(m_videoResource.total_nb_frames() == 0){ + // update start timestamp +// m_videoResource.set_video_start_timestamp(m_sw.fist_start_timestamp_ns()); + } + + using namespace std::chrono; + + auto ff = video()->first_frame_capture_timestamp(); + if(ff.has_value()){ + auto diff = duration_cast(nanoseconds(frame->afterCaptureTS-ff.value())); + std::cout << idCamera << " " << video()->get_camera_data(idCamera)->nb_frames() << " " << diff << "\n"; + } + + + + if((m_videoResource.nb_frames(idCamera) < m_settings.cameraMaxFramesToRecord) && (m_sw.ellapsed_milli_s() < m_settings.maxDurationS*1000.0)){ + + // add frame to video + m_videoResource.add_compressed_frame(idCamera, std::move(frame)); + + // update end timestamp +// m_videoResource.set_video_end_timestamp(m_sw.fist_start_timestamp_ns() + m_sw.ellapsed_nano_s()); + + // update video duration + m_states.duration = m_videoResource.duration_ms(); + if(idCamera == 1){ +// auto c = video()->get_camera_data(idCamera); +// std::cout << frame->afterCaptureTS << " " << c->first_frame_capture_timestamp().value() << " " << video()->get_camera_data(idCamera)->first_frame_capture_timestamp().value() << " "<< idCamera << " " << m_states.duration << "\n"; + } + + ++m_states.nbFramesRecorded[idCamera]; + states_updated_signal(m_states); + } +} + +auto K4Recorder::set_time(double timeMs) -> void{ + + if(timeMs > video()->duration_ms()){ + timeMs = video()->duration_ms(); + } + m_states.currentTime = timeMs; + + for(size_t idC = 0; idC < video()->nb_cameras(); ++idC){ + if(auto idF = video()->closest_frame_id_from_time(idC, m_states.currentTime); idF.has_value()){ + m_currentCompressedFrames[idC] = m_videoResource.get_compressed_frame(idC, idF.value()).lock(); + m_states.currentFrames[idC] = idF.value(); + } + } + + states_updated_signal(m_states); +} + +auto K4Recorder::update_frames() -> void{ + + for(size_t idC = 0; idC < video()->nb_cameras(); ++idC){ + + if(video()->nb_frames(idC) == 0){ + continue; + } + + if(m_currentFrames[idC] == nullptr){ + m_currentFrames[idC] = std::make_shared(); + } + + if(uncompress_frame(idC, *m_currentFrames[idC])){ + new_frame_signal(idC, m_currentFrames[idC]); + } + } +} + +auto K4Recorder::video() -> K4VolumetricVideo* { + return &m_videoResource; +} + +auto K4Recorder::is_recording() const noexcept -> bool { + return m_states.isRecording; +} + +auto K4Recorder::start_recording() -> void { + m_states.isRecording = true; + states_updated_signal(m_states); + + m_sw.start(); +} + +auto K4Recorder::stop_recording() -> void { + m_states.isRecording = false; + states_updated_signal(m_states); + + m_sw.stop(); +} + +auto K4Recorder::reset_recording() -> void { + + // clean video + m_videoResource.clean_all_cameras_compressed_frames(); + + // reset stop watch + m_sw.reset(); + + // reset frames + std::fill(std::begin(m_currentCompressedFrames), std::end(m_currentCompressedFrames), nullptr); + std::fill(std::begin(m_currentFrames), std::end(m_currentFrames), nullptr); + + // reset timestamps +// m_videoResource.set_video_start_timestamp(std::chrono::nanoseconds{0}); +// m_videoResource.set_video_end_timestamp(std::chrono::nanoseconds{0}); + + // reset states + m_states.currentTime = 0.0; + m_states.duration = 0.0; + std::fill(std::begin(m_states.nbFramesRecorded), std::end(m_states.nbFramesRecorded), 0); + std::fill(std::begin(m_states.currentFrames), std::end(m_states.currentFrames), 0); + states_updated_signal(m_states); +} + +auto K4Recorder::update_settings(K4RecorderSettings recordingsS) noexcept -> void{ + m_settings = recordingsS; +} + +auto K4Recorder::update_model(size_t id, const K4Model &model) -> void{ + auto tr = model.compute_full_transformation(); + video()->set_transform(id, tr.conv()); +} + + +// AudioFile af; +// std::vector> buffer; +// fullVideoManager.audio_samples_all_channels(0, buffer); +// for(size_t ii = 0; ii < buffer.size(); ++ii){ +// Logger::message(std::format("channel {} with size {}\n", ii, buffer[ii].size())); +// } +// auto startT = fullVideoResource.start_time(0); +// auto endT = fullVideoResource.end_time(0); +// double time = (endT-startT)*0.000000001; +// double samplingRate = buffer[0].size() / time; +// af.setSampleRate(48000);//static_cast(samplingRate)); + +// Logger::message(std::format("time {} {} {}\n", +// time, buffer[0].size(), samplingRate)); +// af.setAudioBuffer(buffer); +// Logger::message(std::format("{} {} {} {}\n", +// af.getLengthInSeconds(), af.getBitDepth(), af.getNumChannels(), af.getNumSamplesPerChannel())); + +// std::string audioFilePath = filePathName; +// str::replace_all2(audioFilePath, ".kvid", ".wav"); +// if(!af.save (audioFilePath, AudioFileFormat::Wave)){ +// Logger::error(std::format("Cannot save audio file {}.\n", audioFilePath)); +// } diff --git a/cpp-projects/base/camera/kinect4/k4_recorder.hpp b/cpp-projects/base/camera/kinect4/k4_recorder.hpp new file mode 100644 index 0000000..366aaef --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_recorder.hpp @@ -0,0 +1,83 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "thirdparty/sigslot/signal.hpp" +#include "utility/stop_watch.hpp" +#include "k4_recorder_settings.hpp" +#include "k4_model.hpp" +#include "camera/kinect4/k4_volumetric_video.hpp" + +namespace tool::camera { + +class K4Recorder{ +public: + + auto initialize(size_t nbGrabbers) -> void; + + // update + auto update_frames() -> void; + + // video + auto video() -> K4VolumetricVideo*; + + // frames + auto uncompress_frame(size_t idCamera, K4Frame &frame) -> bool; + auto add_compressed_frame_to_default_camera(std::shared_ptr frame) -> void; + auto add_compressed_frame(size_t idCamera, std::shared_ptr frame) -> void; + + // recordings + auto is_recording() const noexcept -> bool; + auto start_recording() -> void; + auto stop_recording() -> void; + auto reset_recording() -> void; + auto set_time(double timeMs) -> void; + + // settings + auto update_settings(K4RecorderSettings recordingsS) noexcept -> void; + auto update_model(size_t id, const camera::K4Model &model) -> void; + + // i/o + auto save_to_file(std::string_view path) -> bool{return m_videoResource.save_to_file(path);} + + // signals + sigslot::signal states_updated_signal; + sigslot::signal> new_frame_signal; + +private: + + std::vector> m_currentCompressedFrames; + std::vector> m_currentFrames; + + K4RecorderStates m_states; + K4RecorderSettings m_settings; + + StopWatch m_sw; + K4VolumetricVideo m_videoResource; +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_recorder_settings.hpp b/cpp-projects/base/camera/kinect4/k4_recorder_settings.hpp new file mode 100644 index 0000000..fc0d43d --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_recorder_settings.hpp @@ -0,0 +1,73 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include + +// local +//#include "files/binary_settings.hpp" + +namespace tool::camera { + +struct K4RecorderStates{ + + bool isRecording = false; + std::vector nbFramesRecorded; + std::vector currentFrames; + double currentTime = 0.0; + double duration = 0.0; + + // actions + bool startRecording = false; + bool stopRecording = false; + bool resetRecording = false; + bool moveTime = false; + + auto reset_actions() -> void{ + startRecording = false; + stopRecording = false; + resetRecording = false; + moveTime = false; + } +}; + +struct K4RecorderSettings{ + int cameraMaxFramesToRecord = 10000; + double maxDurationS = 500.; + // output + std::vector camerasToRecord; + bool recordAllData = true; + bool recordColor = true; + bool recordDepth = true; + bool recordInfra = true; + bool recordCloud = true; + bool recordIMU = true; + bool recordAudio = true; + bool recordBodies = true; +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_server_data.cpp b/cpp-projects/base/camera/kinect4/k4_server_data.cpp new file mode 100644 index 0000000..8b16249 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_server_data.cpp @@ -0,0 +1,120 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "k4_server_data.hpp" + +// std +#include + +// local +#include "utility/logger.hpp" + +using namespace tool::camera; + +K4ServerData::~K4ServerData(){ + clean(); +} + +auto K4ServerData::initialize(size_t nbGrabbers) -> void { + + m_grabbersDataProcessing.resize(nbGrabbers); + for(auto &grabber : m_grabbersDataProcessing){ + grabber = std::make_unique(); + } + + for(auto &grabber : m_grabbersDataProcessing){ + grabber->start(); + } +} + +auto K4ServerData::clean() -> void{ + + for(auto &grabber : m_grabbersDataProcessing){ + grabber->stop(); + } + for(auto &grabber : m_grabbersDataProcessing){ + grabber->clean(); + } + m_grabbersDataProcessing.clear(); +} + +auto K4ServerData::new_compressed_frame(size_t idC, std::shared_ptr frame) -> void { + + if(idC >= m_grabbersDataProcessing.size()){ + Logger::error(std::format("[K4ServerData::new_compressed_frame] Invalid frame id {}, only {} grabbers.\n", idC, m_grabbersDataProcessing.size())); + return; + } + + m_grabbersDataProcessing[idC]->new_compressed_frame(frame); +} + + +auto K4ServerData::new_frame(size_t idC, std::shared_ptr frame) -> void { + + if(idC >= m_grabbersDataProcessing.size()){ + Logger::error(std::format("[K4ServerData::new_frame] Invalid frame id {}, only {} grabbers.\n", idC, m_grabbersDataProcessing.size())); + return; + } + m_grabbersDataProcessing[idC]->new_frame(frame); +} + +auto K4ServerData::get_frame(size_t idC) -> std::shared_ptr{ + + if(idC >= m_grabbersDataProcessing.size()){ + Logger::error(std::format("[K4ServerData::get_frame] Invalid frame id {}, only {} grabbers.\n", idC, m_grabbersDataProcessing.size())); + return nullptr; + } + return m_grabbersDataProcessing[idC]->get_frame(); +} + + +auto K4ServerData::get_compressed_frame(size_t idC) -> std::shared_ptr{ + + if(idC >= m_grabbersDataProcessing.size()){ + Logger::error(std::format("[K4ServerData::get_compressed_frame] Invalid frame id {}, only {} grabbers.\n", idC, m_grabbersDataProcessing.size())); + return nullptr; + } + return m_grabbersDataProcessing[idC]->get_compressed_frame(); +} + + +auto K4ServerData::invalid_last_frame(size_t idC) -> void { + + if(idC >= m_grabbersDataProcessing.size()){ + Logger::error(std::format("[K4ServerData::invalid_last_frame] Invalid frame id {}, only {} grabbers.\n", idC, m_grabbersDataProcessing.size())); + return; + } + m_grabbersDataProcessing[idC]->invalid_frame(); +} + +auto K4ServerData::invalid_last_compressed_frame(size_t idC) -> void { + + if(idC >= m_grabbersDataProcessing.size()){ + Logger::error(std::format("[K4ServerData::invalid_last_compressed_frame] Invalid frame id {}, only {} grabbers.\n", idC, m_grabbersDataProcessing.size())); + return; + } + m_grabbersDataProcessing[idC]->invalid_compressed_frame(); +} diff --git a/cpp-projects/base/camera/kinect4/k4_server_data.hpp b/cpp-projects/base/camera/kinect4/k4_server_data.hpp new file mode 100644 index 0000000..43f4391 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_server_data.hpp @@ -0,0 +1,64 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include + +// local +#include "k4_grabber_data_processing.hpp" +#include "k4_volumetric_video.hpp" + +namespace tool::camera{ + +class K4ServerData{ + +public: + + ~K4ServerData(); + + auto initialize(size_t nbGrabbers) -> void; + auto clean() -> void; + + auto new_compressed_frame(size_t idC, std::shared_ptr frame) -> void; + auto new_frame(size_t idC, std::shared_ptr frame) -> void; + + size_t nb_grabbers() const noexcept {return m_grabbersDataProcessing.size();} + auto get_frame(size_t idC) -> std::shared_ptr; + auto get_compressed_frame(size_t idC) -> std::shared_ptr; + + auto invalid_last_frame(size_t idC) -> void; + auto invalid_last_compressed_frame(size_t idC) -> void; + +private: + + std::mutex m_recordLocker; + std::atomic_bool m_record = false; + std::vector> m_grabbersDataProcessing; + camera::K4VolumetricVideo m_videoResource; +}; +} diff --git a/cpp-projects/base/camera/kinect4/k4_types.hpp b/cpp-projects/base/camera/kinect4/k4_types.hpp new file mode 100644 index 0000000..083c99e --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_types.hpp @@ -0,0 +1,498 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include + +// kinect4 +#include +#include + +// local +#include "geometry/point2.hpp" +#include "geometry/point4.hpp" +#include "geometry/quaternion.hpp" +#include "utility/tuple_array.hpp" + +namespace tool::camera { + + using namespace std::literals::string_view_literals; + + enum class K4PowerlineFrequency : std::int8_t{ + F50=1, F60=2 + }; + + enum class K4ColorControl : std::int8_t { + Exposure_time_absolute = K4A_COLOR_CONTROL_EXPOSURE_TIME_ABSOLUTE, // Manual or Auto, + // Exposure time is measured in microseconds. + // The Azure Kinect supports a limited number of fixed expsore settings. + // When setting this, expect the exposure to(in microseconds) be rounded up to the nearest setting. Exceptions are + // 1) The last value in the table is the upper limit, so a value larger than this will be overridden to the largest entry in the table. + // 2) The exposure time cannot be larger than the equivelent FPS. So expect 100ms exposure time to be reduced to 30ms or 33.33ms when the camera is + // started. The most recent copy of the table 'device_exposure_mapping' is in https://github.com/microsoft/Azure-Kinect-Sensor-SDK/blob/develop/src/color/color_priv.h + // Default: 33330 + Exposure_priority = K4A_COLOR_CONTROL_AUTO_EXPOSURE_PRIORITY, // Only Manual DEPRECATED + // Value of 0 means framerate priority. Value of 1 means exposure priority. + // Using exposure priority may impact the framerate of both the color and depth cameras. + Brightness = K4A_COLOR_CONTROL_BRIGHTNESS, // Only manual + // The valid range is 0 to 255. The default value is 128. + Contrast = K4A_COLOR_CONTROL_CONTRAST, // Only manual + // 0 - 10 : 5 + Saturation = K4A_COLOR_CONTROL_SATURATION, // Only manual + // 0 - 63 : 32 + Sharpness = K4A_COLOR_CONTROL_SHARPNESS, // Only manual + // 0 - 4 : 2 + White_balance = K4A_COLOR_CONTROL_WHITEBALANCE, // Manual or Auto + // The unit is degrees Kelvin. + // The setting must be set to a value evenly divisible by 10 degrees. + // 2500 - 12500 : 4500 + Backlight_compensation = K4A_COLOR_CONTROL_BACKLIGHT_COMPENSATION, // Only manual + // Value of 0 means backlight compensation is disabled. Value of 1 means backlight compensation is enabled. + // Default: 0 + Gain = K4A_COLOR_CONTROL_GAIN, // Only Manual + // 0 - 255 : 128 + Powerline_frequency = K4A_COLOR_CONTROL_POWERLINE_FREQUENCY, // Only manual + // Value of 1 sets the powerline compensation to 50 Hz. Value of 2 sets the powerline compensation to 60 Hz. + // Default: 2 + }; + + enum class K4ImageFormat : std::int8_t { + MJPEG = K4A_IMAGE_FORMAT_COLOR_MJPG, // The buffer for each image is encoded as a JPEG and can be decoded by a JPEG decoder. + NV12 = K4A_IMAGE_FORMAT_COLOR_NV12, // NV12 images separate the luminance and chroma data such that all the luminance is at the + // beginning of the buffer, and the chroma lines follow immediately after. + YUY2 = K4A_IMAGE_FORMAT_COLOR_YUY2, // YUY2 stores chroma and luminance data in interleaved pixels. + BGRA32 = K4A_IMAGE_FORMAT_COLOR_BGRA32, // Each pixel of BGRA32 data is four bytes. The first three bytes represent Blue, Green, + // and Red data. The fourth byte is the alpha channel and is unused in the Azure Kinect APIs. + IR16 = K4A_IMAGE_FORMAT_IR16, // Each pixel of IR16 data is two bytes of little endian unsigned depth data. + // The value of the data represents brightness. + DEPTH16 = K4A_IMAGE_FORMAT_DEPTH16, // Each pixel of DEPTH16 data is two bytes of little endian unsigned depth data. + // The unit of the data is in millimeters from the origin of the camera. + CUSTOM = K4A_IMAGE_FORMAT_CUSTOM, // Used in conjunction with user created images or images packing non-standard data. + CUSTOM8 = K4A_IMAGE_FORMAT_CUSTOM8, // Each pixel of CUSTOM8 is a single channel one byte of unsigned data. + CUSTOM16 = K4A_IMAGE_FORMAT_CUSTOM16 // Each pixel of CUSTOM16 is a single channel two bytes of little endian unsigned data. + }; + + enum class K4ColorResolution : std::int8_t { + OFF = K4A_COLOR_RESOLUTION_OFF, // Color camera will be turned off with this setting + R720P = K4A_COLOR_RESOLUTION_720P, // 1280 * 720 16:9 + R1080P = K4A_COLOR_RESOLUTION_1080P, // 1920 * 1080 16:9 + R1440P = K4A_COLOR_RESOLUTION_1440P, // 2560 * 1440 16:9 + R1536P = K4A_COLOR_RESOLUTION_1536P, // 2048 * 1536 4:3 + R2160P = K4A_COLOR_RESOLUTION_2160P, // 3840 * 2160 16:9 + R3072P = K4A_COLOR_RESOLUTION_3072P // 4096 * 3072 4:3 + }; + + enum class K4DepthMode : std::int8_t { + OFF = K4A_DEPTH_MODE_OFF , // Depth sensor will be turned off with this setting. + NFOV_2X2BINNED = K4A_DEPTH_MODE_NFOV_2X2BINNED, // Depth captured at 320x288. Passive IR is also captured at 320x288. + NFOV_UNBINNED = K4A_DEPTH_MODE_NFOV_UNBINNED, // Depth captured at 640x576. Passive IR is also captured at 640x576. + WFOV_2X2BINNED = K4A_DEPTH_MODE_WFOV_2X2BINNED, // Depth captured at 512x512. Passive IR is also captured at 512x512. + WFOV_UNBINNED = K4A_DEPTH_MODE_WFOV_UNBINNED, // Depth captured at 1024x1024. Passive IR is also captured at 1024x1024. + PASSIVE_IR = K4A_DEPTH_MODE_PASSIVE_IR, // Passive IR only, captured at 1024x1024. + }; + + enum class K4Framerate : std::int8_t{ + F5 = K4A_FRAMES_PER_SECOND_5, // 5 FPS + F15 = K4A_FRAMES_PER_SECOND_15, // 15 FPS + F30 = K4A_FRAMES_PER_SECOND_30, // 30 FPS + }; + + enum class K4SynchronisationMode : std::int8_t{ + Standalone = K4A_WIRED_SYNC_MODE_STANDALONE, // Neither 'Sync In' or 'Sync Out' connections are used. + Master = K4A_WIRED_SYNC_MODE_MASTER, // The 'Sync Out' jack is enabled and synchronization data it driven out the connected wire. + // While in master mode the color camera must be enabled as part of the multi device sync signalling logic. + // Even if the color image is not needed, the color camera must be running. + Subordinate = K4A_WIRED_SYNC_MODE_SUBORDINATE // The 'Sync In' jack is used for synchronization and 'Sync Out' is driven for the + // next device in the chain. 'Sync Out' is a mirror of 'Sync In' for this mode. + }; + // https://docs.microsoft.com/fr-FR/azure/Kinect-dk/multi-camera-sync + + enum class K4Mode : std::int8_t { + // clouds + Cloud_320x288, + Cloud_640x576_MJPEG, + Cloud_640x576_YUY2, + Cloud_640x576_NV12, + Cloud_640x576_BGRA32, + Cloud_512x512, + Cloud_1024x1024, + // frames + Full_frame_320x288, + Full_frame_640x576, + Full_frame_512x512, + Full_frame_1024x1024, + // colors + Only_color_1280x720, + Only_color_1920x1080, + Only_color_2560x1440, + Only_color_2048x1536, + Only_color_3840x2160, + Only_color_4096x3072, + // custom + Merged, + SizeEnum + }; + + enum class K4ExposureTimesMicroS : std::int8_t{ + t500, + t1250, + t2500, + t8330, + t16670, + t33330 + }; + + + using M = K4Mode; + using IF = K4ImageFormat; + using CR = K4ColorResolution; + using DM = K4DepthMode; + using FPS = K4Framerate; + using CE = bool; + using ME = bool; + using IE = bool; + using Range = geo::Pt2f; + using Resolution = geo::Pt2; + using DRes = Resolution; + using CRes = Resolution; + + using TMode = std::tuple< + K4Mode, IF, CR, DM, FPS, Range, DRes, CE, ME, IE>; + static constexpr TupleArray k4Modes = {{ + // cloud + TMode + {M::Cloud_320x288, IF::NV12, CR::R720P, DM::NFOV_2X2BINNED, FPS::F30, {0.5f,5.46f}, {320,288}, true, false, true}, + {M::Cloud_640x576_MJPEG, IF::MJPEG, CR::R720P, DM::NFOV_UNBINNED, FPS::F30, {0.5f,3.86f}, {640,576}, true, false, true}, + {M::Cloud_640x576_YUY2, IF::YUY2, CR::R720P, DM::NFOV_UNBINNED, FPS::F30, {0.5f,3.86f}, {640,576}, true, false, true}, + {M::Cloud_640x576_NV12, IF::NV12, CR::R720P, DM::NFOV_UNBINNED, FPS::F30, {0.5f,3.86f}, {640,576}, true, false, true}, + {M::Cloud_640x576_BGRA32, IF::BGRA32, CR::R720P, DM::NFOV_UNBINNED, FPS::F30, {0.5f,3.86f}, {640,576}, true, false, true}, + {M::Cloud_512x512, IF::NV12, CR::R720P, DM::WFOV_2X2BINNED, FPS::F30, {0.25f,2.88f}, {512,512}, true, false, true}, + {M::Cloud_1024x1024, IF::NV12, CR::R720P, DM::WFOV_UNBINNED, FPS::F15, {0.25f,2.21f}, {1024,1024},true, false, true}, + // frame + {M::Full_frame_320x288, IF::MJPEG, CR::R720P, DM::NFOV_2X2BINNED, FPS::F30, {0.5f,5.46f}, {320,288}, false, false, true}, + {M::Full_frame_640x576, IF::MJPEG, CR::R720P, DM::NFOV_UNBINNED, FPS::F30, {0.5f,3.86f}, {640,576}, false, false, true}, + {M::Full_frame_512x512, IF::MJPEG, CR::R720P, DM::WFOV_2X2BINNED, FPS::F30, {0.25f,2.88f}, {512,512}, false, false, true}, + {M::Full_frame_1024x1024, IF::MJPEG, CR::R720P, DM::WFOV_UNBINNED, FPS::F15, {0.25f,2.21f}, {1024,1024},false, false, true}, + // only color + {M::Only_color_1280x720, IF::BGRA32, CR::R720P, DM::OFF, FPS::F30, {0,0}, {0,0}, false, false, false}, + {M::Only_color_1920x1080, IF::BGRA32, CR::R1080P, DM::OFF, FPS::F30, {0,0}, {0,0}, false, false, false}, + {M::Only_color_2560x1440, IF::BGRA32, CR::R1440P, DM::OFF, FPS::F30, {0,0}, {0,0}, false, false, false}, + {M::Only_color_2048x1536, IF::BGRA32, CR::R1536P, DM::OFF, FPS::F30, {0,0}, {0,0}, false, false, false}, + {M::Only_color_3840x2160, IF::BGRA32, CR::R2160P, DM::OFF, FPS::F30, {0,0}, {0,0}, false, false, false}, + {M::Only_color_4096x3072, IF::BGRA32, CR::R3072P, DM::OFF, FPS::F15, {0,0}, {0,0}, false, false, false}, + }}; + +// 3840x2160 16:9 MJPEG 0, 5, 15, 30 90°x59° +// 2560x1440 16:9 MJPEG 0, 5, 15, 30 90°x59° +// 1920x1080 16:9 MJPEG 0, 5, 15, 30 90°x59° +// 1280x720 16:9 MJPEG/YUY2/NV12 0, 5, 15, 30 90°x59° +// 4096x3072 4:3 MJPEG 0, 5, 15 90°x74.3° +// 2048x1536 4:3 MJPEG 0, 5, 15, 30 90°x74.3° +// The RGB camera is USB Video class-compatible and can be used without the Sensor SDK. The RGB +// camera color space: BT.601 full range [0..255]. The MJPEG chroma sub-sampling is 4:2:2. +// Note +// The Sensor SDK can provide color images in the BGRA pixel format. This is not a native mode +// supported by the device and causes additional CPU load when used. The host CPU is used to +// convert from MJPEG images received from the device. + + + [[maybe_unused]] static constexpr auto image_format(K4Mode m) -> K4ImageFormat{ + return k4Modes.at<0,1>(m); + } + [[maybe_unused]] static constexpr auto color_resolution(K4Mode m) -> K4ColorResolution{ + return k4Modes.at<0,2>(m); + } + [[maybe_unused]] static constexpr auto depth_mode(K4Mode m) -> K4DepthMode { + return k4Modes.at<0,3>(m); + } + [[maybe_unused]] static constexpr auto framerate(K4Mode m) -> K4Framerate{ + return k4Modes.at<0,4>(m); + } + [[maybe_unused]] static constexpr auto range(K4Mode m) -> Range{ + return k4Modes.at<0,5>(m); + } + [[maybe_unused]] static constexpr auto depth_resolution(K4Mode m) -> Resolution{ + return k4Modes.at<0,6>(m); + } + [[maybe_unused]] static constexpr auto has_depth(K4Mode m) -> bool{ + return depth_mode(m) != DM::OFF; + } + [[maybe_unused]] static constexpr auto has_cloud(K4Mode m) -> bool{ + return k4Modes.at<0,7>(m); + } + [[maybe_unused]] static constexpr auto has_mesh(K4Mode m) -> bool{ + return k4Modes.at<0,8>(m); + } + [[maybe_unused]] static constexpr auto has_infrared(K4Mode m) -> bool{ + return k4Modes.at<0,9>(m); + } + + + [[maybe_unused]] static constexpr std::int16_t k4_invalid_depth_value = 0; + [[maybe_unused]] static constexpr std::int16_t k4_invalid_infra_value = 0; + [[maybe_unused]] static constexpr geo::Pt4 k4_invalid_color_value = {0,0,0,0}; + [[maybe_unused]] static constexpr K4Mode k4DefaultMode = K4Mode::Cloud_640x576_MJPEG; + + struct K4ImuSample{ + float temperature; /**< Temperature reading of this sample (Celsius). */ + geo::Pt3f acc; /**< Accelerometer sample in meters per second squared. */ + std::int64_t accTsMs; /**< Timestamp of the accelerometer in microseconds. */ + geo::Pt3f gyr; /**< Gyro sample in radians per second. */ + std::int64_t gyrTsMs; /**< Timestamp of the gyroscope in microseconds */ + }; + + + enum class SettingsType : std::int32_t{ + Network, Filters, Color, Device, Model,Delay, + SizeEnum + }; + + using TSettings = std::tuple< SettingsType, std::string_view>; + static constexpr TupleArray settings = {{ + // cloud + TSettings + {SettingsType::Network, "network settings"}, + {SettingsType::Filters, "filters settings"}, + {SettingsType::Color, "color settings"}, + {SettingsType::Device, "device settings"}, + {SettingsType::Model, "calibration"}, + {SettingsType::Delay, "delay"}, + }}; + + [[maybe_unused]] static constexpr auto settings_name(SettingsType s) -> std::string_view{ + return settings.at<0,1>(s); + } + + + struct K4VertexMeshData{ + geo::Pt3f pos; + geo::Pt4 col; + }; + + enum class K4BTProcessingMode : std::int8_t{ + GPU = K4ABT_TRACKER_PROCESSING_MODE_GPU, /**< SDK will use the most appropriate GPU mode for the operating system to run the tracker */ + /**< Currently this is ONNX DirectML EP for Windows and ONNX Cuda EP for Linux. ONNX TensorRT EP is experimental */ + CPU = K4ABT_TRACKER_PROCESSING_MODE_CPU, /**< SDK will use CPU only mode to run the tracker */ + GPU_CUDA = K4ABT_TRACKER_PROCESSING_MODE_GPU_CUDA, /**< SDK will use ONNX Cuda EP to run the tracker */ + GPU_TENSORRT = K4ABT_TRACKER_PROCESSING_MODE_GPU_TENSORRT, /**< SDK will use ONNX TensorRT EP to run the tracker */ + GPU_DIRECTML = K4ABT_TRACKER_PROCESSING_MODE_GPU_DIRECTML /**< SDK will use ONNX DirectML EP to run the tracker (Windows only) */ + }; + + enum class K4BTSensorOrientation : std::int8_t{ + default_orientation = K4ABT_SENSOR_ORIENTATION_DEFAULT, /**< Mount the sensor at its default orientation */ + clockwise_90 = K4ABT_SENSOR_ORIENTATION_CLOCKWISE90, /**< Clockwisely rotate the sensor 90 degree */ + counter_clockwise_90 = K4ABT_SENSOR_ORIENTATION_COUNTERCLOCKWISE90, /**< Counter-clockwisely rotate the sensor 90 degrees */ + flip_180 = K4ABT_SENSOR_ORIENTATION_FLIP180, /**< Mount the sensor upside-down */ + }; + + enum class K4JointType : std::int8_t{ + pelvis = K4ABT_JOINT_PELVIS, + spine_navel = K4ABT_JOINT_SPINE_NAVEL, + spine_chest = K4ABT_JOINT_SPINE_CHEST, + neck = K4ABT_JOINT_NECK, + clavicle_left = K4ABT_JOINT_CLAVICLE_LEFT, + shoulder_left = K4ABT_JOINT_SHOULDER_LEFT, + elbow_left = K4ABT_JOINT_ELBOW_LEFT, + wrist_left = K4ABT_JOINT_WRIST_LEFT, + hand_left = K4ABT_JOINT_HAND_LEFT, + handtip_left = K4ABT_JOINT_HANDTIP_LEFT, + thumb_left = K4ABT_JOINT_THUMB_LEFT, + clavicle_right = K4ABT_JOINT_CLAVICLE_RIGHT, + shoulder_right = K4ABT_JOINT_SHOULDER_RIGHT, + elbow_right = K4ABT_JOINT_ELBOW_RIGHT, + wrist_right = K4ABT_JOINT_WRIST_RIGHT, + hand_right = K4ABT_JOINT_HAND_RIGHT, + handtip_right = K4ABT_JOINT_HANDTIP_RIGHT, + thumb_right = K4ABT_JOINT_THUMB_RIGHT, + hip_left = K4ABT_JOINT_HIP_LEFT, + knee_left = K4ABT_JOINT_KNEE_LEFT, + ankle_left = K4ABT_JOINT_ANKLE_LEFT, + foot_left = K4ABT_JOINT_FOOT_LEFT, + hip_right = K4ABT_JOINT_HIP_RIGHT, + knee_right = K4ABT_JOINT_KNEE_RIGHT, + ankle_right = K4ABT_JOINT_ANKLE_RIGHT, + foot_right = K4ABT_JOINT_FOOT_RIGHT, + head = K4ABT_JOINT_HEAD, + nose = K4ABT_JOINT_NOSE, + eye_left = K4ABT_JOINT_EYE_LEFT, + ear_left = K4ABT_JOINT_EAR_LEFT, + eye_right = K4ABT_JOINT_EYE_RIGHT, + ear_right = K4ABT_JOINT_EAR_RIGHT, + SizeEnum + }; + + [[maybe_unused]] static constexpr size_t k4JointsCount = static_cast(K4JointType::SizeEnum); + + using TJoint = std::tuple; + static constexpr TupleArray joints ={{ + TJoint + {K4JointType::pelvis, "pelvis"sv}, + {K4JointType::spine_navel, "spine_navel"sv}, + {K4JointType::spine_chest, "spine_chest"sv}, + {K4JointType::neck, "neck"sv}, + {K4JointType::clavicle_left, "clavicle_left"sv}, + {K4JointType::shoulder_left, "shoulder_left"sv}, + {K4JointType::elbow_left, "elbow_left"sv}, + {K4JointType::wrist_left, "wrist_left"sv}, + {K4JointType::hand_left, "hand_left"sv}, + {K4JointType::handtip_left, "handtip_left"sv}, + {K4JointType::thumb_left, "thumb_left"sv}, + {K4JointType::clavicle_right, "clavicle_right"sv}, + {K4JointType::shoulder_right, "shoulder_right"sv}, + {K4JointType::elbow_right, "elbow_right"sv}, + {K4JointType::wrist_right, "wrist_right"sv}, + {K4JointType::hand_right, "hand_right"sv}, + {K4JointType::handtip_right, "handtip_right"sv}, + {K4JointType::thumb_right, "thumb_right"sv}, + {K4JointType::hip_left, "hip_left"sv}, + {K4JointType::knee_left, "knee_left"sv}, + {K4JointType::ankle_left, "ankle_left"sv}, + {K4JointType::foot_left, "foot_left"sv}, + {K4JointType::hip_right, "hip_right"sv}, + {K4JointType::knee_right, "knee_right"sv}, + {K4JointType::ankle_right, "ankle_right"sv}, + {K4JointType::foot_right, "foot_right"sv}, + {K4JointType::head, "head"sv}, + {K4JointType::nose, "nose"sv}, + {K4JointType::eye_left, "eye_left"sv}, + {K4JointType::ear_left, "ear_left"sv}, + {K4JointType::eye_right, "eye_right"sv}, + {K4JointType::ear_right, "ear_right"sv} + }}; + + [[maybe_unused]] static constexpr std::string_view get_joint_name(K4JointType t) { + return joints.at<0,1>(t); + } + + enum class K4JointConfidenceLevel : std::int8_t{ + None = K4ABT_JOINT_CONFIDENCE_NONE, /**< The joint is out of range (too far from depth camera) */ + Low = K4ABT_JOINT_CONFIDENCE_LOW, /**< The joint is not observed (likely due to occlusion), predicted joint pose */ + Medium = K4ABT_JOINT_CONFIDENCE_MEDIUM, /**< Medium confidence in joint pose. Current SDK will only provide joints up to this confidence level */ + Hight = K4ABT_JOINT_CONFIDENCE_HIGH, /**< High confidence in joint pose. Placeholder for future SDK */ + }; + + struct K4Joint{ + geo::Pt3f position = {}; + geo::Quaternion orientation = {}; + K4JointConfidenceLevel confidence = K4JointConfidenceLevel::None; + + constexpr auto good_confidence() const noexcept{ + return confidence == K4JointConfidenceLevel::Hight || confidence == K4JointConfidenceLevel::Medium; + } + }; + + // Define the bone list based on the documentation + [[maybe_unused]] static constexpr std::array, 31> bonesList = { + std::make_pair(K4JointType::spine_chest, K4JointType::spine_navel), + std::make_pair(K4JointType::spine_navel, K4JointType::pelvis), + std::make_pair(K4JointType::spine_chest, K4JointType::neck), + std::make_pair(K4JointType::neck, K4JointType::head), + std::make_pair(K4JointType::head, K4JointType::nose), + std::make_pair(K4JointType::spine_chest, K4JointType::clavicle_left), + std::make_pair(K4JointType::clavicle_left, K4JointType::shoulder_left), + std::make_pair(K4JointType::shoulder_left, K4JointType::elbow_left), + std::make_pair(K4JointType::elbow_left, K4JointType::wrist_left), + std::make_pair(K4JointType::wrist_left, K4JointType::hand_left), + std::make_pair(K4JointType::hand_left, K4JointType::handtip_left), + std::make_pair(K4JointType::wrist_left, K4JointType::thumb_left), + std::make_pair(K4JointType::pelvis, K4JointType::hip_left), + std::make_pair(K4JointType::hip_left, K4JointType::knee_left), + std::make_pair(K4JointType::knee_left, K4JointType::ankle_left), + std::make_pair(K4JointType::ankle_left, K4JointType::foot_left), + std::make_pair(K4JointType::nose, K4JointType::eye_left), + std::make_pair(K4JointType::eye_left, K4JointType::ear_left), + std::make_pair(K4JointType::spine_chest, K4JointType::clavicle_right), + std::make_pair(K4JointType::clavicle_right, K4JointType::shoulder_right), + std::make_pair(K4JointType::shoulder_right, K4JointType::elbow_right), + std::make_pair(K4JointType::elbow_right, K4JointType::wrist_right), + std::make_pair(K4JointType::wrist_right, K4JointType::hand_right), + std::make_pair(K4JointType::hand_right, K4JointType::handtip_right), + std::make_pair(K4JointType::wrist_right, K4JointType::thumb_right), + std::make_pair(K4JointType::pelvis, K4JointType::hand_right), + std::make_pair(K4JointType::hip_right, K4JointType::knee_right), + std::make_pair(K4JointType::knee_right, K4JointType::ankle_right), + std::make_pair(K4JointType::ankle_right, K4JointType::foot_right), + std::make_pair(K4JointType::nose, K4JointType::eye_right), + std::make_pair(K4JointType::eye_right, K4JointType::ear_right) + }; + + struct K4Skeleton{ + std::array(K4JointType::SizeEnum)> joints; + constexpr auto joint(K4JointType type) const noexcept -> K4Joint { return joints[static_cast(type)];} + constexpr auto pelvis() const noexcept -> K4Joint { return joints[static_cast(K4JointType::pelvis)];} + constexpr auto spine_navel() const noexcept -> K4Joint { return joints[static_cast(K4JointType::spine_navel)];} + constexpr auto spine_chest() const noexcept -> K4Joint { return joints[static_cast(K4JointType::spine_chest)];} + constexpr auto neck() const noexcept -> K4Joint { return joints[static_cast(K4JointType::neck)];} + constexpr auto clavicle_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::clavicle_left)];} + constexpr auto shoulder_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::shoulder_left)];} + constexpr auto elbow_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::elbow_left)];} + constexpr auto wrist_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::wrist_left)];} + constexpr auto hand_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::hand_left)];} + constexpr auto handtip_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::handtip_left)];} + constexpr auto thumb_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::thumb_left)];} + constexpr auto clavicle_right() const noexcept -> K4Joint{ return joints[static_cast(K4JointType::clavicle_right)];} + constexpr auto shoulder_right() const noexcept -> K4Joint{ return joints[static_cast(K4JointType::shoulder_right)];} + constexpr auto elbow_right() const noexcept -> K4Joint { return joints[static_cast(K4JointType::elbow_right)];} + constexpr auto wrist_right() const noexcept -> K4Joint { return joints[static_cast(K4JointType::wrist_right)];} + constexpr auto hand_right() const noexcept -> K4Joint { return joints[static_cast(K4JointType::hand_right)];} + constexpr auto handtip_right() const noexcept -> K4Joint { return joints[static_cast(K4JointType::handtip_right)];} + constexpr auto thumb_right() const noexcept -> K4Joint { return joints[static_cast(K4JointType::thumb_right)];} + constexpr auto hip_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::hip_left)];} + constexpr auto knee_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::knee_left)];} + constexpr auto ankle_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::ankle_left)];} + constexpr auto foot_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::foot_left)];} + constexpr auto hip_right() const noexcept -> K4Joint { return joints[static_cast(K4JointType::hip_right)];} + constexpr auto knee_right() const noexcept -> K4Joint { return joints[static_cast(K4JointType::knee_right)];} + constexpr auto ankle_right() const noexcept -> K4Joint { return joints[static_cast(K4JointType::ankle_right)];} + constexpr auto foot_right() const noexcept -> K4Joint { return joints[static_cast(K4JointType::foot_right)];} + constexpr auto head() const noexcept -> K4Joint { return joints[static_cast(K4JointType::head)];} + constexpr auto nose() const noexcept -> K4Joint { return joints[static_cast(K4JointType::nose)];} + constexpr auto eye_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::eye_left)];} + constexpr auto ear_left() const noexcept -> K4Joint { return joints[static_cast(K4JointType::ear_left)];} + constexpr auto eye_right() const noexcept -> K4Joint { return joints[static_cast(K4JointType::eye_right)];} + constexpr auto ear_right() const noexcept -> K4Joint { return joints[static_cast(K4JointType::ear_right)];} + }; + + struct K4Body{ + std::int8_t id = 0; + K4Skeleton skeleton; + + auto update(const k4abt_body_t &body) -> void{ + id = static_cast(body.id); + for(const auto &jointD : joints.data){ + const auto &kaKoint = body.skeleton.joints[static_cast(std::get<0>(jointD))]; + auto &joint = skeleton.joints[static_cast(std::get<0>(jointD))]; + joint.confidence = static_cast(kaKoint.confidence_level); + const auto &p = kaKoint.position; + joint.position = {-p.v[0],-p.v[1],p.v[2]}; + const auto &o = kaKoint.orientation; + joint.orientation = {o.wxyz.x,o.wxyz.y,o.wxyz.z,o.wxyz.w}; + } + } + }; +} diff --git a/cpp-projects/base/camera/kinect4/k4_volumetric_video.cpp b/cpp-projects/base/camera/kinect4/k4_volumetric_video.cpp new file mode 100644 index 0000000..5bd05ea --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_volumetric_video.cpp @@ -0,0 +1,794 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + + +#include "k4_volumetric_video.hpp" + +// std +#include +#include + +// local +#include "utility/logger.hpp" +#include "utility/io_fstream.hpp" +#include "geometry/voxel_grid.hpp" +#include "camera/kinect4/k4_frame_compressor.hpp" + +using namespace tool::camera; + +auto K4CameraData::nb_frames() const noexcept -> size_t{ + return frames.size(); +} + +auto K4CameraData::first_frame_ptr() const -> K4CompressedFrame*{ + if(!frames.empty()){ + return frames.front().get(); + } + Logger::error("[K4CameraData::first_frame] No frame available.\n"); + return nullptr; +} + +auto K4CameraData::last_frame_ptr() const -> K4CompressedFrame*{ + if(!frames.empty()){ + return frames.back().get(); + } + Logger::error("[K4CameraData::last_frame] No frame available.\n"); + return nullptr; +} + +auto K4CameraData::get_frame_ptr(size_t idFrame) const -> K4CompressedFrame*{ + if(idFrame < nb_frames()){ + return frames[idFrame].get(); + } + Logger::error(std::format("[K4CameraData::get_frame] Invalid frame id [{}], current number of frames [{}].\n", idFrame, nb_frames())); + return nullptr; +} + +auto K4CameraData::first_frame_capture_timestamp() const noexcept -> std::expected{ + if(!frames.empty()){ + return frames.front()->afterCaptureTS; + } + return std::unexpected("No frames available"sv); +} + +auto K4CameraData::last_frame_capture_timestamp() const noexcept -> std::expected { + if(!frames.empty()){ + return frames.back()->afterCaptureTS; + } + return std::unexpected("No frames available"sv); +} + +auto K4CameraData::valid_vertices_count(size_t idFrame) const noexcept -> size_t{ + if(auto frame = get_frame_ptr(idFrame)){ + return frame->validVerticesCount; + } + return 0; +} + + +auto K4CameraData::get_compressed_frame(size_t idFrame) const noexcept -> std::weak_ptr{ + if(idFrame < nb_frames()){ + return frames[idFrame]; + } + Logger::error(std::format("[K4CameraData::get_compressed_frame] Invalid frame id [{}], current number of frames [{}].\n", idFrame, nb_frames())); + return {}; +} + +auto K4CameraData::add_compressed_frame(std::shared_ptr frame) -> void{ + if(!frames.empty()){ + if(frame->idCapture == frames.back()->idCapture){ + return; + } + } + frames.push_back(std::move(frame)); +} + + +auto K4CameraData::remove_frames_until(size_t idFrame) -> void{ + if(idFrame < nb_frames()){ + frames.erase(frames.begin(), frames.begin() + idFrame); + } +} + +auto K4CameraData::remove_frames_after(size_t idFrame) -> void{ + if(idFrame < nb_frames()){ + frames.erase(frames.begin()+idFrame, frames.end()); + } +} + +auto K4CameraData::clean() noexcept -> void{ + frames.clear(); +} + +K4VolumetricVideo &K4VolumetricVideo::operator=(const K4VolumetricVideo &other){ + +// m_videoStartTS = other.m_videoStartTS; +// m_videoEndTS = other.m_videoEndTS; + m_framesPerCamera = other.m_framesPerCamera; + + for(size_t ii = 0; ii < other.nb_cameras(); ++ii){ + m_uncompressors.push_back(std::make_unique()); + } + + return *this; +} + +auto K4VolumetricVideo::initialize(size_t nbCameras) -> void{ + m_framesPerCamera.resize(nbCameras); + for(size_t ii = 0; ii < nbCameras; ++ii){ + m_uncompressors.push_back(std::make_unique()); + } +} + +auto K4VolumetricVideo::initialize_from(const K4VolumetricVideo &video) -> void{ + clean_all_cameras_compressed_frames(); + initialize(video.nb_cameras()); +} + +auto K4VolumetricVideo::nb_cameras() const noexcept -> size_t{ + return m_framesPerCamera.size(); +} + +auto K4VolumetricVideo::count_frames_from_all_cameras() const noexcept -> size_t{ + size_t count = 0; + for(const auto &cFrames : m_framesPerCamera){ + count += cFrames.nb_frames(); + } + return count; +} + +auto K4VolumetricVideo::uncompress_frame(size_t idCamera, size_t idFrame, K4Frame &frame) -> bool{ + if(const auto camD = get_camera_data(idCamera)){ + if(auto cFrame = camD->get_frame_ptr(idFrame)){ + return uncompress_frame(idCamera, cFrame, frame); + } + } + return false; +} + +auto K4VolumetricVideo::uncompress_frame(size_t idCamera, K4CompressedFrame *cFrame, K4Frame &frame) -> bool{ + if(auto unc = uncompressor(idCamera); unc != nullptr){ + return unc->uncompress(cFrame, frame); + } + return false; +} + +auto K4VolumetricVideo::first_frame_capture_timestamp() const noexcept -> std::expected{ + auto start = std::numeric_limits::max(); + bool found = false; + for(const auto &cFrame : m_framesPerCamera){ + if(auto camStart = cFrame.first_frame_capture_timestamp(); camStart.has_value()){ + if(start > camStart.value()){ + start = camStart.value(); + found = true; + } + } + } + if(found){ + return start; + } + return std::unexpected("No frame available from any camera."sv); +} + +auto K4VolumetricVideo::last_frame_capture_timestamp() const noexcept -> std::expected{ + std::int64_t end = -1; + bool found = false; + for(const auto &cFrame : m_framesPerCamera){ + if(auto camEnd = cFrame.last_frame_capture_timestamp(); camEnd.has_value()){ + if(end < camEnd.value()){ + end = camEnd.value(); + found = true; + } + } + } + if(found){ + return end; + } + return std::unexpected("No frame available from any camera."sv); +} + +//auto K4VolumetricVideo::set_video_start_timestamp(std::chrono::nanoseconds videoStartTS) noexcept -> void{ +// m_videoStartTS = videoStartTS; +//} + +//auto K4VolumetricVideo::set_video_end_timestamp(std::chrono::nanoseconds videoEndTS) noexcept -> void{ +// m_videoEndTS = videoEndTS; +//} + +auto K4VolumetricVideo::duration_ms() const noexcept -> double { + using namespace std::chrono; + auto lfc = last_frame_capture_timestamp(); + auto ffc = first_frame_capture_timestamp(); + if(lfc.has_value() && ffc.has_value()){ + return duration_cast(nanoseconds(lfc.value()-ffc.value())).count()*0.001; + } + return 0.0; +} + +auto K4VolumetricVideo::get_timestamp_diff_time_ms(std::int64_t t1, std::int64_t t2) noexcept -> double{ + using namespace std::chrono; + return duration_cast(nanoseconds(std::max(t1,t2)) - nanoseconds(std::min(t1,t2))).count() / 1000.0; +} + + +auto K4VolumetricVideo::closest_frame_id_from_time(size_t idCamera, double timeMs) const noexcept -> std::expected{ + + if(idCamera >= nb_cameras()){ + return std::unexpected("Camera id invalid."sv); + } + + if(nb_frames(idCamera) == 0){ + return std::unexpected("No frame available."sv); + } + + using namespace std::chrono; + + auto ffTs = nanoseconds(first_frame_capture_timestamp().value()); + size_t idFrame = 0; + double prevDiff = std::numeric_limits::max(); +// if(idCamera == 1) +// std::cout << "ffTs " << idCamera << " " << timeMs << " " << ffTs.count() << "\n"; + + size_t idF = 0; + for(const auto& frame : get_camera_data(idCamera)->frames){ +// std::cout << idCamera << " " << idF << " " << frame->afterCaptureTS - ffTs.count() << "\n"; +// idF++; + auto timeFrameMs = get_timestamp_diff_time_ms(ffTs.count(), frame->afterCaptureTS); + + auto diffMs = std::abs(timeFrameMs-timeMs); +// if(idCamera == 1) +// std::cout << frame->afterCaptureTS << " " << timeFrameMs << " " << diffMs << " |"; + + if(diffMs > prevDiff){ + return idFrame; + } + prevDiff = diffMs; + ++idFrame; + } + + return idFrame-1; +} + +auto K4VolumetricVideo::nb_frames(size_t idCamera) const noexcept -> size_t{ + if(idCamera < nb_cameras()){ + return get_camera_data(idCamera)->nb_frames(); + } + return 0; +} + +auto K4VolumetricVideo::total_nb_frames() const noexcept -> size_t{ + size_t totalNbFrames = 0; + for(const auto &cameraFrames : m_framesPerCamera){ + totalNbFrames += cameraFrames.nb_frames(); + } + return totalNbFrames; +} + +auto K4VolumetricVideo::min_nb_frames() const noexcept -> size_t { + + if(m_framesPerCamera.empty()){ + return 0; + } + + size_t minFrames = m_framesPerCamera.front().nb_frames(); + for(auto &frames : m_framesPerCamera){ + if(minFrames > frames.nb_frames()){ + minFrames = frames.nb_frames(); + } + } + return minFrames; +} + +auto K4VolumetricVideo::get_camera_data(size_t idCamera) const noexcept -> const K4CameraData*{ + if(idCamera < nb_cameras()){ + return &m_framesPerCamera[idCamera]; + } + Logger::error(std::format("[K4VolumetricVideo::get_camera_data] Invalid camera id: [{}], number of cameras available: [{}]\n", idCamera, nb_cameras())); + return nullptr; +} + +auto K4VolumetricVideo::get_compressed_frame(size_t idCamera, size_t idFrame) -> std::weak_ptr{ + if(const auto camD = get_camera_data(idCamera)){ + return camD->get_compressed_frame(idFrame); + } + return {}; +} + +auto K4VolumetricVideo::remove_compressed_frames_until(size_t idCamera, size_t idFrame) -> void{ + if(idCamera < nb_cameras()){ + m_framesPerCamera[idCamera].remove_frames_until(idFrame); + } +} + +auto K4VolumetricVideo::remove_compressed_frames_after(size_t idCamera, size_t idFrame) -> void{ + if(idCamera < nb_cameras()){ + m_framesPerCamera[idCamera].remove_frames_after(idFrame); + } +} + +auto K4VolumetricVideo::keep_only_one_camera(size_t idCamera) -> void{ + m_uncompressors.resize(1); + auto cameraFrames = std::move(m_framesPerCamera[idCamera]); + m_framesPerCamera = {std::move(cameraFrames)}; +} + +auto K4VolumetricVideo::clean_all_cameras_compressed_frames() noexcept -> void{ + for(auto &frames : m_framesPerCamera){ + frames.clean(); + } +} + +auto K4VolumetricVideo::clean_camera_compressed_frames(size_t idCamera) noexcept -> void{ + if(idCamera < nb_cameras()){ + m_framesPerCamera[idCamera].clean(); + } +} + +auto K4VolumetricVideo::get_transform(size_t idCamera) const -> tool::geo::Mat4d{ + if(idCamera < nb_cameras()){ + return m_framesPerCamera[idCamera].transform; + } + Logger::error("[K4VolumetricVideo::get_transform] Invalid camera id: [{}], number of cameras available: [{}]\n", idCamera, nb_cameras()); + return geo::Mat4d::identity(); +} + +auto K4VolumetricVideo::set_transform(size_t idCamera, geo::Mat4d tr) -> void{ + if(idCamera < nb_cameras()){ + m_framesPerCamera[idCamera].transform = tr; + return; + } + Logger::error("[K4VolumetricVideo::set_transform]Invalid camera id: [{}], number of cameras available: [{}]\n", idCamera, nb_cameras()); +} + +auto K4VolumetricVideo::add_compressed_frame(size_t idCamera, std::shared_ptr frame) -> void{ + if(idCamera >= nb_cameras()){ + m_framesPerCamera.resize(idCamera+1); + } + + m_framesPerCamera[idCamera].add_compressed_frame(std::move(frame)); +} + +auto K4VolumetricVideo::save_to_file(std::string_view path) -> bool{ + + if(path.length() == 0){ + Logger::error("[K4VolumetricVideo::save_to_file] Empty path.\n"); + return false; + } + + if(count_frames_from_all_cameras() == 0){ + Logger::error("[K4VolumetricVideo::save_to_file] No available frames to save.\n"); + return false; + } + + std::ofstream file; + file.open(path.data(), std::ios_base::binary); + if(!file.is_open()){ + Logger::error(std::format("[K4VolumetricVideo::save_to_file] Cannot save compressed frames to {}.\n", path)); + return false; + } + file.exceptions(std::ofstream::badbit | std::ofstream::failbit); + + // write file + bool success = false; + try{ + write_file(file); + success = true; + }catch(const std::exception &e){ + Logger::error(std::format("[K4VolumetricVideo::save_to_file] Error happend during writing file [{}].\n", e.what())); + } + file.close(); + + return success; +} + +auto K4VolumetricVideo::load_from_file(std::string_view path) -> bool{ + + if(path.length() == 0){ + Logger::error("[K4VolumetricVideo::load_from_file] Empty path.\n"); + return false; + } + + // open file + std::ifstream file(path.data(), std::ios_base::binary); + if(!file.is_open()){ + Logger::error(std::format("[K4VolumetricVideo::load_from_file] Cannot open compressed frames file: [{}].\n", path)); + return false; + } + file.exceptions ( std::ifstream::eofbit | std::ifstream::failbit | std::ifstream::badbit); + + // clean data + clean_all_cameras_compressed_frames(); + + // read file + bool success = false; + try{ + success = read_file(file); + }catch(const std::exception &e){ + Logger::error(std::format("[K4VolumetricVideo::load_from_file] Error happend during reading file [{}].\n", e.what())); + return false; + } + + return success; +} + +auto K4VolumetricVideo::uncompressor(size_t idCamera) noexcept -> K4FrameUncompressor*{ + if(idCamera < m_uncompressors.size()){ + return m_uncompressors[idCamera].get(); + } + return nullptr; +} + +#include + +auto K4VolumetricVideo::merge_all_cameras(float voxelSize, tool::geo::Pt3f minBound, tool::geo::Pt3f maxBound) -> void{ + + if(m_framesPerCamera.empty()){ + return; + } + + if(m_framesPerCamera.front().frames.empty()){ + return; + } + + auto c0FirstFrameTS = m_framesPerCamera.front().frames.front()->afterCaptureTS; + + K4FrameCompressor compressor; + for(size_t idF = 0; idF < nb_frames(0); ++idF){ + + auto c0Frame = m_framesPerCamera.front().frames[idF].get(); + auto c0Time = c0Frame->afterCaptureTS; + auto c0TimeMs= std::chrono::duration_cast(std::chrono::nanoseconds(c0Time - c0FirstFrameTS)); + + K4Frame final; + if(!uncompress_frame(0, idF, final)){ + continue; + } + + final.mode = K4Mode::Merged; + final.idCapture = c0Frame->idCapture; + final.afterCaptureTS = c0Frame->afterCaptureTS; + final.colorWidth = 0; + final.colorHeight = 0; + final.depthWidth = 0; + final.depthHeight = 0; + final.infraWidth = 0; + final.infraHeight = 0; + final.imageColorData.clear(); + final.depthData.clear(); + final.imageDepthData.clear(); + final.infraData.clear(); + final.imageInfraData.clear(); + + geo::VoxelGrid grid(voxelSize, minBound, maxBound); + grid.add_cloud(final.cloud, m_framesPerCamera.front().transform.conv()); + + for(size_t jj = 1; jj < nb_cameras(); ++jj){ + + size_t idF = 0; + if(auto id = closest_frame_id_from_time(jj, c0TimeMs.count()); id.has_value()){ + idF = id.value(); + }else{ + Logger::error(id.error()); + continue; + } + + K4Frame current; + std::cout << "uncompress_frame " << c0TimeMs.count() << " " << jj << " " << idF << "\n"; + if(!uncompress_frame(jj, idF, current)){ + continue; + } + + grid.add_cloud(current.cloud, m_framesPerCamera[jj].transform.conv()); + } + grid.compute_grid(); + grid.convert_to_cloud(final.cloud); + + // compress frame + m_framesPerCamera.front().frames[idF] = compressor.compress(final, 90); + } + + m_framesPerCamera.front().transform = geo::Mat4d::identity(); + + m_framesPerCamera.resize(1); + m_uncompressors.resize(1); +} + + +auto K4VolumetricVideo::merge_cameras_clouds(size_t idFrame, float sizeVoxel, geo::Pt3f minBound, geo::Pt3f maxBound) -> tool::geo::ColoredCloudData{ + + if(idFrame >= min_nb_frames()){ + // ... + return {}; + } + + geo::VoxelGrid grid(sizeVoxel, minBound, maxBound); + + K4Frame frame; + for(size_t ii = 0; ii < nb_cameras(); ++ii){ + uncompress_frame(ii, idFrame, frame); + grid.add_cloud(frame.cloud, get_transform(ii).conv()); + } + grid.compute_grid(); + + geo::ColoredCloudData cloud; + grid.convert_to_cloud(cloud); + + return cloud; +} + +auto K4VolumetricVideo::merge_cameras_clouds(size_t idFrame, float sizeVoxel, geo::Pt3f minBound, geo::Pt3f maxBound, K4Frame &frame) -> void{ + + frame = K4Frame(); + if(idFrame >= min_nb_frames()){ + // ... + return; + } + + geo::VoxelGrid grid(sizeVoxel, minBound, maxBound); + for(size_t ii = 0; ii < nb_cameras(); ++ii){ + K4Frame uFrame; + uncompress_frame(ii, idFrame, uFrame); + grid.add_cloud(uFrame.cloud, get_transform(ii).conv()); + + if(ii == 0){ + frame.mode = K4Mode::Merged; + frame.idCapture = uFrame.idCapture; + frame.afterCaptureTS = uFrame.afterCaptureTS; + frame.imuSample = uFrame.imuSample; + frame.audioFrames = uFrame.audioFrames; + frame.bodies = uFrame.bodies; + } + } + grid.compute_grid(); + frame.cloud = merge_cameras_clouds(idFrame, sizeVoxel, minBound, maxBound); +} + + +auto K4VolumetricVideo::total_audio_frames_size(size_t idCamera) const -> size_t{ + if(idCamera < m_framesPerCamera.size()){ + size_t total = 0; + for(const auto &frame : m_framesPerCamera[idCamera].frames){ + for(const auto &audioChannel : frame->audioFrames){ + total += audioChannel.size(); + } + } + return total; + } + Logger::error("[K4VolumetricVideo::total_audio_frames_size] Invalid camera id.\n"); + return 0; +} + +auto K4VolumetricVideo::get_audio_samples_all_channels(size_t idCamera, std::vector > &audioBuffer) -> void{ + + auto camData = &m_framesPerCamera[idCamera]; + + size_t samplesCount = 0; + for(const auto &frame : camData->frames){ + if(frame != nullptr){ + samplesCount += frame->audioFrames.size(); + } + } + + if(samplesCount == 0){ + Logger::error(""); + return; + } + + audioBuffer.resize(7); + for(auto &channelAudioBuffer : audioBuffer){ + channelAudioBuffer.reserve(samplesCount); + } + + for(const auto &frame : camData->frames){ + if(frame != nullptr){ + for(size_t idChannel = 0; idChannel < 7; ++idChannel){ + for(const auto &channelsData : frame->audioFrames){ + audioBuffer[idChannel].push_back(channelsData[idChannel]); + } + } + } + } +} + +auto K4VolumetricVideo::get_audio_samples_all_channels(size_t idCamera, std::vector &audioBuffer) -> void{ + + auto camData = &m_framesPerCamera[idCamera]; + + size_t samplesCount = 0; + for(const auto &frame : camData->frames){ + samplesCount += frame->audioFrames.size(); + } + + if(samplesCount == 0){ + Logger::error(""); + return; + } + + audioBuffer.resize(samplesCount*7); + + size_t id = 0; + for(const auto &frame : camData->frames){ + for(const auto &channelsData : frame->audioFrames){ + for(size_t idChannel = 0; idChannel < 7; ++idChannel){ + audioBuffer[id++] = channelsData[idChannel]; + } + } + } +} + + +auto K4VolumetricVideo::read_file(std::ifstream &file) -> bool{ + + // read mode + std::int8_t videoType; + read(videoType, file); + + std::cout << "VIDEO TYPE" << (int)videoType << "\n"; + if(videoType == 0){ + read_legacy_full_video_file(file); + return true; + }else if(videoType == 1){ + read_legacy_cloud_video_file(file); + return true; + }else if(videoType != 2){ + Logger::error("[K4VolumetricVideo::read_file] Invalid video type.\n"); + return false; + } + + // read nb of cameras + std::int8_t nbCameras; + read(nbCameras, file); + initialize(nbCameras); + + // read infos per camera + std::int32_t nbFrames; + for(auto &cameraData : m_framesPerCamera){ + + // read nb frames + read(nbFrames, file); + + // create frames + cameraData.frames.reserve(nbFrames); + for(size_t ii = 0; ii < static_cast(nbFrames); ++ii){ + cameraData.frames.push_back(std::make_shared()); + } + + // calibration matrix + read_array(cameraData.transform.array.data(), file, 16); + } + + // read frames + for(auto &cameraData : m_framesPerCamera){ + for(auto &frame : cameraData.frames){ + double timeMsNotUsed; + read(timeMsNotUsed, file); // read time ms + frame->init_from_file_stream(file); + } + } + + return true; +} + +auto K4VolumetricVideo::write_file(std::ofstream &file) -> void{ + + // write video type + std::int8_t videoType = 2; + write(videoType, file); // std::int8_t + + // write nb of cameras + write(static_cast(m_framesPerCamera.size()), file); // std::int8_t + + // write infos per camera + for(auto &cameraData : m_framesPerCamera){ + // nb frames + write(static_cast(cameraData.frames.size()), file); // std::int32_t * cameras count + // calibration matrix + write_array(cameraData.transform.array.data(), file, 16); // double * 16 + } + + // writes frames + for(const auto &cameraData : m_framesPerCamera){ + for(const auto &frame : cameraData.frames){ + double timeMsNotUsed = 0.0; + write(timeMsNotUsed, file); + frame->write_to_file_stream(file); + } + } +} + +auto K4VolumetricVideo::read_legacy_cloud_video_file(std::ifstream &file) -> void{ + + // read nb of cameras + std::int8_t nbCameras; + read(nbCameras, file); + initialize(nbCameras); + + // read infos per camera + std::int32_t nbFrames; + for(auto &cameraData : m_framesPerCamera){ + + // read nb frames + read(nbFrames, file); + + // create frames + cameraData.frames.reserve(nbFrames); + for(size_t ii = 0; ii < static_cast(nbFrames); ++ii){ + cameraData.frames.push_back(std::make_shared()); + } + + // calibration matrix + read_array(cameraData.transform.array.data(), file, 16); + } + + // read frames + for(auto &cameraData : m_framesPerCamera){ + for(auto &frame : cameraData.frames){ + // read frame + frame->init_legacy_cloud_frame_file_stream(file); + } + } +} + +auto K4VolumetricVideo::read_legacy_full_video_file(std::ifstream &file) -> void{ + + // read nb of cameras + std::int8_t nbCameras; + read(nbCameras, file); + initialize(nbCameras); + + // read infos per camera + std::int32_t nbFrames; + for(auto &cameraData : m_framesPerCamera){ + + // read nb frames + read(nbFrames, file); + + // create frames + cameraData.frames.reserve(nbFrames); + for(size_t ii = 0; ii < static_cast(nbFrames); ++ii){ + cameraData.frames.push_back(std::make_shared()); + } + + // calibration matrix + read_array(cameraData.transform.array.data(), file, 16); + } + + // read frames + for(auto &cameraData : m_framesPerCamera){ + for(auto &frame : cameraData.frames){ + + std::int32_t idFrame; + std::int64_t timestamp; + // # read frame info + read(idFrame, file); + read(timestamp, file); + frame->afterCaptureTS = timestamp; + + // read frame + frame->init_legacy_full_frame_file_stream(file); + } + } +} diff --git a/cpp-projects/base/camera/kinect4/k4_volumetric_video.hpp b/cpp-projects/base/camera/kinect4/k4_volumetric_video.hpp new file mode 100644 index 0000000..a9ca5c7 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4_volumetric_video.hpp @@ -0,0 +1,159 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include + +// local +#include "geometry/matrix4.hpp" +#include "k4_frame_uncompressor.hpp" + + +namespace tool::camera{ + +class K4VolumetricVideo; + +struct K4CameraData{ + + K4CameraData() = default; + K4CameraData& operator=(const K4CameraData& other){ + this->transform = other.transform; + this->frames.reserve(other.frames.size()); + for(const auto &frame : other.frames){ + auto cf = std::make_shared(); + *cf = *frame; + this->frames.push_back(std::move(cf)); + } + return *this; + } + + geo::Mat4d transform = geo::Mat4d::identity(); + std::vector> frames; + + // getters + auto nb_frames() const noexcept -> size_t; + auto valid_vertices_count(size_t idFrame) const noexcept -> size_t; + auto first_frame_capture_timestamp() const noexcept -> std::expected; + auto last_frame_capture_timestamp() const noexcept -> std::expected; + + // modifiers + auto add_compressed_frame(std::shared_ptr frame) -> void; + auto get_compressed_frame(size_t idFrame) const noexcept -> std::weak_ptr; + auto remove_frames_until(size_t idFrame) -> void; + auto remove_frames_after(size_t idFrame) -> void; + auto clean() noexcept -> void; + + friend K4VolumetricVideo; + +private: + + auto first_frame_ptr() const -> K4CompressedFrame*; + auto last_frame_ptr() const -> K4CompressedFrame*; + auto get_frame_ptr(size_t idFrame) const -> K4CompressedFrame*; +}; + +class K4VolumetricVideo{ + +public: + + K4VolumetricVideo() = default; + K4VolumetricVideo& operator=(const K4VolumetricVideo& other); + + auto initialize(size_t nbCameras) -> void; + auto initialize_from(const K4VolumetricVideo &video) -> void; + + // cameras + auto nb_cameras() const noexcept -> size_t; + auto get_camera_data(size_t idCamera) const noexcept -> const K4CameraData*; + + // times + auto first_frame_capture_timestamp() const noexcept -> std::expected; + auto last_frame_capture_timestamp() const noexcept -> std::expected; +// auto set_video_start_timestamp(std::chrono::nanoseconds videoStartTS) noexcept -> void; +// auto set_video_end_timestamp(std::chrono::nanoseconds videoEndTS) noexcept -> void; + auto duration_ms() const noexcept -> double; + static auto get_timestamp_diff_time_ms(std::int64_t t1, std::int64_t t2) noexcept -> double; + + // frames + auto nb_frames(size_t idCamera) const noexcept -> size_t; + auto total_nb_frames() const noexcept -> size_t; + auto min_nb_frames() const noexcept -> size_t; + auto closest_frame_id_from_time(size_t idCamera, double timeMs) const noexcept -> std::expected; + // # add + auto add_compressed_frame(size_t idCamera, std::shared_ptr frame) -> void; + // # get + auto get_compressed_frame(size_t idCamera, size_t idFrame) -> std::weak_ptr; + auto count_frames_from_all_cameras() const noexcept -> size_t; + // # uncompress + auto uncompressor(size_t idCamera) noexcept -> K4FrameUncompressor*; + auto uncompress_frame(size_t idCamera, size_t idFrame, K4Frame &frame) -> bool; + auto uncompress_frame(size_t idCamera, K4CompressedFrame *cFrame, K4Frame &frame) -> bool; + // # remove + auto remove_compressed_frames_until(size_t idCamera, size_t idFrame) -> void; + auto remove_compressed_frames_after(size_t idCamera, size_t idFrame) -> void; + auto keep_only_one_camera(size_t idCamera) -> void; + // # clean + auto clean_all_cameras_compressed_frames() noexcept -> void; + auto clean_camera_compressed_frames(size_t idCamera) noexcept -> void; + // # merge + auto merge_cameras_clouds(size_t idFrame, float sizeVoxel, geo::Pt3f minBound, geo::Pt3f maxBound, camera::K4Frame &frame) -> void; + auto merge_cameras_clouds(size_t idFrame, float sizeVoxel, geo::Pt3f minBound, geo::Pt3f maxBound) -> tool::geo::ColoredCloudData; + auto merge_all_cameras(float voxelSize, tool::geo::Pt3f minBound, tool::geo::Pt3f maxBound) -> void; + + // audio + auto total_audio_frames_size(size_t idCamera) const -> size_t; + auto get_audio_samples_all_channels(size_t idCamera, std::vector> &audioBuffer) -> void; + auto get_audio_samples_all_channels(size_t idCamera, std::vector &audioBuffer) -> void; + + // calibration + auto get_transform(size_t idCamera) const -> geo::Mat4d; + auto set_transform(size_t idCamera, geo::Mat4d tr) -> void; + + // i/o + auto save_to_file(std::string_view path) -> bool; + auto load_from_file(std::string_view path) -> bool; + +protected: + + std::vector> m_uncompressors; + std::vector m_framesPerCamera; +// std::chrono::nanoseconds m_videoStartTS{}; +// std::chrono::nanoseconds m_videoEndTS{}; + +private: + + // i/o files + auto read_file(std::ifstream &file) -> bool; + auto write_file(std::ofstream &file) -> void; + // # legacy + auto read_legacy_cloud_video_file(std::ifstream &file) -> void; + auto read_legacy_full_video_file(std::ifstream &file) -> void; +}; + +} diff --git a/cpp-projects/base/camera/kinect4/k4a/k4a_body_tracking_helpers.hpp b/cpp-projects/base/camera/kinect4/k4a/k4a_body_tracking_helpers.hpp new file mode 100644 index 0000000..98af9d8 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4a_body_tracking_helpers.hpp @@ -0,0 +1,119 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +// std +#include +#include +#include + +// kabt +#include + +//// Define the bone list based on the documentation +//static constexpr std::array, 31> g_boneList = +//{ +// std::make_pair(K4ABT_JOINT_SPINE_CHEST, K4ABT_JOINT_SPINE_NAVEL), +// std::make_pair(K4ABT_JOINT_SPINE_NAVEL, K4ABT_JOINT_PELVIS), +// std::make_pair(K4ABT_JOINT_SPINE_CHEST, K4ABT_JOINT_NECK), +// std::make_pair(K4ABT_JOINT_NECK, K4ABT_JOINT_HEAD), +// std::make_pair(K4ABT_JOINT_HEAD, K4ABT_JOINT_NOSE), + +// std::make_pair(K4ABT_JOINT_SPINE_CHEST, K4ABT_JOINT_CLAVICLE_LEFT), +// std::make_pair(K4ABT_JOINT_CLAVICLE_LEFT, K4ABT_JOINT_SHOULDER_LEFT), +// std::make_pair(K4ABT_JOINT_SHOULDER_LEFT, K4ABT_JOINT_ELBOW_LEFT), +// std::make_pair(K4ABT_JOINT_ELBOW_LEFT, K4ABT_JOINT_WRIST_LEFT), +// std::make_pair(K4ABT_JOINT_WRIST_LEFT, K4ABT_JOINT_HAND_LEFT), +// std::make_pair(K4ABT_JOINT_HAND_LEFT, K4ABT_JOINT_HANDTIP_LEFT), +// std::make_pair(K4ABT_JOINT_WRIST_LEFT, K4ABT_JOINT_THUMB_LEFT), +// std::make_pair(K4ABT_JOINT_PELVIS, K4ABT_JOINT_HIP_LEFT), +// std::make_pair(K4ABT_JOINT_HIP_LEFT, K4ABT_JOINT_KNEE_LEFT), +// std::make_pair(K4ABT_JOINT_KNEE_LEFT, K4ABT_JOINT_ANKLE_LEFT), +// std::make_pair(K4ABT_JOINT_ANKLE_LEFT, K4ABT_JOINT_FOOT_LEFT), +// std::make_pair(K4ABT_JOINT_NOSE, K4ABT_JOINT_EYE_LEFT), +// std::make_pair(K4ABT_JOINT_EYE_LEFT, K4ABT_JOINT_EAR_LEFT), + +// std::make_pair(K4ABT_JOINT_SPINE_CHEST, K4ABT_JOINT_CLAVICLE_RIGHT), +// std::make_pair(K4ABT_JOINT_CLAVICLE_RIGHT, K4ABT_JOINT_SHOULDER_RIGHT), +// std::make_pair(K4ABT_JOINT_SHOULDER_RIGHT, K4ABT_JOINT_ELBOW_RIGHT), +// std::make_pair(K4ABT_JOINT_ELBOW_RIGHT, K4ABT_JOINT_WRIST_RIGHT), +// std::make_pair(K4ABT_JOINT_WRIST_RIGHT, K4ABT_JOINT_HAND_RIGHT), +// std::make_pair(K4ABT_JOINT_HAND_RIGHT, K4ABT_JOINT_HANDTIP_RIGHT), +// std::make_pair(K4ABT_JOINT_WRIST_RIGHT, K4ABT_JOINT_THUMB_RIGHT), +// std::make_pair(K4ABT_JOINT_PELVIS, K4ABT_JOINT_HIP_RIGHT), +// std::make_pair(K4ABT_JOINT_HIP_RIGHT, K4ABT_JOINT_KNEE_RIGHT), +// std::make_pair(K4ABT_JOINT_KNEE_RIGHT, K4ABT_JOINT_ANKLE_RIGHT), +// std::make_pair(K4ABT_JOINT_ANKLE_RIGHT, K4ABT_JOINT_FOOT_RIGHT), +// std::make_pair(K4ABT_JOINT_NOSE, K4ABT_JOINT_EYE_RIGHT), +// std::make_pair(K4ABT_JOINT_EYE_RIGHT, K4ABT_JOINT_EAR_RIGHT) +//}; + +//// Define the joint string names +//static inline std::unordered_map g_jointNames = +//{ +// std::make_pair(K4ABT_JOINT_PELVIS, "PELVIS"), +// std::make_pair(K4ABT_JOINT_SPINE_NAVEL, "SPINE_NAVEL"), +// std::make_pair(K4ABT_JOINT_SPINE_CHEST, "SPINE_CHEST"), +// std::make_pair(K4ABT_JOINT_NECK, "NECK"), +// std::make_pair(K4ABT_JOINT_CLAVICLE_LEFT, "CLAVICLE_LEFT"), +// std::make_pair(K4ABT_JOINT_SHOULDER_LEFT, "SHOULDER_LEFT"), +// std::make_pair(K4ABT_JOINT_ELBOW_LEFT, "ELBOW_LEFT"), +// std::make_pair(K4ABT_JOINT_WRIST_LEFT, "WRIST_LEFT"), +// std::make_pair(K4ABT_JOINT_HAND_LEFT, "HAND_LEFT"), +// std::make_pair(K4ABT_JOINT_HANDTIP_LEFT, "HANDTIP_LEFT"), +// std::make_pair(K4ABT_JOINT_THUMB_LEFT, "THUMB_LEFT"), +// std::make_pair(K4ABT_JOINT_CLAVICLE_RIGHT,"CLAVICLE_RIGHT"), +// std::make_pair(K4ABT_JOINT_SHOULDER_RIGHT,"SHOULDER_RIGHT"), +// std::make_pair(K4ABT_JOINT_ELBOW_RIGHT, "ELBOW_RIGHT"), +// std::make_pair(K4ABT_JOINT_WRIST_RIGHT, "WRIST_RIGHT"), +// std::make_pair(K4ABT_JOINT_HAND_RIGHT, "HAND_RIGHT"), +// std::make_pair(K4ABT_JOINT_HANDTIP_RIGHT, "HANDTIP_RIGHT"), +// std::make_pair(K4ABT_JOINT_THUMB_RIGHT, "THUMB_RIGHT"), +// std::make_pair(K4ABT_JOINT_HIP_LEFT, "HIP_LEFT"), +// std::make_pair(K4ABT_JOINT_KNEE_LEFT, "KNEE_LEFT"), +// std::make_pair(K4ABT_JOINT_ANKLE_LEFT, "ANKLE_LEFT"), +// std::make_pair(K4ABT_JOINT_FOOT_LEFT, "FOOT_LEFT"), +// std::make_pair(K4ABT_JOINT_HIP_RIGHT, "HIP_RIGHT"), +// std::make_pair(K4ABT_JOINT_KNEE_RIGHT, "KNEE_RIGHT"), +// std::make_pair(K4ABT_JOINT_ANKLE_RIGHT, "ANKLE_RIGHT"), +// std::make_pair(K4ABT_JOINT_FOOT_RIGHT, "FOOT_RIGHT"), +// std::make_pair(K4ABT_JOINT_HEAD, "HEAD"), +// std::make_pair(K4ABT_JOINT_NOSE, "NOSE"), +// std::make_pair(K4ABT_JOINT_EYE_LEFT, "EYE_LEFT"), +// std::make_pair(K4ABT_JOINT_EAR_LEFT, "EAR_LEFT"), +// std::make_pair(K4ABT_JOINT_EYE_RIGHT, "EYE_RIGHT"), +// std::make_pair(K4ABT_JOINT_EAR_RIGHT, "EAR_RIGHT") +//}; + +//struct Color +//{ +// float r = 1.f; +// float g = 1.f; +// float b = 1.f; +// float a = 1.f; +//}; + +//static constexpr std::array g_bodyColors = +//{ +//Color{0.00f, 1.00f, 1.00f, 1.00f}, +// {1.00f, 0.65f, 0.00f, 1.00f}, +// {0.00f, 0.50f, 0.50f, 1.00f}, +// {0.85f, 0.44f, 0.84f, 1.00f}, +// {0.00f, 1.00f, 0.50f, 1.00f}, +// {0.53f, 0.81f, 0.98f, 1.00f}, +// {1.00f, 0.39f, 0.28f, 1.00f}, +// {0.13f, 0.70f, 0.67f, 1.00f}, +// {0.87f, 0.63f, 0.87f, 1.00f}, +// {0.00f, 0.98f, 0.60f, 1.00f}, +// {0.00f, 1.00f, 1.00f, 1.00f}, +// {1.00f, 0.65f, 0.00f, 1.00f}, +// {0.00f, 0.50f, 0.50f, 1.00f}, +// {0.85f, 0.44f, 0.84f, 1.00f}, +// {0.00f, 1.00f, 0.50f, 1.00f}, +// {0.53f, 0.81f, 0.98f, 1.00f}, +// {1.00f, 0.39f, 0.28f, 1.00f}, +// {0.13f, 0.70f, 0.67f, 1.00f}, +// {0.87f, 0.63f, 0.87f, 1.00f}, +// {0.00f, 0.98f, 0.60f, 1.00f} +//}; diff --git a/cpp-projects/base/camera/kinect4/k4a/k4aaudiochanneldatagraph.cpp b/cpp-projects/base/camera/kinect4/k4a/k4aaudiochanneldatagraph.cpp new file mode 100644 index 0000000..7b180f4 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4aaudiochanneldatagraph.cpp @@ -0,0 +1,191 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// Associated header +// +#include "k4aaudiochanneldatagraph.h" + +// System headers +// +#include + + +using namespace k4a; + +//namespace +//{ +//ImVec2 operator+(const ImVec2 &lhs, const ImVec2 &rhs) +//{ +// return { lhs.x + rhs.x, lhs.y + rhs.y }; +//} + +//ImVec2 operator-(const ImVec2 &lhs, const ImVec2 &rhs) +//{ +// return { lhs.x - rhs.x, lhs.y - rhs.y }; +//} +//} // namespace + +K4AAudioChannelDataGraph::K4AAudioChannelDataGraph(const char *name) : m_name(name) {} + +void K4AAudioChannelDataGraph::add_sample(const float sample){ + + // We're computing the root-mean-square for visualization + // + if (sample > 0){ + m_positiveDataAccumulator.add_sample(sample); + }else if (sample < 0){ + m_negativeDataAccumulator.add_sample(sample); + }else{ + m_positiveDataAccumulator.add_sample(sample); + m_negativeDataAccumulator.add_sample(sample); + } + + const size_t totalSamples = m_positiveDataAccumulator.get_sample_count() + m_negativeDataAccumulator.get_sample_count(); + if (totalSamples >= AudioSamplesPerGraphSample){ + // Update graph data + // + m_graphData[m_nextGraphPointIndex] = DataPoint( + m_positiveDataAccumulator.get_abs_max(), + m_positiveDataAccumulator.get_rms(), + -m_negativeDataAccumulator.get_rms(), + -m_negativeDataAccumulator.get_abs_max() + ); + + // Advance graph point + // + m_nextGraphPointIndex = (m_nextGraphPointIndex + 1) % AudioChannelGraphSampleCount; + + // Reset accumulators + // + m_positiveDataAccumulator.reset(); + m_negativeDataAccumulator.reset(); + } +} + +void K4AAudioChannelDataGraph::SignedAudioDataAccumulator::add_sample(const float sample){ + m_sampleCount++; + m_rmsAccumulator += sample * sample; + m_absMax = std::max(std::fabs(sample), m_absMax); +} + +void K4AAudioChannelDataGraph::SignedAudioDataAccumulator::reset(){ + m_sampleCount = 0; + m_rmsAccumulator = 0; + m_absMax = 0; +} + +float K4AAudioChannelDataGraph::SignedAudioDataAccumulator::get_abs_max() const{ + return m_absMax; +} + +float K4AAudioChannelDataGraph::SignedAudioDataAccumulator::get_rms() const{ + if (m_sampleCount == 0){ + return 0; + } + + return std::sqrt(m_rmsAccumulator / m_sampleCount); +} + +size_t K4AAudioChannelDataGraph::SignedAudioDataAccumulator::get_sample_count() const{ + return m_sampleCount; +} + + +//void K4AAudioChannelDataGraph::Show(ImVec2 graphSize, const float scale) +//{ + +// const float scaleMin = -scale; +// const float scaleMax = scale; + +// ImGuiWindow *window = ImGui::GetCurrentWindow(); +// if (window->SkipItems) +// { +// return; +// } + +// const ImGuiStyle &style = ImGui::GetCurrentContext()->Style; + +// if (graphSize.x == 0.0f) +// graphSize.x = ImGui::CalcItemWidth(); +// if (graphSize.y == 0.0f) +// graphSize.y = style.FramePadding.y * 2; + +// const ImRect frameBoundingBox(window->DC.CursorPos, window->DC.CursorPos + ImVec2(graphSize.x, graphSize.y)); +// const ImRect innerBoundingBox(frameBoundingBox.Min + style.FramePadding, frameBoundingBox.Max - style.FramePadding); +// const ImRect totalBoundingBox(frameBoundingBox.Min, frameBoundingBox.Max); +// ImGui::ItemSize(totalBoundingBox, style.FramePadding.y); + +// if (!ImGui::ItemAdd(totalBoundingBox, 0, &frameBoundingBox)) +// { +// return; +// } + +// ImGui::RenderFrame(frameBoundingBox.Min, +// frameBoundingBox.Max, +// ImGui::GetColorU32(ImGuiCol_FrameBg), +// true, +// style.FrameRounding); + +// const auto numValues = static_cast(m_graphData.size()); +// if (numValues > 0) +// { +// const int sampleCount = ImMin(static_cast(graphSize.x), numValues); + +// const float timeStep = 1.0f / static_cast(sampleCount); +// const float scaleRatio = 1.0f / (scaleMax - scaleMin); + +// const auto startOffset = static_cast(m_nextGraphPointIndex); + +// const ImU32 colorMinMax = ImGui::GetColorU32({ 0.2f, 0.2f, 0.8f, 1.0f }); +// const ImU32 colorRms = ImGui::GetColorU32({ 0.4f, 0.4f, 0.85f, 1.0f }); + +// float t0 = 0.0f; +// for (int n = 0; n < sampleCount; n++) +// { +// const float t1 = t0 + timeStep; + +// auto drawDataPoint = [&](const float min, const float max, const ImU32 color) { +// const auto normalizeValue = [scaleMin, scaleRatio](const float value) { +// return 1.0f - ImSaturate((value - scaleMin) * scaleRatio); +// }; + +// // Points in relative coordinates of graph area +// // +// const ImVec2 relativeUpperLeft = ImVec2(t0, normalizeValue(max)); +// const ImVec2 relativeLowerRight = ImVec2(t1, normalizeValue(min)); + +// // Points in absolute/window coordinates +// // +// const ImVec2 absoluteUpperLeft = ImLerp(innerBoundingBox.Min, innerBoundingBox.Max, relativeUpperLeft); +// ImVec2 absoluteLowerRight = ImLerp(innerBoundingBox.Min, innerBoundingBox.Max, relativeLowerRight); + +// // Make sure floating point error in normalization doesn't cause us +// // to make an extra-wide bar +// // +// if (absoluteLowerRight.x >= absoluteUpperLeft.x + 2.0f) +// absoluteLowerRight.x -= 1.0f; + +// window->DrawList->AddRectFilled(absoluteUpperLeft, absoluteLowerRight, color); +// }; + +// const auto offset = static_cast((std::lround(t0 * numValues + 0.5f) + startOffset + 1) % numValues); +// DataPoint ¤tDataPoint = m_graphData[offset]; +// drawDataPoint(currentDataPoint.Min, currentDataPoint.Max, colorMinMax); + +// // RMS graph will always be smaller, so draw it on top of the min/max graph +// // +// drawDataPoint(currentDataPoint.NegativeRms, currentDataPoint.PositiveRms, colorRms); + +// t0 = t1; +// } +// } + +// // Show channel label overlay +// // +// ImGui::RenderTextClipped(ImVec2(frameBoundingBox.Min.x, frameBoundingBox.Min.y + style.FramePadding.y), +// frameBoundingBox.Max, +// m_name.c_str(), +// nullptr, +// nullptr, +// ImVec2(0.5f, 0.0f)); +//} diff --git a/cpp-projects/base/camera/kinect4/k4a/k4aaudiochanneldatagraph.h b/cpp-projects/base/camera/kinect4/k4a/k4aaudiochanneldatagraph.h new file mode 100644 index 0000000..32307dd --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4aaudiochanneldatagraph.h @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef K4AAUDIOCHANNELDATAGRAPH_H +#define K4AAUDIOCHANNELDATAGRAPH_H + +// System headers +// +#include + +// Project headers +// +#include "k4amicrophonelistener.h" + +namespace k4a{ +class K4AAudioChannelDataGraph{ + +public: + + explicit K4AAudioChannelDataGraph(const char *name); + void add_sample(float sample); + + + +private: + + // We need to keep track of the min and max separately to produce graphs + // + class SignedAudioDataAccumulator{ + + public: + void add_sample(float sample); + void reset(); + float get_abs_max() const; + + // Gets the root-mean-square of the samples that have been given to the accumulator, + // which is intended to be an estimation of the loudness of the sound + // + float get_rms() const; + size_t get_sample_count() const; + + private: + size_t m_sampleCount = 0; + float m_rmsAccumulator = 0; + float m_absMax = 0; + }; + + struct DataPoint + { + float Max; + float PositiveRms; + float NegativeRms; + float Min; + + DataPoint(const float max, const float positiveRms, const float negativeRms, const float min) : + Max(max), + PositiveRms(positiveRms), + NegativeRms(negativeRms), + Min(min) + {} + DataPoint() : Max(0), PositiveRms(0), NegativeRms(0), Min(0) {} + }; + + + + + // We're targeting 60FPS, so we want to do our sample math approximately + // often enough that we trigger an update to the graph every frame. + // + static constexpr size_t AudioChannelGraphSampleCount = 120; + static constexpr size_t AudioSamplesPerGraphSample = K4AMicrophoneSampleRate / 60; + std::array m_graphData = {}; + size_t m_nextGraphPointIndex = 0; + SignedAudioDataAccumulator m_positiveDataAccumulator; + SignedAudioDataAccumulator m_negativeDataAccumulator; + std::string m_name = "Unknown channel"; + std::string m_zoomSliderLabel; +}; +} // namespace k4aviewer + +#endif diff --git a/cpp-projects/base/camera/kinect4/k4a/k4aaudiomanager.cpp b/cpp-projects/base/camera/kinect4/k4a/k4aaudiomanager.cpp new file mode 100644 index 0000000..f3d23a7 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4aaudiomanager.cpp @@ -0,0 +1,84 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// Associated header +// +#include "k4aaudiomanager.h" + +// System headers +// +#include + +// Library headers +// +#include + +// Project headers +// +#include "k4adevicecorrelator.h" + +using namespace k4a; + +K4AAudioManager &K4AAudioManager::Instance(){ + static K4AAudioManager instance; + return instance; +} + +int K4AAudioManager::Initialize(SoundIoBackend backend){ + return InitializeImpl([backend](SoundIo *soundIo) { return soundio_connect_backend(soundIo, backend); }); +} + +int K4AAudioManager::Initialize(){ + return InitializeImpl(soundio_connect); +} + +int K4AAudioManager::InitializeImpl(const std::function &initFn){ + + m_io.reset(soundio_create()); + const int status = initFn(m_io.get()); + + if (status != SoundIoErrorNone){ + return status; + } + + return RefreshDevices(); +} + +int K4AAudioManager::RefreshDevices(){ + + if (!m_io){ + return SoundIoErrorInvalid; + } + + soundio_flush_events(m_io.get()); + + m_devicesNames.clear(); + m_inputDevices.clear(); + + const int inputCount = soundio_input_device_count(m_io.get()); + for (int i = 0; i < inputCount; i++){ + std::shared_ptr device(soundio_get_input_device(m_io.get(), i), SoundIoDeviceDeleter()); + if (device){ + // Each device is listed twice - a 'raw' device and a not-'raw' device. + // We only want the non-raw ones. + // + if (device->is_raw){ + continue; + } + + m_devicesNames.push_back(device->name); + m_inputDevices[device->name] = std::move(device); + } + } + + return SoundIoErrorNone; +} + +std::shared_ptr K4AAudioManager::get_microphone_for_device(const std::string &deviceName){ + const auto soundIoDevice = m_inputDevices.find(deviceName); + if (soundIoDevice == m_inputDevices.end()){ + return nullptr; + } + return std::shared_ptr(new K4AMicrophone(soundIoDevice->second)); +} + diff --git a/cpp-projects/base/camera/kinect4/k4a/k4aaudiomanager.h b/cpp-projects/base/camera/kinect4/k4a/k4aaudiomanager.h new file mode 100644 index 0000000..e5a74af --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4aaudiomanager.h @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef K4AAUDIOMANAGER_H +#define K4AAUDIOMANAGER_H + +// System headers +// +#include +#include +#include + +// Library headers +// +#include "k4asoundio_util.h" + +// Project headers +// +#include "k4amicrophone.h" + +namespace k4a +{ +class K4AAudioManager +{ +public: + static K4AAudioManager &Instance(); + + int Initialize(SoundIoBackend backend); + int Initialize(); + + int RefreshDevices(); + + size_t get_devices_count() const{ + return m_devicesNames.size(); + } + + std::string get_device_name(size_t id) const{ + if(id < m_devicesNames.size()){ + return m_devicesNames[id]; + } + return ""; + } + + std::shared_ptr get_microphone_for_device(const std::string &deviceName); +// std::shared_ptr GetMicrophoneForDevice(const std::string &deviceSerialNumber); + + ~K4AAudioManager() = default; + K4AAudioManager(const K4AAudioManager &) = delete; + K4AAudioManager(const K4AAudioManager &&) = delete; + K4AAudioManager &operator=(const K4AAudioManager &) = delete; + K4AAudioManager &operator=(const K4AAudioManager &&) = delete; + +private: + K4AAudioManager() = default; + + int InitializeImpl(const std::function &initFn); + + SoundIoUniquePtr m_io; + + std::vector m_devicesNames; + std::map> m_inputDevices; +}; +} // namespace k4aviewer + +#endif diff --git a/cpp-projects/base/camera/kinect4/k4a/k4aaudiowindow.cpp b/cpp-projects/base/camera/kinect4/k4a/k4aaudiowindow.cpp new file mode 100644 index 0000000..16642be --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4aaudiowindow.cpp @@ -0,0 +1,136 @@ +//// Copyright (c) Microsoft Corporation. All rights reserved. +//// Licensed under the MIT License. + +//// Associated header +//// +//#include "k4aaudiowindow.h" + +//// System headers +//// +//#include + +//// Library headers +//// +//#include "k4aimgui_all.h" + +//// Project headers +//// +//#include "k4aimguiextensions.h" +//#include "k4amicrophonelistener.h" +//#include "k4aviewererrormanager.h" +//#include "k4awindowsizehelpers.h" + +//using namespace k4aviewer; + +//namespace +//{ +//constexpr float MinHeight = 140.f; +//} + +//K4AAudioWindow::K4AAudioWindow(std::string &&title, std::shared_ptr listener) : +// m_title(std::move(title)), +// m_listener(std::move(listener)), +// m_channelData{ { +// K4AAudioChannelDataGraph("Channel 0"), +// K4AAudioChannelDataGraph("Channel 1"), +// K4AAudioChannelDataGraph("Channel 2"), +// K4AAudioChannelDataGraph("Channel 3"), +// K4AAudioChannelDataGraph("Channel 4"), +// K4AAudioChannelDataGraph("Channel 5"), +// K4AAudioChannelDataGraph("Channel 6"), +// } } +//{ +//} + +//void K4AAudioWindow::Show(K4AWindowPlacementInfo placementInfo) +//{ +// ProcessNewData(); + +// if (m_listener && m_listener->GetStatus() != SoundIoErrorNone) +// { +// std::stringstream errorBuilder; +// errorBuilder << "Microphone failed: " << soundio_strerror(m_listener->GetStatus()) << "!"; +// K4AViewerErrorManager::Instance().SetErrorStatus(errorBuilder.str()); +// m_listener.reset(); +// } + +// if (!m_listener) +// { +// ImGui::Text("Microphone failed!"); +// return; +// } + +// ImGuiStyle &style = ImGui::GetStyle(); +// ImVec2 sliderSize; + +// sliderSize.x = GetStandardVerticalSliderWidth(); +// sliderSize.y = placementInfo.Size.y; +// sliderSize.y -= GetTitleBarHeight(); +// sliderSize.y -= 2 * style.WindowPadding.y; +// sliderSize.y -= 2 * style.ItemSpacing.y; + +// sliderSize.y = std::max(MinHeight, sliderSize.y); + +// ImVec2 graphSize; +// graphSize.x = placementInfo.Size.x; +// graphSize.x -= sliderSize.x; +// graphSize.x -= 2 * style.WindowPadding.x; +// graphSize.x -= 2 * style.ItemSpacing.x; + +// graphSize.y = sliderSize.y; +// graphSize.y -= style.ItemSpacing.y * (m_channelData.size() - 1); +// graphSize.y /= m_channelData.size(); + +// // We use negative numbers for the scale so the slider goes up for more sensitivity, +// // which is a bit more intuitive +// // +// ImGuiExtensions::K4AVSliderFloat("##MicrophoneScale", sliderSize, &m_microphoneScale, -1.0f, -0.1f, "Scale"); + +// ImGui::SameLine(); + +// ImGui::BeginGroup(); +// for (auto &channelData : m_channelData) +// { +// channelData.Show(graphSize, -m_microphoneScale); +// } +// ImGui::EndGroup(); +//} + +//const char *K4AAudioWindow::GetTitle() const +//{ +// return m_title.c_str(); +//} + +//void K4AAudioWindow::ProcessNewData() +//{ +// if (!m_listener) +// { +// return; +// } + +// m_listener->ProcessFrames([this](K4AMicrophoneFrame *frame, const size_t frameCount) { +// for (size_t frameId = 0; frameId < frameCount; frameId++) +// { +// for (size_t channelId = 0; channelId < K4AMicrophoneFrame::ChannelCount; channelId++) +// { +// m_channelData[channelId].AddSample(frame[frameId].Channel[channelId]); +// } +// } + +// return frameCount; +// }); + +// if (m_listener->GetStatus() != SoundIoErrorNone) +// { +// std::stringstream errorBuilder; +// errorBuilder << "Error while recording " << soundio_strerror(m_listener->GetStatus()) << "!"; + +// K4AViewerErrorManager::Instance().SetErrorStatus(errorBuilder.str()); +// m_listener.reset(); +// } +// else if (m_listener->Overflowed()) +// { +// K4AViewerErrorManager::Instance().SetErrorStatus("Warning: sound overflow detected!"); +// m_listener->ClearOverflowed(); +// } +//} diff --git a/cpp-projects/base/camera/kinect4/k4a/k4aaudiowindow.h b/cpp-projects/base/camera/kinect4/k4a/k4aaudiowindow.h new file mode 100644 index 0000000..df82129 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4aaudiowindow.h @@ -0,0 +1,44 @@ +//// Copyright (c) Microsoft Corporation. All rights reserved. +//// Licensed under the MIT License. + +//#ifndef K4AAUDIOWINDOW_H +//#define K4AAUDIOWINDOW_H + +//// System headers +//// +//#include +//#include + +//// Library headers +//// + +//// Project headers +//// +//#include "ik4avisualizationwindow.h" +//#include "k4aaudiochanneldatagraph.h" +//#include "k4amicrophonelistener.h" + +//namespace k4aviewer +//{ +//class K4AAudioWindow : public IK4AVisualizationWindow +//{ +//public: +// explicit K4AAudioWindow(std::string &&title, std::shared_ptr listener); + +// void Show(K4AWindowPlacementInfo placementInfo) override; +// const char *GetTitle() const override; + +//private: +// void ProcessNewData(); + +// std::string m_title; + +// std::shared_ptr m_listener; + +// std::array m_channelData; + +// float m_microphoneScale = -0.5f; +//}; +//} // namespace k4aviewer + +//#endif diff --git a/cpp-projects/base/camera/kinect4/k4a/k4adevicecorrelator.h b/cpp-projects/base/camera/kinect4/k4a/k4adevicecorrelator.h new file mode 100644 index 0000000..156238d --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4adevicecorrelator.h @@ -0,0 +1,33 @@ +//// Copyright (c) Microsoft Corporation. All rights reserved. +//// Licensed under the MIT License. + +//#ifndef K4ADEVICECORRELATOR_H +//#define K4ADEVICECORRELATOR_H + +//// System headers +//// +//#include +//#include + +//// Library headers +//// + +//// Project headers +//// +//#include "k4asoundio_util.h" + +//namespace k4aviewer +//{ +//// Populates result with a map from libsoundio backend ID to USB container ID. +//// Implementation of this function is platform-specific. +//// Returns true if successful, false otherwise. +//// +//class K4ADeviceCorrelator +//{ +//public: +// static bool GetSoundIoBackendIdToSerialNumberMapping(SoundIo *soundIo, std::map *result); + +// K4ADeviceCorrelator() = delete; +//}; +//} // namespace k4aviewer +//#endif // K4ADEVICECORRELATOR_H diff --git a/cpp-projects/base/camera/kinect4/k4a/k4amicrophone.cpp b/cpp-projects/base/camera/kinect4/k4a/k4amicrophone.cpp new file mode 100644 index 0000000..6b16fb4 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4amicrophone.cpp @@ -0,0 +1,297 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// Associated header +// +#include "k4amicrophone.h" + +// System headers +// +#include + +// Library headers +// + +// Project headers +// +#include "k4amicrophonelistener.h" +//#include "k4aviewererrormanager.h" + +using namespace k4a; + +K4AMicrophone::K4AMicrophone(std::shared_ptr device) : m_device(std::move(device)) {} + +void K4AMicrophone::SetFailed(const int errorCode) +{ + // We can't stop the stream because this function can be called from the reader thread, + // and you're not allowed to call soundio_destroy_instream from the reader thread. + // Instead, we set the failed state and destroy the instream the next time someone tries + // to use it. K4AMicrophoneListeners know to check this and drop their references to the + // K4AMicrophone if the K4AMicrophone has failed. + // + m_statusCode = errorCode; + m_started = false; + + // The next time all our listeners try to pull frames and realize that the mic is dead, + // we'll lose the last reference to the microphone and it'll get deleted. We can't actually + // stop the sound stream on the callback thread, though (libsoundio doesn't allow the callback + // thread to call soundio_instream_destroy), so we can't stop the callbacks from happening. + // To work around this, we have to null out the pointer that libsoundio has our callback can + // know that the stream is dead and we get don't segfault. + // + m_inStream->userdata = nullptr; +} + +int K4AMicrophone::Start() +{ + m_inStream.reset(soundio_instream_create(m_device.get())); + if (!m_inStream) + { + return SoundIoErrorNoMem; + } + + m_inStream->format = SoundIoFormatFloat32LE; + m_inStream->sample_rate = K4AMicrophoneSampleRate; + + m_inStream->layout = *soundio_channel_layout_get_builtin(SoundIoChannelLayoutId7Point0); + m_inStream->software_latency = 0.2; + + m_inStream->userdata = this; + m_inStream->read_callback = K4AMicrophone::ReadCallback; + m_inStream->overflow_callback = K4AMicrophone::OverflowCallback; + m_inStream->error_callback = K4AMicrophone::ErrorCallback; + + int result = soundio_instream_open(m_inStream.get()); + if (result != SoundIoErrorNone) + { + m_inStream.reset(); + return result; + } + + result = soundio_instream_start(m_inStream.get()); + if (result != SoundIoErrorNone) + { + return result; + } + + m_started = true; + + return result; +} + +void K4AMicrophone::Stop() +{ + m_started = false; + m_inStream.reset(); + + std::lock_guard lock(m_listenersMutex); + m_listeners.clear(); +} + +std::shared_ptr K4AMicrophone::CreateListener() +{ + if (!m_inStream) + { + return nullptr; + } + + constexpr int bufferPaddingRatio = 3; + const auto bufferSize = static_cast(bufferPaddingRatio * m_inStream->software_latency * + m_inStream->sample_rate * m_inStream->bytes_per_frame); + + auto result = std::shared_ptr(new K4AMicrophoneListener(shared_from_this(), bufferSize)); + if (!result->m_buffer) + { + // OOM + // + result.reset(); + return nullptr; + } + + std::weak_ptr newListenerWeakPtr = result; + + std::lock_guard lock(m_listenersMutex); + m_listeners.emplace_back(std::move(newListenerWeakPtr)); + + return result; +} + +void K4AMicrophone::ReadCallback(SoundIoInStream *inStream, const int frameCountMin, const int frameCountMax) +{ + if (!inStream->userdata) + { + return; + } + + auto instance = reinterpret_cast(inStream->userdata); + + int maxFramesToWrite = 0; + + // Grab references to all the listeners and figure out how many frames we're going to read + // + struct ListenerInfo + { + std::shared_ptr Listener = nullptr; + int FramesToWrite = 0; + int FramesWritten = 0; + char *WritePtr = nullptr; + }; + + std::vector listenerInfo; + + // We don't need the lock on the listeners for the whole function, so create a scope that just + // lasts as long as we need the lock + { + std::lock_guard lock(instance->m_listenersMutex); + listenerInfo.reserve(instance->m_listeners.size()); + + bool expiredReferencesFound = false; + + for (std::weak_ptr &wpListener : instance->m_listeners) + { + std::shared_ptr spListener = wpListener.lock(); + if (spListener) + { + const int bufferFreeBytes = soundio_ring_buffer_free_count(spListener->m_buffer.get()); + const int bufferFreeFrames = bufferFreeBytes / instance->m_inStream->bytes_per_frame; + const int totalFramesToWrite = std::min(bufferFreeFrames, frameCountMax); + + ListenerInfo newListener; + newListener.Listener = std::move(spListener); + newListener.FramesToWrite = totalFramesToWrite; + newListener.FramesWritten = 0; + newListener.WritePtr = soundio_ring_buffer_write_ptr(newListener.Listener->m_buffer.get()); + listenerInfo.emplace_back(std::move(newListener)); + + maxFramesToWrite = std::max(totalFramesToWrite, maxFramesToWrite); + } + else + { + expiredReferencesFound = true; + } + } + + // Clean up listeners that have been destroyed + // + if (expiredReferencesFound) + { + const auto isExpired = [](const std::weak_ptr &wp) { return wp.expired(); }; + + instance->m_listeners.erase(std::remove_if(instance->m_listeners.begin(), + instance->m_listeners.end(), + isExpired), + instance->m_listeners.end()); + } + } + + if (frameCountMin > maxFramesToWrite) + { + // Everyone is out of buffer space, which means something has gone badly wrong. + // + ErrorCallback(inStream, SoundIoErrorStreaming); + return; + } + + // Actually read audio data + // + int remainingFramesToWrite = maxFramesToWrite; + int maxFramesWritten = 0; + while (true) + { + int readFrameCount = remainingFramesToWrite; + SoundIoChannelArea *areas; + + int err = soundio_instream_begin_read(instance->m_inStream.get(), &areas, &readFrameCount); + if (err != SoundIoErrorNone) + { + ErrorCallback(inStream, err); + return; + } + + if (readFrameCount == 0) + { + break; + } + + // Distribute audio data to each listener + // + for (ListenerInfo &listener : listenerInfo) + { + const int framesToWriteForListener = std::min(readFrameCount, listener.FramesToWrite); + + if (framesToWriteForListener < readFrameCount) + { + // This listener has run out of space in its buffer and is going to lose data. + // + listener.Listener->m_overflowed = true; + } + + if (areas == nullptr) + { + // There is a hole in the buffer; we need to fill it with silence. + // This can happen if the microphone is muted by the OS. + // + memset(listener.WritePtr, + 0, + static_cast(readFrameCount * instance->m_inStream->bytes_per_frame)); + } + else + { + for (int frame = 0; frame < readFrameCount; ++frame) + { + for (int channel = 0; channel < instance->m_inStream->layout.channel_count; channel++) + { + memcpy(listener.WritePtr, + areas[channel].ptr, + static_cast(instance->m_inStream->bytes_per_sample)); + areas[channel].ptr += areas[channel].step; + listener.WritePtr += instance->m_inStream->bytes_per_sample; + } + } + } + + listener.FramesToWrite -= framesToWriteForListener; + listener.FramesWritten += framesToWriteForListener; + + maxFramesWritten = std::max(listener.FramesWritten, maxFramesWritten); + } + + err = soundio_instream_end_read(instance->m_inStream.get()); + if (err != SoundIoErrorNone) + { + ErrorCallback(inStream, err); + return; + } + + remainingFramesToWrite -= readFrameCount; + if (remainingFramesToWrite <= 0) + { + break; + } + } + + for (ListenerInfo &listener : listenerInfo) + { + const int bytesWritten = listener.FramesWritten * instance->m_inStream->bytes_per_frame; + soundio_ring_buffer_advance_write_ptr(listener.Listener->m_buffer.get(), bytesWritten); + + // This listener fell behind and lost some data; notify it that this happened + // + if (listener.FramesWritten < maxFramesWritten) + { + listener.Listener->m_overflowed = true; + } + } +} + +void K4AMicrophone::ErrorCallback(SoundIoInStream *inStream, const int errorCode) +{ + auto instance = reinterpret_cast(inStream->userdata); + + instance->SetFailed(errorCode); +} + +void K4AMicrophone::OverflowCallback(SoundIoInStream *inStream) +{ + ErrorCallback(inStream, SoundIoErrorStreaming); +} diff --git a/cpp-projects/base/camera/kinect4/k4a/k4amicrophone.h b/cpp-projects/base/camera/kinect4/k4a/k4amicrophone.h new file mode 100644 index 0000000..c56e50a --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4amicrophone.h @@ -0,0 +1,82 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef K4AMICROPHONE_H +#define K4AMICROPHONE_H + +// System headers +// +#include +#include +#include + +// Library headers +// +#include "k4asoundio_util.h" + +// Project headers +// + +namespace k4a +{ +class K4AMicrophoneListener; + +class K4AMicrophone : public std::enable_shared_from_this +{ +public: + // Returns a libsoundio exit code + // + int Start(); + + void Stop(); + + // SoundIO status code + // + int GetStatusCode() const + { + return m_statusCode; + } + + void ClearStatusCode() + { + m_statusCode = SoundIoErrorNone; + } + + bool IsStarted() const + { + return m_started; + } + + std::shared_ptr CreateListener(); + + K4AMicrophone(const K4AMicrophone &) = delete; + K4AMicrophone(const K4AMicrophone &&) = delete; + K4AMicrophone &operator=(const K4AMicrophone &) = delete; + K4AMicrophone &operator=(const K4AMicrophone &&) = delete; + + ~K4AMicrophone() = default; + +private: + friend class K4AAudioManager; + explicit K4AMicrophone(std::shared_ptr device); + + void SetFailed(int errorCode); + + // These are callbacks that we give to libsoundio. + // inStream->userdata is a void* that will point to a K4AMicrophone instance. + // + static void ReadCallback(SoundIoInStream *inStream, int frameCountMin, int frameCountMax); + static void ErrorCallback(SoundIoInStream *inStream, int errorCode); + static void OverflowCallback(SoundIoInStream *inStream); + + std::mutex m_listenersMutex; + std::vector> m_listeners; + + SoundIoInStreamUniquePtr m_inStream = nullptr; + std::shared_ptr m_device = nullptr; + bool m_started = false; + int m_statusCode = SoundIoErrorNone; +}; +} // namespace k4aviewer + +#endif diff --git a/cpp-projects/base/camera/kinect4/k4a/k4amicrophonelistener.cpp b/cpp-projects/base/camera/kinect4/k4a/k4amicrophonelistener.cpp new file mode 100644 index 0000000..998a315 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4amicrophonelistener.cpp @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// Associated header +// +#include "k4amicrophonelistener.h" + +// System headers +// +#include + +// Library headers +// + +// Project headers +// + +using namespace k4a; + +size_t K4AMicrophoneListener::ProcessFrames(const std::function &processor) +{ + if (!m_buffer) + { + return 0; + } + + if (m_backingDevice->GetStatusCode() != SoundIoErrorNone) + { + // When our backing device fails, that's unrecoverable. To get working again, + // we need to recreate the microphone listener. Clear out everything. + // + m_statusCode = m_backingDevice->GetStatusCode(); + m_buffer.reset(); + m_backingDevice.reset(); + return 0; + } + + const auto readableBytes = static_cast(soundio_ring_buffer_fill_count(m_buffer.get())); + const size_t readableFrames = readableBytes / sizeof(K4AMicrophoneFrame); + + if (readableFrames == 0) + { + return 0; + } + + char *readPoint = soundio_ring_buffer_read_ptr(m_buffer.get()); + auto *frameReadPoint = reinterpret_cast(readPoint); + + const size_t readFrames = processor(frameReadPoint, readableFrames); + const size_t readBytes = readFrames * sizeof(K4AMicrophoneFrame); + + soundio_ring_buffer_advance_read_ptr(m_buffer.get(), static_cast(readBytes)); + + return readFrames; +} + +K4AMicrophoneListener::K4AMicrophoneListener(std::shared_ptr backingDevice, const size_t bufferSize) : + m_backingDevice(std::move(backingDevice)) +{ + m_buffer.reset(soundio_ring_buffer_create(nullptr, static_cast(bufferSize))); + if (m_buffer) + { + memset(soundio_ring_buffer_write_ptr(m_buffer.get()), 0, bufferSize); + } +} diff --git a/cpp-projects/base/camera/kinect4/k4a/k4amicrophonelistener.h b/cpp-projects/base/camera/kinect4/k4a/k4amicrophonelistener.h new file mode 100644 index 0000000..b496609 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4amicrophonelistener.h @@ -0,0 +1,71 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef K4AMICROPHONELISTENER_H +#define K4AMICROPHONELISTENER_H + +// System headers +// +#include +#include + +// Library headers +// +#include "k4asoundio_util.h" + +// Project headers +// +#include "k4amicrophone.h" + +namespace k4a{ + +constexpr size_t K4AMicrophoneSampleRate = 48000; +struct K4AMicrophoneFrame{ + static constexpr size_t ChannelCount = 7; + float Channel[ChannelCount]; +}; + +class K4AMicrophoneListener +{ +public: + // processor takes a pointer to array of frames and the number of frames available to read. It returns + // the number of frames that it processed (i.e. wants removed from the buffer). It must return a number + // that is <= the number of frames it received. + // + size_t ProcessFrames(const std::function &processor); + + int GetStatus() const + { + return m_statusCode; + } + + bool Overflowed() const + { + return m_overflowed; + } + + void ClearOverflowed() + { + m_overflowed = false; + } + + ~K4AMicrophoneListener() = default; + + K4AMicrophoneListener(const K4AMicrophoneListener &) = delete; + K4AMicrophoneListener(const K4AMicrophoneListener &&) = delete; + K4AMicrophoneListener &operator=(const K4AMicrophoneListener &) = delete; + K4AMicrophoneListener &operator=(const K4AMicrophoneListener &&) = delete; + +private: + friend class K4AMicrophone; + + K4AMicrophoneListener(std::shared_ptr backingDevice, size_t bufferSize); + + SoundIoRingBufferUniquePtr m_buffer; + std::shared_ptr m_backingDevice; + int m_statusCode = SoundIoErrorNone; + bool m_overflowed = false; +}; +} // namespace k4aviewer + +#endif diff --git a/cpp-projects/base/camera/kinect4/k4a/k4asoundio_util.h b/cpp-projects/base/camera/kinect4/k4a/k4asoundio_util.h new file mode 100644 index 0000000..dc0aba1 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4asoundio_util.h @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef K4ASOUNDIO_UTIL_H +#define K4ASOUNDIO_UTIL_H + +// System headers +// +#include + +// Library headers +// + +// On Windows, this ends up including Windows.h, which by default #defines +// min/max, which overrides std::min/std::max. +// This disables that behavior. +// +#define NOMINMAX +#include + +// Project headers +// + +namespace k4a +{ +// Deleter functors for smart pointer types +// +struct SoundIoDeleter +{ + void operator()(SoundIo *s) const + { + soundio_destroy(s); + } +}; + +struct SoundIoDeviceDeleter +{ + void operator()(SoundIoDevice *s) const + { + soundio_device_unref(s); + } +}; + +struct SoundIoInStreamDeleter +{ + void operator()(SoundIoInStream *s) const + { + soundio_instream_destroy(s); + } +}; + +struct SoundIoRingBufferDeleter +{ + void operator()(SoundIoRingBuffer *b) const + { + soundio_ring_buffer_destroy(b); + } +}; + +using SoundIoUniquePtr = std::unique_ptr; +using SoundIoInStreamUniquePtr = std::unique_ptr; +using SoundIoRingBufferUniquePtr = std::unique_ptr; +} // namespace k4aviewer + +#endif diff --git a/cpp-projects/base/camera/kinect4/k4a/k4astaticimageproperties.h b/cpp-projects/base/camera/kinect4/k4a/k4astaticimageproperties.h new file mode 100644 index 0000000..ce39e6e --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/k4astaticimageproperties.h @@ -0,0 +1,104 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef K4AIMAGESIZES_H +#define K4AIMAGESIZES_H + +#include +#include + +#include + +namespace k4a +{ + +// Gets the dimensions of the color images that the color camera will produce for a +// given color resolution +// +inline std::pair GetColorDimensions(const k4a_color_resolution_t resolution) +{ + switch (resolution) + { + case K4A_COLOR_RESOLUTION_720P: + return { 1280, 720 }; + case K4A_COLOR_RESOLUTION_2160P: + return { 3840, 2160 }; + case K4A_COLOR_RESOLUTION_1440P: + return { 2560, 1440 }; + case K4A_COLOR_RESOLUTION_1080P: + return { 1920, 1080 }; + case K4A_COLOR_RESOLUTION_3072P: + return { 4096, 3072 }; + case K4A_COLOR_RESOLUTION_1536P: + return { 2048, 1536 }; + + default: + throw std::logic_error("Invalid color dimensions value!"); + } +} + +// Gets the dimensions of the depth images that the depth camera will produce for a +// given depth mode +// +inline std::pair GetDepthDimensions(const k4a_depth_mode_t depthMode) +{ + switch (depthMode) + { + case K4A_DEPTH_MODE_NFOV_2X2BINNED: + return { 320, 288 }; + case K4A_DEPTH_MODE_NFOV_UNBINNED: + return { 640, 576 }; + case K4A_DEPTH_MODE_WFOV_2X2BINNED: + return { 512, 512 }; + case K4A_DEPTH_MODE_WFOV_UNBINNED: + return { 1024, 1024 }; + case K4A_DEPTH_MODE_PASSIVE_IR: + return { 1024, 1024 }; + + default: + throw std::logic_error("Invalid depth dimensions value!"); + } +} + +// Gets the range of values that we expect to see from the depth camera +// when using a given depth mode, in millimeters +// +inline std::pair GetDepthModeRange(const k4a_depth_mode_t depthMode) +{ + switch (depthMode) + { + case K4A_DEPTH_MODE_NFOV_2X2BINNED: + return { (uint16_t)500, (uint16_t)5800 }; + case K4A_DEPTH_MODE_NFOV_UNBINNED: + return { (uint16_t)500, (uint16_t)4000 }; + case K4A_DEPTH_MODE_WFOV_2X2BINNED: + return { (uint16_t)250, (uint16_t)3000 }; + case K4A_DEPTH_MODE_WFOV_UNBINNED: + return { (uint16_t)250, (uint16_t)2500 }; + + case K4A_DEPTH_MODE_PASSIVE_IR: + default: + throw std::logic_error("Invalid depth mode!"); + } +} + +// Gets the expected min/max IR brightness levels that we expect to see +// from the IR camera when using a given depth mode +// +inline std::pair GetIrLevels(const k4a_depth_mode_t depthMode) +{ + switch (depthMode) + { + case K4A_DEPTH_MODE_PASSIVE_IR: + return { (uint16_t)0, (uint16_t)100 }; + + case K4A_DEPTH_MODE_OFF: + throw std::logic_error("Invalid depth mode!"); + + default: + return { (uint16_t)0, (uint16_t)1000 }; + } +} +} // namespace k4aviewer + +#endif diff --git a/cpp-projects/base/camera/kinect4/k4a/platform/windows/k4adevicecorrelator.cpp b/cpp-projects/base/camera/kinect4/k4a/platform/windows/k4adevicecorrelator.cpp new file mode 100644 index 0000000..8b494c9 --- /dev/null +++ b/cpp-projects/base/camera/kinect4/k4a/platform/windows/k4adevicecorrelator.cpp @@ -0,0 +1,298 @@ +//// Copyright (c) Microsoft Corporation. All rights reserved. +//// Licensed under the MIT License. + +//// Associated header +//// +//#include "k4adevicecorrelator.h" + +//// System headers +//// +//#include +//#include +//#include +//#include +//#include +//#include + +//// Library headers +//// + +//// Shut off windows.h's min/max macros so they don't conflict with the STL min/max functions, which +//// are used in some of the k4aviewer headers +//// +//#define NOMINMAX + +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include + +//// Project headers +//// +////#include "k4aviewererrormanager.h" +////#include "k4aviewerutil.h" + +//#define RETURN_IF_FAILED(hr) \ +// if (FAILED(hr)) \ +// { \ +// return hr; \ +// } + +//using namespace k4aviewer; + +//namespace +//{ +//// Functor to clean up COM objects that we create +//// +//template struct ComSafeDeleter +//{ +// void operator()(T *managedComObject) +// { +// if (managedComObject != nullptr) +// { +// managedComObject->Release(); +// managedComObject = nullptr; +// } +// } +//}; + +//template using ComUniquePtr = std::unique_ptr>; + +//// GUID comparer to allow use of std::maps of GUIDs +//// +//struct GuidComparer +//{ +// bool operator()(const GUID &a, const GUID &b) const +// { +// return a.Data1 != b.Data1 ? +// a.Data1 < b.Data1 : +// a.Data2 != b.Data2 ? a.Data2 < b.Data2 : +// a.Data3 != b.Data3 ? a.Data3 < b.Data3 : memcmp(a.Data4, b.Data4, 8) < 0; +// } +//}; + +//// Adapted from https://docs.microsoft.com/en-us/windows/desktop/CoreAudio/device-properties +//// +//HRESULT GetContainerIdToWasapiIdMap(std::map *result) +//{ +// ComUniquePtr pEnumerator; +// { +// // clang-format off +// // +// // BCDE0395-E52F-467C-8E3D-C4579291692E +// // +// constexpr CLSID mmDeviceEnumeratorGuid = +// { 0xBCDE0395, 0xE52F, 0x467C, { 0x8E, 0x3D, 0xC4, 0x57, 0x92, 0x91, 0x69, 0x2E } }; + +// // A95664D2-9614-4F35-A746-DE8DB63617E6 +// // +// constexpr IID immDeviceEnumeratorGuid = +// { 0xA95664D2, 0x9614, 0x4F35, { 0xA7, 0x46, 0xDE, 0x8D, 0xB6, 0x36, 0x17, 0xE6 } }; +// // +// // clang-format on + +// IMMDeviceEnumerator *rawEnumerator = nullptr; +// RETURN_IF_FAILED(CoCreateInstance(mmDeviceEnumeratorGuid, +// nullptr, +// CLSCTX_ALL, +// immDeviceEnumeratorGuid, +// reinterpret_cast(&rawEnumerator))); +// pEnumerator.reset(rawEnumerator); +// } + +// // Enumerate capture devices (i.e microphones) +// // +// ComUniquePtr pCollection; +// { +// IMMDeviceCollection *rawCollection = nullptr; +// RETURN_IF_FAILED(pEnumerator->EnumAudioEndpoints(eCapture, DEVICE_STATE_ACTIVE, &rawCollection)); +// pCollection.reset(rawCollection); +// } + +// UINT count; +// RETURN_IF_FAILED(pCollection->GetCount(&count)); + +// for (ULONG i = 0; i < count; i++) +// { +// ComUniquePtr pEndpoint; +// { +// IMMDevice *rawEndpoint = nullptr; +// RETURN_IF_FAILED(pCollection->Item(i, &rawEndpoint)); +// pEndpoint.reset(rawEndpoint); +// } + +// // Get the endpoint ID string (this is what libsoundio wants) +// // +// LPWSTR idComString = nullptr; +// RETURN_IF_FAILED(pEndpoint->GetId(&idComString)); + +// std::wstring idWString = idComString; +// CoTaskMemFree(idComString); + +// // Convert to an ASCII string, which is what libsoundio expects. +// // Note that this works by doing a narrowing conversion of the wchar_ts in the wstring +// // into to chars in the string. +// // This is not always safe. However, in this case, it is, because we know that WASAPI identifiers +// // only use letters A-Z, numbers 0-9, dashes(-), periods(.), and curly braces ({}), all of which +// // have the same numeric value for chars and wchars and don't use the upper byte of the wchar. +// // +// std::wstring_convert> converter; +// std::string idString = converter.to_bytes(idWString); + +// ComUniquePtr pProps; +// { +// IPropertyStore *rawProps = nullptr; +// RETURN_IF_FAILED(pEndpoint->OpenPropertyStore(STGM_READ, &rawProps)); +// pProps.reset(rawProps); +// } + +// // Get the endpoint's container ID +// PROPVARIANT containerIdProperty; +// PropVariantInit(&containerIdProperty); +// RETURN_IF_FAILED(pProps->GetValue(PKEY_Device_ContainerId, &containerIdProperty)); + +// GUID containerId = *containerIdProperty.puuid; + +// PropVariantClear(&containerIdProperty); + +// (*result)[containerId] = std::move(idString); +// } + +// return S_OK; +//} + +//HRESULT GetSerialNumberToContainerIdMap(std::map *result) +//{ +// // WinUSB Device {88BAE032-5A81-49F0-BC3D-A4FF138216D6} +// // see http://msdn.microsoft.com/en-us/library/windows/hardware/ff553426%28v=vs.85%29.aspx +// // +// // clang-format off +// constexpr GUID winUsbDeviceClassGuid = +// { 0x88BAE032, 0x5A81, 0x49F0, { 0xBC, 0x3D, 0xA4, 0xFF, 0x13, 0x82, 0x16, 0xD6 } }; +// // clang-format on + +// // Get list of USB devices +// // +// HDEVINFO hDevInfo = SetupDiGetClassDevs(&winUsbDeviceClassGuid, "USB", nullptr, DIGCF_ALLCLASSES | DIGCF_PRESENT); +// if (hDevInfo == INVALID_HANDLE_VALUE) +// { +// return HRESULT_FROM_WIN32(GetLastError()); +// } + +// CleanupGuard hDevInfoGuard([&hDevInfo]() { SetupDiDestroyDeviceInfoList(hDevInfo); }); + +// // Loop through the devices from the USB class +// // +// SP_DEVINFO_DATA deviceInfo{}; +// deviceInfo.cbSize = sizeof(deviceInfo); +// DWORD currentDeviceId = 0; +// while (SetupDiEnumDeviceInfo(hDevInfo, currentDeviceId, &deviceInfo)) +// { +// currentDeviceId++; + +// // The device path shouldn't be > this for any of the devices that we care about +// // +// char devicePathBuffer[500]; +// DWORD devicePathLength = 0; + +// if (!SetupDiGetDeviceInstanceId(hDevInfo, +// &deviceInfo, +// devicePathBuffer, +// sizeof(devicePathBuffer), +// &devicePathLength)) +// { +// continue; +// } + +// // Example string: USB\VID_045E&PID_097C\EV1-014 +// // +// const std::regex vidPidRegex(R"(USB\\VID_([0-9A-F]{4})&PID_([0-9A-F]{4})\\(.*))"); + +// std::cmatch match; +// if (!std::regex_match(devicePathBuffer, match, vidPidRegex)) +// { +// continue; +// } + +// // Extract vid/pid/serial number +// // +// // match[0] is the whole match; captures start at 1 +// // +// std::string vidStr = match[1]; +// std::string pidStr = match[2]; +// std::string serialNumber = match[3]; + +// constexpr int baseHex = 16; +// const auto vid = uint16_t(std::stoul(vidStr, nullptr, baseHex)); +// const auto pid = uint16_t(std::stoul(pidStr, nullptr, baseHex)); + +// constexpr uint16_t depthCameraVid = 0x045E; +// constexpr uint16_t depthCameraPid = 0x097C; + +// if (vid != depthCameraVid || pid != depthCameraPid) +// { +// continue; +// } + +// DEVPROPTYPE propType; +// GUID containerId{}; +// if (!SetupDiGetDevicePropertyW(hDevInfo, +// &deviceInfo, +// &DEVPKEY_Device_ContainerId, +// &propType, +// reinterpret_cast(&containerId), +// sizeof(containerId), +// nullptr, +// 0)) +// { +// return HRESULT_FROM_WIN32(GetLastError()); +// } + +// (*result)[serialNumber] = containerId; +// } + +// // SetupDiEnumDeviceInfo is expected to set the last error to ERROR_NO_MORE_ITEMS when +// // it completes successfully. If it fails for any other reason, we have a problem and want +// // to report failure. +// // +// const DWORD lastError = GetLastError(); +// if (lastError != ERROR_NO_MORE_ITEMS) +// { +// return HRESULT_FROM_WIN32(lastError); +// } + +// return S_OK; +//} +//} // namespace + +//bool K4ADeviceCorrelator::GetSoundIoBackendIdToSerialNumberMapping(SoundIo *soundio, +// std::map *result) +//{ +// (void)soundio; + +// std::map serialNumberToContainerIdMap; +// std::map containerIdToWasapiIdMap; + +// if (FAILED(GetSerialNumberToContainerIdMap(&serialNumberToContainerIdMap)) || +// FAILED(GetContainerIdToWasapiIdMap(&containerIdToWasapiIdMap))) +// { +// return false; +// } + +// result->clear(); + +// for (auto &serialNumberToContainerIdMapping : serialNumberToContainerIdMap) +// { +// auto containerIdToWasapiIdMapping = containerIdToWasapiIdMap.find(serialNumberToContainerIdMapping.second); +// if (containerIdToWasapiIdMapping != containerIdToWasapiIdMap.end()) +// { +// (*result)[containerIdToWasapiIdMapping->second] = serialNumberToContainerIdMapping.first; +// } +// } + +// return true; +//} diff --git a/cpp-projects/base/data/FastDifferentialCoding/fastdelta.c b/cpp-projects/base/data/FastDifferentialCoding/fastdelta.c new file mode 100644 index 0000000..702dec4 --- /dev/null +++ b/cpp-projects/base/data/FastDifferentialCoding/fastdelta.c @@ -0,0 +1,102 @@ + +#include "fastdelta.h" + +#if defined(_MSC_VER) + /* Microsoft C/C++-compatible compiler */ + #include +#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) + /* GCC-compatible compiler, targeting x86/x86-64 */ + #include +#elif defined(__GNUC__) && defined(__ARM_NEON__) + /* GCC-compatible compiler, targeting ARM with NEON */ + #include +#elif defined(__GNUC__) && defined(__IWMMXT__) + /* GCC-compatible compiler, targeting ARM with WMMX */ + #include +#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__)) + /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */ + #include +#elif defined(__GNUC__) && defined(__SPE__) + /* GCC-compatible compiler, targeting PowerPC with SPE */ + #include +#endif + +// write to output the successive differences of input (input[0]-starting_point, input[1]-input[2], ...) +// there are "length" values in input and output +// input and output must be distinct +void compute_deltas(const uint32_t * input, size_t length, uint32_t * output, uint32_t starting_point) { + __m128i prev = _mm_set1_epi32(starting_point); + size_t i = 0; + for(; i < length/4; i++) { + __m128i curr = _mm_lddqu_si128 (( const __m128i*) input + i ); + __m128i delta = _mm_sub_epi32(curr, + _mm_alignr_epi8(curr, prev, 12)); + _mm_storeu_si128((__m128i*)output + i,delta); + prev = curr; + } + uint32_t lastprev = _mm_extract_epi32(prev,3); + for(i = 4 * i; i < length; ++i) { + uint32_t curr = input[i]; + output[i] = curr - lastprev; + lastprev = curr; + } +} + +// write to buffer the successive differences of buffer (buffer[0]-starting_point, buffer[1]-buffer[2], ...) +// there are "length" values in buffer +void compute_deltas_inplace(uint32_t * buffer, size_t length, uint32_t starting_point) { + __m128i prev = _mm_set1_epi32(starting_point); + size_t i = 0; + for(; i < length/4; i++) { + __m128i curr = _mm_lddqu_si128 (( const __m128i*) buffer + i ); + __m128i delta = _mm_sub_epi32(curr, + _mm_alignr_epi8(curr, prev, 12)); + _mm_storeu_si128((__m128i*)buffer + i,delta); + prev = curr; + } + uint32_t lastprev = _mm_extract_epi32(prev,3); + for(i = 4 * i; i < length; ++i) { + uint32_t curr = buffer[i]; + buffer[i] = curr - lastprev; + lastprev = curr; + } +} + +// write to output the successive differences of input (input[0]-starting_point, input[1]-input[2], ...) +// there are "length" values in input and output +// input and output must be distinct +void compute_prefix_sum(const uint32_t * input, size_t length, uint32_t * output, uint32_t starting_point) { + __m128i prev = _mm_set1_epi32(starting_point); + size_t i = 0; + for(; i < length/4; i++) { + __m128i curr = _mm_lddqu_si128 (( const __m128i*) input + i ); + const __m128i _tmp1 = _mm_add_epi32(_mm_slli_si128(curr, 8), curr); + const __m128i _tmp2 = _mm_add_epi32(_mm_slli_si128(_tmp1, 4), _tmp1); + prev = _mm_add_epi32(_tmp2, _mm_shuffle_epi32(prev, 0xff)); + _mm_storeu_si128((__m128i*)output + i,prev); + } + uint32_t lastprev = _mm_extract_epi32(prev,3); + for(i = 4 * i; i < length; ++i) { + lastprev = lastprev + input[i]; + output[i] = lastprev; + } +} + +// write to buffer the successive differences of buffer (buffer[0]-starting_point, buffer[1]-buffer[2], ...) +// there are "length" values in buffer +void compute_prefix_sum_inplace(uint32_t * buffer, size_t length, uint32_t starting_point) { + __m128i prev = _mm_set1_epi32(starting_point); + size_t i = 0; + for(; i < length/4; i++) { + __m128i curr = _mm_lddqu_si128 (( const __m128i*) buffer + i ); + const __m128i _tmp1 = _mm_add_epi32(_mm_slli_si128(curr, 8), curr); + const __m128i _tmp2 = _mm_add_epi32(_mm_slli_si128(_tmp1, 4), _tmp1); + prev = _mm_add_epi32(_tmp2, _mm_shuffle_epi32(prev, 0xff)); + _mm_storeu_si128((__m128i*)buffer + i,prev); + } + uint32_t lastprev = _mm_extract_epi32(prev,3); + for(i = 4 * i ; i < length; ++i) { + lastprev = lastprev + buffer[i]; + buffer[i] = lastprev; + } +} diff --git a/cpp-projects/base/data/FastDifferentialCoding/fastdelta.h b/cpp-projects/base/data/FastDifferentialCoding/fastdelta.h new file mode 100644 index 0000000..e675c65 --- /dev/null +++ b/cpp-projects/base/data/FastDifferentialCoding/fastdelta.h @@ -0,0 +1,54 @@ + +#ifndef FASTDELTA_H_ +#define FASTDELTA_H_ + + + + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/*** +* These functions compute fast successive differences, and recover the original +* values from the fast successive differences (i.e., they compute a prefix sum) +* using fast SIMD instructions. +* +* Reference : +* Daniel Lemire, Nathan Kurz, Leonid Boytsov, SIMD Compression and the Intersection of Sorted +* Integers, Software: Practice and Experience (to appear) +* http://arxiv.org/abs/1401.6399 +*/ + +// write to output the successive differences of input (input[0]-starting_point, input[1]-input[2], ...) +// there are "length" values in input and output +// input and output must be distinct +// it can make sense to set to zero by default +void compute_deltas(const uint32_t * input, size_t length, uint32_t * output, uint32_t starting_point); + + + +// write to buffer the successive differences of buffer (buffer[0]-starting_point, buffer[1]-buffer[2], ...) +// there are "length" values in buffer +// it can make sense to set to zero by default +void compute_deltas_inplace(uint32_t * buffer, size_t length, uint32_t starting_point); + + +// write to output the successive differences of input (input[0]-starting_point, input[1]-input[2], ...) +// there are "length" values in input and output +// input and output must be distinct +void compute_prefix_sum(const uint32_t * input, size_t length, uint32_t * output, uint32_t starting_point); + + +// write to buffer the successive differences of buffer (buffer[0]-starting_point, buffer[1]-buffer[2], ...) +// there are "length" values in buffer +void compute_prefix_sum_inplace(uint32_t * buffer, size_t length, uint32_t starting_point); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/cpp-projects/base/data/integers_encoder.cpp b/cpp-projects/base/data/integers_encoder.cpp new file mode 100644 index 0000000..1bc893b --- /dev/null +++ b/cpp-projects/base/data/integers_encoder.cpp @@ -0,0 +1,190 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "integers_encoder.hpp" + +#pragma warning( disable : 4100 ) +#pragma warning( disable : 4267 ) + + + +// fastfor +#include "fastpfor/codecfactory.h" +#include "fastpfor/deltautil.h" +//#include "fastpfor/simdbinarypacking.h" + + +using namespace tool::data; + +struct IntegersEncoder::Impl{ + std::shared_ptr codec = nullptr; + Impl(){} +}; + +//void _verify64() { +// size_t inSize = in64.size(); +// std::vector encoded(in64.size() * 4); +// size_t encodeSize = encoded.size(); + +// codec->encodeArray( +// reinterpret_cast(in64.data()), +// inSize, +// encoded.data(), +// encodeSize); + +// out64.resize(inSize); +// codec->decodeArray( +// encoded.data(), +// encodeSize, +// reinterpret_cast(out64.data()), +// inSize); + +// bool passed = true; +// for (size_t i = 0; i < inSize; ++i) { +// if (in64[i] != out64[i]) { +// passed = false; +// } +// EXPECT_EQ(in64[i], out64[i]); +// } +// if (!passed) { +// std::cout << "Test failed with int64 input: "; +// for (size_t i = 0; i < inSize; ++i) { +// std::cout << in64[i] << " "; +// } +// std::cout << std::endl; +// } +//} + + +IntegersEncoder::IntegersEncoder() : m_p(std::make_unique()){ + + // std::shared_ptr( + // new CompositeCodec, VariableByte>()); + +// m_p->codec = std::shared_ptr( +// new FastPForLib::CompositeCodec, FastPForLib::VariableByte>()); + +// m_p->codec = std::shared_ptr( +// new FastPForLib::CompositeCodec, FastPForLib::VariableByte>()); + + m_p->codec = std::shared_ptr( + new FastPForLib::CompositeCodec, FastPForLib::VariableByte>()); +} + +IntegersEncoder::IntegersEncoder(int nbBits){ + if(nbBits == 2){ + m_p->codec = std::shared_ptr( + new FastPForLib::SIMDBinaryPacking()); + }else if(nbBits == 4){ + m_p->codec = std::shared_ptr( + new FastPForLib::CompositeCodec, FastPForLib::VariableByte>()); + }else if(nbBits == 8){ + m_p->codec = std::shared_ptr( + new FastPForLib::CompositeCodec, FastPForLib::VariableByte>()); + } +} + +IntegersEncoder::~IntegersEncoder(){} + +size_t IntegersEncoder::encode(uint32_t *inputData, size_t sizeInput, uint32_t *compressedData, size_t outputFullSize){ + + try{ + m_p->codec->encodeArray( + inputData, sizeInput, + compressedData, outputFullSize + ); + }catch(std::exception e){ + std::cerr << "Error encode array: " << e.what() << "\n"; + return 0; + }catch(...){ + std::cerr << "Error encode array: unknow error \n"; + return 0; + } + + return outputFullSize; +} + +size_t IntegersEncoder::encode(uint64_t *inputData, size_t sizeInput, uint32_t *compressedData, size_t outputFullSize){ + try{ + m_p->codec->encodeArray( + inputData, sizeInput, + compressedData, outputFullSize + ); + }catch(std::exception e){ + std::cerr << "Error encode array: " << e.what() << "\n"; + return 0; + }catch(...){ + std::cerr << "Error encode array: unknow error \n"; + return 0; + } + + return outputFullSize; +} + + + +size_t IntegersEncoder::decode(uint32_t *codedData, size_t sizeCoded, uint32_t *decodedData, size_t sizeOriginalData){ + + try{ + m_p->codec->decodeArray( + codedData, sizeCoded, + decodedData, sizeOriginalData + ); + }catch(std::exception e){ + std::cerr << "Error decode array: " << e.what() << "\n"; + return 0; + }catch(...){ + std::cerr << "Error decode array: unknow error \n"; + return 0; + } + + return sizeOriginalData; +} + +size_t IntegersEncoder::decode(uint32_t *codedData, size_t sizeCoded, uint64_t *decodedData, size_t sizeOriginalData){ + + try{ + m_p->codec->decodeArray( + codedData, sizeCoded, + decodedData, sizeOriginalData + ); + }catch(std::exception e){ + std::cerr << "Error decode array: " << e.what() << "\n"; + return 0; + }catch(...){ + std::cerr << "Error decode array: unknow error \n"; + return 0; + } + + return sizeOriginalData; +} + + +#pragma warning( default : 4100 ) +#pragma warning( default : 4267 ) + + diff --git a/cpp-projects/base/data/integers_encoder.hpp b/cpp-projects/base/data/integers_encoder.hpp new file mode 100644 index 0000000..a6546e9 --- /dev/null +++ b/cpp-projects/base/data/integers_encoder.hpp @@ -0,0 +1,54 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include + +namespace tool::data { + + class IntegersEncoder{ + + public: + + IntegersEncoder(); + IntegersEncoder(int nbBits); + ~IntegersEncoder(); + + size_t encode(std::uint32_t *inputData, size_t sizeInput, std::uint32_t *compressedData, size_t outputFullSize); + size_t decode(std::uint32_t *codedData, size_t sizeCoded, std::uint32_t *decodedData, size_t sizeOriginalData); + + size_t encode(std::uint64_t *inputData, size_t sizeInput, std::uint32_t *compressedData, size_t outputFullSize); + size_t decode(std::uint32_t *codedData, size_t sizeCoded, std::uint64_t *decodedData, size_t sizeOriginalData); + + private: + struct Impl; + std::unique_ptr m_p; + }; +} + diff --git a/cpp-projects/base/data/simdcomp/avx512bitpacking.c b/cpp-projects/base/data/simdcomp/avx512bitpacking.c new file mode 100644 index 0000000..ac93ae6 --- /dev/null +++ b/cpp-projects/base/data/simdcomp/avx512bitpacking.c @@ -0,0 +1,9932 @@ +#include "avx512bitpacking.h" +#ifdef __AVX512F__ + +static uint32_t maxbitas32int(const __m256i accumulator) { + const __m256i _tmp1 = + _mm256_or_si256(_mm256_srli_si256(accumulator, 8), accumulator); + const __m256i _tmp2 = _mm256_or_si256(_mm256_srli_si256(_tmp1, 4), _tmp1); + uint32_t ans1 = _mm256_extract_epi32(_tmp2, 0); + uint32_t ans2 = _mm256_extract_epi32(_tmp2, 4); + uint32_t ans = ans1 > ans2 ? ans1 : ans2; + return ans; +} + +static uint32_t avx512maxbitas32int(const __m512i accumulator) { + uint32_t ans1 = maxbitas32int(_mm512_castsi512_si256(accumulator)); + uint32_t ans2 = maxbitas32int(_mm512_extracti64x4_epi64(accumulator, 1)); + uint32_t ans = ans1 > ans2 ? ans1 : ans2; + return bits(ans); +} + +uint32_t avx512maxbits(const uint32_t *begin) { + const __m512i *pin = (const __m512i *)(begin); + __m512i accumulator = _mm512_loadu_si512(pin); + uint32_t k = 1; + for (; 16 * k < AVX512BlockSize; ++k) { + __m512i newvec = _mm512_loadu_si512(pin + k); + accumulator = _mm512_or_si512(accumulator, newvec); + } + return avx512maxbitas32int(accumulator); +} + +/** avx512packing **/ + +typedef void (*avx512packblockfnc)(const uint32_t *pin, __m512i *compressed); +typedef void (*avx512unpackblockfnc)(const __m512i *compressed, uint32_t *pout); + +static void avx512packblock0(const uint32_t *pin, __m512i *compressed) { + (void)compressed; + (void)pin; /* we consumed 512 32-bit integers */ +} + +/* we are going to pack 512 1-bit values, touching 1 512-bit words, using 32 + * bytes */ +static void avx512packblock1(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 1 512-bit word */ + __m512i w0; + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 3)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 7)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 8), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 9)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 11)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 13)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 15)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 16), 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 17)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 18)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 19)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 21)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 22)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 23)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 24), 24)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 25)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 26)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 27)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 28)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 29)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 30)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 31)); + _mm512_storeu_si512(compressed + 0, w0); +} + +/* we are going to pack 512 2-bit values, touching 2 512-bit words, using 64 + * bytes */ +static void avx512packblock2(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 2 512-bit words */ + __m512i w0, w1; + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 8), 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 18)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 22)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 24)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 26)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 28)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 30)); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_loadu_si512(in + 16); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 6)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 14)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 24), 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 18)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 20)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 22)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 24)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 26)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 28)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 30)); + _mm512_storeu_si512(compressed + 1, w1); +} + +/* we are going to pack 512 3-bit values, touching 3 512-bit words, using 96 + * bytes */ +static void avx512packblock3(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 3 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 3)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 9)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 15)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 18)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 21)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 8), 24)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 27)); + tmp = _mm512_loadu_si512(in + 10); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 1)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 7)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 13)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 16), 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 19)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 22)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 25)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 28)); + tmp = _mm512_loadu_si512(in + 21); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 24), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 11)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 17)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 23)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 26)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 29)); + _mm512_storeu_si512(compressed + 2, w0); +} + +/* we are going to pack 512 4-bit values, touching 4 512-bit words, using 128 + * bytes */ +static void avx512packblock4(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 4 512-bit words */ + __m512i w0, w1; + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 24)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 28)); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_loadu_si512(in + 8); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 20)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 24)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 28)); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_loadu_si512(in + 16); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 24)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 28)); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_loadu_si512(in + 24); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 20)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 24)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 28)); + _mm512_storeu_si512(compressed + 3, w1); +} + +/* we are going to pack 512 5-bit values, touching 5 512-bit words, using 160 + * bytes */ +static void avx512packblock5(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 5 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 15)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 25)); + tmp = _mm512_loadu_si512(in + 6); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 3)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 8), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 13)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 18)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 23)); + tmp = _mm512_loadu_si512(in + 12); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 11)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 16), 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 21)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 26)); + tmp = _mm512_loadu_si512(in + 19); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 31)); + w1 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 9)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 14)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 19)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 24), 24)); + tmp = _mm512_loadu_si512(in + 25); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 7)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 17)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 22)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 27)); + _mm512_storeu_si512(compressed + 4, w0); +} + +/* we are going to pack 512 6-bit values, touching 6 512-bit words, using 192 + * bytes */ +static void avx512packblock6(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 6 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 18)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 24)); + tmp = _mm512_loadu_si512(in + 5); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 8), 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 22)); + tmp = _mm512_loadu_si512(in + 10); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 26)); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_loadu_si512(in + 16); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 6)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 18)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 24)); + tmp = _mm512_loadu_si512(in + 21); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 24), 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 22)); + tmp = _mm512_loadu_si512(in + 26); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 14)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 20)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 26)); + _mm512_storeu_si512(compressed + 5, w1); +} + +/* we are going to pack 512 7-bit values, touching 7 512-bit words, using 224 + * bytes */ +static void avx512packblock7(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 7 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 7)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 21)); + tmp = _mm512_loadu_si512(in + 4); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 3)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 17)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 8), 24)); + tmp = _mm512_loadu_si512(in + 9); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 13)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 20)); + tmp = _mm512_loadu_si512(in + 13); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 9)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 16), 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 23)); + tmp = _mm512_loadu_si512(in + 18); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 19)); + tmp = _mm512_loadu_si512(in + 22); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 1)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 24), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 15)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 22)); + tmp = _mm512_loadu_si512(in + 27); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 11)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 18)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 25)); + _mm512_storeu_si512(compressed + 6, w0); +} + +/* we are going to pack 512 8-bit values, touching 8 512-bit words, using 256 + * bytes */ +static void avx512packblock8(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 8 512-bit words */ + __m512i w0, w1; + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 24)); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_loadu_si512(in + 4); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 24)); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_loadu_si512(in + 8); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 24)); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_loadu_si512(in + 12); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 24)); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_loadu_si512(in + 16); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 24)); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_loadu_si512(in + 20); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 24)); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_loadu_si512(in + 24); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 24)); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_loadu_si512(in + 28); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 24)); + _mm512_storeu_si512(compressed + 7, w1); +} + +/* we are going to pack 512 9-bit values, touching 9 512-bit words, using 288 + * bytes */ +static void avx512packblock9(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 9 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 9)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 18)); + tmp = _mm512_loadu_si512(in + 3); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 13)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 22)); + tmp = _mm512_loadu_si512(in + 7); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 8), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 17)); + tmp = _mm512_loadu_si512(in + 10); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 3)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 21)); + tmp = _mm512_loadu_si512(in + 14); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 7)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 16), 16)); + tmp = _mm512_loadu_si512(in + 17); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 11)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 20)); + tmp = _mm512_loadu_si512(in + 21); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 15)); + tmp = _mm512_loadu_si512(in + 24); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 1)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 19)); + tmp = _mm512_loadu_si512(in + 28); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 23)); + _mm512_storeu_si512(compressed + 8, w0); +} + +/* we are going to pack 512 10-bit values, touching 10 512-bit words, using 320 + * bytes */ +static void avx512packblock10(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 10 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 20)); + tmp = _mm512_loadu_si512(in + 3); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 18)); + tmp = _mm512_loadu_si512(in + 6); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 8), 16)); + tmp = _mm512_loadu_si512(in + 9); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 14)); + tmp = _mm512_loadu_si512(in + 12); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 22)); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_loadu_si512(in + 16); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 20)); + tmp = _mm512_loadu_si512(in + 19); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 18)); + tmp = _mm512_loadu_si512(in + 22); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 6)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 24), 16)); + tmp = _mm512_loadu_si512(in + 25); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 14)); + tmp = _mm512_loadu_si512(in + 28); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 22)); + _mm512_storeu_si512(compressed + 9, w1); +} + +/* we are going to pack 512 11-bit values, touching 11 512-bit words, using 352 + * bytes */ +static void avx512packblock11(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 11 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 11)); + tmp = _mm512_loadu_si512(in + 2); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 1)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 12)); + tmp = _mm512_loadu_si512(in + 5); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 13)); + tmp = _mm512_loadu_si512(in + 8); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 3)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 14)); + tmp = _mm512_loadu_si512(in + 11); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 25)); + w0 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 15)); + tmp = _mm512_loadu_si512(in + 14); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 5)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 16), 16)); + tmp = _mm512_loadu_si512(in + 17); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 27)); + w0 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 17)); + tmp = _mm512_loadu_si512(in + 20); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 7)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 18)); + tmp = _mm512_loadu_si512(in + 23); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 24), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 19)); + tmp = _mm512_loadu_si512(in + 26); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 9)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 20)); + tmp = _mm512_loadu_si512(in + 29); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 21)); + _mm512_storeu_si512(compressed + 10, w0); +} + +/* we are going to pack 512 12-bit values, touching 12 512-bit words, using 384 + * bytes */ +static void avx512packblock12(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 12 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 12)); + tmp = _mm512_loadu_si512(in + 2); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 16)); + tmp = _mm512_loadu_si512(in + 5); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 20)); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_loadu_si512(in + 8); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 12)); + tmp = _mm512_loadu_si512(in + 10); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 16)); + tmp = _mm512_loadu_si512(in + 13); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 20)); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_loadu_si512(in + 16); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 12)); + tmp = _mm512_loadu_si512(in + 18); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 16)); + tmp = _mm512_loadu_si512(in + 21); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 20)); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_loadu_si512(in + 24); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 12)); + tmp = _mm512_loadu_si512(in + 26); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 16)); + tmp = _mm512_loadu_si512(in + 29); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 20)); + _mm512_storeu_si512(compressed + 11, w1); +} + +/* we are going to pack 512 13-bit values, touching 13 512-bit words, using 416 + * bytes */ +static void avx512packblock13(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 13 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 13)); + tmp = _mm512_loadu_si512(in + 2); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 7)); + tmp = _mm512_loadu_si512(in + 4); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 14)); + tmp = _mm512_loadu_si512(in + 7); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 8), 8)); + tmp = _mm512_loadu_si512(in + 9); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 21)); + w0 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 15)); + tmp = _mm512_loadu_si512(in + 12); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 9)); + tmp = _mm512_loadu_si512(in + 14); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 3)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 16), 16)); + tmp = _mm512_loadu_si512(in + 17); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 29)); + w1 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 10)); + tmp = _mm512_loadu_si512(in + 19); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 17)); + tmp = _mm512_loadu_si512(in + 22); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 11)); + tmp = _mm512_loadu_si512(in + 24); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 18)); + tmp = _mm512_loadu_si512(in + 27); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 31)); + w1 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 12)); + tmp = _mm512_loadu_si512(in + 29); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 25)); + w0 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 19)); + _mm512_storeu_si512(compressed + 12, w0); +} + +/* we are going to pack 512 14-bit values, touching 14 512-bit words, using 448 + * bytes */ +static void avx512packblock14(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 14 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 14)); + tmp = _mm512_loadu_si512(in + 2); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 10)); + tmp = _mm512_loadu_si512(in + 4); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 6)); + tmp = _mm512_loadu_si512(in + 6); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 8), 16)); + tmp = _mm512_loadu_si512(in + 9); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 12)); + tmp = _mm512_loadu_si512(in + 11); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 8)); + tmp = _mm512_loadu_si512(in + 13); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 18)); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_loadu_si512(in + 16); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 14)); + tmp = _mm512_loadu_si512(in + 18); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 10)); + tmp = _mm512_loadu_si512(in + 20); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 6)); + tmp = _mm512_loadu_si512(in + 22); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 24), 16)); + tmp = _mm512_loadu_si512(in + 25); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 12)); + tmp = _mm512_loadu_si512(in + 27); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 8)); + tmp = _mm512_loadu_si512(in + 29); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 18)); + _mm512_storeu_si512(compressed + 13, w1); +} + +/* we are going to pack 512 15-bit values, touching 15 512-bit words, using 480 + * bytes */ +static void avx512packblock15(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 15 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 15)); + tmp = _mm512_loadu_si512(in + 2); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 13)); + tmp = _mm512_loadu_si512(in + 4); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 11)); + tmp = _mm512_loadu_si512(in + 6); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 9)); + tmp = _mm512_loadu_si512(in + 8); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 7)); + tmp = _mm512_loadu_si512(in + 10); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 5)); + tmp = _mm512_loadu_si512(in + 12); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 3)); + tmp = _mm512_loadu_si512(in + 14); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 1)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 16), 16)); + tmp = _mm512_loadu_si512(in + 17); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 14)); + tmp = _mm512_loadu_si512(in + 19); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 29)); + w1 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 12)); + tmp = _mm512_loadu_si512(in + 21); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 27)); + w0 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 10)); + tmp = _mm512_loadu_si512(in + 23); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 24), 8)); + tmp = _mm512_loadu_si512(in + 25); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 6)); + tmp = _mm512_loadu_si512(in + 27); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 4)); + tmp = _mm512_loadu_si512(in + 29); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 19)); + w0 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 30), 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 17)); + _mm512_storeu_si512(compressed + 14, w0); +} + +/* we are going to pack 512 16-bit values, touching 16 512-bit words, using 512 + * bytes */ +static void avx512packblock16(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 16 512-bit words */ + __m512i w0, w1; + w0 = _mm512_loadu_si512(in + 0); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 1), 16)); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_loadu_si512(in + 2); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 16)); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_loadu_si512(in + 4); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 16)); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_loadu_si512(in + 6); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 16)); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_loadu_si512(in + 8); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 16)); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_loadu_si512(in + 10); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 16)); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_loadu_si512(in + 12); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 16)); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_loadu_si512(in + 14); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 16)); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_loadu_si512(in + 16); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 16)); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_loadu_si512(in + 18); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 16)); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_loadu_si512(in + 20); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 16)); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_loadu_si512(in + 22); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 16)); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_loadu_si512(in + 24); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 16)); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_loadu_si512(in + 26); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 16)); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_loadu_si512(in + 28); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 16)); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_loadu_si512(in + 30); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 16)); + _mm512_storeu_si512(compressed + 15, w1); +} + +/* we are going to pack 512 17-bit values, touching 17 512-bit words, using 544 + * bytes */ +static void avx512packblock17(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 17 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 2)); + tmp = _mm512_loadu_si512(in + 3); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 19)); + w0 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 4)); + tmp = _mm512_loadu_si512(in + 5); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 6)); + tmp = _mm512_loadu_si512(in + 7); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 8), 8)); + tmp = _mm512_loadu_si512(in + 9); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 10)); + tmp = _mm512_loadu_si512(in + 11); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 27)); + w0 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 12)); + tmp = _mm512_loadu_si512(in + 13); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 29)); + w1 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 14)); + tmp = _mm512_loadu_si512(in + 15); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_loadu_si512(in + 16); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 1)); + tmp = _mm512_loadu_si512(in + 18); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 3)); + tmp = _mm512_loadu_si512(in + 20); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 5)); + tmp = _mm512_loadu_si512(in + 22); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 7)); + tmp = _mm512_loadu_si512(in + 24); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 9)); + tmp = _mm512_loadu_si512(in + 26); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 11)); + tmp = _mm512_loadu_si512(in + 28); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 13)); + tmp = _mm512_loadu_si512(in + 30); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 15, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 15)); + _mm512_storeu_si512(compressed + 16, w0); +} + +/* we are going to pack 512 18-bit values, touching 18 512-bit words, using 576 + * bytes */ +static void avx512packblock18(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 18 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 4)); + tmp = _mm512_loadu_si512(in + 3); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 8)); + tmp = _mm512_loadu_si512(in + 5); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 12)); + tmp = _mm512_loadu_si512(in + 7); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_loadu_si512(in + 8); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 2)); + tmp = _mm512_loadu_si512(in + 10); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 6)); + tmp = _mm512_loadu_si512(in + 12); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 10)); + tmp = _mm512_loadu_si512(in + 14); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 14)); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_loadu_si512(in + 16); + tmp = _mm512_loadu_si512(in + 17); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 4)); + tmp = _mm512_loadu_si512(in + 19); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 8)); + tmp = _mm512_loadu_si512(in + 21); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 12)); + tmp = _mm512_loadu_si512(in + 23); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_loadu_si512(in + 24); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 2)); + tmp = _mm512_loadu_si512(in + 26); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 6)); + tmp = _mm512_loadu_si512(in + 28); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 15, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 10)); + tmp = _mm512_loadu_si512(in + 30); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 14)); + _mm512_storeu_si512(compressed + 17, w1); +} + +/* we are going to pack 512 19-bit values, touching 19 512-bit words, using 608 + * bytes */ +static void avx512packblock19(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 19 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 19)); + w1 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 6)); + tmp = _mm512_loadu_si512(in + 3); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 25)); + w0 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 12)); + tmp = _mm512_loadu_si512(in + 5); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 31)); + w1 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_loadu_si512(in + 6); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 5)); + tmp = _mm512_loadu_si512(in + 8); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 11)); + tmp = _mm512_loadu_si512(in + 10); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_loadu_si512(in + 11); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 4)); + tmp = _mm512_loadu_si512(in + 13); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 10)); + tmp = _mm512_loadu_si512(in + 15); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 29)); + w1 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_loadu_si512(in + 16); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 3)); + tmp = _mm512_loadu_si512(in + 18); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 9)); + tmp = _mm512_loadu_si512(in + 20); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_loadu_si512(in + 21); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 15)); + w1 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 2)); + tmp = _mm512_loadu_si512(in + 23); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 21)); + w0 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 24), 8)); + tmp = _mm512_loadu_si512(in + 25); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_loadu_si512(in + 26); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 15, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 1)); + tmp = _mm512_loadu_si512(in + 28); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 7)); + tmp = _mm512_loadu_si512(in + 30); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 17, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 13)); + _mm512_storeu_si512(compressed + 18, w0); +} + +/* we are going to pack 512 20-bit values, touching 20 512-bit words, using 640 + * bytes */ +static void avx512packblock20(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 20 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 8)); + tmp = _mm512_loadu_si512(in + 3); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_loadu_si512(in + 4); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 4)); + tmp = _mm512_loadu_si512(in + 6); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 12)); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_loadu_si512(in + 8); + tmp = _mm512_loadu_si512(in + 9); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 8)); + tmp = _mm512_loadu_si512(in + 11); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_loadu_si512(in + 12); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 4)); + tmp = _mm512_loadu_si512(in + 14); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 12)); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_loadu_si512(in + 16); + tmp = _mm512_loadu_si512(in + 17); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 8)); + tmp = _mm512_loadu_si512(in + 19); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_loadu_si512(in + 20); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 4)); + tmp = _mm512_loadu_si512(in + 22); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 12)); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_loadu_si512(in + 24); + tmp = _mm512_loadu_si512(in + 25); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 15, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 8)); + tmp = _mm512_loadu_si512(in + 27); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 16, w0); + tmp = _mm512_loadu_si512(in + 28); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 17, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 4)); + tmp = _mm512_loadu_si512(in + 30); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 18, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 12)); + _mm512_storeu_si512(compressed + 19, w1); +} + +/* we are going to pack 512 21-bit values, touching 21 512-bit words, using 672 + * bytes */ +static void avx512packblock21(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 21 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 2), 10)); + tmp = _mm512_loadu_si512(in + 3); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_loadu_si512(in + 4); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 9)); + tmp = _mm512_loadu_si512(in + 6); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_loadu_si512(in + 7); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 19)); + w1 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 8), 8)); + tmp = _mm512_loadu_si512(in + 9); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_loadu_si512(in + 10); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 7)); + tmp = _mm512_loadu_si512(in + 12); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_loadu_si512(in + 13); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 6)); + tmp = _mm512_loadu_si512(in + 15); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 27)); + w0 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_loadu_si512(in + 16); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 5)); + tmp = _mm512_loadu_si512(in + 18); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_loadu_si512(in + 19); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 15)); + w1 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 20), 4)); + tmp = _mm512_loadu_si512(in + 21); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 25)); + w0 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_loadu_si512(in + 22); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 14)); + w1 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 3)); + tmp = _mm512_loadu_si512(in + 24); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_loadu_si512(in + 25); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 13)); + w1 = _mm512_srli_epi32(tmp, 19); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 2)); + tmp = _mm512_loadu_si512(in + 27); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_loadu_si512(in + 28); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 18, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 29), 1)); + tmp = _mm512_loadu_si512(in + 30); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 19, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 11)); + _mm512_storeu_si512(compressed + 20, w0); +} + +/* we are going to pack 512 22-bit values, touching 22 512-bit words, using 704 + * bytes */ +static void avx512packblock22(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 22 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_loadu_si512(in + 2); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 2)); + tmp = _mm512_loadu_si512(in + 4); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_loadu_si512(in + 5); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 4)); + tmp = _mm512_loadu_si512(in + 7); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_loadu_si512(in + 8); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 6)); + tmp = _mm512_loadu_si512(in + 10); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_loadu_si512(in + 11); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 8)); + tmp = _mm512_loadu_si512(in + 13); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_loadu_si512(in + 14); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 10)); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_loadu_si512(in + 16); + tmp = _mm512_loadu_si512(in + 17); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_loadu_si512(in + 18); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 2)); + tmp = _mm512_loadu_si512(in + 20); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_loadu_si512(in + 21); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 14)); + w1 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 4)); + tmp = _mm512_loadu_si512(in + 23); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_loadu_si512(in + 24); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 6)); + tmp = _mm512_loadu_si512(in + 26); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_loadu_si512(in + 27); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 18, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 8)); + tmp = _mm512_loadu_si512(in + 29); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_loadu_si512(in + 30); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 10)); + _mm512_storeu_si512(compressed + 21, w1); +} + +/* we are going to pack 512 23-bit values, touching 23 512-bit words, using 736 + * bytes */ +static void avx512packblock23(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 23 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 23)); + w1 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_loadu_si512(in + 2); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 5)); + tmp = _mm512_loadu_si512(in + 4); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_loadu_si512(in + 5); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 19)); + w0 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_loadu_si512(in + 6); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 10)); + w1 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 1)); + tmp = _mm512_loadu_si512(in + 8); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_loadu_si512(in + 9); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 15)); + w1 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 6)); + tmp = _mm512_loadu_si512(in + 11); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_loadu_si512(in + 12); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_loadu_si512(in + 13); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 11)); + w0 = _mm512_srli_epi32(tmp, 21); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 14), 2)); + tmp = _mm512_loadu_si512(in + 15); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_loadu_si512(in + 16); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 17), 7)); + tmp = _mm512_loadu_si512(in + 18); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_loadu_si512(in + 19); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 21)); + w0 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_loadu_si512(in + 20); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 3)); + tmp = _mm512_loadu_si512(in + 22); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_loadu_si512(in + 23); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 24), 8)); + tmp = _mm512_loadu_si512(in + 25); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_loadu_si512(in + 26); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_loadu_si512(in + 27); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 13)); + w0 = _mm512_srli_epi32(tmp, 19); + _mm512_storeu_si512(compressed + 19, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 28), 4)); + tmp = _mm512_loadu_si512(in + 29); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 20, w0); + tmp = _mm512_loadu_si512(in + 30); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 21, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 9)); + _mm512_storeu_si512(compressed + 22, w0); +} + +/* we are going to pack 512 24-bit values, touching 24 512-bit words, using 768 + * bytes */ +static void avx512packblock24(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 24 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_loadu_si512(in + 2); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 3), 8)); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_loadu_si512(in + 4); + tmp = _mm512_loadu_si512(in + 5); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_loadu_si512(in + 6); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 8)); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_loadu_si512(in + 8); + tmp = _mm512_loadu_si512(in + 9); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_loadu_si512(in + 10); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 11), 8)); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_loadu_si512(in + 12); + tmp = _mm512_loadu_si512(in + 13); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_loadu_si512(in + 14); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 8)); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_loadu_si512(in + 16); + tmp = _mm512_loadu_si512(in + 17); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_loadu_si512(in + 18); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 8)); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_loadu_si512(in + 20); + tmp = _mm512_loadu_si512(in + 21); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_loadu_si512(in + 22); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 8)); + _mm512_storeu_si512(compressed + 17, w1); + w0 = _mm512_loadu_si512(in + 24); + tmp = _mm512_loadu_si512(in + 25); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_loadu_si512(in + 26); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 19, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 8)); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_loadu_si512(in + 28); + tmp = _mm512_loadu_si512(in + 29); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_loadu_si512(in + 30); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 22, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 8)); + _mm512_storeu_si512(compressed + 23, w1); +} + +/* we are going to pack 512 25-bit values, touching 25 512-bit words, using 800 + * bytes */ +static void avx512packblock25(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 25 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_loadu_si512(in + 2); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_loadu_si512(in + 3); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 11)); + w1 = _mm512_srli_epi32(tmp, 21); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 4), 4)); + tmp = _mm512_loadu_si512(in + 5); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_loadu_si512(in + 6); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_loadu_si512(in + 7); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 15)); + w0 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_loadu_si512(in + 8); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 8)); + w1 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 9), 1)); + tmp = _mm512_loadu_si512(in + 10); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_loadu_si512(in + 11); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 19)); + w1 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_loadu_si512(in + 12); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 13), 5)); + tmp = _mm512_loadu_si512(in + 14); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_loadu_si512(in + 15); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_loadu_si512(in + 16); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_loadu_si512(in + 17); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 9)); + w0 = _mm512_srli_epi32(tmp, 23); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 18), 2)); + tmp = _mm512_loadu_si512(in + 19); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_loadu_si512(in + 20); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_loadu_si512(in + 21); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 13)); + w1 = _mm512_srli_epi32(tmp, 19); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 22), 6)); + tmp = _mm512_loadu_si512(in + 23); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_loadu_si512(in + 24); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_loadu_si512(in + 25); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 17)); + w0 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_loadu_si512(in + 26); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 10)); + w1 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 27), 3)); + tmp = _mm512_loadu_si512(in + 28); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_loadu_si512(in + 29); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_loadu_si512(in + 30); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 23, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 7)); + _mm512_storeu_si512(compressed + 24, w0); +} + +/* we are going to pack 512 26-bit values, touching 26 512-bit words, using 832 + * bytes */ +static void avx512packblock26(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 26 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_loadu_si512(in + 2); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_loadu_si512(in + 3); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 14)); + w1 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_loadu_si512(in + 4); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 8)); + w0 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 5), 2)); + tmp = _mm512_loadu_si512(in + 6); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_loadu_si512(in + 7); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_loadu_si512(in + 8); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_loadu_si512(in + 9); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 10)); + w0 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 4)); + tmp = _mm512_loadu_si512(in + 11); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_loadu_si512(in + 12); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_loadu_si512(in + 13); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_loadu_si512(in + 14); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 6)); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_loadu_si512(in + 16); + tmp = _mm512_loadu_si512(in + 17); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_loadu_si512(in + 18); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_loadu_si512(in + 19); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_loadu_si512(in + 20); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 8)); + w1 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 2)); + tmp = _mm512_loadu_si512(in + 22); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_loadu_si512(in + 23); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_loadu_si512(in + 24); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_loadu_si512(in + 25); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 10)); + w1 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 26), 4)); + tmp = _mm512_loadu_si512(in + 27); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_loadu_si512(in + 28); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_loadu_si512(in + 29); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 23, w1); + tmp = _mm512_loadu_si512(in + 30); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 24, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 6)); + _mm512_storeu_si512(compressed + 25, w1); +} + +/* we are going to pack 512 27-bit values, touching 27 512-bit words, using 864 + * bytes */ +static void avx512packblock27(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 27 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_loadu_si512(in + 2); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_loadu_si512(in + 3); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_loadu_si512(in + 4); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_loadu_si512(in + 5); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 7)); + w1 = _mm512_srli_epi32(tmp, 25); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 6), 2)); + tmp = _mm512_loadu_si512(in + 7); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_loadu_si512(in + 8); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_loadu_si512(in + 9); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 19)); + w0 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_loadu_si512(in + 10); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 14)); + w1 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_loadu_si512(in + 11); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 9)); + w0 = _mm512_srli_epi32(tmp, 23); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 12), 4)); + tmp = _mm512_loadu_si512(in + 13); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 31)); + w1 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_loadu_si512(in + 14); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_loadu_si512(in + 15); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_loadu_si512(in + 16); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_loadu_si512(in + 17); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 11)); + w1 = _mm512_srli_epi32(tmp, 21); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_loadu_si512(in + 18); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 6)); + w0 = _mm512_srli_epi32(tmp, 26); + _mm512_storeu_si512(compressed + 15, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 19), 1)); + tmp = _mm512_loadu_si512(in + 20); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 16, w0); + tmp = _mm512_loadu_si512(in + 21); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_loadu_si512(in + 22); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_loadu_si512(in + 23); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 13)); + w0 = _mm512_srli_epi32(tmp, 19); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_loadu_si512(in + 24); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 8)); + w1 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 25), 3)); + tmp = _mm512_loadu_si512(in + 26); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_loadu_si512(in + 27); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_loadu_si512(in + 28); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 23, w1); + tmp = _mm512_loadu_si512(in + 29); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 15)); + w1 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 24, w0); + tmp = _mm512_loadu_si512(in + 30); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 10)); + w0 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 25, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 5)); + _mm512_storeu_si512(compressed + 26, w0); +} + +/* we are going to pack 512 28-bit values, touching 28 512-bit words, using 896 + * bytes */ +static void avx512packblock28(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 28 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_loadu_si512(in + 2); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_loadu_si512(in + 3); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_loadu_si512(in + 4); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_loadu_si512(in + 5); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_loadu_si512(in + 6); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 8)); + w0 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 7), 4)); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_loadu_si512(in + 8); + tmp = _mm512_loadu_si512(in + 9); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_loadu_si512(in + 10); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_loadu_si512(in + 11); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_loadu_si512(in + 12); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_loadu_si512(in + 13); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_loadu_si512(in + 14); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 8)); + w1 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 4)); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_loadu_si512(in + 16); + tmp = _mm512_loadu_si512(in + 17); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_loadu_si512(in + 18); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_loadu_si512(in + 19); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 16, w0); + tmp = _mm512_loadu_si512(in + 20); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_loadu_si512(in + 21); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_loadu_si512(in + 22); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 8)); + w0 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 19, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 23), 4)); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_loadu_si512(in + 24); + tmp = _mm512_loadu_si512(in + 25); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_loadu_si512(in + 26); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_loadu_si512(in + 27); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 23, w1); + tmp = _mm512_loadu_si512(in + 28); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 24, w0); + tmp = _mm512_loadu_si512(in + 29); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 25, w1); + tmp = _mm512_loadu_si512(in + 30); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 8)); + w1 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 26, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 4)); + _mm512_storeu_si512(compressed + 27, w1); +} + +/* we are going to pack 512 29-bit values, touching 29 512-bit words, using 928 + * bytes */ +static void avx512packblock29(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 29 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 29)); + w1 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_loadu_si512(in + 2); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_loadu_si512(in + 3); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 23)); + w1 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_loadu_si512(in + 4); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_loadu_si512(in + 5); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_loadu_si512(in + 6); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_loadu_si512(in + 7); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 11)); + w1 = _mm512_srli_epi32(tmp, 21); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_loadu_si512(in + 8); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 8)); + w0 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_loadu_si512(in + 9); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 5)); + w1 = _mm512_srli_epi32(tmp, 27); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 10), 2)); + tmp = _mm512_loadu_si512(in + 11); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_loadu_si512(in + 12); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_loadu_si512(in + 13); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 25)); + w0 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_loadu_si512(in + 14); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_loadu_si512(in + 15); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 19)); + w0 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_loadu_si512(in + 16); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_loadu_si512(in + 17); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 13)); + w0 = _mm512_srli_epi32(tmp, 19); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_loadu_si512(in + 18); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 10)); + w1 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 16, w0); + tmp = _mm512_loadu_si512(in + 19); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 7)); + w0 = _mm512_srli_epi32(tmp, 25); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_loadu_si512(in + 20); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 4)); + w1 = _mm512_srli_epi32(tmp, 28); + _mm512_storeu_si512(compressed + 18, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 21), 1)); + tmp = _mm512_loadu_si512(in + 22); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_loadu_si512(in + 23); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 20, w0); + tmp = _mm512_loadu_si512(in + 24); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_loadu_si512(in + 25); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_loadu_si512(in + 26); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 23, w1); + tmp = _mm512_loadu_si512(in + 27); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 15)); + w1 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 24, w0); + tmp = _mm512_loadu_si512(in + 28); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 25, w1); + tmp = _mm512_loadu_si512(in + 29); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 9)); + w1 = _mm512_srli_epi32(tmp, 23); + _mm512_storeu_si512(compressed + 26, w0); + tmp = _mm512_loadu_si512(in + 30); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 6)); + w0 = _mm512_srli_epi32(tmp, 26); + _mm512_storeu_si512(compressed + 27, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 3)); + _mm512_storeu_si512(compressed + 28, w0); +} + +/* we are going to pack 512 30-bit values, touching 30 512-bit words, using 960 + * bytes */ +static void avx512packblock30(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 30 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_loadu_si512(in + 2); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_loadu_si512(in + 3); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_loadu_si512(in + 4); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_loadu_si512(in + 5); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_loadu_si512(in + 6); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_loadu_si512(in + 7); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_loadu_si512(in + 8); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_loadu_si512(in + 9); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 14)); + w1 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_loadu_si512(in + 10); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_loadu_si512(in + 11); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 10)); + w1 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_loadu_si512(in + 12); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 8)); + w0 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_loadu_si512(in + 13); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 6)); + w1 = _mm512_srli_epi32(tmp, 26); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_loadu_si512(in + 14); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 4)); + w0 = _mm512_srli_epi32(tmp, 28); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 15), 2)); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_loadu_si512(in + 16); + tmp = _mm512_loadu_si512(in + 17); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_loadu_si512(in + 18); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 16, w0); + tmp = _mm512_loadu_si512(in + 19); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_loadu_si512(in + 20); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_loadu_si512(in + 21); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_loadu_si512(in + 22); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 20, w0); + tmp = _mm512_loadu_si512(in + 23); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_loadu_si512(in + 24); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_loadu_si512(in + 25); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 23, w1); + tmp = _mm512_loadu_si512(in + 26); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 24, w0); + tmp = _mm512_loadu_si512(in + 27); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 10)); + w0 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 25, w1); + tmp = _mm512_loadu_si512(in + 28); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 8)); + w1 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 26, w0); + tmp = _mm512_loadu_si512(in + 29); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 6)); + w0 = _mm512_srli_epi32(tmp, 26); + _mm512_storeu_si512(compressed + 27, w1); + tmp = _mm512_loadu_si512(in + 30); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 4)); + w1 = _mm512_srli_epi32(tmp, 28); + _mm512_storeu_si512(compressed + 28, w0); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 2)); + _mm512_storeu_si512(compressed + 29, w1); +} + +/* we are going to pack 512 31-bit values, touching 31 512-bit words, using 992 + * bytes */ +static void avx512packblock31(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 31 512-bit words */ + __m512i w0, w1; + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_loadu_si512(in + 0); + tmp = _mm512_loadu_si512(in + 1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 31)); + w1 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_loadu_si512(in + 2); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_loadu_si512(in + 3); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 29)); + w1 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_loadu_si512(in + 4); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_loadu_si512(in + 5); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_loadu_si512(in + 6); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_loadu_si512(in + 7); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_loadu_si512(in + 8); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_loadu_si512(in + 9); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 23)); + w1 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_loadu_si512(in + 10); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_loadu_si512(in + 11); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_loadu_si512(in + 12); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_loadu_si512(in + 13); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 19)); + w1 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_loadu_si512(in + 14); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_loadu_si512(in + 15); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_loadu_si512(in + 16); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_loadu_si512(in + 17); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 15)); + w1 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 16, w0); + tmp = _mm512_loadu_si512(in + 18); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_loadu_si512(in + 19); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 13)); + w1 = _mm512_srli_epi32(tmp, 19); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_loadu_si512(in + 20); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_loadu_si512(in + 21); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 11)); + w1 = _mm512_srli_epi32(tmp, 21); + _mm512_storeu_si512(compressed + 20, w0); + tmp = _mm512_loadu_si512(in + 22); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 10)); + w0 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_loadu_si512(in + 23); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 9)); + w1 = _mm512_srli_epi32(tmp, 23); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_loadu_si512(in + 24); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 8)); + w0 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 23, w1); + tmp = _mm512_loadu_si512(in + 25); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 7)); + w1 = _mm512_srli_epi32(tmp, 25); + _mm512_storeu_si512(compressed + 24, w0); + tmp = _mm512_loadu_si512(in + 26); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 6)); + w0 = _mm512_srli_epi32(tmp, 26); + _mm512_storeu_si512(compressed + 25, w1); + tmp = _mm512_loadu_si512(in + 27); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 5)); + w1 = _mm512_srli_epi32(tmp, 27); + _mm512_storeu_si512(compressed + 26, w0); + tmp = _mm512_loadu_si512(in + 28); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 4)); + w0 = _mm512_srli_epi32(tmp, 28); + _mm512_storeu_si512(compressed + 27, w1); + tmp = _mm512_loadu_si512(in + 29); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 3)); + w1 = _mm512_srli_epi32(tmp, 29); + _mm512_storeu_si512(compressed + 28, w0); + tmp = _mm512_loadu_si512(in + 30); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 2)); + w0 = _mm512_srli_epi32(tmp, 30); + _mm512_storeu_si512(compressed + 29, w1); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(_mm512_loadu_si512(in + 31), 1)); + _mm512_storeu_si512(compressed + 30, w0); +} + +/* we are going to pack 512 32-bit values, touching 32 512-bit words, using 1024 + * bytes */ +static void avx512packblock32(const uint32_t *pin, __m512i *compressed) { + const __m512i *in = (const __m512i *)pin; + /* we are going to touch 32 512-bit words */ + __m512i w0, w1; + w0 = _mm512_loadu_si512(in + 0); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_loadu_si512(in + 1); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_loadu_si512(in + 2); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_loadu_si512(in + 3); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_loadu_si512(in + 4); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_loadu_si512(in + 5); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_loadu_si512(in + 6); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_loadu_si512(in + 7); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_loadu_si512(in + 8); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_loadu_si512(in + 9); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_loadu_si512(in + 10); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_loadu_si512(in + 11); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_loadu_si512(in + 12); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_loadu_si512(in + 13); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_loadu_si512(in + 14); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_loadu_si512(in + 15); + _mm512_storeu_si512(compressed + 15, w1); + w0 = _mm512_loadu_si512(in + 16); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_loadu_si512(in + 17); + _mm512_storeu_si512(compressed + 17, w1); + w0 = _mm512_loadu_si512(in + 18); + _mm512_storeu_si512(compressed + 18, w0); + w1 = _mm512_loadu_si512(in + 19); + _mm512_storeu_si512(compressed + 19, w1); + w0 = _mm512_loadu_si512(in + 20); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_loadu_si512(in + 21); + _mm512_storeu_si512(compressed + 21, w1); + w0 = _mm512_loadu_si512(in + 22); + _mm512_storeu_si512(compressed + 22, w0); + w1 = _mm512_loadu_si512(in + 23); + _mm512_storeu_si512(compressed + 23, w1); + w0 = _mm512_loadu_si512(in + 24); + _mm512_storeu_si512(compressed + 24, w0); + w1 = _mm512_loadu_si512(in + 25); + _mm512_storeu_si512(compressed + 25, w1); + w0 = _mm512_loadu_si512(in + 26); + _mm512_storeu_si512(compressed + 26, w0); + w1 = _mm512_loadu_si512(in + 27); + _mm512_storeu_si512(compressed + 27, w1); + w0 = _mm512_loadu_si512(in + 28); + _mm512_storeu_si512(compressed + 28, w0); + w1 = _mm512_loadu_si512(in + 29); + _mm512_storeu_si512(compressed + 29, w1); + w0 = _mm512_loadu_si512(in + 30); + _mm512_storeu_si512(compressed + 30, w0); + w1 = _mm512_loadu_si512(in + 31); + _mm512_storeu_si512(compressed + 31, w1); +} + +static void avx512packblockmask0(const uint32_t *pin, __m512i *compressed) { + (void)compressed; + (void)pin; /* we consumed 512 32-bit integers */ +} + +/* we are going to pack 512 1-bit values, touching 1 512-bit words, using 32 + * bytes */ +static void avx512packblockmask1(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 1 512-bit word */ + __m512i w0; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), 1)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), 2)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), 3)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), 4)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), 5)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), 6)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), 7)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 8)), 8)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), 9)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 10)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 11)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 12)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 13)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 14)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 15)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 16)), + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 17)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 18)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 19)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 20)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 21)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 22)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 23)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 24)), + 24)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 25)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 26)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 27)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 28)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 29)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 30)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 31)); + _mm512_storeu_si512(compressed + 0, w0); +} + +/* we are going to pack 512 2-bit values, touching 2 512-bit words, using 64 + * bytes */ +static void avx512packblockmask2(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 2 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(3); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), 2)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), 4)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), 6)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), + 10)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), + 12)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), + 14)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 8)), + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), + 18)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 20)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 22)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 24)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 26)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 28)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 30)); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 2)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 4)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 6)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 10)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 12)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 14)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 24)), + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 18)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 20)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 22)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 24)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 26)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 28)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 30)); + _mm512_storeu_si512(compressed + 1, w1); +} + +/* we are going to pack 512 3-bit values, touching 3 512-bit words, using 96 + * bytes */ +static void avx512packblockmask3(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 3 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(7); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), 3)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), 6)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), 9)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), + 12)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), + 15)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), + 18)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), + 21)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 8)), + 24)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), + 27)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 1)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 4)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 7)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 10)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 13)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 16)), + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 19)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 22)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 25)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 28)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 2)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 5)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 24)), + 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 11)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 14)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 17)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 20)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 23)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 26)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 29)); + _mm512_storeu_si512(compressed + 2, w0); +} + +/* we are going to pack 512 4-bit values, touching 4 512-bit words, using 128 + * bytes */ +static void avx512packblockmask4(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 4 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(15); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), 4)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), + 12)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), + 20)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), + 24)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), + 28)); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), 4)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 12)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 20)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 24)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 28)); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 4)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 12)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 20)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 24)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 28)); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 4)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 12)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 20)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 24)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 28)); + _mm512_storeu_si512(compressed + 3, w1); +} + +/* we are going to pack 512 5-bit values, touching 5 512-bit words, using 160 + * bytes */ +static void avx512packblockmask5(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 5 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(31); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), 5)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), + 10)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), + 15)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), + 20)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), + 25)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), 3)); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 8)), 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), + 13)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 18)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 23)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 1)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 6)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 11)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 16)), + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 21)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 26)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 31)); + w1 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 4)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 9)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 14)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 19)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 24)), + 24)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 2)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 7)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 12)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 17)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 22)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 27)); + _mm512_storeu_si512(compressed + 4, w0); +} + +/* we are going to pack 512 6-bit values, touching 6 512-bit words, using 192 + * bytes */ +static void avx512packblockmask6(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 6 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(63); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), 6)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), + 12)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), + 18)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), + 24)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), 4)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), + 10)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 8)), + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), + 22)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 2)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 14)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 20)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 26)); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 6)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 12)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 18)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 24)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 4)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 10)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 24)), + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 22)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 2)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 14)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 20)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 26)); + _mm512_storeu_si512(compressed + 5, w1); +} + +/* we are going to pack 512 7-bit values, touching 7 512-bit words, using 224 + * bytes */ +static void avx512packblockmask7(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 7 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(127); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), 7)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), + 14)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), + 21)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), 3)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), + 10)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), + 17)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 8)), + 24)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 6)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 13)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 20)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 2)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 9)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 16)), + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 23)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 5)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 12)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 19)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 1)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 24)), + 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 15)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 22)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 4)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 11)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 18)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 25)); + _mm512_storeu_si512(compressed + 6, w0); +} + +/* we are going to pack 512 8-bit values, touching 8 512-bit words, using 256 + * bytes */ +static void avx512packblockmask8(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 8 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(255); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), + 24)); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), + 24)); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 24)); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 24)); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 24)); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 24)); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 24)); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 24)); + _mm512_storeu_si512(compressed + 7, w1); +} + +/* we are going to pack 512 9-bit values, touching 9 512-bit words, using 288 + * bytes */ +static void avx512packblockmask9(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 9 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(511); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), 9)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), + 18)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), 4)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), + 13)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), + 22)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 7)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 8)), 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), + 17)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 3)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 12)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 21)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 7)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 16)), + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 2)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 11)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 20)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 6)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 15)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 1)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 10)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 19)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 5)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 14)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 23)); + _mm512_storeu_si512(compressed + 8, w0); +} + +/* we are going to pack 512 10-bit values, touching 10 512-bit words, using 320 + * bytes */ +static void avx512packblockmask10(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 10 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(1023); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), + 10)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), + 20)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), + 18)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), 6)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 8)), + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 4)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 14)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 2)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 12)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 22)); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 10)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 20)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 18)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 6)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 24)), + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 4)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 14)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 2)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 12)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 22)); + _mm512_storeu_si512(compressed + 9, w1); +} + +/* we are going to pack 512 11-bit values, touching 11 512-bit words, using 352 + * bytes */ +static void avx512packblockmask11(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 11 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(2047); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), + 11)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), 1)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), 2)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), + 13)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), 3)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 14)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 25)); + w0 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 4)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 15)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 5)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 16)), + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 27)); + w0 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 6)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 17)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 7)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 18)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 23)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 24)), + 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 19)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 9)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 20)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 10)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 21)); + _mm512_storeu_si512(compressed + 10, w0); +} + +/* we are going to pack 512 12-bit values, touching 12 512-bit words, using 384 + * bytes */ +static void avx512packblockmask12(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 12 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(4095); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), 4)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), + 20)); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 4)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 20)); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 4)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 20)); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 4)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 8)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 20)); + _mm512_storeu_si512(compressed + 11, w1); +} + +/* we are going to pack 512 13-bit values, touching 13 512-bit words, using 416 + * bytes */ +static void avx512packblockmask13(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 13 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(8191); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), + 13)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), 7)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), 1)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), + 14)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 7)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 8)), 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 21)); + w0 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 2)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 15)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 9)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 3)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 16)), + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 29)); + w1 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 10)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 4)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 17)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 11)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 5)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 18)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 31)); + w1 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 25)); + w0 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 6)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 19)); + _mm512_storeu_si512(compressed + 12, w0); +} + +/* we are going to pack 512 14-bit values, touching 14 512-bit words, using 448 + * bytes */ +static void avx512packblockmask14(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 14 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(16383); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), + 14)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), + 10)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), 6)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), 2)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 8)), + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 4)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 18)); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 14)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 10)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 6)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 2)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 24)), + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 4)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 18)); + _mm512_storeu_si512(compressed + 13, w1); +} + +/* we are going to pack 512 15-bit values, touching 15 512-bit words, using 480 + * bytes */ +static void avx512packblockmask15(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 15 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(32767); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), + 15)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), + 13)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), + 11)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), 9)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), 7)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 5)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 3)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 1)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 16)), + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 14)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 29)); + w1 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 27)); + w0 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 10)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 23)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 24)), + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 6)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 19)); + w0 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 30)), + 2)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 17)); + _mm512_storeu_si512(compressed + 14, w0); +} + +/* we are going to pack 512 16-bit values, touching 16 512-bit words, using 512 + * bytes */ +static void avx512packblockmask16(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 16 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(65535); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 1)), + 16)); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), + 16)); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), + 16)); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), + 16)); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), + 16)); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 16)); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 16)); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 16)); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 16)); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 16)); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 16)); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 16)); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 16)); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 16)); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 16)); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 16)); + _mm512_storeu_si512(compressed + 15, w1); +} + +/* we are going to pack 512 17-bit values, touching 17 512-bit words, using 544 + * bytes */ +static void avx512packblockmask17(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 17 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(131071); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 19)); + w0 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), 6)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 7)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 8)), 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 10)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 27)); + w0 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 29)); + w1 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 14)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 15)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 1)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 3)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 5)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 7)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 9)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 11)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 13)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 15, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 15)); + _mm512_storeu_si512(compressed + 16, w0); +} + +/* we are going to pack 512 18-bit values, touching 18 512-bit words, using 576 + * bytes */ +static void avx512packblockmask18(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 18 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(262143); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 7)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 6)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 10)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 14)); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 23)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 6)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 15, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 10)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 14)); + _mm512_storeu_si512(compressed + 17, w1); +} + +/* we are going to pack 512 19-bit values, touching 19 512-bit words, using 608 + * bytes */ +static void avx512packblockmask19(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 19 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(524287); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 19)); + w1 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), 6)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 25)); + w0 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 31)); + w1 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), 5)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), + 11)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 10)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 15)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 29)); + w1 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 3)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 9)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 15)); + w1 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 23)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 21)); + w0 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 24)), + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 15, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 1)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 7)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 17, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 13)); + _mm512_storeu_si512(compressed + 18, w0); +} + +/* we are going to pack 512 20-bit values, touching 20 512-bit words, using 640 + * bytes */ +static void avx512packblockmask20(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 20 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(1048575); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), + 12)); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 12)); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 12)); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 15, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 16, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 17, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 18, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 12)); + _mm512_storeu_si512(compressed + 19, w1); +} + +/* we are going to pack 512 21-bit values, touching 21 512-bit words, using 672 + * bytes */ +static void avx512packblockmask21(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 21 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(2097151); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 2)), + 10)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), 9)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 7)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 19)); + w1 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 8)), 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 7)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 6)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 15)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 27)); + w0 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 5)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 15)); + w1 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 20)), + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 25)); + w0 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 14)); + w1 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 3)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 13)); + w1 = _mm512_srli_epi32(tmp, 19); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 18, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 29)), + 1)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 19, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 11)); + _mm512_storeu_si512(compressed + 20, w0); +} + +/* we are going to pack 512 22-bit values, touching 22 512-bit words, using 704 + * bytes */ +static void avx512packblockmask22(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 22 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(4194303); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 7)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), 6)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 10)); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 14)); + w1 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 23)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 6)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 18, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 10)); + _mm512_storeu_si512(compressed + 21, w1); +} + +/* we are going to pack 512 23-bit values, touching 23 512-bit words, using 736 + * bytes */ +static void avx512packblockmask23(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 23 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(8388607); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 23)); + w1 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), 5)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 19)); + w0 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 10)); + w1 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), 1)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 15)); + w1 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 6)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 11)); + w0 = _mm512_srli_epi32(tmp, 21); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 14)), + 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 15)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 17)), + 7)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 21)); + w0 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 3)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 23)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 24)), + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 13)); + w0 = _mm512_srli_epi32(tmp, 19); + _mm512_storeu_si512(compressed + 19, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 28)), + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 20, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 21, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 9)); + _mm512_storeu_si512(compressed + 22, w0); +} + +/* we are going to pack 512 24-bit values, touching 24 512-bit words, using 768 + * bytes */ +static void avx512packblockmask24(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 24 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(16777215); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 3)), 8)); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), 8)); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 11)), + 8)); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 8)); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 8)); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 8)); + _mm512_storeu_si512(compressed + 17, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 19, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 8)); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 22, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 8)); + _mm512_storeu_si512(compressed + 23, w1); +} + +/* we are going to pack 512 25-bit values, touching 25 512-bit words, using 800 + * bytes */ +static void avx512packblockmask25(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 25 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(33554431); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 11)); + w1 = _mm512_srli_epi32(tmp, 21); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 4)), 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 7)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 15)); + w0 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 8)); + w1 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 9)), 1)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 19)); + w1 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 13)), + 5)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 15)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 9)); + w0 = _mm512_srli_epi32(tmp, 23); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 18)), + 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 13)); + w1 = _mm512_srli_epi32(tmp, 19); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 22)), + 6)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 23)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 17)); + w0 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 10)); + w1 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 27)), + 3)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 23, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 7)); + _mm512_storeu_si512(compressed + 24, w0); +} + +/* we are going to pack 512 26-bit values, touching 26 512-bit words, using 832 + * bytes */ +static void avx512packblockmask26(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 26 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(67108863); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 14)); + w1 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 8)); + w0 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 5)), 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 7)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 10)); + w0 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 6)); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 8)); + w1 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 23)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 10)); + w1 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 26)), + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 23, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 24, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 6)); + _mm512_storeu_si512(compressed + 25, w1); +} + +/* we are going to pack 512 27-bit values, touching 27 512-bit words, using 864 + * bytes */ +static void avx512packblockmask27(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 27 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(134217727); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 7)); + w1 = _mm512_srli_epi32(tmp, 25); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_or_si512( + w1, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 6)), 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 7)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 29)); + w0 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 19)); + w0 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 14)); + w1 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 9)); + w0 = _mm512_srli_epi32(tmp, 23); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 12)), + 4)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 31)); + w1 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 15)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 11)); + w1 = _mm512_srli_epi32(tmp, 21); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 6)); + w0 = _mm512_srli_epi32(tmp, 26); + _mm512_storeu_si512(compressed + 15, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 19)), + 1)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 16, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 23)); + w0 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 23)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 13)); + w0 = _mm512_srli_epi32(tmp, 19); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 8)); + w1 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 25)), + 3)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 23, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 15)); + w1 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 24, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 10)); + w0 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 25, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 5)); + _mm512_storeu_si512(compressed + 26, w0); +} + +/* we are going to pack 512 28-bit values, touching 28 512-bit words, using 896 + * bytes */ +static void avx512packblockmask28(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 28 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(268435455); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 8)); + w0 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_or_si512( + w0, + _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 7)), 4)); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 8)); + w1 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 4)); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 16, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 8)); + w0 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 19, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 23)), + 4)); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 23, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 24, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 25, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 8)); + w1 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 26, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 4)); + _mm512_storeu_si512(compressed + 27, w1); +} + +/* we are going to pack 512 29-bit values, touching 29 512-bit words, using 928 + * bytes */ +static void avx512packblockmask29(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 29 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(536870911); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 29)); + w1 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 23)); + w1 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 7)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 11)); + w1 = _mm512_srli_epi32(tmp, 21); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 8)); + w0 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 5)); + w1 = _mm512_srli_epi32(tmp, 27); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 10)), + 2)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 31)); + w0 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 25)); + w0 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 15)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 19)); + w0 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 13)); + w0 = _mm512_srli_epi32(tmp, 19); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 10)); + w1 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 16, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 7)); + w0 = _mm512_srli_epi32(tmp, 25); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 4)); + w1 = _mm512_srli_epi32(tmp, 28); + _mm512_storeu_si512(compressed + 18, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 21)), + 1)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 23)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 20, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 23, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 15)); + w1 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 24, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 25, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 9)); + w1 = _mm512_srli_epi32(tmp, 23); + _mm512_storeu_si512(compressed + 26, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 6)); + w0 = _mm512_srli_epi32(tmp, 26); + _mm512_storeu_si512(compressed + 27, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 3)); + _mm512_storeu_si512(compressed + 28, w0); +} + +/* we are going to pack 512 30-bit values, touching 30 512-bit words, using 960 + * bytes */ +static void avx512packblockmask30(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 30 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(1073741823); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 30)); + w1 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 26)); + w1 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 22)); + w1 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 7)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 18)); + w1 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 14)); + w1 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 10)); + w1 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 8)); + w0 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 6)); + w1 = _mm512_srli_epi32(tmp, 26); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 4)); + w0 = _mm512_srli_epi32(tmp, 28); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 15)), + 2)); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 28)); + w1 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 16, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 24)); + w1 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 20)); + w1 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 20, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 23)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 16)); + w1 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 23, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 12)); + w1 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 24, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 10)); + w0 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 25, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 8)); + w1 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 26, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 6)); + w0 = _mm512_srli_epi32(tmp, 26); + _mm512_storeu_si512(compressed + 27, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 4)); + w1 = _mm512_srli_epi32(tmp, 28); + _mm512_storeu_si512(compressed + 28, w0); + w1 = _mm512_or_si512( + w1, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 2)); + _mm512_storeu_si512(compressed + 29, w1); +} + +/* we are going to pack 512 31-bit values, touching 31 512-bit words, using 992 + * bytes */ +static void avx512packblockmask31(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 31 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + const __m512i mask = _mm512_set1_epi32(2147483647); + __m512i tmp; /* used to store inputs at word boundary */ + w0 = _mm512_and_si512(mask, _mm512_loadu_si512(in + 0)); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 1)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 31)); + w1 = _mm512_srli_epi32(tmp, 1); + _mm512_storeu_si512(compressed + 0, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 2)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 30)); + w0 = _mm512_srli_epi32(tmp, 2); + _mm512_storeu_si512(compressed + 1, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 3)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 29)); + w1 = _mm512_srli_epi32(tmp, 3); + _mm512_storeu_si512(compressed + 2, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 4)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 28)); + w0 = _mm512_srli_epi32(tmp, 4); + _mm512_storeu_si512(compressed + 3, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 5)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 27)); + w1 = _mm512_srli_epi32(tmp, 5); + _mm512_storeu_si512(compressed + 4, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 6)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 26)); + w0 = _mm512_srli_epi32(tmp, 6); + _mm512_storeu_si512(compressed + 5, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 7)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 25)); + w1 = _mm512_srli_epi32(tmp, 7); + _mm512_storeu_si512(compressed + 6, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 8)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 24)); + w0 = _mm512_srli_epi32(tmp, 8); + _mm512_storeu_si512(compressed + 7, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 9)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 23)); + w1 = _mm512_srli_epi32(tmp, 9); + _mm512_storeu_si512(compressed + 8, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 10)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 22)); + w0 = _mm512_srli_epi32(tmp, 10); + _mm512_storeu_si512(compressed + 9, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 11)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 21)); + w1 = _mm512_srli_epi32(tmp, 11); + _mm512_storeu_si512(compressed + 10, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 12)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 20)); + w0 = _mm512_srli_epi32(tmp, 12); + _mm512_storeu_si512(compressed + 11, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 13)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 19)); + w1 = _mm512_srli_epi32(tmp, 13); + _mm512_storeu_si512(compressed + 12, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 14)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 18)); + w0 = _mm512_srli_epi32(tmp, 14); + _mm512_storeu_si512(compressed + 13, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 15)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 17)); + w1 = _mm512_srli_epi32(tmp, 15); + _mm512_storeu_si512(compressed + 14, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 16)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 16)); + w0 = _mm512_srli_epi32(tmp, 16); + _mm512_storeu_si512(compressed + 15, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 17)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 15)); + w1 = _mm512_srli_epi32(tmp, 17); + _mm512_storeu_si512(compressed + 16, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 18)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 14)); + w0 = _mm512_srli_epi32(tmp, 18); + _mm512_storeu_si512(compressed + 17, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 19)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 13)); + w1 = _mm512_srli_epi32(tmp, 19); + _mm512_storeu_si512(compressed + 18, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 20)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 12)); + w0 = _mm512_srli_epi32(tmp, 20); + _mm512_storeu_si512(compressed + 19, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 21)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 11)); + w1 = _mm512_srli_epi32(tmp, 21); + _mm512_storeu_si512(compressed + 20, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 22)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 10)); + w0 = _mm512_srli_epi32(tmp, 22); + _mm512_storeu_si512(compressed + 21, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 23)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 9)); + w1 = _mm512_srli_epi32(tmp, 23); + _mm512_storeu_si512(compressed + 22, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 24)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 8)); + w0 = _mm512_srli_epi32(tmp, 24); + _mm512_storeu_si512(compressed + 23, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 25)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 7)); + w1 = _mm512_srli_epi32(tmp, 25); + _mm512_storeu_si512(compressed + 24, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 26)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 6)); + w0 = _mm512_srli_epi32(tmp, 26); + _mm512_storeu_si512(compressed + 25, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 27)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 5)); + w1 = _mm512_srli_epi32(tmp, 27); + _mm512_storeu_si512(compressed + 26, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 28)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 4)); + w0 = _mm512_srli_epi32(tmp, 28); + _mm512_storeu_si512(compressed + 27, w1); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 29)); + w0 = _mm512_or_si512(w0, _mm512_slli_epi32(tmp, 3)); + w1 = _mm512_srli_epi32(tmp, 29); + _mm512_storeu_si512(compressed + 28, w0); + tmp = _mm512_and_si512(mask, _mm512_loadu_si512(in + 30)); + w1 = _mm512_or_si512(w1, _mm512_slli_epi32(tmp, 2)); + w0 = _mm512_srli_epi32(tmp, 30); + _mm512_storeu_si512(compressed + 29, w1); + w0 = _mm512_or_si512( + w0, _mm512_slli_epi32(_mm512_and_si512(mask, _mm512_loadu_si512(in + 31)), + 1)); + _mm512_storeu_si512(compressed + 30, w0); +} + +/* we are going to pack 512 32-bit values, touching 32 512-bit words, using 1024 + * bytes */ +static void avx512packblockmask32(const uint32_t *pin, __m512i *compressed) { + /* we are going to touch 32 512-bit words */ + __m512i w0, w1; + const __m512i *in = (const __m512i *)pin; + w0 = _mm512_loadu_si512(in + 0); + _mm512_storeu_si512(compressed + 0, w0); + w1 = _mm512_loadu_si512(in + 1); + _mm512_storeu_si512(compressed + 1, w1); + w0 = _mm512_loadu_si512(in + 2); + _mm512_storeu_si512(compressed + 2, w0); + w1 = _mm512_loadu_si512(in + 3); + _mm512_storeu_si512(compressed + 3, w1); + w0 = _mm512_loadu_si512(in + 4); + _mm512_storeu_si512(compressed + 4, w0); + w1 = _mm512_loadu_si512(in + 5); + _mm512_storeu_si512(compressed + 5, w1); + w0 = _mm512_loadu_si512(in + 6); + _mm512_storeu_si512(compressed + 6, w0); + w1 = _mm512_loadu_si512(in + 7); + _mm512_storeu_si512(compressed + 7, w1); + w0 = _mm512_loadu_si512(in + 8); + _mm512_storeu_si512(compressed + 8, w0); + w1 = _mm512_loadu_si512(in + 9); + _mm512_storeu_si512(compressed + 9, w1); + w0 = _mm512_loadu_si512(in + 10); + _mm512_storeu_si512(compressed + 10, w0); + w1 = _mm512_loadu_si512(in + 11); + _mm512_storeu_si512(compressed + 11, w1); + w0 = _mm512_loadu_si512(in + 12); + _mm512_storeu_si512(compressed + 12, w0); + w1 = _mm512_loadu_si512(in + 13); + _mm512_storeu_si512(compressed + 13, w1); + w0 = _mm512_loadu_si512(in + 14); + _mm512_storeu_si512(compressed + 14, w0); + w1 = _mm512_loadu_si512(in + 15); + _mm512_storeu_si512(compressed + 15, w1); + w0 = _mm512_loadu_si512(in + 16); + _mm512_storeu_si512(compressed + 16, w0); + w1 = _mm512_loadu_si512(in + 17); + _mm512_storeu_si512(compressed + 17, w1); + w0 = _mm512_loadu_si512(in + 18); + _mm512_storeu_si512(compressed + 18, w0); + w1 = _mm512_loadu_si512(in + 19); + _mm512_storeu_si512(compressed + 19, w1); + w0 = _mm512_loadu_si512(in + 20); + _mm512_storeu_si512(compressed + 20, w0); + w1 = _mm512_loadu_si512(in + 21); + _mm512_storeu_si512(compressed + 21, w1); + w0 = _mm512_loadu_si512(in + 22); + _mm512_storeu_si512(compressed + 22, w0); + w1 = _mm512_loadu_si512(in + 23); + _mm512_storeu_si512(compressed + 23, w1); + w0 = _mm512_loadu_si512(in + 24); + _mm512_storeu_si512(compressed + 24, w0); + w1 = _mm512_loadu_si512(in + 25); + _mm512_storeu_si512(compressed + 25, w1); + w0 = _mm512_loadu_si512(in + 26); + _mm512_storeu_si512(compressed + 26, w0); + w1 = _mm512_loadu_si512(in + 27); + _mm512_storeu_si512(compressed + 27, w1); + w0 = _mm512_loadu_si512(in + 28); + _mm512_storeu_si512(compressed + 28, w0); + w1 = _mm512_loadu_si512(in + 29); + _mm512_storeu_si512(compressed + 29, w1); + w0 = _mm512_loadu_si512(in + 30); + _mm512_storeu_si512(compressed + 30, w0); + w1 = _mm512_loadu_si512(in + 31); + _mm512_storeu_si512(compressed + 31, w1); +} + +static void avx512unpackblock0(const __m512i *compressed, uint32_t *pout) { + (void)compressed; + memset(pout, 0, 512); +} + +/* we packed 512 1-bit values, touching 1 512-bit words, using 32 bytes */ +static void avx512unpackblock1(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 1 512-bit word */ + __m512i w0; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(1); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 1))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 3))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 5))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + _mm512_storeu_si512(out + 7, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 7))); + _mm512_storeu_si512(out + 8, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 9))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 10))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 11))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 13))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 14))); + _mm512_storeu_si512(out + 15, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 15))); + _mm512_storeu_si512(out + 16, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 17))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 18))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 19))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 20))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 21))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 22))); + _mm512_storeu_si512(out + 23, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 23))); + _mm512_storeu_si512(out + 24, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 24))); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 25))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 26))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 27))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 28))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 29))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 30))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 31)); +} + +/* we packed 512 2-bit values, touching 2 512-bit words, using 64 bytes */ +static void avx512unpackblock2(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 2 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(3); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 10))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + _mm512_storeu_si512(out + 7, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 14))); + _mm512_storeu_si512(out + 8, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 18))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 20))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 22))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 24))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 26))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 28))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w0, 30)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 6))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 10))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + _mm512_storeu_si512(out + 23, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 14))); + _mm512_storeu_si512(out + 24, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 18))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 20))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 22))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 24))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 26))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 28))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 30)); +} + +/* we packed 512 3-bit values, touching 3 512-bit words, using 96 bytes */ +static void avx512unpackblock3(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 3 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(7); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 3))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 9))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 15))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 18))); + _mm512_storeu_si512(out + 7, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 21))); + _mm512_storeu_si512(out + 8, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 24))); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 27))); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 1))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 7))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 10))); + _mm512_storeu_si512(out + 15, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 13))); + _mm512_storeu_si512(out + 16, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 19))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 22))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 25))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 28))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 31), + _mm512_slli_epi32(w0, 1)))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + _mm512_storeu_si512(out + 23, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 5))); + _mm512_storeu_si512(out + 24, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 11))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 14))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 17))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 20))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 23))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 26))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 29)); +} + +/* we packed 512 4-bit values, touching 4 512-bit words, using 128 bytes */ +static void avx512unpackblock4(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 4 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(15); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 20))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 24))); + _mm512_storeu_si512(out + 7, _mm512_srli_epi32(w0, 28)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512(out + 8, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 20))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 24))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w1, 28)); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 20))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 24))); + _mm512_storeu_si512(out + 23, _mm512_srli_epi32(w0, 28)); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512(out + 24, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 20))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 24))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 28)); +} + +/* we packed 512 5-bit values, touching 5 512-bit words, using 160 bytes */ +static void avx512unpackblock5(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 5 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(31); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 5))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 10))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 15))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 20))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 25))); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + _mm512_storeu_si512(out + 7, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 3))); + _mm512_storeu_si512(out + 8, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 13))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 18))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 23))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 1))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + _mm512_storeu_si512(out + 15, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 11))); + _mm512_storeu_si512(out + 16, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 21))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 26))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 31), + _mm512_slli_epi32(w1, 1)))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 9))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 14))); + _mm512_storeu_si512(out + 23, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 19))); + _mm512_storeu_si512(out + 24, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 24))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 29), + _mm512_slli_epi32(w0, 3)))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 7))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 17))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 22))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 27)); +} + +/* we packed 512 6-bit values, touching 6 512-bit words, using 192 bytes */ +static void avx512unpackblock6(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 6 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(63); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 18))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 24))); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + _mm512_storeu_si512(out + 7, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 10))); + _mm512_storeu_si512(out + 8, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 22))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 14))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 20))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w0, 26)); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 6))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 18))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 24))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + _mm512_storeu_si512(out + 23, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 10))); + _mm512_storeu_si512(out + 24, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 22))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 14))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 20))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 26)); +} + +/* we packed 512 7-bit values, touching 7 512-bit words, using 224 bytes */ +static void avx512unpackblock7(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 7 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(127); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 7))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 14))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 21))); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 3))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 10))); + _mm512_storeu_si512(out + 7, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 17))); + _mm512_storeu_si512(out + 8, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 24))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 31), + _mm512_slli_epi32(w0, 1)))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 13))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 20))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 27), + _mm512_slli_epi32(w1, 5)))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + _mm512_storeu_si512(out + 15, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 9))); + _mm512_storeu_si512(out + 16, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 23))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 5))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 19))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 26), + _mm512_slli_epi32(w1, 6)))); + _mm512_storeu_si512(out + 23, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 1))); + _mm512_storeu_si512(out + 24, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 15))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 22))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 29), + _mm512_slli_epi32(w0, 3)))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 11))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 18))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 25)); +} + +/* we packed 512 8-bit values, touching 8 512-bit words, using 256 bytes */ +static void avx512unpackblock8(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 8 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(255); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + _mm512_storeu_si512(out + 3, _mm512_srli_epi32(w0, 24)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512(out + 4, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + _mm512_storeu_si512(out + 7, _mm512_srli_epi32(w1, 24)); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512(out + 8, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + _mm512_storeu_si512(out + 11, _mm512_srli_epi32(w0, 24)); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512(out + 12, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w1, 24)); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + _mm512_storeu_si512(out + 19, _mm512_srli_epi32(w0, 24)); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512(out + 20, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + _mm512_storeu_si512(out + 23, _mm512_srli_epi32(w1, 24)); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512(out + 24, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + _mm512_storeu_si512(out + 27, _mm512_srli_epi32(w0, 24)); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512(out + 28, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 24)); +} + +/* we packed 512 9-bit values, touching 9 512-bit words, using 288 bytes */ +static void avx512unpackblock9(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 9 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(511); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 9))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 18))); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 27), + _mm512_slli_epi32(w1, 5)))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 13))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 22))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 7, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 31), + _mm512_slli_epi32(w0, 1)))); + _mm512_storeu_si512(out + 8, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 17))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 26), + _mm512_slli_epi32(w1, 6)))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 3))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 21))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + _mm512_storeu_si512(out + 15, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 7))); + _mm512_storeu_si512(out + 16, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 25), + _mm512_slli_epi32(w1, 7)))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 11))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 20))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 29), + _mm512_slli_epi32(w0, 3)))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + _mm512_storeu_si512(out + 23, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 15))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 24, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 1))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 10))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 19))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 5))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 14))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 23)); +} + +/* we packed 512 10-bit values, touching 10 512-bit words, using 320 bytes */ +static void avx512unpackblock10(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 10 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(1023); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 10))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 20))); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 18))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + _mm512_storeu_si512(out + 7, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + _mm512_storeu_si512(out + 8, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 26), + _mm512_slli_epi32(w1, 6)))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 14))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w0, 22)); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 10))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 20))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 18))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + _mm512_storeu_si512(out + 23, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 6))); + _mm512_storeu_si512(out + 24, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 14))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 22)); +} + +/* we packed 512 11-bit values, touching 11 512-bit words, using 352 bytes */ +static void avx512unpackblock11(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 11 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(2047); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 11))); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 22), + _mm512_slli_epi32(w1, 10)))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 1))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 23), + _mm512_slli_epi32(w0, 9)))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + _mm512_storeu_si512(out + 7, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 13))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 8, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 3))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 14))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 25), + _mm512_slli_epi32(w0, 7)))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 15))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 26), + _mm512_slli_epi32(w1, 6)))); + _mm512_storeu_si512(out + 15, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 5))); + _mm512_storeu_si512(out + 16, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 27), + _mm512_slli_epi32(w0, 5)))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 17))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 7))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 18))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 23, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 29), + _mm512_slli_epi32(w0, 3)))); + _mm512_storeu_si512(out + 24, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 19))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 9))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 20))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 31), + _mm512_slli_epi32(w0, 1)))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 10))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 21)); +} + +/* we packed 512 12-bit values, touching 12 512-bit words, using 384 bytes */ +static void avx512unpackblock12(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 12 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(4095); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 7, _mm512_srli_epi32(w0, 20)); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512(out + 8, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w1, 20)); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + _mm512_storeu_si512(out + 23, _mm512_srli_epi32(w0, 20)); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512(out + 24, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 20)); +} + +/* we packed 512 13-bit values, touching 13 512-bit words, using 416 bytes */ +static void avx512unpackblock13(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 13 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(8191); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 13))); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 26), + _mm512_slli_epi32(w1, 6)))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 7))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 1))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 14))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 7, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 27), + _mm512_slli_epi32(w1, 5)))); + _mm512_storeu_si512(out + 8, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 21), + _mm512_slli_epi32(w0, 11)))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 15))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 9))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 22), + _mm512_slli_epi32(w0, 10)))); + _mm512_storeu_si512(out + 15, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 3))); + _mm512_storeu_si512(out + 16, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 29), + _mm512_slli_epi32(w1, 3)))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 10))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 23), + _mm512_slli_epi32(w0, 9)))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 17))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + _mm512_storeu_si512(out + 23, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 11))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 24, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 5))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 18))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 31), + _mm512_slli_epi32(w1, 1)))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 25), + _mm512_slli_epi32(w0, 7)))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 19)); +} + +/* we packed 512 14-bit values, touching 14 512-bit words, using 448 bytes */ +static void avx512unpackblock14(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 14 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(16383); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 14))); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 10))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + _mm512_storeu_si512(out + 7, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + _mm512_storeu_si512(out + 8, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 26), + _mm512_slli_epi32(w1, 6)))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 22), + _mm512_slli_epi32(w0, 10)))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w0, 18)); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 14))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 10))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 6))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + _mm512_storeu_si512(out + 23, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + _mm512_storeu_si512(out + 24, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 16))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 22), + _mm512_slli_epi32(w1, 10)))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 18)); +} + +/* we packed 512 15-bit values, touching 15 512-bit words, using 480 bytes */ +static void avx512unpackblock15(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 15 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(32767); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 15))); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 13))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 11))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 26), + _mm512_slli_epi32(w1, 6)))); + _mm512_storeu_si512(out + 7, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 9))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 8, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 7))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 22), + _mm512_slli_epi32(w1, 10)))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 5))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 3))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 18), + _mm512_slli_epi32(w1, 14)))); + _mm512_storeu_si512(out + 15, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 1))); + _mm512_storeu_si512(out + 16, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 16))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 31), + _mm512_slli_epi32(w0, 1)))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 14))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 29), + _mm512_slli_epi32(w1, 3)))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 27), + _mm512_slli_epi32(w0, 5)))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 10))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 23, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 25), + _mm512_slli_epi32(w1, 7)))); + _mm512_storeu_si512(out + 24, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 23), + _mm512_slli_epi32(w0, 9)))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 21), + _mm512_slli_epi32(w1, 11)))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 19), + _mm512_slli_epi32(w0, 13)))); + _mm512_storeu_si512(out + 30, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 17)); +} + +/* we packed 512 16-bit values, touching 16 512-bit words, using 512 bytes */ +static void avx512unpackblock16(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 16 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(65535); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 1, _mm512_srli_epi32(w0, 16)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512(out + 2, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 3, _mm512_srli_epi32(w1, 16)); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512(out + 4, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 5, _mm512_srli_epi32(w0, 16)); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512(out + 6, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 7, _mm512_srli_epi32(w1, 16)); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512(out + 8, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 9, _mm512_srli_epi32(w0, 16)); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512(out + 10, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 11, _mm512_srli_epi32(w1, 16)); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512(out + 12, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 13, _mm512_srli_epi32(w0, 16)); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512(out + 14, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w1, 16)); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 17, _mm512_srli_epi32(w0, 16)); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512(out + 18, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 19, _mm512_srli_epi32(w1, 16)); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512(out + 20, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 21, _mm512_srli_epi32(w0, 16)); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512(out + 22, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 23, _mm512_srli_epi32(w1, 16)); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512(out + 24, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 25, _mm512_srli_epi32(w0, 16)); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512(out + 26, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 27, _mm512_srli_epi32(w1, 16)); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512(out + 28, _mm512_and_si512(mask, w0)); + _mm512_storeu_si512(out + 29, _mm512_srli_epi32(w0, 16)); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512(out + 30, _mm512_and_si512(mask, w1)); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 16)); +} + +/* we packed 512 17-bit values, touching 17 512-bit words, using 544 bytes */ +static void avx512unpackblock17(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 17 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(131071); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 17), + _mm512_slli_epi32(w1, 15)))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 19), + _mm512_slli_epi32(w0, 13)))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 21), + _mm512_slli_epi32(w1, 11)))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 6))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 7, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 23), + _mm512_slli_epi32(w0, 9)))); + _mm512_storeu_si512(out + 8, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 25), + _mm512_slli_epi32(w1, 7)))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 10))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 27), + _mm512_slli_epi32(w0, 5)))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 29), + _mm512_slli_epi32(w1, 3)))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 14))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 15, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 31), + _mm512_slli_epi32(w0, 1)))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 16, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 1))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 18), + _mm512_slli_epi32(w0, 14)))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 3))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 5))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 22), + _mm512_slli_epi32(w0, 10)))); + _mm512_storeu_si512(out + 23, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 7))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 24, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 9))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 11))); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 13))); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 15)); +} + +/* we packed 512 18-bit values, touching 18 512-bit words, using 576 bytes */ +static void avx512unpackblock18(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 18 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(262143); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 18), + _mm512_slli_epi32(w1, 14)))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 22), + _mm512_slli_epi32(w0, 10)))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 26), + _mm512_slli_epi32(w1, 6)))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 12))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 7, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 8, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 10))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w0, 14)); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w1)); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 18), + _mm512_slli_epi32(w0, 14)))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 22), + _mm512_slli_epi32(w1, 10)))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 23, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 24, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 6))); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 10))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 14)); +} + +/* we packed 512 19-bit values, touching 19 512-bit words, using 608 bytes */ +static void avx512unpackblock19(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 19 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(524287); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 19), + _mm512_slli_epi32(w1, 13)))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 6))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 25), + _mm512_slli_epi32(w0, 7)))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 12))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 31), + _mm512_slli_epi32(w1, 1)))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 18), + _mm512_slli_epi32(w0, 14)))); + _mm512_storeu_si512(out + 7, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 5))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 8, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 11))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 17), + _mm512_slli_epi32(w1, 15)))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 23), + _mm512_slli_epi32(w0, 9)))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 10))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 15, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 29), + _mm512_slli_epi32(w1, 3)))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 16, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 3))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 22), + _mm512_slli_epi32(w1, 10)))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 9))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 15), + _mm512_slli_epi32(w1, 17)))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 23, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 21), + _mm512_slli_epi32(w0, 11)))); + _mm512_storeu_si512(out + 24, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 27), + _mm512_slli_epi32(w1, 5)))); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 14), + _mm512_slli_epi32(w0, 18)))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 1))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 7))); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 13)); +} + +/* we packed 512 20-bit values, touching 20 512-bit words, using 640 bytes */ +static void avx512unpackblock20(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 20 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(1048575); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + _mm512_storeu_si512(out + 7, _mm512_srli_epi32(w0, 12)); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512(out + 8, _mm512_and_si512(mask, w1)); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w1, 12)); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + _mm512_storeu_si512(out + 23, _mm512_srli_epi32(w0, 12)); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512(out + 24, _mm512_and_si512(mask, w1)); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 12)); +} + +/* we packed 512 21-bit values, touching 21 512-bit words, using 672 bytes */ +static void avx512unpackblock21(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 21 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(2097151); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 21), + _mm512_slli_epi32(w1, 11)))); + _mm512_storeu_si512(out + 2, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 10))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 31), + _mm512_slli_epi32(w0, 1)))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 9))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 7, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 19), + _mm512_slli_epi32(w1, 13)))); + _mm512_storeu_si512(out + 8, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 29), + _mm512_slli_epi32(w0, 3)))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 18), + _mm512_slli_epi32(w1, 14)))); + _mm512_storeu_si512(out + 11, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 7))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 17), + _mm512_slli_epi32(w1, 15)))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 6))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 15, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 27), + _mm512_slli_epi32(w0, 5)))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 16, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 5))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 15), + _mm512_slli_epi32(w1, 17)))); + _mm512_storeu_si512(out + 20, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 25), + _mm512_slli_epi32(w0, 7)))); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 14), + _mm512_slli_epi32(w1, 18)))); + _mm512_storeu_si512(out + 23, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 3))); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 24, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 13), + _mm512_slli_epi32(w1, 19)))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 23), + _mm512_slli_epi32(w0, 9)))); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 12), + _mm512_slli_epi32(w1, 20)))); + _mm512_storeu_si512(out + 29, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 1))); + w0 = _mm512_loadu_si512(compressed + 20); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 22), + _mm512_slli_epi32(w0, 10)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 11)); +} + +/* we packed 512 22-bit values, touching 22 512-bit words, using 704 bytes */ +static void avx512unpackblock22(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 22 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(4194303); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 22), + _mm512_slli_epi32(w1, 10)))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 12), + _mm512_slli_epi32(w0, 20)))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 14), + _mm512_slli_epi32(w0, 18)))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 7, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 26), + _mm512_slli_epi32(w1, 6)))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 8, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 6))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 18), + _mm512_slli_epi32(w0, 14)))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 8))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w0, 10)); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w1)); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 22), + _mm512_slli_epi32(w0, 10)))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 12), + _mm512_slli_epi32(w1, 20)))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 14), + _mm512_slli_epi32(w1, 18)))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 23, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 24, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 6))); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 18), + _mm512_slli_epi32(w1, 14)))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + w0 = _mm512_loadu_si512(compressed + 20); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + w1 = _mm512_loadu_si512(compressed + 21); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 10)); +} + +/* we packed 512 23-bit values, touching 23 512-bit words, using 736 bytes */ +static void avx512unpackblock23(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 23 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(8388607); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 23), + _mm512_slli_epi32(w1, 9)))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 14), + _mm512_slli_epi32(w0, 18)))); + _mm512_storeu_si512(out + 3, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 5))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 19), + _mm512_slli_epi32(w0, 13)))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 10), + _mm512_slli_epi32(w1, 22)))); + _mm512_storeu_si512(out + 7, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 1))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 8, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 15), + _mm512_slli_epi32(w1, 17)))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 6))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 29), + _mm512_slli_epi32(w0, 3)))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 11), + _mm512_slli_epi32(w0, 21)))); + _mm512_storeu_si512(out + 14, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 15, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 25), + _mm512_slli_epi32(w1, 7)))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 16, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + _mm512_storeu_si512(out + 17, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 7))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 21), + _mm512_slli_epi32(w0, 11)))); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 12), + _mm512_slli_epi32(w1, 20)))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 3))); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 23, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 17), + _mm512_slli_epi32(w1, 15)))); + _mm512_storeu_si512(out + 24, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 8))); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 31), + _mm512_slli_epi32(w0, 1)))); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 22), + _mm512_slli_epi32(w1, 10)))); + w0 = _mm512_loadu_si512(compressed + 20); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 13), + _mm512_slli_epi32(w0, 19)))); + _mm512_storeu_si512(out + 28, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + w1 = _mm512_loadu_si512(compressed + 21); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 27), + _mm512_slli_epi32(w1, 5)))); + w0 = _mm512_loadu_si512(compressed + 22); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 18), + _mm512_slli_epi32(w0, 14)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 9)); +} + +/* we packed 512 24-bit values, touching 24 512-bit words, using 768 bytes */ +static void avx512unpackblock24(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 24 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(16777215); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + _mm512_storeu_si512(out + 3, _mm512_srli_epi32(w0, 8)); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512(out + 4, _mm512_and_si512(mask, w1)); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + _mm512_storeu_si512(out + 7, _mm512_srli_epi32(w1, 8)); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512(out + 8, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + _mm512_storeu_si512(out + 11, _mm512_srli_epi32(w0, 8)); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512(out + 12, _mm512_and_si512(mask, w1)); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w1, 8)); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + _mm512_storeu_si512(out + 19, _mm512_srli_epi32(w0, 8)); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512(out + 20, _mm512_and_si512(mask, w1)); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + _mm512_storeu_si512(out + 23, _mm512_srli_epi32(w1, 8)); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512(out + 24, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + w0 = _mm512_loadu_si512(compressed + 20); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + _mm512_storeu_si512(out + 27, _mm512_srli_epi32(w0, 8)); + w1 = _mm512_loadu_si512(compressed + 21); + _mm512_storeu_si512(out + 28, _mm512_and_si512(mask, w1)); + w0 = _mm512_loadu_si512(compressed + 22); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 23); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 8)); +} + +/* we packed 512 25-bit values, touching 25 512-bit words, using 800 bytes */ +static void avx512unpackblock25(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 25 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(33554431); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 25), + _mm512_slli_epi32(w1, 7)))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 18), + _mm512_slli_epi32(w0, 14)))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 11), + _mm512_slli_epi32(w1, 21)))); + _mm512_storeu_si512(out + 4, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 29), + _mm512_slli_epi32(w0, 3)))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 22), + _mm512_slli_epi32(w1, 10)))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 7, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 15), + _mm512_slli_epi32(w0, 17)))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 8, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 8), + _mm512_slli_epi32(w1, 24)))); + _mm512_storeu_si512(out + 9, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 1))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 19), + _mm512_slli_epi32(w1, 13)))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 12), + _mm512_slli_epi32(w0, 20)))); + _mm512_storeu_si512(out + 13, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 5))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 15, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 23), + _mm512_slli_epi32(w0, 9)))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 16, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 9), + _mm512_slli_epi32(w0, 23)))); + _mm512_storeu_si512(out + 18, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 27), + _mm512_slli_epi32(w1, 5)))); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 13), + _mm512_slli_epi32(w1, 19)))); + _mm512_storeu_si512(out + 22, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 6))); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512( + out + 23, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 31), + _mm512_slli_epi32(w0, 1)))); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512( + out + 24, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + w0 = _mm512_loadu_si512(compressed + 20); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 17), + _mm512_slli_epi32(w0, 15)))); + w1 = _mm512_loadu_si512(compressed + 21); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 10), + _mm512_slli_epi32(w1, 22)))); + _mm512_storeu_si512(out + 27, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 3))); + w0 = _mm512_loadu_si512(compressed + 22); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + w1 = _mm512_loadu_si512(compressed + 23); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 21), + _mm512_slli_epi32(w1, 11)))); + w0 = _mm512_loadu_si512(compressed + 24); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 14), + _mm512_slli_epi32(w0, 18)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 7)); +} + +/* we packed 512 26-bit values, touching 26 512-bit words, using 832 bytes */ +static void avx512unpackblock26(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 26 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(67108863); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 26), + _mm512_slli_epi32(w1, 6)))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 14), + _mm512_slli_epi32(w1, 18)))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 8), + _mm512_slli_epi32(w0, 24)))); + _mm512_storeu_si512(out + 5, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 2))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 7, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 22), + _mm512_slli_epi32(w0, 10)))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 8, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 10), + _mm512_slli_epi32(w0, 22)))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 18), + _mm512_slli_epi32(w1, 14)))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 12), + _mm512_slli_epi32(w0, 20)))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w0, 6)); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w1)); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 14), + _mm512_slli_epi32(w0, 18)))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 8), + _mm512_slli_epi32(w1, 24)))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512( + out + 23, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 22), + _mm512_slli_epi32(w1, 10)))); + w0 = _mm512_loadu_si512(compressed + 20); + _mm512_storeu_si512( + out + 24, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + w1 = _mm512_loadu_si512(compressed + 21); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 10), + _mm512_slli_epi32(w1, 22)))); + _mm512_storeu_si512(out + 26, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 4))); + w0 = _mm512_loadu_si512(compressed + 22); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + w1 = _mm512_loadu_si512(compressed + 23); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + w0 = _mm512_loadu_si512(compressed + 24); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 18), + _mm512_slli_epi32(w0, 14)))); + w1 = _mm512_loadu_si512(compressed + 25); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 12), + _mm512_slli_epi32(w1, 20)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 6)); +} + +/* we packed 512 27-bit values, touching 27 512-bit words, using 864 bytes */ +static void avx512unpackblock27(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 27 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(134217727); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 27), + _mm512_slli_epi32(w1, 5)))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 22), + _mm512_slli_epi32(w0, 10)))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 17), + _mm512_slli_epi32(w1, 15)))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 12), + _mm512_slli_epi32(w0, 20)))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 7), + _mm512_slli_epi32(w1, 25)))); + _mm512_storeu_si512(out + 6, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 7, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 29), + _mm512_slli_epi32(w0, 3)))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 8, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 19), + _mm512_slli_epi32(w0, 13)))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 14), + _mm512_slli_epi32(w1, 18)))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 9), + _mm512_slli_epi32(w0, 23)))); + _mm512_storeu_si512(out + 12, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 4))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 31), + _mm512_slli_epi32(w1, 1)))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 15, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 21), + _mm512_slli_epi32(w1, 11)))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 16, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 11), + _mm512_slli_epi32(w1, 21)))); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 6), + _mm512_slli_epi32(w0, 26)))); + _mm512_storeu_si512(out + 19, + _mm512_and_si512(mask, _mm512_srli_epi32(w0, 1))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 23), + _mm512_slli_epi32(w0, 9)))); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 18), + _mm512_slli_epi32(w1, 14)))); + w0 = _mm512_loadu_si512(compressed + 20); + _mm512_storeu_si512( + out + 23, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 13), + _mm512_slli_epi32(w0, 19)))); + w1 = _mm512_loadu_si512(compressed + 21); + _mm512_storeu_si512( + out + 24, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 8), + _mm512_slli_epi32(w1, 24)))); + _mm512_storeu_si512(out + 25, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 3))); + w0 = _mm512_loadu_si512(compressed + 22); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + w1 = _mm512_loadu_si512(compressed + 23); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 25), + _mm512_slli_epi32(w1, 7)))); + w0 = _mm512_loadu_si512(compressed + 24); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + w1 = _mm512_loadu_si512(compressed + 25); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 15), + _mm512_slli_epi32(w1, 17)))); + w0 = _mm512_loadu_si512(compressed + 26); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 10), + _mm512_slli_epi32(w0, 22)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 5)); +} + +/* we packed 512 28-bit values, touching 28 512-bit words, using 896 bytes */ +static void avx512unpackblock28(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 28 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(268435455); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 12), + _mm512_slli_epi32(w1, 20)))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 8), + _mm512_slli_epi32(w0, 24)))); + _mm512_storeu_si512(out + 7, _mm512_srli_epi32(w0, 4)); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512(out + 8, _mm512_and_si512(mask, w1)); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 12), + _mm512_slli_epi32(w0, 20)))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 8), + _mm512_slli_epi32(w1, 24)))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w1, 4)); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 12), + _mm512_slli_epi32(w1, 20)))); + w0 = _mm512_loadu_si512(compressed + 20); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 8), + _mm512_slli_epi32(w0, 24)))); + _mm512_storeu_si512(out + 23, _mm512_srli_epi32(w0, 4)); + w1 = _mm512_loadu_si512(compressed + 21); + _mm512_storeu_si512(out + 24, _mm512_and_si512(mask, w1)); + w0 = _mm512_loadu_si512(compressed + 22); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + w1 = _mm512_loadu_si512(compressed + 23); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + w0 = _mm512_loadu_si512(compressed + 24); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + w1 = _mm512_loadu_si512(compressed + 25); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + w0 = _mm512_loadu_si512(compressed + 26); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 12), + _mm512_slli_epi32(w0, 20)))); + w1 = _mm512_loadu_si512(compressed + 27); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 8), + _mm512_slli_epi32(w1, 24)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 4)); +} + +/* we packed 512 29-bit values, touching 29 512-bit words, using 928 bytes */ +static void avx512unpackblock29(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 29 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(536870911); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 29), + _mm512_slli_epi32(w1, 3)))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 23), + _mm512_slli_epi32(w1, 9)))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 17), + _mm512_slli_epi32(w1, 15)))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 14), + _mm512_slli_epi32(w0, 18)))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 7, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 11), + _mm512_slli_epi32(w1, 21)))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 8, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 8), + _mm512_slli_epi32(w0, 24)))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 5), + _mm512_slli_epi32(w1, 27)))); + _mm512_storeu_si512(out + 10, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 2))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 31), + _mm512_slli_epi32(w0, 1)))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 25), + _mm512_slli_epi32(w0, 7)))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 22), + _mm512_slli_epi32(w1, 10)))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 15, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 19), + _mm512_slli_epi32(w0, 13)))); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512( + out + 16, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 13), + _mm512_slli_epi32(w0, 19)))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 10), + _mm512_slli_epi32(w1, 22)))); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 7), + _mm512_slli_epi32(w0, 25)))); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 4), + _mm512_slli_epi32(w1, 28)))); + _mm512_storeu_si512(out + 21, + _mm512_and_si512(mask, _mm512_srli_epi32(w1, 1))); + w0 = _mm512_loadu_si512(compressed + 20); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + w1 = _mm512_loadu_si512(compressed + 21); + _mm512_storeu_si512( + out + 23, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 27), + _mm512_slli_epi32(w1, 5)))); + w0 = _mm512_loadu_si512(compressed + 22); + _mm512_storeu_si512( + out + 24, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 23); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 21), + _mm512_slli_epi32(w1, 11)))); + w0 = _mm512_loadu_si512(compressed + 24); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 18), + _mm512_slli_epi32(w0, 14)))); + w1 = _mm512_loadu_si512(compressed + 25); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 15), + _mm512_slli_epi32(w1, 17)))); + w0 = _mm512_loadu_si512(compressed + 26); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 12), + _mm512_slli_epi32(w0, 20)))); + w1 = _mm512_loadu_si512(compressed + 27); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 9), + _mm512_slli_epi32(w1, 23)))); + w0 = _mm512_loadu_si512(compressed + 28); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 6), + _mm512_slli_epi32(w0, 26)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 3)); +} + +/* we packed 512 30-bit values, touching 30 512-bit words, using 960 bytes */ +static void avx512unpackblock30(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 30 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(1073741823); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 30), + _mm512_slli_epi32(w1, 2)))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 26), + _mm512_slli_epi32(w1, 6)))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 22), + _mm512_slli_epi32(w1, 10)))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 7, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 18), + _mm512_slli_epi32(w1, 14)))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 8, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 14), + _mm512_slli_epi32(w1, 18)))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 12), + _mm512_slli_epi32(w0, 20)))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 10), + _mm512_slli_epi32(w1, 22)))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 8), + _mm512_slli_epi32(w0, 24)))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 6), + _mm512_slli_epi32(w1, 26)))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 4), + _mm512_slli_epi32(w0, 28)))); + _mm512_storeu_si512(out + 15, _mm512_srli_epi32(w0, 2)); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512(out + 16, _mm512_and_si512(mask, w1)); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 28), + _mm512_slli_epi32(w1, 4)))); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 24), + _mm512_slli_epi32(w1, 8)))); + w0 = _mm512_loadu_si512(compressed + 20); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 22), + _mm512_slli_epi32(w0, 10)))); + w1 = _mm512_loadu_si512(compressed + 21); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 20), + _mm512_slli_epi32(w1, 12)))); + w0 = _mm512_loadu_si512(compressed + 22); + _mm512_storeu_si512( + out + 23, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 18), + _mm512_slli_epi32(w0, 14)))); + w1 = _mm512_loadu_si512(compressed + 23); + _mm512_storeu_si512( + out + 24, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 16), + _mm512_slli_epi32(w1, 16)))); + w0 = _mm512_loadu_si512(compressed + 24); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 14), + _mm512_slli_epi32(w0, 18)))); + w1 = _mm512_loadu_si512(compressed + 25); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 12), + _mm512_slli_epi32(w1, 20)))); + w0 = _mm512_loadu_si512(compressed + 26); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 10), + _mm512_slli_epi32(w0, 22)))); + w1 = _mm512_loadu_si512(compressed + 27); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 8), + _mm512_slli_epi32(w1, 24)))); + w0 = _mm512_loadu_si512(compressed + 28); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 6), + _mm512_slli_epi32(w0, 26)))); + w1 = _mm512_loadu_si512(compressed + 29); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 4), + _mm512_slli_epi32(w1, 28)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w1, 2)); +} + +/* we packed 512 31-bit values, touching 31 512-bit words, using 992 bytes */ +static void avx512unpackblock31(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 31 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + const __m512i mask = _mm512_set1_epi32(2147483647); + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, _mm512_and_si512(mask, w0)); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512( + out + 1, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 31), + _mm512_slli_epi32(w1, 1)))); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512( + out + 2, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 30), + _mm512_slli_epi32(w0, 2)))); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512( + out + 3, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 29), + _mm512_slli_epi32(w1, 3)))); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512( + out + 4, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 28), + _mm512_slli_epi32(w0, 4)))); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512( + out + 5, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 27), + _mm512_slli_epi32(w1, 5)))); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512( + out + 6, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 26), + _mm512_slli_epi32(w0, 6)))); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512( + out + 7, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 25), + _mm512_slli_epi32(w1, 7)))); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512( + out + 8, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 24), + _mm512_slli_epi32(w0, 8)))); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512( + out + 9, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 23), + _mm512_slli_epi32(w1, 9)))); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512( + out + 10, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 22), + _mm512_slli_epi32(w0, 10)))); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512( + out + 11, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 21), + _mm512_slli_epi32(w1, 11)))); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512( + out + 12, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 20), + _mm512_slli_epi32(w0, 12)))); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512( + out + 13, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 19), + _mm512_slli_epi32(w1, 13)))); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512( + out + 14, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 18), + _mm512_slli_epi32(w0, 14)))); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512( + out + 15, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 17), + _mm512_slli_epi32(w1, 15)))); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512( + out + 16, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 16), + _mm512_slli_epi32(w0, 16)))); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512( + out + 17, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 15), + _mm512_slli_epi32(w1, 17)))); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512( + out + 18, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 14), + _mm512_slli_epi32(w0, 18)))); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512( + out + 19, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 13), + _mm512_slli_epi32(w1, 19)))); + w0 = _mm512_loadu_si512(compressed + 20); + _mm512_storeu_si512( + out + 20, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 12), + _mm512_slli_epi32(w0, 20)))); + w1 = _mm512_loadu_si512(compressed + 21); + _mm512_storeu_si512( + out + 21, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 11), + _mm512_slli_epi32(w1, 21)))); + w0 = _mm512_loadu_si512(compressed + 22); + _mm512_storeu_si512( + out + 22, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 10), + _mm512_slli_epi32(w0, 22)))); + w1 = _mm512_loadu_si512(compressed + 23); + _mm512_storeu_si512( + out + 23, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 9), + _mm512_slli_epi32(w1, 23)))); + w0 = _mm512_loadu_si512(compressed + 24); + _mm512_storeu_si512( + out + 24, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 8), + _mm512_slli_epi32(w0, 24)))); + w1 = _mm512_loadu_si512(compressed + 25); + _mm512_storeu_si512( + out + 25, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 7), + _mm512_slli_epi32(w1, 25)))); + w0 = _mm512_loadu_si512(compressed + 26); + _mm512_storeu_si512( + out + 26, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 6), + _mm512_slli_epi32(w0, 26)))); + w1 = _mm512_loadu_si512(compressed + 27); + _mm512_storeu_si512( + out + 27, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 5), + _mm512_slli_epi32(w1, 27)))); + w0 = _mm512_loadu_si512(compressed + 28); + _mm512_storeu_si512( + out + 28, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 4), + _mm512_slli_epi32(w0, 28)))); + w1 = _mm512_loadu_si512(compressed + 29); + _mm512_storeu_si512( + out + 29, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w0, 3), + _mm512_slli_epi32(w1, 29)))); + w0 = _mm512_loadu_si512(compressed + 30); + _mm512_storeu_si512( + out + 30, + _mm512_and_si512(mask, _mm512_or_si512(_mm512_srli_epi32(w1, 2), + _mm512_slli_epi32(w0, 30)))); + _mm512_storeu_si512(out + 31, _mm512_srli_epi32(w0, 1)); +} + +/* we packed 512 32-bit values, touching 32 512-bit words, using 1024 bytes */ +static void avx512unpackblock32(const __m512i *compressed, uint32_t *pout) { + /* we are going to access 32 512-bit words */ + __m512i w0, w1; + __m512i *out = (__m512i *)pout; + w0 = _mm512_loadu_si512(compressed); + _mm512_storeu_si512(out + 0, w0); + w1 = _mm512_loadu_si512(compressed + 1); + _mm512_storeu_si512(out + 1, w1); + w0 = _mm512_loadu_si512(compressed + 2); + _mm512_storeu_si512(out + 2, w0); + w1 = _mm512_loadu_si512(compressed + 3); + _mm512_storeu_si512(out + 3, w1); + w0 = _mm512_loadu_si512(compressed + 4); + _mm512_storeu_si512(out + 4, w0); + w1 = _mm512_loadu_si512(compressed + 5); + _mm512_storeu_si512(out + 5, w1); + w0 = _mm512_loadu_si512(compressed + 6); + _mm512_storeu_si512(out + 6, w0); + w1 = _mm512_loadu_si512(compressed + 7); + _mm512_storeu_si512(out + 7, w1); + w0 = _mm512_loadu_si512(compressed + 8); + _mm512_storeu_si512(out + 8, w0); + w1 = _mm512_loadu_si512(compressed + 9); + _mm512_storeu_si512(out + 9, w1); + w0 = _mm512_loadu_si512(compressed + 10); + _mm512_storeu_si512(out + 10, w0); + w1 = _mm512_loadu_si512(compressed + 11); + _mm512_storeu_si512(out + 11, w1); + w0 = _mm512_loadu_si512(compressed + 12); + _mm512_storeu_si512(out + 12, w0); + w1 = _mm512_loadu_si512(compressed + 13); + _mm512_storeu_si512(out + 13, w1); + w0 = _mm512_loadu_si512(compressed + 14); + _mm512_storeu_si512(out + 14, w0); + w1 = _mm512_loadu_si512(compressed + 15); + _mm512_storeu_si512(out + 15, w1); + w0 = _mm512_loadu_si512(compressed + 16); + _mm512_storeu_si512(out + 16, w0); + w1 = _mm512_loadu_si512(compressed + 17); + _mm512_storeu_si512(out + 17, w1); + w0 = _mm512_loadu_si512(compressed + 18); + _mm512_storeu_si512(out + 18, w0); + w1 = _mm512_loadu_si512(compressed + 19); + _mm512_storeu_si512(out + 19, w1); + w0 = _mm512_loadu_si512(compressed + 20); + _mm512_storeu_si512(out + 20, w0); + w1 = _mm512_loadu_si512(compressed + 21); + _mm512_storeu_si512(out + 21, w1); + w0 = _mm512_loadu_si512(compressed + 22); + _mm512_storeu_si512(out + 22, w0); + w1 = _mm512_loadu_si512(compressed + 23); + _mm512_storeu_si512(out + 23, w1); + w0 = _mm512_loadu_si512(compressed + 24); + _mm512_storeu_si512(out + 24, w0); + w1 = _mm512_loadu_si512(compressed + 25); + _mm512_storeu_si512(out + 25, w1); + w0 = _mm512_loadu_si512(compressed + 26); + _mm512_storeu_si512(out + 26, w0); + w1 = _mm512_loadu_si512(compressed + 27); + _mm512_storeu_si512(out + 27, w1); + w0 = _mm512_loadu_si512(compressed + 28); + _mm512_storeu_si512(out + 28, w0); + w1 = _mm512_loadu_si512(compressed + 29); + _mm512_storeu_si512(out + 29, w1); + w0 = _mm512_loadu_si512(compressed + 30); + _mm512_storeu_si512(out + 30, w0); + w1 = _mm512_loadu_si512(compressed + 31); + _mm512_storeu_si512(out + 31, w1); +} + +static avx512packblockfnc avx512funcPackArr[] = { + &avx512packblock0, &avx512packblock1, &avx512packblock2, + &avx512packblock3, &avx512packblock4, &avx512packblock5, + &avx512packblock6, &avx512packblock7, &avx512packblock8, + &avx512packblock9, &avx512packblock10, &avx512packblock11, + &avx512packblock12, &avx512packblock13, &avx512packblock14, + &avx512packblock15, &avx512packblock16, &avx512packblock17, + &avx512packblock18, &avx512packblock19, &avx512packblock20, + &avx512packblock21, &avx512packblock22, &avx512packblock23, + &avx512packblock24, &avx512packblock25, &avx512packblock26, + &avx512packblock27, &avx512packblock28, &avx512packblock29, + &avx512packblock30, &avx512packblock31, &avx512packblock32}; +static avx512packblockfnc avx512funcPackMaskArr[] = { + &avx512packblockmask0, &avx512packblockmask1, &avx512packblockmask2, + &avx512packblockmask3, &avx512packblockmask4, &avx512packblockmask5, + &avx512packblockmask6, &avx512packblockmask7, &avx512packblockmask8, + &avx512packblockmask9, &avx512packblockmask10, &avx512packblockmask11, + &avx512packblockmask12, &avx512packblockmask13, &avx512packblockmask14, + &avx512packblockmask15, &avx512packblockmask16, &avx512packblockmask17, + &avx512packblockmask18, &avx512packblockmask19, &avx512packblockmask20, + &avx512packblockmask21, &avx512packblockmask22, &avx512packblockmask23, + &avx512packblockmask24, &avx512packblockmask25, &avx512packblockmask26, + &avx512packblockmask27, &avx512packblockmask28, &avx512packblockmask29, + &avx512packblockmask30, &avx512packblockmask31, &avx512packblockmask32}; +static avx512unpackblockfnc avx512funcUnpackArr[] = { + &avx512unpackblock0, &avx512unpackblock1, &avx512unpackblock2, + &avx512unpackblock3, &avx512unpackblock4, &avx512unpackblock5, + &avx512unpackblock6, &avx512unpackblock7, &avx512unpackblock8, + &avx512unpackblock9, &avx512unpackblock10, &avx512unpackblock11, + &avx512unpackblock12, &avx512unpackblock13, &avx512unpackblock14, + &avx512unpackblock15, &avx512unpackblock16, &avx512unpackblock17, + &avx512unpackblock18, &avx512unpackblock19, &avx512unpackblock20, + &avx512unpackblock21, &avx512unpackblock22, &avx512unpackblock23, + &avx512unpackblock24, &avx512unpackblock25, &avx512unpackblock26, + &avx512unpackblock27, &avx512unpackblock28, &avx512unpackblock29, + &avx512unpackblock30, &avx512unpackblock31, &avx512unpackblock32}; +/** avx512packing **/ + +/* reads 512 values from "in", writes "bit" 512-bit vectors to "out" */ +void avx512pack(const uint32_t *in, __m512i *out, const uint32_t bit) { + avx512funcPackMaskArr[bit](in, out); +} + +/* reads 512 values from "in", writes "bit" 512-bit vectors to "out" */ +void avx512packwithoutmask(const uint32_t *in, __m512i *out, + const uint32_t bit) { + avx512funcPackArr[bit](in, out); +} + +/* reads "bit" 512-bit vectors from "in", writes 512 values to "out" */ +void avx512unpack(const __m512i *in, uint32_t *out, const uint32_t bit) { + avx512funcUnpackArr[bit](in, out); +} + +#endif /* __AVX512F__ */ diff --git a/cpp-projects/base/data/simdcomp/avx512bitpacking.h b/cpp-projects/base/data/simdcomp/avx512bitpacking.h new file mode 100644 index 0000000..b6c39d9 --- /dev/null +++ b/cpp-projects/base/data/simdcomp/avx512bitpacking.h @@ -0,0 +1,37 @@ +/** + * This code is released under a BSD License. + */ + +#ifndef INCLUDE_AVX512BITPACKING_H_ +#define INCLUDE_AVX512BITPACKING_H_ + +#ifdef __AVX512F__ + +#include "portability.h" + +/* AVX512 is required */ +#include +/* for memset */ +#include + +#include "simdcomputil.h" + +enum { AVX512BlockSize = 512 }; + +/* max integer logarithm over a range of AVX512BlockSize integers (512 integer) + */ +uint32_t avx512maxbits(const uint32_t *begin); + +/* reads 512 values from "in", writes "bit" 512-bit vectors to "out" */ +void avx512pack(const uint32_t *in, __m512i *out, const uint32_t bit); + +/* reads 512 values from "in", writes "bit" 512-bit vectors to "out" */ +void avx512packwithoutmask(const uint32_t *in, __m512i *out, + const uint32_t bit); + +/* reads "bit" 512-bit vectors from "in", writes 512 values to "out" */ +void avx512unpack(const __m512i *in, uint32_t *out, const uint32_t bit); + +#endif /* __AVX512F__ */ + +#endif /* INCLUDE_AVX512BITPACKING_H_ */ diff --git a/cpp-projects/base/data/simdcomp/avxbitpacking.c b/cpp-projects/base/data/simdcomp/avxbitpacking.c new file mode 100644 index 0000000..0f25b94 --- /dev/null +++ b/cpp-projects/base/data/simdcomp/avxbitpacking.c @@ -0,0 +1,9920 @@ +#include "avxbitpacking.h" +#ifdef __AVX2__ + +static uint32_t maxbitas32int(const __m256i accumulator) { + const __m256i _tmp1 = + _mm256_or_si256(_mm256_srli_si256(accumulator, 8), accumulator); + const __m256i _tmp2 = _mm256_or_si256(_mm256_srli_si256(_tmp1, 4), _tmp1); + uint32_t ans1 = _mm256_extract_epi32(_tmp2, 0); + uint32_t ans2 = _mm256_extract_epi32(_tmp2, 4); + uint32_t ans = ans1 > ans2 ? ans1 : ans2; + return bits(ans); +} + +uint32_t avxmaxbits(const uint32_t *begin) { + const __m256i *pin = (const __m256i *)(begin); + __m256i accumulator = _mm256_lddqu_si256(pin); + uint32_t k = 1; + for (; 8 * k < AVXBlockSize; ++k) { + __m256i newvec = _mm256_lddqu_si256(pin + k); + accumulator = _mm256_or_si256(accumulator, newvec); + } + return maxbitas32int(accumulator); +} + +/** avxpacking **/ + +typedef void (*avxpackblockfnc)(const uint32_t *pin, __m256i *compressed); +typedef void (*avxunpackblockfnc)(const __m256i *compressed, uint32_t *pout); + +static void avxpackblock0(const uint32_t *pin, __m256i *compressed) { + (void)compressed; + (void)pin; /* we consumed 256 32-bit integers */ +} + +/* we are going to pack 256 1-bit values, touching 1 256-bit words, using 16 + * bytes */ +static void avxpackblock1(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 1 256-bit word */ + __m256i w0; + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 3)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 7)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 8), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 9)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 11)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 13)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 15)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 16), 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 17)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 18)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 19)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 21)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 22)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 23)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 24), 24)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 25)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 26)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 27)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 28)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 29)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 30)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 31)); + _mm256_storeu_si256(compressed + 0, w0); +} + +/* we are going to pack 256 2-bit values, touching 2 256-bit words, using 32 + * bytes */ +static void avxpackblock2(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 2 256-bit words */ + __m256i w0, w1; + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 8), 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 18)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 22)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 24)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 26)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 28)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 30)); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_lddqu_si256(in + 16); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 6)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 14)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 24), 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 18)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 20)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 22)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 24)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 26)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 28)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 30)); + _mm256_storeu_si256(compressed + 1, w1); +} + +/* we are going to pack 256 3-bit values, touching 3 256-bit words, using 48 + * bytes */ +static void avxpackblock3(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 3 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 3)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 9)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 15)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 18)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 21)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 8), 24)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 27)); + tmp = _mm256_lddqu_si256(in + 10); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 1)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 7)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 13)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 16), 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 19)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 22)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 25)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 28)); + tmp = _mm256_lddqu_si256(in + 21); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 24), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 11)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 17)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 23)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 26)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 29)); + _mm256_storeu_si256(compressed + 2, w0); +} + +/* we are going to pack 256 4-bit values, touching 4 256-bit words, using 64 + * bytes */ +static void avxpackblock4(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 4 256-bit words */ + __m256i w0, w1; + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 24)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 28)); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_lddqu_si256(in + 8); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 20)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 24)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 28)); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_lddqu_si256(in + 16); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 24)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 28)); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_lddqu_si256(in + 24); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 20)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 24)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 28)); + _mm256_storeu_si256(compressed + 3, w1); +} + +/* we are going to pack 256 5-bit values, touching 5 256-bit words, using 80 + * bytes */ +static void avxpackblock5(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 5 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 15)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 25)); + tmp = _mm256_lddqu_si256(in + 6); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 3)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 8), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 13)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 18)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 23)); + tmp = _mm256_lddqu_si256(in + 12); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 11)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 16), 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 21)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 26)); + tmp = _mm256_lddqu_si256(in + 19); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 31)); + w1 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 9)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 14)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 19)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 24), 24)); + tmp = _mm256_lddqu_si256(in + 25); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 7)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 17)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 22)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 27)); + _mm256_storeu_si256(compressed + 4, w0); +} + +/* we are going to pack 256 6-bit values, touching 6 256-bit words, using 96 + * bytes */ +static void avxpackblock6(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 6 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 18)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 24)); + tmp = _mm256_lddqu_si256(in + 5); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 8), 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 22)); + tmp = _mm256_lddqu_si256(in + 10); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 26)); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_lddqu_si256(in + 16); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 6)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 18)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 24)); + tmp = _mm256_lddqu_si256(in + 21); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 24), 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 22)); + tmp = _mm256_lddqu_si256(in + 26); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 14)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 20)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 26)); + _mm256_storeu_si256(compressed + 5, w1); +} + +/* we are going to pack 256 7-bit values, touching 7 256-bit words, using 112 + * bytes */ +static void avxpackblock7(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 7 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 7)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 21)); + tmp = _mm256_lddqu_si256(in + 4); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 3)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 17)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 8), 24)); + tmp = _mm256_lddqu_si256(in + 9); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 13)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 20)); + tmp = _mm256_lddqu_si256(in + 13); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 9)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 16), 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 23)); + tmp = _mm256_lddqu_si256(in + 18); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 19)); + tmp = _mm256_lddqu_si256(in + 22); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 1)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 24), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 15)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 22)); + tmp = _mm256_lddqu_si256(in + 27); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 11)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 18)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 25)); + _mm256_storeu_si256(compressed + 6, w0); +} + +/* we are going to pack 256 8-bit values, touching 8 256-bit words, using 128 + * bytes */ +static void avxpackblock8(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 8 256-bit words */ + __m256i w0, w1; + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 24)); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_lddqu_si256(in + 4); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 24)); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_lddqu_si256(in + 8); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 24)); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_lddqu_si256(in + 12); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 24)); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_lddqu_si256(in + 16); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 24)); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_lddqu_si256(in + 20); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 24)); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_lddqu_si256(in + 24); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 24)); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_lddqu_si256(in + 28); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 24)); + _mm256_storeu_si256(compressed + 7, w1); +} + +/* we are going to pack 256 9-bit values, touching 9 256-bit words, using 144 + * bytes */ +static void avxpackblock9(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 9 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 9)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 18)); + tmp = _mm256_lddqu_si256(in + 3); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 13)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 22)); + tmp = _mm256_lddqu_si256(in + 7); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 8), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 17)); + tmp = _mm256_lddqu_si256(in + 10); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 3)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 21)); + tmp = _mm256_lddqu_si256(in + 14); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 7)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 16), 16)); + tmp = _mm256_lddqu_si256(in + 17); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 11)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 20)); + tmp = _mm256_lddqu_si256(in + 21); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 15)); + tmp = _mm256_lddqu_si256(in + 24); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 1)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 19)); + tmp = _mm256_lddqu_si256(in + 28); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 23)); + _mm256_storeu_si256(compressed + 8, w0); +} + +/* we are going to pack 256 10-bit values, touching 10 256-bit words, using 160 + * bytes */ +static void avxpackblock10(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 10 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 20)); + tmp = _mm256_lddqu_si256(in + 3); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 18)); + tmp = _mm256_lddqu_si256(in + 6); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 8), 16)); + tmp = _mm256_lddqu_si256(in + 9); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 14)); + tmp = _mm256_lddqu_si256(in + 12); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 22)); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_lddqu_si256(in + 16); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 20)); + tmp = _mm256_lddqu_si256(in + 19); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 18)); + tmp = _mm256_lddqu_si256(in + 22); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 6)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 24), 16)); + tmp = _mm256_lddqu_si256(in + 25); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 14)); + tmp = _mm256_lddqu_si256(in + 28); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 22)); + _mm256_storeu_si256(compressed + 9, w1); +} + +/* we are going to pack 256 11-bit values, touching 11 256-bit words, using 176 + * bytes */ +static void avxpackblock11(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 11 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 11)); + tmp = _mm256_lddqu_si256(in + 2); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 1)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 12)); + tmp = _mm256_lddqu_si256(in + 5); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 13)); + tmp = _mm256_lddqu_si256(in + 8); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 3)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 14)); + tmp = _mm256_lddqu_si256(in + 11); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 25)); + w0 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 15)); + tmp = _mm256_lddqu_si256(in + 14); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 5)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 16), 16)); + tmp = _mm256_lddqu_si256(in + 17); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 27)); + w0 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 17)); + tmp = _mm256_lddqu_si256(in + 20); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 7)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 18)); + tmp = _mm256_lddqu_si256(in + 23); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 24), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 19)); + tmp = _mm256_lddqu_si256(in + 26); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 9)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 20)); + tmp = _mm256_lddqu_si256(in + 29); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 21)); + _mm256_storeu_si256(compressed + 10, w0); +} + +/* we are going to pack 256 12-bit values, touching 12 256-bit words, using 192 + * bytes */ +static void avxpackblock12(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 12 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 12)); + tmp = _mm256_lddqu_si256(in + 2); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 16)); + tmp = _mm256_lddqu_si256(in + 5); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 20)); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_lddqu_si256(in + 8); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 12)); + tmp = _mm256_lddqu_si256(in + 10); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 16)); + tmp = _mm256_lddqu_si256(in + 13); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 20)); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_lddqu_si256(in + 16); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 12)); + tmp = _mm256_lddqu_si256(in + 18); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 16)); + tmp = _mm256_lddqu_si256(in + 21); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 20)); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_lddqu_si256(in + 24); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 12)); + tmp = _mm256_lddqu_si256(in + 26); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 16)); + tmp = _mm256_lddqu_si256(in + 29); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 20)); + _mm256_storeu_si256(compressed + 11, w1); +} + +/* we are going to pack 256 13-bit values, touching 13 256-bit words, using 208 + * bytes */ +static void avxpackblock13(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 13 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 13)); + tmp = _mm256_lddqu_si256(in + 2); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 7)); + tmp = _mm256_lddqu_si256(in + 4); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 14)); + tmp = _mm256_lddqu_si256(in + 7); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 8), 8)); + tmp = _mm256_lddqu_si256(in + 9); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 21)); + w0 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 15)); + tmp = _mm256_lddqu_si256(in + 12); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 9)); + tmp = _mm256_lddqu_si256(in + 14); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 3)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 16), 16)); + tmp = _mm256_lddqu_si256(in + 17); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 29)); + w1 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 10)); + tmp = _mm256_lddqu_si256(in + 19); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 17)); + tmp = _mm256_lddqu_si256(in + 22); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 11)); + tmp = _mm256_lddqu_si256(in + 24); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 18)); + tmp = _mm256_lddqu_si256(in + 27); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 31)); + w1 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 12)); + tmp = _mm256_lddqu_si256(in + 29); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 25)); + w0 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 19)); + _mm256_storeu_si256(compressed + 12, w0); +} + +/* we are going to pack 256 14-bit values, touching 14 256-bit words, using 224 + * bytes */ +static void avxpackblock14(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 14 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 14)); + tmp = _mm256_lddqu_si256(in + 2); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 10)); + tmp = _mm256_lddqu_si256(in + 4); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 6)); + tmp = _mm256_lddqu_si256(in + 6); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 8), 16)); + tmp = _mm256_lddqu_si256(in + 9); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 12)); + tmp = _mm256_lddqu_si256(in + 11); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 8)); + tmp = _mm256_lddqu_si256(in + 13); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 18)); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_lddqu_si256(in + 16); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 14)); + tmp = _mm256_lddqu_si256(in + 18); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 10)); + tmp = _mm256_lddqu_si256(in + 20); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 6)); + tmp = _mm256_lddqu_si256(in + 22); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 24), 16)); + tmp = _mm256_lddqu_si256(in + 25); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 12)); + tmp = _mm256_lddqu_si256(in + 27); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 8)); + tmp = _mm256_lddqu_si256(in + 29); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 18)); + _mm256_storeu_si256(compressed + 13, w1); +} + +/* we are going to pack 256 15-bit values, touching 15 256-bit words, using 240 + * bytes */ +static void avxpackblock15(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 15 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 15)); + tmp = _mm256_lddqu_si256(in + 2); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 13)); + tmp = _mm256_lddqu_si256(in + 4); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 11)); + tmp = _mm256_lddqu_si256(in + 6); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 9)); + tmp = _mm256_lddqu_si256(in + 8); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 7)); + tmp = _mm256_lddqu_si256(in + 10); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 5)); + tmp = _mm256_lddqu_si256(in + 12); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 3)); + tmp = _mm256_lddqu_si256(in + 14); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 1)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 16), 16)); + tmp = _mm256_lddqu_si256(in + 17); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 14)); + tmp = _mm256_lddqu_si256(in + 19); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 29)); + w1 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 12)); + tmp = _mm256_lddqu_si256(in + 21); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 27)); + w0 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 10)); + tmp = _mm256_lddqu_si256(in + 23); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 24), 8)); + tmp = _mm256_lddqu_si256(in + 25); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 6)); + tmp = _mm256_lddqu_si256(in + 27); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 4)); + tmp = _mm256_lddqu_si256(in + 29); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 19)); + w0 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 30), 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 17)); + _mm256_storeu_si256(compressed + 14, w0); +} + +/* we are going to pack 256 16-bit values, touching 16 256-bit words, using 256 + * bytes */ +static void avxpackblock16(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 16 256-bit words */ + __m256i w0, w1; + w0 = _mm256_lddqu_si256(in + 0); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 1), 16)); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_lddqu_si256(in + 2); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 16)); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_lddqu_si256(in + 4); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 16)); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_lddqu_si256(in + 6); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 16)); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_lddqu_si256(in + 8); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 16)); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_lddqu_si256(in + 10); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 16)); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_lddqu_si256(in + 12); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 16)); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_lddqu_si256(in + 14); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 16)); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_lddqu_si256(in + 16); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 16)); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_lddqu_si256(in + 18); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 16)); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_lddqu_si256(in + 20); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 16)); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_lddqu_si256(in + 22); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 16)); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_lddqu_si256(in + 24); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 16)); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_lddqu_si256(in + 26); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 16)); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_lddqu_si256(in + 28); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 16)); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_lddqu_si256(in + 30); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 16)); + _mm256_storeu_si256(compressed + 15, w1); +} + +/* we are going to pack 256 17-bit values, touching 17 256-bit words, using 272 + * bytes */ +static void avxpackblock17(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 17 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 2)); + tmp = _mm256_lddqu_si256(in + 3); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 19)); + w0 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 4)); + tmp = _mm256_lddqu_si256(in + 5); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 6)); + tmp = _mm256_lddqu_si256(in + 7); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 8), 8)); + tmp = _mm256_lddqu_si256(in + 9); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 10)); + tmp = _mm256_lddqu_si256(in + 11); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 27)); + w0 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 12)); + tmp = _mm256_lddqu_si256(in + 13); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 29)); + w1 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 14)); + tmp = _mm256_lddqu_si256(in + 15); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_lddqu_si256(in + 16); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 1)); + tmp = _mm256_lddqu_si256(in + 18); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 3)); + tmp = _mm256_lddqu_si256(in + 20); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 5)); + tmp = _mm256_lddqu_si256(in + 22); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 7)); + tmp = _mm256_lddqu_si256(in + 24); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 9)); + tmp = _mm256_lddqu_si256(in + 26); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 11)); + tmp = _mm256_lddqu_si256(in + 28); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 13)); + tmp = _mm256_lddqu_si256(in + 30); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 15, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 15)); + _mm256_storeu_si256(compressed + 16, w0); +} + +/* we are going to pack 256 18-bit values, touching 18 256-bit words, using 288 + * bytes */ +static void avxpackblock18(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 18 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 4)); + tmp = _mm256_lddqu_si256(in + 3); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 8)); + tmp = _mm256_lddqu_si256(in + 5); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 12)); + tmp = _mm256_lddqu_si256(in + 7); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_lddqu_si256(in + 8); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 2)); + tmp = _mm256_lddqu_si256(in + 10); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 6)); + tmp = _mm256_lddqu_si256(in + 12); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 10)); + tmp = _mm256_lddqu_si256(in + 14); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 14)); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_lddqu_si256(in + 16); + tmp = _mm256_lddqu_si256(in + 17); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 4)); + tmp = _mm256_lddqu_si256(in + 19); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 8)); + tmp = _mm256_lddqu_si256(in + 21); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 12)); + tmp = _mm256_lddqu_si256(in + 23); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_lddqu_si256(in + 24); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 2)); + tmp = _mm256_lddqu_si256(in + 26); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 6)); + tmp = _mm256_lddqu_si256(in + 28); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 15, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 10)); + tmp = _mm256_lddqu_si256(in + 30); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 14)); + _mm256_storeu_si256(compressed + 17, w1); +} + +/* we are going to pack 256 19-bit values, touching 19 256-bit words, using 304 + * bytes */ +static void avxpackblock19(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 19 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 19)); + w1 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 6)); + tmp = _mm256_lddqu_si256(in + 3); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 25)); + w0 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 12)); + tmp = _mm256_lddqu_si256(in + 5); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 31)); + w1 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_lddqu_si256(in + 6); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 5)); + tmp = _mm256_lddqu_si256(in + 8); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 11)); + tmp = _mm256_lddqu_si256(in + 10); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_lddqu_si256(in + 11); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 4)); + tmp = _mm256_lddqu_si256(in + 13); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 10)); + tmp = _mm256_lddqu_si256(in + 15); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 29)); + w1 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_lddqu_si256(in + 16); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 3)); + tmp = _mm256_lddqu_si256(in + 18); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 9)); + tmp = _mm256_lddqu_si256(in + 20); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_lddqu_si256(in + 21); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 15)); + w1 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 2)); + tmp = _mm256_lddqu_si256(in + 23); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 21)); + w0 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 24), 8)); + tmp = _mm256_lddqu_si256(in + 25); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_lddqu_si256(in + 26); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 15, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 1)); + tmp = _mm256_lddqu_si256(in + 28); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 7)); + tmp = _mm256_lddqu_si256(in + 30); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 17, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 13)); + _mm256_storeu_si256(compressed + 18, w0); +} + +/* we are going to pack 256 20-bit values, touching 20 256-bit words, using 320 + * bytes */ +static void avxpackblock20(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 20 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 8)); + tmp = _mm256_lddqu_si256(in + 3); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_lddqu_si256(in + 4); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 4)); + tmp = _mm256_lddqu_si256(in + 6); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 12)); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_lddqu_si256(in + 8); + tmp = _mm256_lddqu_si256(in + 9); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 8)); + tmp = _mm256_lddqu_si256(in + 11); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_lddqu_si256(in + 12); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 4)); + tmp = _mm256_lddqu_si256(in + 14); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 12)); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_lddqu_si256(in + 16); + tmp = _mm256_lddqu_si256(in + 17); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 8)); + tmp = _mm256_lddqu_si256(in + 19); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_lddqu_si256(in + 20); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 4)); + tmp = _mm256_lddqu_si256(in + 22); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 12)); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_lddqu_si256(in + 24); + tmp = _mm256_lddqu_si256(in + 25); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 15, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 8)); + tmp = _mm256_lddqu_si256(in + 27); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 16, w0); + tmp = _mm256_lddqu_si256(in + 28); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 17, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 4)); + tmp = _mm256_lddqu_si256(in + 30); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 18, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 12)); + _mm256_storeu_si256(compressed + 19, w1); +} + +/* we are going to pack 256 21-bit values, touching 21 256-bit words, using 336 + * bytes */ +static void avxpackblock21(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 21 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 2), 10)); + tmp = _mm256_lddqu_si256(in + 3); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_lddqu_si256(in + 4); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 9)); + tmp = _mm256_lddqu_si256(in + 6); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_lddqu_si256(in + 7); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 19)); + w1 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 8), 8)); + tmp = _mm256_lddqu_si256(in + 9); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_lddqu_si256(in + 10); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 7)); + tmp = _mm256_lddqu_si256(in + 12); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_lddqu_si256(in + 13); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 6)); + tmp = _mm256_lddqu_si256(in + 15); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 27)); + w0 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_lddqu_si256(in + 16); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 5)); + tmp = _mm256_lddqu_si256(in + 18); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_lddqu_si256(in + 19); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 15)); + w1 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 20), 4)); + tmp = _mm256_lddqu_si256(in + 21); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 25)); + w0 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_lddqu_si256(in + 22); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 14)); + w1 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 3)); + tmp = _mm256_lddqu_si256(in + 24); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_lddqu_si256(in + 25); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 13)); + w1 = _mm256_srli_epi32(tmp, 19); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 2)); + tmp = _mm256_lddqu_si256(in + 27); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_lddqu_si256(in + 28); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 18, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 29), 1)); + tmp = _mm256_lddqu_si256(in + 30); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 19, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 11)); + _mm256_storeu_si256(compressed + 20, w0); +} + +/* we are going to pack 256 22-bit values, touching 22 256-bit words, using 352 + * bytes */ +static void avxpackblock22(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 22 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_lddqu_si256(in + 2); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 2)); + tmp = _mm256_lddqu_si256(in + 4); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_lddqu_si256(in + 5); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 4)); + tmp = _mm256_lddqu_si256(in + 7); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_lddqu_si256(in + 8); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 6)); + tmp = _mm256_lddqu_si256(in + 10); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_lddqu_si256(in + 11); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 8)); + tmp = _mm256_lddqu_si256(in + 13); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_lddqu_si256(in + 14); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 10)); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_lddqu_si256(in + 16); + tmp = _mm256_lddqu_si256(in + 17); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_lddqu_si256(in + 18); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 2)); + tmp = _mm256_lddqu_si256(in + 20); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_lddqu_si256(in + 21); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 14)); + w1 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 4)); + tmp = _mm256_lddqu_si256(in + 23); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_lddqu_si256(in + 24); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 6)); + tmp = _mm256_lddqu_si256(in + 26); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_lddqu_si256(in + 27); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 18, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 8)); + tmp = _mm256_lddqu_si256(in + 29); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_lddqu_si256(in + 30); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 10)); + _mm256_storeu_si256(compressed + 21, w1); +} + +/* we are going to pack 256 23-bit values, touching 23 256-bit words, using 368 + * bytes */ +static void avxpackblock23(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 23 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 23)); + w1 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_lddqu_si256(in + 2); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 5)); + tmp = _mm256_lddqu_si256(in + 4); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_lddqu_si256(in + 5); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 19)); + w0 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_lddqu_si256(in + 6); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 10)); + w1 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 1)); + tmp = _mm256_lddqu_si256(in + 8); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_lddqu_si256(in + 9); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 15)); + w1 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 6)); + tmp = _mm256_lddqu_si256(in + 11); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_lddqu_si256(in + 12); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_lddqu_si256(in + 13); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 11)); + w0 = _mm256_srli_epi32(tmp, 21); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 14), 2)); + tmp = _mm256_lddqu_si256(in + 15); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_lddqu_si256(in + 16); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 17), 7)); + tmp = _mm256_lddqu_si256(in + 18); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_lddqu_si256(in + 19); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 21)); + w0 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_lddqu_si256(in + 20); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 3)); + tmp = _mm256_lddqu_si256(in + 22); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_lddqu_si256(in + 23); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 24), 8)); + tmp = _mm256_lddqu_si256(in + 25); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_lddqu_si256(in + 26); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_lddqu_si256(in + 27); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 13)); + w0 = _mm256_srli_epi32(tmp, 19); + _mm256_storeu_si256(compressed + 19, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 28), 4)); + tmp = _mm256_lddqu_si256(in + 29); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 20, w0); + tmp = _mm256_lddqu_si256(in + 30); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 21, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 9)); + _mm256_storeu_si256(compressed + 22, w0); +} + +/* we are going to pack 256 24-bit values, touching 24 256-bit words, using 384 + * bytes */ +static void avxpackblock24(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 24 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_lddqu_si256(in + 2); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 3), 8)); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_lddqu_si256(in + 4); + tmp = _mm256_lddqu_si256(in + 5); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_lddqu_si256(in + 6); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 8)); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_lddqu_si256(in + 8); + tmp = _mm256_lddqu_si256(in + 9); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_lddqu_si256(in + 10); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 11), 8)); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_lddqu_si256(in + 12); + tmp = _mm256_lddqu_si256(in + 13); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_lddqu_si256(in + 14); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 8)); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_lddqu_si256(in + 16); + tmp = _mm256_lddqu_si256(in + 17); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_lddqu_si256(in + 18); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 8)); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_lddqu_si256(in + 20); + tmp = _mm256_lddqu_si256(in + 21); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_lddqu_si256(in + 22); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 8)); + _mm256_storeu_si256(compressed + 17, w1); + w0 = _mm256_lddqu_si256(in + 24); + tmp = _mm256_lddqu_si256(in + 25); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_lddqu_si256(in + 26); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 19, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 8)); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_lddqu_si256(in + 28); + tmp = _mm256_lddqu_si256(in + 29); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_lddqu_si256(in + 30); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 22, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 8)); + _mm256_storeu_si256(compressed + 23, w1); +} + +/* we are going to pack 256 25-bit values, touching 25 256-bit words, using 400 + * bytes */ +static void avxpackblock25(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 25 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_lddqu_si256(in + 2); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_lddqu_si256(in + 3); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 11)); + w1 = _mm256_srli_epi32(tmp, 21); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 4), 4)); + tmp = _mm256_lddqu_si256(in + 5); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_lddqu_si256(in + 6); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_lddqu_si256(in + 7); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 15)); + w0 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_lddqu_si256(in + 8); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 8)); + w1 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 9), 1)); + tmp = _mm256_lddqu_si256(in + 10); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_lddqu_si256(in + 11); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 19)); + w1 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_lddqu_si256(in + 12); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 13), 5)); + tmp = _mm256_lddqu_si256(in + 14); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_lddqu_si256(in + 15); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_lddqu_si256(in + 16); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_lddqu_si256(in + 17); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 9)); + w0 = _mm256_srli_epi32(tmp, 23); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 18), 2)); + tmp = _mm256_lddqu_si256(in + 19); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_lddqu_si256(in + 20); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_lddqu_si256(in + 21); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 13)); + w1 = _mm256_srli_epi32(tmp, 19); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 22), 6)); + tmp = _mm256_lddqu_si256(in + 23); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_lddqu_si256(in + 24); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_lddqu_si256(in + 25); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 17)); + w0 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_lddqu_si256(in + 26); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 10)); + w1 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 27), 3)); + tmp = _mm256_lddqu_si256(in + 28); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_lddqu_si256(in + 29); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_lddqu_si256(in + 30); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 23, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 7)); + _mm256_storeu_si256(compressed + 24, w0); +} + +/* we are going to pack 256 26-bit values, touching 26 256-bit words, using 416 + * bytes */ +static void avxpackblock26(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 26 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_lddqu_si256(in + 2); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_lddqu_si256(in + 3); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 14)); + w1 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_lddqu_si256(in + 4); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 8)); + w0 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 5), 2)); + tmp = _mm256_lddqu_si256(in + 6); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_lddqu_si256(in + 7); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_lddqu_si256(in + 8); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_lddqu_si256(in + 9); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 10)); + w0 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 4)); + tmp = _mm256_lddqu_si256(in + 11); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_lddqu_si256(in + 12); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_lddqu_si256(in + 13); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_lddqu_si256(in + 14); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 6)); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_lddqu_si256(in + 16); + tmp = _mm256_lddqu_si256(in + 17); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_lddqu_si256(in + 18); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_lddqu_si256(in + 19); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_lddqu_si256(in + 20); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 8)); + w1 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 2)); + tmp = _mm256_lddqu_si256(in + 22); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_lddqu_si256(in + 23); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_lddqu_si256(in + 24); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_lddqu_si256(in + 25); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 10)); + w1 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 26), 4)); + tmp = _mm256_lddqu_si256(in + 27); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_lddqu_si256(in + 28); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_lddqu_si256(in + 29); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 23, w1); + tmp = _mm256_lddqu_si256(in + 30); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 24, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 6)); + _mm256_storeu_si256(compressed + 25, w1); +} + +/* we are going to pack 256 27-bit values, touching 27 256-bit words, using 432 + * bytes */ +static void avxpackblock27(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 27 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_lddqu_si256(in + 2); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_lddqu_si256(in + 3); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_lddqu_si256(in + 4); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_lddqu_si256(in + 5); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 7)); + w1 = _mm256_srli_epi32(tmp, 25); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 6), 2)); + tmp = _mm256_lddqu_si256(in + 7); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_lddqu_si256(in + 8); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_lddqu_si256(in + 9); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 19)); + w0 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_lddqu_si256(in + 10); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 14)); + w1 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_lddqu_si256(in + 11); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 9)); + w0 = _mm256_srli_epi32(tmp, 23); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 12), 4)); + tmp = _mm256_lddqu_si256(in + 13); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 31)); + w1 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_lddqu_si256(in + 14); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_lddqu_si256(in + 15); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_lddqu_si256(in + 16); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_lddqu_si256(in + 17); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 11)); + w1 = _mm256_srli_epi32(tmp, 21); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_lddqu_si256(in + 18); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 6)); + w0 = _mm256_srli_epi32(tmp, 26); + _mm256_storeu_si256(compressed + 15, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 19), 1)); + tmp = _mm256_lddqu_si256(in + 20); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 16, w0); + tmp = _mm256_lddqu_si256(in + 21); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_lddqu_si256(in + 22); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_lddqu_si256(in + 23); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 13)); + w0 = _mm256_srli_epi32(tmp, 19); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_lddqu_si256(in + 24); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 8)); + w1 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 25), 3)); + tmp = _mm256_lddqu_si256(in + 26); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_lddqu_si256(in + 27); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_lddqu_si256(in + 28); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 23, w1); + tmp = _mm256_lddqu_si256(in + 29); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 15)); + w1 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 24, w0); + tmp = _mm256_lddqu_si256(in + 30); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 10)); + w0 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 25, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 5)); + _mm256_storeu_si256(compressed + 26, w0); +} + +/* we are going to pack 256 28-bit values, touching 28 256-bit words, using 448 + * bytes */ +static void avxpackblock28(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 28 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_lddqu_si256(in + 2); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_lddqu_si256(in + 3); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_lddqu_si256(in + 4); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_lddqu_si256(in + 5); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_lddqu_si256(in + 6); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 8)); + w0 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 7), 4)); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_lddqu_si256(in + 8); + tmp = _mm256_lddqu_si256(in + 9); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_lddqu_si256(in + 10); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_lddqu_si256(in + 11); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_lddqu_si256(in + 12); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_lddqu_si256(in + 13); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_lddqu_si256(in + 14); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 8)); + w1 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 4)); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_lddqu_si256(in + 16); + tmp = _mm256_lddqu_si256(in + 17); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_lddqu_si256(in + 18); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_lddqu_si256(in + 19); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 16, w0); + tmp = _mm256_lddqu_si256(in + 20); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_lddqu_si256(in + 21); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_lddqu_si256(in + 22); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 8)); + w0 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 19, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 23), 4)); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_lddqu_si256(in + 24); + tmp = _mm256_lddqu_si256(in + 25); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_lddqu_si256(in + 26); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_lddqu_si256(in + 27); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 23, w1); + tmp = _mm256_lddqu_si256(in + 28); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 24, w0); + tmp = _mm256_lddqu_si256(in + 29); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 25, w1); + tmp = _mm256_lddqu_si256(in + 30); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 8)); + w1 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 26, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 4)); + _mm256_storeu_si256(compressed + 27, w1); +} + +/* we are going to pack 256 29-bit values, touching 29 256-bit words, using 464 + * bytes */ +static void avxpackblock29(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 29 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 29)); + w1 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_lddqu_si256(in + 2); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_lddqu_si256(in + 3); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 23)); + w1 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_lddqu_si256(in + 4); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_lddqu_si256(in + 5); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_lddqu_si256(in + 6); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_lddqu_si256(in + 7); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 11)); + w1 = _mm256_srli_epi32(tmp, 21); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_lddqu_si256(in + 8); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 8)); + w0 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_lddqu_si256(in + 9); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 5)); + w1 = _mm256_srli_epi32(tmp, 27); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 10), 2)); + tmp = _mm256_lddqu_si256(in + 11); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_lddqu_si256(in + 12); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_lddqu_si256(in + 13); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 25)); + w0 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_lddqu_si256(in + 14); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_lddqu_si256(in + 15); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 19)); + w0 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_lddqu_si256(in + 16); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_lddqu_si256(in + 17); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 13)); + w0 = _mm256_srli_epi32(tmp, 19); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_lddqu_si256(in + 18); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 10)); + w1 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 16, w0); + tmp = _mm256_lddqu_si256(in + 19); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 7)); + w0 = _mm256_srli_epi32(tmp, 25); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_lddqu_si256(in + 20); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 4)); + w1 = _mm256_srli_epi32(tmp, 28); + _mm256_storeu_si256(compressed + 18, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 21), 1)); + tmp = _mm256_lddqu_si256(in + 22); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_lddqu_si256(in + 23); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 20, w0); + tmp = _mm256_lddqu_si256(in + 24); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_lddqu_si256(in + 25); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_lddqu_si256(in + 26); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 23, w1); + tmp = _mm256_lddqu_si256(in + 27); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 15)); + w1 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 24, w0); + tmp = _mm256_lddqu_si256(in + 28); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 25, w1); + tmp = _mm256_lddqu_si256(in + 29); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 9)); + w1 = _mm256_srli_epi32(tmp, 23); + _mm256_storeu_si256(compressed + 26, w0); + tmp = _mm256_lddqu_si256(in + 30); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 6)); + w0 = _mm256_srli_epi32(tmp, 26); + _mm256_storeu_si256(compressed + 27, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 3)); + _mm256_storeu_si256(compressed + 28, w0); +} + +/* we are going to pack 256 30-bit values, touching 30 256-bit words, using 480 + * bytes */ +static void avxpackblock30(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 30 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_lddqu_si256(in + 2); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_lddqu_si256(in + 3); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_lddqu_si256(in + 4); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_lddqu_si256(in + 5); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_lddqu_si256(in + 6); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_lddqu_si256(in + 7); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_lddqu_si256(in + 8); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_lddqu_si256(in + 9); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 14)); + w1 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_lddqu_si256(in + 10); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_lddqu_si256(in + 11); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 10)); + w1 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_lddqu_si256(in + 12); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 8)); + w0 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_lddqu_si256(in + 13); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 6)); + w1 = _mm256_srli_epi32(tmp, 26); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_lddqu_si256(in + 14); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 4)); + w0 = _mm256_srli_epi32(tmp, 28); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 15), 2)); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_lddqu_si256(in + 16); + tmp = _mm256_lddqu_si256(in + 17); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_lddqu_si256(in + 18); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 16, w0); + tmp = _mm256_lddqu_si256(in + 19); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_lddqu_si256(in + 20); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_lddqu_si256(in + 21); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_lddqu_si256(in + 22); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 20, w0); + tmp = _mm256_lddqu_si256(in + 23); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_lddqu_si256(in + 24); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_lddqu_si256(in + 25); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 23, w1); + tmp = _mm256_lddqu_si256(in + 26); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 24, w0); + tmp = _mm256_lddqu_si256(in + 27); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 10)); + w0 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 25, w1); + tmp = _mm256_lddqu_si256(in + 28); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 8)); + w1 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 26, w0); + tmp = _mm256_lddqu_si256(in + 29); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 6)); + w0 = _mm256_srli_epi32(tmp, 26); + _mm256_storeu_si256(compressed + 27, w1); + tmp = _mm256_lddqu_si256(in + 30); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 4)); + w1 = _mm256_srli_epi32(tmp, 28); + _mm256_storeu_si256(compressed + 28, w0); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 2)); + _mm256_storeu_si256(compressed + 29, w1); +} + +/* we are going to pack 256 31-bit values, touching 31 256-bit words, using 496 + * bytes */ +static void avxpackblock31(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 31 256-bit words */ + __m256i w0, w1; + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_lddqu_si256(in + 0); + tmp = _mm256_lddqu_si256(in + 1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 31)); + w1 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_lddqu_si256(in + 2); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_lddqu_si256(in + 3); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 29)); + w1 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_lddqu_si256(in + 4); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_lddqu_si256(in + 5); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_lddqu_si256(in + 6); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_lddqu_si256(in + 7); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_lddqu_si256(in + 8); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_lddqu_si256(in + 9); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 23)); + w1 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_lddqu_si256(in + 10); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_lddqu_si256(in + 11); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_lddqu_si256(in + 12); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_lddqu_si256(in + 13); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 19)); + w1 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_lddqu_si256(in + 14); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_lddqu_si256(in + 15); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_lddqu_si256(in + 16); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_lddqu_si256(in + 17); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 15)); + w1 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 16, w0); + tmp = _mm256_lddqu_si256(in + 18); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_lddqu_si256(in + 19); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 13)); + w1 = _mm256_srli_epi32(tmp, 19); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_lddqu_si256(in + 20); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_lddqu_si256(in + 21); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 11)); + w1 = _mm256_srli_epi32(tmp, 21); + _mm256_storeu_si256(compressed + 20, w0); + tmp = _mm256_lddqu_si256(in + 22); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 10)); + w0 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_lddqu_si256(in + 23); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 9)); + w1 = _mm256_srli_epi32(tmp, 23); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_lddqu_si256(in + 24); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 8)); + w0 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 23, w1); + tmp = _mm256_lddqu_si256(in + 25); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 7)); + w1 = _mm256_srli_epi32(tmp, 25); + _mm256_storeu_si256(compressed + 24, w0); + tmp = _mm256_lddqu_si256(in + 26); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 6)); + w0 = _mm256_srli_epi32(tmp, 26); + _mm256_storeu_si256(compressed + 25, w1); + tmp = _mm256_lddqu_si256(in + 27); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 5)); + w1 = _mm256_srli_epi32(tmp, 27); + _mm256_storeu_si256(compressed + 26, w0); + tmp = _mm256_lddqu_si256(in + 28); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 4)); + w0 = _mm256_srli_epi32(tmp, 28); + _mm256_storeu_si256(compressed + 27, w1); + tmp = _mm256_lddqu_si256(in + 29); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 3)); + w1 = _mm256_srli_epi32(tmp, 29); + _mm256_storeu_si256(compressed + 28, w0); + tmp = _mm256_lddqu_si256(in + 30); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 2)); + w0 = _mm256_srli_epi32(tmp, 30); + _mm256_storeu_si256(compressed + 29, w1); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(_mm256_lddqu_si256(in + 31), 1)); + _mm256_storeu_si256(compressed + 30, w0); +} + +/* we are going to pack 256 32-bit values, touching 32 256-bit words, using 512 + * bytes */ +static void avxpackblock32(const uint32_t *pin, __m256i *compressed) { + const __m256i *in = (const __m256i *)pin; + /* we are going to touch 32 256-bit words */ + __m256i w0, w1; + w0 = _mm256_lddqu_si256(in + 0); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_lddqu_si256(in + 1); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_lddqu_si256(in + 2); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_lddqu_si256(in + 3); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_lddqu_si256(in + 4); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_lddqu_si256(in + 5); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_lddqu_si256(in + 6); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_lddqu_si256(in + 7); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_lddqu_si256(in + 8); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_lddqu_si256(in + 9); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_lddqu_si256(in + 10); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_lddqu_si256(in + 11); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_lddqu_si256(in + 12); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_lddqu_si256(in + 13); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_lddqu_si256(in + 14); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_lddqu_si256(in + 15); + _mm256_storeu_si256(compressed + 15, w1); + w0 = _mm256_lddqu_si256(in + 16); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_lddqu_si256(in + 17); + _mm256_storeu_si256(compressed + 17, w1); + w0 = _mm256_lddqu_si256(in + 18); + _mm256_storeu_si256(compressed + 18, w0); + w1 = _mm256_lddqu_si256(in + 19); + _mm256_storeu_si256(compressed + 19, w1); + w0 = _mm256_lddqu_si256(in + 20); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_lddqu_si256(in + 21); + _mm256_storeu_si256(compressed + 21, w1); + w0 = _mm256_lddqu_si256(in + 22); + _mm256_storeu_si256(compressed + 22, w0); + w1 = _mm256_lddqu_si256(in + 23); + _mm256_storeu_si256(compressed + 23, w1); + w0 = _mm256_lddqu_si256(in + 24); + _mm256_storeu_si256(compressed + 24, w0); + w1 = _mm256_lddqu_si256(in + 25); + _mm256_storeu_si256(compressed + 25, w1); + w0 = _mm256_lddqu_si256(in + 26); + _mm256_storeu_si256(compressed + 26, w0); + w1 = _mm256_lddqu_si256(in + 27); + _mm256_storeu_si256(compressed + 27, w1); + w0 = _mm256_lddqu_si256(in + 28); + _mm256_storeu_si256(compressed + 28, w0); + w1 = _mm256_lddqu_si256(in + 29); + _mm256_storeu_si256(compressed + 29, w1); + w0 = _mm256_lddqu_si256(in + 30); + _mm256_storeu_si256(compressed + 30, w0); + w1 = _mm256_lddqu_si256(in + 31); + _mm256_storeu_si256(compressed + 31, w1); +} + +static void avxpackblockmask0(const uint32_t *pin, __m256i *compressed) { + (void)compressed; + (void)pin; /* we consumed 256 32-bit integers */ +} + +/* we are going to pack 256 1-bit values, touching 1 256-bit words, using 16 + * bytes */ +static void avxpackblockmask1(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 1 256-bit word */ + __m256i w0; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), 1)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), 2)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), 3)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), 4)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), 5)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), 6)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), 7)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)), 8)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), 9)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 10)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 11)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 12)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 13)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 14)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 15)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)), + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 17)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 18)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 19)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 20)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 21)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 22)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 23)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)), + 24)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 25)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 26)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 27)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 28)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 29)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 30)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 31)); + _mm256_storeu_si256(compressed + 0, w0); +} + +/* we are going to pack 256 2-bit values, touching 2 256-bit words, using 32 + * bytes */ +static void avxpackblockmask2(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 2 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(3); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), 2)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), 4)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), 6)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), + 10)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), + 12)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), + 14)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)), + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), + 18)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 20)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 22)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 24)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 26)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 28)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 30)); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 2)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 4)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 6)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 10)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 12)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 14)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)), + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 18)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 20)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 22)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 24)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 26)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 28)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 30)); + _mm256_storeu_si256(compressed + 1, w1); +} + +/* we are going to pack 256 3-bit values, touching 3 256-bit words, using 48 + * bytes */ +static void avxpackblockmask3(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 3 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(7); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), 3)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), 6)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), 9)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), + 12)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), + 15)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), + 18)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), + 21)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)), + 24)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), + 27)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 1)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 4)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 7)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 10)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 13)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)), + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 19)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 22)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 25)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 28)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 2)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 5)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)), + 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 11)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 14)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 17)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 20)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 23)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 26)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 29)); + _mm256_storeu_si256(compressed + 2, w0); +} + +/* we are going to pack 256 4-bit values, touching 4 256-bit words, using 64 + * bytes */ +static void avxpackblockmask4(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 4 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(15); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), 4)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), + 12)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), + 20)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), + 24)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), + 28)); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), 4)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 12)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 20)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 24)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 28)); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 4)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 12)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 20)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 24)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 28)); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 4)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 12)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 20)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 24)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 28)); + _mm256_storeu_si256(compressed + 3, w1); +} + +/* we are going to pack 256 5-bit values, touching 5 256-bit words, using 80 + * bytes */ +static void avxpackblockmask5(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 5 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(31); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), 5)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), + 10)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), + 15)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), + 20)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), + 25)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), 3)); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)), 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), + 13)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 18)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 23)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 1)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 6)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 11)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)), + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 21)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 26)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 31)); + w1 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 4)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 9)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 14)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 19)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)), + 24)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 2)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 7)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 12)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 17)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 22)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 27)); + _mm256_storeu_si256(compressed + 4, w0); +} + +/* we are going to pack 256 6-bit values, touching 6 256-bit words, using 96 + * bytes */ +static void avxpackblockmask6(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 6 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(63); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), 6)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), + 12)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), + 18)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), + 24)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), 4)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), + 10)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)), + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), + 22)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 2)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 14)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 20)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 26)); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 6)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 12)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 18)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 24)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 4)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 10)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)), + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 22)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 2)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 14)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 20)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 26)); + _mm256_storeu_si256(compressed + 5, w1); +} + +/* we are going to pack 256 7-bit values, touching 7 256-bit words, using 112 + * bytes */ +static void avxpackblockmask7(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 7 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(127); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), 7)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), + 14)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), + 21)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), 3)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), + 10)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), + 17)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)), + 24)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 6)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 13)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 20)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 2)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 9)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)), + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 23)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 5)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 12)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 19)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 1)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)), + 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 15)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 22)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 4)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 11)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 18)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 25)); + _mm256_storeu_si256(compressed + 6, w0); +} + +/* we are going to pack 256 8-bit values, touching 8 256-bit words, using 128 + * bytes */ +static void avxpackblockmask8(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 8 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(255); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), + 24)); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), + 24)); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 24)); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 24)); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 24)); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 24)); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 24)); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 24)); + _mm256_storeu_si256(compressed + 7, w1); +} + +/* we are going to pack 256 9-bit values, touching 9 256-bit words, using 144 + * bytes */ +static void avxpackblockmask9(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 9 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(511); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), 9)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), + 18)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), 4)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), + 13)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), + 22)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)), 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), + 17)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 3)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 12)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 21)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 7)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)), + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 2)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 11)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 20)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 6)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 15)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 1)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 10)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 19)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 5)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 14)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 23)); + _mm256_storeu_si256(compressed + 8, w0); +} + +/* we are going to pack 256 10-bit values, touching 10 256-bit words, using 160 + * bytes */ +static void avxpackblockmask10(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 10 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(1023); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), + 10)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), + 20)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), + 18)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), 6)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)), + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 4)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 14)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 2)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 12)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 22)); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 10)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 20)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 18)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 6)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)), + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 4)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 14)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 2)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 12)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 22)); + _mm256_storeu_si256(compressed + 9, w1); +} + +/* we are going to pack 256 11-bit values, touching 11 256-bit words, using 176 + * bytes */ +static void avxpackblockmask11(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 11 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(2047); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), + 11)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), 1)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), 2)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), + 13)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), 3)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 14)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 25)); + w0 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 4)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 15)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 5)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)), + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 27)); + w0 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 6)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 17)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 7)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 18)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)), + 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 19)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 9)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 20)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 10)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 21)); + _mm256_storeu_si256(compressed + 10, w0); +} + +/* we are going to pack 256 12-bit values, touching 12 256-bit words, using 192 + * bytes */ +static void avxpackblockmask12(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 12 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(4095); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), 4)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), + 20)); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 4)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 20)); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 4)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 20)); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 4)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 8)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 20)); + _mm256_storeu_si256(compressed + 11, w1); +} + +/* we are going to pack 256 13-bit values, touching 13 256-bit words, using 208 + * bytes */ +static void avxpackblockmask13(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 13 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(8191); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), + 13)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), 7)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), 1)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), + 14)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)), 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 21)); + w0 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 2)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 15)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 9)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 3)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)), + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 29)); + w1 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 10)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 4)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 17)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 11)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 5)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 18)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 31)); + w1 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 25)); + w0 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 6)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 19)); + _mm256_storeu_si256(compressed + 12, w0); +} + +/* we are going to pack 256 14-bit values, touching 14 256-bit words, using 224 + * bytes */ +static void avxpackblockmask14(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 14 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(16383); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), + 14)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), + 10)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), 6)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), 2)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)), + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 4)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 18)); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 14)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 10)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 6)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 2)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)), + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 4)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 18)); + _mm256_storeu_si256(compressed + 13, w1); +} + +/* we are going to pack 256 15-bit values, touching 15 256-bit words, using 240 + * bytes */ +static void avxpackblockmask15(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 15 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(32767); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), + 15)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), + 13)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), + 11)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), 9)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), 7)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 5)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 3)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 1)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)), + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 14)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 29)); + w1 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 27)); + w0 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 10)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)), + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 6)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 19)); + w0 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)), + 2)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 17)); + _mm256_storeu_si256(compressed + 14, w0); +} + +/* we are going to pack 256 16-bit values, touching 16 256-bit words, using 256 + * bytes */ +static void avxpackblockmask16(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 16 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(65535); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)), + 16)); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), + 16)); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), + 16)); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), + 16)); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), + 16)); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 16)); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 16)); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 16)); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 16)); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 16)); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 16)); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 16)); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 16)); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 16)); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 16)); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 16)); + _mm256_storeu_si256(compressed + 15, w1); +} + +/* we are going to pack 256 17-bit values, touching 17 256-bit words, using 272 + * bytes */ +static void avxpackblockmask17(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 17 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(131071); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 19)); + w0 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), 6)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)), 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 10)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 27)); + w0 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 29)); + w1 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 14)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 1)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 3)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 5)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 7)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 9)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 11)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 13)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 15, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 15)); + _mm256_storeu_si256(compressed + 16, w0); +} + +/* we are going to pack 256 18-bit values, touching 18 256-bit words, using 288 + * bytes */ +static void avxpackblockmask18(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 18 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(262143); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 6)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 10)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 14)); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 6)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 15, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 10)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 14)); + _mm256_storeu_si256(compressed + 17, w1); +} + +/* we are going to pack 256 19-bit values, touching 19 256-bit words, using 304 + * bytes */ +static void avxpackblockmask19(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 19 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(524287); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 19)); + w1 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), 6)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 25)); + w0 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 31)); + w1 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), 5)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), + 11)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 10)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 29)); + w1 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 3)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 9)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 15)); + w1 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 21)); + w0 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)), + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 15, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 1)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 7)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 17, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 13)); + _mm256_storeu_si256(compressed + 18, w0); +} + +/* we are going to pack 256 20-bit values, touching 20 256-bit words, using 320 + * bytes */ +static void avxpackblockmask20(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 20 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(1048575); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), + 12)); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 12)); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 12)); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 15, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 16, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 17, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 18, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 12)); + _mm256_storeu_si256(compressed + 19, w1); +} + +/* we are going to pack 256 21-bit values, touching 21 256-bit words, using 336 + * bytes */ +static void avxpackblockmask21(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 21 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(2097151); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)), + 10)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), 9)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 19)); + w1 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)), 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 7)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 6)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 27)); + w0 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 5)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 15)); + w1 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)), + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 25)); + w0 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 14)); + w1 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 3)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 13)); + w1 = _mm256_srli_epi32(tmp, 19); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 18, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)), + 1)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 19, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 11)); + _mm256_storeu_si256(compressed + 20, w0); +} + +/* we are going to pack 256 22-bit values, touching 22 256-bit words, using 352 + * bytes */ +static void avxpackblockmask22(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 22 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(4194303); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), 6)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 10)); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 14)); + w1 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 6)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 18, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 10)); + _mm256_storeu_si256(compressed + 21, w1); +} + +/* we are going to pack 256 23-bit values, touching 23 256-bit words, using 368 + * bytes */ +static void avxpackblockmask23(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 23 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(8388607); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 23)); + w1 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), 5)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 19)); + w0 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 10)); + w1 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), 1)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 15)); + w1 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 6)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 11)); + w0 = _mm256_srli_epi32(tmp, 21); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)), + 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)), + 7)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 21)); + w0 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 3)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)), + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 13)); + w0 = _mm256_srli_epi32(tmp, 19); + _mm256_storeu_si256(compressed + 19, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)), + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 20, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 21, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 9)); + _mm256_storeu_si256(compressed + 22, w0); +} + +/* we are going to pack 256 24-bit values, touching 24 256-bit words, using 384 + * bytes */ +static void avxpackblockmask24(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 24 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(16777215); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)), 8)); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), 8)); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)), + 8)); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 8)); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 8)); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 8)); + _mm256_storeu_si256(compressed + 17, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 19, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 8)); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 22, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 8)); + _mm256_storeu_si256(compressed + 23, w1); +} + +/* we are going to pack 256 25-bit values, touching 25 256-bit words, using 400 + * bytes */ +static void avxpackblockmask25(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 25 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(33554431); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 11)); + w1 = _mm256_srli_epi32(tmp, 21); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)), 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 15)); + w0 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 8)); + w1 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)), 1)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 19)); + w1 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)), + 5)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 9)); + w0 = _mm256_srli_epi32(tmp, 23); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)), + 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 13)); + w1 = _mm256_srli_epi32(tmp, 19); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)), + 6)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 17)); + w0 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 10)); + w1 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)), + 3)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 23, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 7)); + _mm256_storeu_si256(compressed + 24, w0); +} + +/* we are going to pack 256 26-bit values, touching 26 256-bit words, using 416 + * bytes */ +static void avxpackblockmask26(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 26 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(67108863); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 14)); + w1 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 8)); + w0 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)), 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 10)); + w0 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 6)); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 8)); + w1 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 10)); + w1 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)), + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 23, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 24, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 6)); + _mm256_storeu_si256(compressed + 25, w1); +} + +/* we are going to pack 256 27-bit values, touching 27 256-bit words, using 432 + * bytes */ +static void avxpackblockmask27(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 27 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(134217727); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 7)); + w1 = _mm256_srli_epi32(tmp, 25); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_or_si256( + w1, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)), 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 29)); + w0 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 19)); + w0 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 14)); + w1 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 9)); + w0 = _mm256_srli_epi32(tmp, 23); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)), + 4)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 31)); + w1 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 11)); + w1 = _mm256_srli_epi32(tmp, 21); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 6)); + w0 = _mm256_srli_epi32(tmp, 26); + _mm256_storeu_si256(compressed + 15, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)), + 1)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 16, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 23)); + w0 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 13)); + w0 = _mm256_srli_epi32(tmp, 19); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 8)); + w1 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)), + 3)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 23, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 15)); + w1 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 24, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 10)); + w0 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 25, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 5)); + _mm256_storeu_si256(compressed + 26, w0); +} + +/* we are going to pack 256 28-bit values, touching 28 256-bit words, using 448 + * bytes */ +static void avxpackblockmask28(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 28 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(268435455); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 8)); + w0 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_or_si256( + w0, + _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)), 4)); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 8)); + w1 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 4)); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 16, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 8)); + w0 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 19, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)), + 4)); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 23, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 24, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 25, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 8)); + w1 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 26, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 4)); + _mm256_storeu_si256(compressed + 27, w1); +} + +/* we are going to pack 256 29-bit values, touching 29 256-bit words, using 464 + * bytes */ +static void avxpackblockmask29(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 29 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(536870911); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 29)); + w1 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 23)); + w1 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 11)); + w1 = _mm256_srli_epi32(tmp, 21); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 8)); + w0 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 5)); + w1 = _mm256_srli_epi32(tmp, 27); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)), + 2)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 31)); + w0 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 25)); + w0 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 19)); + w0 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 13)); + w0 = _mm256_srli_epi32(tmp, 19); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 10)); + w1 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 16, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 7)); + w0 = _mm256_srli_epi32(tmp, 25); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 4)); + w1 = _mm256_srli_epi32(tmp, 28); + _mm256_storeu_si256(compressed + 18, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)), + 1)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 20, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 23, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 15)); + w1 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 24, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 25, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 9)); + w1 = _mm256_srli_epi32(tmp, 23); + _mm256_storeu_si256(compressed + 26, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 6)); + w0 = _mm256_srli_epi32(tmp, 26); + _mm256_storeu_si256(compressed + 27, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 3)); + _mm256_storeu_si256(compressed + 28, w0); +} + +/* we are going to pack 256 30-bit values, touching 30 256-bit words, using 480 + * bytes */ +static void avxpackblockmask30(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 30 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(1073741823); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 30)); + w1 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 26)); + w1 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 22)); + w1 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 18)); + w1 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 14)); + w1 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 10)); + w1 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 8)); + w0 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 6)); + w1 = _mm256_srli_epi32(tmp, 26); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 4)); + w0 = _mm256_srli_epi32(tmp, 28); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)), + 2)); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 28)); + w1 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 16, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 24)); + w1 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 20)); + w1 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 20, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 16)); + w1 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 23, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 12)); + w1 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 24, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 10)); + w0 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 25, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 8)); + w1 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 26, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 6)); + w0 = _mm256_srli_epi32(tmp, 26); + _mm256_storeu_si256(compressed + 27, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 4)); + w1 = _mm256_srli_epi32(tmp, 28); + _mm256_storeu_si256(compressed + 28, w0); + w1 = _mm256_or_si256( + w1, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 2)); + _mm256_storeu_si256(compressed + 29, w1); +} + +/* we are going to pack 256 31-bit values, touching 31 256-bit words, using 496 + * bytes */ +static void avxpackblockmask31(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 31 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + const __m256i mask = _mm256_set1_epi32(2147483647); + __m256i tmp; /* used to store inputs at word boundary */ + w0 = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 0)); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 1)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 31)); + w1 = _mm256_srli_epi32(tmp, 1); + _mm256_storeu_si256(compressed + 0, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 2)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 30)); + w0 = _mm256_srli_epi32(tmp, 2); + _mm256_storeu_si256(compressed + 1, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 3)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 29)); + w1 = _mm256_srli_epi32(tmp, 3); + _mm256_storeu_si256(compressed + 2, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 4)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 28)); + w0 = _mm256_srli_epi32(tmp, 4); + _mm256_storeu_si256(compressed + 3, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 5)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 27)); + w1 = _mm256_srli_epi32(tmp, 5); + _mm256_storeu_si256(compressed + 4, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 6)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 26)); + w0 = _mm256_srli_epi32(tmp, 6); + _mm256_storeu_si256(compressed + 5, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 7)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 25)); + w1 = _mm256_srli_epi32(tmp, 7); + _mm256_storeu_si256(compressed + 6, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 8)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 24)); + w0 = _mm256_srli_epi32(tmp, 8); + _mm256_storeu_si256(compressed + 7, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 9)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 23)); + w1 = _mm256_srli_epi32(tmp, 9); + _mm256_storeu_si256(compressed + 8, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 10)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 22)); + w0 = _mm256_srli_epi32(tmp, 10); + _mm256_storeu_si256(compressed + 9, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 11)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 21)); + w1 = _mm256_srli_epi32(tmp, 11); + _mm256_storeu_si256(compressed + 10, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 12)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 20)); + w0 = _mm256_srli_epi32(tmp, 12); + _mm256_storeu_si256(compressed + 11, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 13)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 19)); + w1 = _mm256_srli_epi32(tmp, 13); + _mm256_storeu_si256(compressed + 12, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 14)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 18)); + w0 = _mm256_srli_epi32(tmp, 14); + _mm256_storeu_si256(compressed + 13, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 15)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 17)); + w1 = _mm256_srli_epi32(tmp, 15); + _mm256_storeu_si256(compressed + 14, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 16)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 16)); + w0 = _mm256_srli_epi32(tmp, 16); + _mm256_storeu_si256(compressed + 15, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 17)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 15)); + w1 = _mm256_srli_epi32(tmp, 17); + _mm256_storeu_si256(compressed + 16, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 18)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 14)); + w0 = _mm256_srli_epi32(tmp, 18); + _mm256_storeu_si256(compressed + 17, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 19)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 13)); + w1 = _mm256_srli_epi32(tmp, 19); + _mm256_storeu_si256(compressed + 18, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 20)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 12)); + w0 = _mm256_srli_epi32(tmp, 20); + _mm256_storeu_si256(compressed + 19, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 21)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 11)); + w1 = _mm256_srli_epi32(tmp, 21); + _mm256_storeu_si256(compressed + 20, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 22)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 10)); + w0 = _mm256_srli_epi32(tmp, 22); + _mm256_storeu_si256(compressed + 21, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 23)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 9)); + w1 = _mm256_srli_epi32(tmp, 23); + _mm256_storeu_si256(compressed + 22, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 24)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 8)); + w0 = _mm256_srli_epi32(tmp, 24); + _mm256_storeu_si256(compressed + 23, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 25)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 7)); + w1 = _mm256_srli_epi32(tmp, 25); + _mm256_storeu_si256(compressed + 24, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 26)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 6)); + w0 = _mm256_srli_epi32(tmp, 26); + _mm256_storeu_si256(compressed + 25, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 27)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 5)); + w1 = _mm256_srli_epi32(tmp, 27); + _mm256_storeu_si256(compressed + 26, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 28)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 4)); + w0 = _mm256_srli_epi32(tmp, 28); + _mm256_storeu_si256(compressed + 27, w1); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 29)); + w0 = _mm256_or_si256(w0, _mm256_slli_epi32(tmp, 3)); + w1 = _mm256_srli_epi32(tmp, 29); + _mm256_storeu_si256(compressed + 28, w0); + tmp = _mm256_and_si256(mask, _mm256_lddqu_si256(in + 30)); + w1 = _mm256_or_si256(w1, _mm256_slli_epi32(tmp, 2)); + w0 = _mm256_srli_epi32(tmp, 30); + _mm256_storeu_si256(compressed + 29, w1); + w0 = _mm256_or_si256( + w0, _mm256_slli_epi32(_mm256_and_si256(mask, _mm256_lddqu_si256(in + 31)), + 1)); + _mm256_storeu_si256(compressed + 30, w0); +} + +/* we are going to pack 256 32-bit values, touching 32 256-bit words, using 512 + * bytes */ +static void avxpackblockmask32(const uint32_t *pin, __m256i *compressed) { + /* we are going to touch 32 256-bit words */ + __m256i w0, w1; + const __m256i *in = (const __m256i *)pin; + w0 = _mm256_lddqu_si256(in + 0); + _mm256_storeu_si256(compressed + 0, w0); + w1 = _mm256_lddqu_si256(in + 1); + _mm256_storeu_si256(compressed + 1, w1); + w0 = _mm256_lddqu_si256(in + 2); + _mm256_storeu_si256(compressed + 2, w0); + w1 = _mm256_lddqu_si256(in + 3); + _mm256_storeu_si256(compressed + 3, w1); + w0 = _mm256_lddqu_si256(in + 4); + _mm256_storeu_si256(compressed + 4, w0); + w1 = _mm256_lddqu_si256(in + 5); + _mm256_storeu_si256(compressed + 5, w1); + w0 = _mm256_lddqu_si256(in + 6); + _mm256_storeu_si256(compressed + 6, w0); + w1 = _mm256_lddqu_si256(in + 7); + _mm256_storeu_si256(compressed + 7, w1); + w0 = _mm256_lddqu_si256(in + 8); + _mm256_storeu_si256(compressed + 8, w0); + w1 = _mm256_lddqu_si256(in + 9); + _mm256_storeu_si256(compressed + 9, w1); + w0 = _mm256_lddqu_si256(in + 10); + _mm256_storeu_si256(compressed + 10, w0); + w1 = _mm256_lddqu_si256(in + 11); + _mm256_storeu_si256(compressed + 11, w1); + w0 = _mm256_lddqu_si256(in + 12); + _mm256_storeu_si256(compressed + 12, w0); + w1 = _mm256_lddqu_si256(in + 13); + _mm256_storeu_si256(compressed + 13, w1); + w0 = _mm256_lddqu_si256(in + 14); + _mm256_storeu_si256(compressed + 14, w0); + w1 = _mm256_lddqu_si256(in + 15); + _mm256_storeu_si256(compressed + 15, w1); + w0 = _mm256_lddqu_si256(in + 16); + _mm256_storeu_si256(compressed + 16, w0); + w1 = _mm256_lddqu_si256(in + 17); + _mm256_storeu_si256(compressed + 17, w1); + w0 = _mm256_lddqu_si256(in + 18); + _mm256_storeu_si256(compressed + 18, w0); + w1 = _mm256_lddqu_si256(in + 19); + _mm256_storeu_si256(compressed + 19, w1); + w0 = _mm256_lddqu_si256(in + 20); + _mm256_storeu_si256(compressed + 20, w0); + w1 = _mm256_lddqu_si256(in + 21); + _mm256_storeu_si256(compressed + 21, w1); + w0 = _mm256_lddqu_si256(in + 22); + _mm256_storeu_si256(compressed + 22, w0); + w1 = _mm256_lddqu_si256(in + 23); + _mm256_storeu_si256(compressed + 23, w1); + w0 = _mm256_lddqu_si256(in + 24); + _mm256_storeu_si256(compressed + 24, w0); + w1 = _mm256_lddqu_si256(in + 25); + _mm256_storeu_si256(compressed + 25, w1); + w0 = _mm256_lddqu_si256(in + 26); + _mm256_storeu_si256(compressed + 26, w0); + w1 = _mm256_lddqu_si256(in + 27); + _mm256_storeu_si256(compressed + 27, w1); + w0 = _mm256_lddqu_si256(in + 28); + _mm256_storeu_si256(compressed + 28, w0); + w1 = _mm256_lddqu_si256(in + 29); + _mm256_storeu_si256(compressed + 29, w1); + w0 = _mm256_lddqu_si256(in + 30); + _mm256_storeu_si256(compressed + 30, w0); + w1 = _mm256_lddqu_si256(in + 31); + _mm256_storeu_si256(compressed + 31, w1); +} + +static void avxunpackblock0(const __m256i *compressed, uint32_t *pout) { + (void)compressed; + memset(pout, 0, 256); +} + +/* we packed 256 1-bit values, touching 1 256-bit words, using 16 bytes */ +static void avxunpackblock1(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 1 256-bit word */ + __m256i w0; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(1); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 1))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 3))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 5))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + _mm256_storeu_si256(out + 7, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 7))); + _mm256_storeu_si256(out + 8, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 9))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 10))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 11))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 13))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 14))); + _mm256_storeu_si256(out + 15, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 15))); + _mm256_storeu_si256(out + 16, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 17))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 18))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 19))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 20))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 21))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 22))); + _mm256_storeu_si256(out + 23, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 23))); + _mm256_storeu_si256(out + 24, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 24))); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 25))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 26))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 27))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 28))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 29))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 30))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 31)); +} + +/* we packed 256 2-bit values, touching 2 256-bit words, using 32 bytes */ +static void avxunpackblock2(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 2 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(3); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 10))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + _mm256_storeu_si256(out + 7, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 14))); + _mm256_storeu_si256(out + 8, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 18))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 20))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 22))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 24))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 26))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 28))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w0, 30)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 6))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 10))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + _mm256_storeu_si256(out + 23, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 14))); + _mm256_storeu_si256(out + 24, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 18))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 20))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 22))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 24))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 26))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 28))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 30)); +} + +/* we packed 256 3-bit values, touching 3 256-bit words, using 48 bytes */ +static void avxunpackblock3(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 3 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(7); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 3))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 9))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 15))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 18))); + _mm256_storeu_si256(out + 7, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 21))); + _mm256_storeu_si256(out + 8, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 24))); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 27))); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 1))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 7))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 10))); + _mm256_storeu_si256(out + 15, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 13))); + _mm256_storeu_si256(out + 16, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 19))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 22))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 25))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 28))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 31), + _mm256_slli_epi32(w0, 1)))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + _mm256_storeu_si256(out + 23, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 5))); + _mm256_storeu_si256(out + 24, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 11))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 14))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 17))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 20))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 23))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 26))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 29)); +} + +/* we packed 256 4-bit values, touching 4 256-bit words, using 64 bytes */ +static void avxunpackblock4(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 4 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(15); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 20))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 24))); + _mm256_storeu_si256(out + 7, _mm256_srli_epi32(w0, 28)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256(out + 8, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 20))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 24))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w1, 28)); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 20))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 24))); + _mm256_storeu_si256(out + 23, _mm256_srli_epi32(w0, 28)); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256(out + 24, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 20))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 24))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 28)); +} + +/* we packed 256 5-bit values, touching 5 256-bit words, using 80 bytes */ +static void avxunpackblock5(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 5 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(31); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 5))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 10))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 15))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 20))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 25))); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + _mm256_storeu_si256(out + 7, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 3))); + _mm256_storeu_si256(out + 8, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 13))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 18))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 23))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 1))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + _mm256_storeu_si256(out + 15, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 11))); + _mm256_storeu_si256(out + 16, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 21))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 26))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 31), + _mm256_slli_epi32(w1, 1)))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 9))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 14))); + _mm256_storeu_si256(out + 23, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 19))); + _mm256_storeu_si256(out + 24, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 24))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 29), + _mm256_slli_epi32(w0, 3)))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 7))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 17))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 22))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 27)); +} + +/* we packed 256 6-bit values, touching 6 256-bit words, using 96 bytes */ +static void avxunpackblock6(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 6 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(63); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 18))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 24))); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + _mm256_storeu_si256(out + 7, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 10))); + _mm256_storeu_si256(out + 8, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 22))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 14))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 20))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w0, 26)); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 6))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 18))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 24))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + _mm256_storeu_si256(out + 23, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 10))); + _mm256_storeu_si256(out + 24, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 22))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 14))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 20))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 26)); +} + +/* we packed 256 7-bit values, touching 7 256-bit words, using 112 bytes */ +static void avxunpackblock7(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 7 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(127); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 7))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 14))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 21))); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 3))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 10))); + _mm256_storeu_si256(out + 7, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 17))); + _mm256_storeu_si256(out + 8, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 24))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 31), + _mm256_slli_epi32(w0, 1)))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 13))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 20))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 27), + _mm256_slli_epi32(w1, 5)))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + _mm256_storeu_si256(out + 15, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 9))); + _mm256_storeu_si256(out + 16, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 23))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 5))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 19))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 26), + _mm256_slli_epi32(w1, 6)))); + _mm256_storeu_si256(out + 23, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 1))); + _mm256_storeu_si256(out + 24, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 15))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 22))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 29), + _mm256_slli_epi32(w0, 3)))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 11))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 18))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 25)); +} + +/* we packed 256 8-bit values, touching 8 256-bit words, using 128 bytes */ +static void avxunpackblock8(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 8 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(255); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + _mm256_storeu_si256(out + 3, _mm256_srli_epi32(w0, 24)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256(out + 4, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + _mm256_storeu_si256(out + 7, _mm256_srli_epi32(w1, 24)); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256(out + 8, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + _mm256_storeu_si256(out + 11, _mm256_srli_epi32(w0, 24)); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256(out + 12, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w1, 24)); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + _mm256_storeu_si256(out + 19, _mm256_srli_epi32(w0, 24)); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256(out + 20, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + _mm256_storeu_si256(out + 23, _mm256_srli_epi32(w1, 24)); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256(out + 24, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + _mm256_storeu_si256(out + 27, _mm256_srli_epi32(w0, 24)); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256(out + 28, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 24)); +} + +/* we packed 256 9-bit values, touching 9 256-bit words, using 144 bytes */ +static void avxunpackblock9(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 9 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(511); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 9))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 18))); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 27), + _mm256_slli_epi32(w1, 5)))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 13))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 22))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 7, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 31), + _mm256_slli_epi32(w0, 1)))); + _mm256_storeu_si256(out + 8, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 17))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 26), + _mm256_slli_epi32(w1, 6)))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 3))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 21))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + _mm256_storeu_si256(out + 15, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 7))); + _mm256_storeu_si256(out + 16, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 25), + _mm256_slli_epi32(w1, 7)))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 11))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 20))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 29), + _mm256_slli_epi32(w0, 3)))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + _mm256_storeu_si256(out + 23, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 15))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 24, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 1))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 10))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 19))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 5))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 14))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 23)); +} + +/* we packed 256 10-bit values, touching 10 256-bit words, using 160 bytes */ +static void avxunpackblock10(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 10 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(1023); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 10))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 20))); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 18))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + _mm256_storeu_si256(out + 7, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + _mm256_storeu_si256(out + 8, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 26), + _mm256_slli_epi32(w1, 6)))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 14))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w0, 22)); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 10))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 20))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 18))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + _mm256_storeu_si256(out + 23, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 6))); + _mm256_storeu_si256(out + 24, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 14))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 22)); +} + +/* we packed 256 11-bit values, touching 11 256-bit words, using 176 bytes */ +static void avxunpackblock11(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 11 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(2047); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 11))); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 22), + _mm256_slli_epi32(w1, 10)))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 1))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 23), + _mm256_slli_epi32(w0, 9)))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + _mm256_storeu_si256(out + 7, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 13))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 8, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 3))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 14))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 25), + _mm256_slli_epi32(w0, 7)))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 15))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 26), + _mm256_slli_epi32(w1, 6)))); + _mm256_storeu_si256(out + 15, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 5))); + _mm256_storeu_si256(out + 16, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 27), + _mm256_slli_epi32(w0, 5)))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 17))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 7))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 18))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 23, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 29), + _mm256_slli_epi32(w0, 3)))); + _mm256_storeu_si256(out + 24, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 19))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 9))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 20))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 31), + _mm256_slli_epi32(w0, 1)))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 10))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 21)); +} + +/* we packed 256 12-bit values, touching 12 256-bit words, using 192 bytes */ +static void avxunpackblock12(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 12 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(4095); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 7, _mm256_srli_epi32(w0, 20)); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256(out + 8, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w1, 20)); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + _mm256_storeu_si256(out + 23, _mm256_srli_epi32(w0, 20)); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256(out + 24, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 20)); +} + +/* we packed 256 13-bit values, touching 13 256-bit words, using 208 bytes */ +static void avxunpackblock13(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 13 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(8191); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 13))); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 26), + _mm256_slli_epi32(w1, 6)))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 7))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 1))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 14))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 7, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 27), + _mm256_slli_epi32(w1, 5)))); + _mm256_storeu_si256(out + 8, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 21), + _mm256_slli_epi32(w0, 11)))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 15))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 9))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 22), + _mm256_slli_epi32(w0, 10)))); + _mm256_storeu_si256(out + 15, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 3))); + _mm256_storeu_si256(out + 16, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 29), + _mm256_slli_epi32(w1, 3)))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 10))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 23), + _mm256_slli_epi32(w0, 9)))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 17))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + _mm256_storeu_si256(out + 23, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 11))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 24, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 5))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 18))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 31), + _mm256_slli_epi32(w1, 1)))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 25), + _mm256_slli_epi32(w0, 7)))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 19)); +} + +/* we packed 256 14-bit values, touching 14 256-bit words, using 224 bytes */ +static void avxunpackblock14(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 14 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(16383); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 14))); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 10))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + _mm256_storeu_si256(out + 7, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + _mm256_storeu_si256(out + 8, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 26), + _mm256_slli_epi32(w1, 6)))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 22), + _mm256_slli_epi32(w0, 10)))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w0, 18)); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 14))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 10))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 6))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + _mm256_storeu_si256(out + 23, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + _mm256_storeu_si256(out + 24, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 16))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 22), + _mm256_slli_epi32(w1, 10)))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 18)); +} + +/* we packed 256 15-bit values, touching 15 256-bit words, using 240 bytes */ +static void avxunpackblock15(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 15 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(32767); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 15))); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 13))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 11))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 26), + _mm256_slli_epi32(w1, 6)))); + _mm256_storeu_si256(out + 7, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 9))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 8, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 7))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 22), + _mm256_slli_epi32(w1, 10)))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 5))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 3))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 18), + _mm256_slli_epi32(w1, 14)))); + _mm256_storeu_si256(out + 15, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 1))); + _mm256_storeu_si256(out + 16, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 16))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 31), + _mm256_slli_epi32(w0, 1)))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 14))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 29), + _mm256_slli_epi32(w1, 3)))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 27), + _mm256_slli_epi32(w0, 5)))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 10))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 23, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 25), + _mm256_slli_epi32(w1, 7)))); + _mm256_storeu_si256(out + 24, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 23), + _mm256_slli_epi32(w0, 9)))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 21), + _mm256_slli_epi32(w1, 11)))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 19), + _mm256_slli_epi32(w0, 13)))); + _mm256_storeu_si256(out + 30, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 17)); +} + +/* we packed 256 16-bit values, touching 16 256-bit words, using 256 bytes */ +static void avxunpackblock16(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 16 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(65535); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 1, _mm256_srli_epi32(w0, 16)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256(out + 2, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 3, _mm256_srli_epi32(w1, 16)); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256(out + 4, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 5, _mm256_srli_epi32(w0, 16)); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256(out + 6, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 7, _mm256_srli_epi32(w1, 16)); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256(out + 8, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 9, _mm256_srli_epi32(w0, 16)); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256(out + 10, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 11, _mm256_srli_epi32(w1, 16)); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256(out + 12, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 13, _mm256_srli_epi32(w0, 16)); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256(out + 14, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w1, 16)); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 17, _mm256_srli_epi32(w0, 16)); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256(out + 18, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 19, _mm256_srli_epi32(w1, 16)); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256(out + 20, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 21, _mm256_srli_epi32(w0, 16)); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256(out + 22, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 23, _mm256_srli_epi32(w1, 16)); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256(out + 24, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 25, _mm256_srli_epi32(w0, 16)); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256(out + 26, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 27, _mm256_srli_epi32(w1, 16)); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256(out + 28, _mm256_and_si256(mask, w0)); + _mm256_storeu_si256(out + 29, _mm256_srli_epi32(w0, 16)); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256(out + 30, _mm256_and_si256(mask, w1)); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 16)); +} + +/* we packed 256 17-bit values, touching 17 256-bit words, using 272 bytes */ +static void avxunpackblock17(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 17 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(131071); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 17), + _mm256_slli_epi32(w1, 15)))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 19), + _mm256_slli_epi32(w0, 13)))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 21), + _mm256_slli_epi32(w1, 11)))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 6))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 7, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 23), + _mm256_slli_epi32(w0, 9)))); + _mm256_storeu_si256(out + 8, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 25), + _mm256_slli_epi32(w1, 7)))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 10))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 27), + _mm256_slli_epi32(w0, 5)))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 29), + _mm256_slli_epi32(w1, 3)))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 14))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 15, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 31), + _mm256_slli_epi32(w0, 1)))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 16, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 1))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 18), + _mm256_slli_epi32(w0, 14)))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 3))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 5))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 22), + _mm256_slli_epi32(w0, 10)))); + _mm256_storeu_si256(out + 23, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 7))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 24, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 9))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 11))); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 13))); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 15)); +} + +/* we packed 256 18-bit values, touching 18 256-bit words, using 288 bytes */ +static void avxunpackblock18(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 18 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(262143); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 18), + _mm256_slli_epi32(w1, 14)))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 22), + _mm256_slli_epi32(w0, 10)))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 26), + _mm256_slli_epi32(w1, 6)))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 12))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 7, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 8, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 10))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w0, 14)); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w1)); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 18), + _mm256_slli_epi32(w0, 14)))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 22), + _mm256_slli_epi32(w1, 10)))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 23, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 24, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 6))); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 10))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 14)); +} + +/* we packed 256 19-bit values, touching 19 256-bit words, using 304 bytes */ +static void avxunpackblock19(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 19 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(524287); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 19), + _mm256_slli_epi32(w1, 13)))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 6))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 25), + _mm256_slli_epi32(w0, 7)))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 12))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 31), + _mm256_slli_epi32(w1, 1)))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 18), + _mm256_slli_epi32(w0, 14)))); + _mm256_storeu_si256(out + 7, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 5))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 8, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 11))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 17), + _mm256_slli_epi32(w1, 15)))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 23), + _mm256_slli_epi32(w0, 9)))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 10))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 15, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 29), + _mm256_slli_epi32(w1, 3)))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 16, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 3))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 22), + _mm256_slli_epi32(w1, 10)))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 9))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 15), + _mm256_slli_epi32(w1, 17)))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 23, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 21), + _mm256_slli_epi32(w0, 11)))); + _mm256_storeu_si256(out + 24, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 27), + _mm256_slli_epi32(w1, 5)))); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 14), + _mm256_slli_epi32(w0, 18)))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 1))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 7))); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 13)); +} + +/* we packed 256 20-bit values, touching 20 256-bit words, using 320 bytes */ +static void avxunpackblock20(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 20 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(1048575); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + _mm256_storeu_si256(out + 7, _mm256_srli_epi32(w0, 12)); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256(out + 8, _mm256_and_si256(mask, w1)); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w1, 12)); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + _mm256_storeu_si256(out + 23, _mm256_srli_epi32(w0, 12)); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256(out + 24, _mm256_and_si256(mask, w1)); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 12)); +} + +/* we packed 256 21-bit values, touching 21 256-bit words, using 336 bytes */ +static void avxunpackblock21(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 21 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(2097151); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 21), + _mm256_slli_epi32(w1, 11)))); + _mm256_storeu_si256(out + 2, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 10))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 31), + _mm256_slli_epi32(w0, 1)))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 9))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 7, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 19), + _mm256_slli_epi32(w1, 13)))); + _mm256_storeu_si256(out + 8, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 29), + _mm256_slli_epi32(w0, 3)))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 18), + _mm256_slli_epi32(w1, 14)))); + _mm256_storeu_si256(out + 11, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 7))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 17), + _mm256_slli_epi32(w1, 15)))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 6))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 15, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 27), + _mm256_slli_epi32(w0, 5)))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 16, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 5))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 15), + _mm256_slli_epi32(w1, 17)))); + _mm256_storeu_si256(out + 20, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 25), + _mm256_slli_epi32(w0, 7)))); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 14), + _mm256_slli_epi32(w1, 18)))); + _mm256_storeu_si256(out + 23, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 3))); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 24, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 13), + _mm256_slli_epi32(w1, 19)))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 23), + _mm256_slli_epi32(w0, 9)))); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 12), + _mm256_slli_epi32(w1, 20)))); + _mm256_storeu_si256(out + 29, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 1))); + w0 = _mm256_lddqu_si256(compressed + 20); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 22), + _mm256_slli_epi32(w0, 10)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 11)); +} + +/* we packed 256 22-bit values, touching 22 256-bit words, using 352 bytes */ +static void avxunpackblock22(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 22 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(4194303); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 22), + _mm256_slli_epi32(w1, 10)))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 12), + _mm256_slli_epi32(w0, 20)))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 14), + _mm256_slli_epi32(w0, 18)))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 7, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 26), + _mm256_slli_epi32(w1, 6)))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 8, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 6))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 18), + _mm256_slli_epi32(w0, 14)))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 8))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w0, 10)); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w1)); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 22), + _mm256_slli_epi32(w0, 10)))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 12), + _mm256_slli_epi32(w1, 20)))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 14), + _mm256_slli_epi32(w1, 18)))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 23, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 24, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 6))); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 18), + _mm256_slli_epi32(w1, 14)))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + w0 = _mm256_lddqu_si256(compressed + 20); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + w1 = _mm256_lddqu_si256(compressed + 21); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 10)); +} + +/* we packed 256 23-bit values, touching 23 256-bit words, using 368 bytes */ +static void avxunpackblock23(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 23 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(8388607); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 23), + _mm256_slli_epi32(w1, 9)))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 14), + _mm256_slli_epi32(w0, 18)))); + _mm256_storeu_si256(out + 3, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 5))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 19), + _mm256_slli_epi32(w0, 13)))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 10), + _mm256_slli_epi32(w1, 22)))); + _mm256_storeu_si256(out + 7, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 1))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 8, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 15), + _mm256_slli_epi32(w1, 17)))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 6))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 29), + _mm256_slli_epi32(w0, 3)))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 11), + _mm256_slli_epi32(w0, 21)))); + _mm256_storeu_si256(out + 14, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 15, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 25), + _mm256_slli_epi32(w1, 7)))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 16, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + _mm256_storeu_si256(out + 17, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 7))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 21), + _mm256_slli_epi32(w0, 11)))); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 12), + _mm256_slli_epi32(w1, 20)))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 3))); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 23, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 17), + _mm256_slli_epi32(w1, 15)))); + _mm256_storeu_si256(out + 24, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 8))); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 31), + _mm256_slli_epi32(w0, 1)))); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 22), + _mm256_slli_epi32(w1, 10)))); + w0 = _mm256_lddqu_si256(compressed + 20); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 13), + _mm256_slli_epi32(w0, 19)))); + _mm256_storeu_si256(out + 28, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + w1 = _mm256_lddqu_si256(compressed + 21); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 27), + _mm256_slli_epi32(w1, 5)))); + w0 = _mm256_lddqu_si256(compressed + 22); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 18), + _mm256_slli_epi32(w0, 14)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 9)); +} + +/* we packed 256 24-bit values, touching 24 256-bit words, using 384 bytes */ +static void avxunpackblock24(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 24 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(16777215); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + _mm256_storeu_si256(out + 3, _mm256_srli_epi32(w0, 8)); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256(out + 4, _mm256_and_si256(mask, w1)); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + _mm256_storeu_si256(out + 7, _mm256_srli_epi32(w1, 8)); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256(out + 8, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + _mm256_storeu_si256(out + 11, _mm256_srli_epi32(w0, 8)); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256(out + 12, _mm256_and_si256(mask, w1)); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w1, 8)); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + _mm256_storeu_si256(out + 19, _mm256_srli_epi32(w0, 8)); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256(out + 20, _mm256_and_si256(mask, w1)); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + _mm256_storeu_si256(out + 23, _mm256_srli_epi32(w1, 8)); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256(out + 24, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + w0 = _mm256_lddqu_si256(compressed + 20); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + _mm256_storeu_si256(out + 27, _mm256_srli_epi32(w0, 8)); + w1 = _mm256_lddqu_si256(compressed + 21); + _mm256_storeu_si256(out + 28, _mm256_and_si256(mask, w1)); + w0 = _mm256_lddqu_si256(compressed + 22); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 23); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 8)); +} + +/* we packed 256 25-bit values, touching 25 256-bit words, using 400 bytes */ +static void avxunpackblock25(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 25 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(33554431); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 25), + _mm256_slli_epi32(w1, 7)))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 18), + _mm256_slli_epi32(w0, 14)))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 11), + _mm256_slli_epi32(w1, 21)))); + _mm256_storeu_si256(out + 4, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 29), + _mm256_slli_epi32(w0, 3)))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 22), + _mm256_slli_epi32(w1, 10)))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 7, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 15), + _mm256_slli_epi32(w0, 17)))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 8, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 8), + _mm256_slli_epi32(w1, 24)))); + _mm256_storeu_si256(out + 9, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 1))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 19), + _mm256_slli_epi32(w1, 13)))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 12), + _mm256_slli_epi32(w0, 20)))); + _mm256_storeu_si256(out + 13, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 5))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 15, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 23), + _mm256_slli_epi32(w0, 9)))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 16, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 9), + _mm256_slli_epi32(w0, 23)))); + _mm256_storeu_si256(out + 18, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 27), + _mm256_slli_epi32(w1, 5)))); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 13), + _mm256_slli_epi32(w1, 19)))); + _mm256_storeu_si256(out + 22, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 6))); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256( + out + 23, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 31), + _mm256_slli_epi32(w0, 1)))); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256( + out + 24, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + w0 = _mm256_lddqu_si256(compressed + 20); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 17), + _mm256_slli_epi32(w0, 15)))); + w1 = _mm256_lddqu_si256(compressed + 21); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 10), + _mm256_slli_epi32(w1, 22)))); + _mm256_storeu_si256(out + 27, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 3))); + w0 = _mm256_lddqu_si256(compressed + 22); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + w1 = _mm256_lddqu_si256(compressed + 23); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 21), + _mm256_slli_epi32(w1, 11)))); + w0 = _mm256_lddqu_si256(compressed + 24); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 14), + _mm256_slli_epi32(w0, 18)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 7)); +} + +/* we packed 256 26-bit values, touching 26 256-bit words, using 416 bytes */ +static void avxunpackblock26(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 26 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(67108863); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 26), + _mm256_slli_epi32(w1, 6)))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 14), + _mm256_slli_epi32(w1, 18)))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 8), + _mm256_slli_epi32(w0, 24)))); + _mm256_storeu_si256(out + 5, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 2))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 7, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 22), + _mm256_slli_epi32(w0, 10)))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 8, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 10), + _mm256_slli_epi32(w0, 22)))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 18), + _mm256_slli_epi32(w1, 14)))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 12), + _mm256_slli_epi32(w0, 20)))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w0, 6)); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w1)); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 14), + _mm256_slli_epi32(w0, 18)))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 8), + _mm256_slli_epi32(w1, 24)))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256( + out + 23, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 22), + _mm256_slli_epi32(w1, 10)))); + w0 = _mm256_lddqu_si256(compressed + 20); + _mm256_storeu_si256( + out + 24, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + w1 = _mm256_lddqu_si256(compressed + 21); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 10), + _mm256_slli_epi32(w1, 22)))); + _mm256_storeu_si256(out + 26, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 4))); + w0 = _mm256_lddqu_si256(compressed + 22); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + w1 = _mm256_lddqu_si256(compressed + 23); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + w0 = _mm256_lddqu_si256(compressed + 24); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 18), + _mm256_slli_epi32(w0, 14)))); + w1 = _mm256_lddqu_si256(compressed + 25); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 12), + _mm256_slli_epi32(w1, 20)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 6)); +} + +/* we packed 256 27-bit values, touching 27 256-bit words, using 432 bytes */ +static void avxunpackblock27(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 27 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(134217727); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 27), + _mm256_slli_epi32(w1, 5)))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 22), + _mm256_slli_epi32(w0, 10)))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 17), + _mm256_slli_epi32(w1, 15)))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 12), + _mm256_slli_epi32(w0, 20)))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 7), + _mm256_slli_epi32(w1, 25)))); + _mm256_storeu_si256(out + 6, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 7, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 29), + _mm256_slli_epi32(w0, 3)))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 8, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 19), + _mm256_slli_epi32(w0, 13)))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 14), + _mm256_slli_epi32(w1, 18)))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 9), + _mm256_slli_epi32(w0, 23)))); + _mm256_storeu_si256(out + 12, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 4))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 31), + _mm256_slli_epi32(w1, 1)))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 15, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 21), + _mm256_slli_epi32(w1, 11)))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 16, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 11), + _mm256_slli_epi32(w1, 21)))); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 6), + _mm256_slli_epi32(w0, 26)))); + _mm256_storeu_si256(out + 19, + _mm256_and_si256(mask, _mm256_srli_epi32(w0, 1))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 23), + _mm256_slli_epi32(w0, 9)))); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 18), + _mm256_slli_epi32(w1, 14)))); + w0 = _mm256_lddqu_si256(compressed + 20); + _mm256_storeu_si256( + out + 23, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 13), + _mm256_slli_epi32(w0, 19)))); + w1 = _mm256_lddqu_si256(compressed + 21); + _mm256_storeu_si256( + out + 24, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 8), + _mm256_slli_epi32(w1, 24)))); + _mm256_storeu_si256(out + 25, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 3))); + w0 = _mm256_lddqu_si256(compressed + 22); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + w1 = _mm256_lddqu_si256(compressed + 23); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 25), + _mm256_slli_epi32(w1, 7)))); + w0 = _mm256_lddqu_si256(compressed + 24); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + w1 = _mm256_lddqu_si256(compressed + 25); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 15), + _mm256_slli_epi32(w1, 17)))); + w0 = _mm256_lddqu_si256(compressed + 26); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 10), + _mm256_slli_epi32(w0, 22)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 5)); +} + +/* we packed 256 28-bit values, touching 28 256-bit words, using 448 bytes */ +static void avxunpackblock28(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 28 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(268435455); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 12), + _mm256_slli_epi32(w1, 20)))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 8), + _mm256_slli_epi32(w0, 24)))); + _mm256_storeu_si256(out + 7, _mm256_srli_epi32(w0, 4)); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256(out + 8, _mm256_and_si256(mask, w1)); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 12), + _mm256_slli_epi32(w0, 20)))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 8), + _mm256_slli_epi32(w1, 24)))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w1, 4)); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 12), + _mm256_slli_epi32(w1, 20)))); + w0 = _mm256_lddqu_si256(compressed + 20); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 8), + _mm256_slli_epi32(w0, 24)))); + _mm256_storeu_si256(out + 23, _mm256_srli_epi32(w0, 4)); + w1 = _mm256_lddqu_si256(compressed + 21); + _mm256_storeu_si256(out + 24, _mm256_and_si256(mask, w1)); + w0 = _mm256_lddqu_si256(compressed + 22); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + w1 = _mm256_lddqu_si256(compressed + 23); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + w0 = _mm256_lddqu_si256(compressed + 24); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + w1 = _mm256_lddqu_si256(compressed + 25); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + w0 = _mm256_lddqu_si256(compressed + 26); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 12), + _mm256_slli_epi32(w0, 20)))); + w1 = _mm256_lddqu_si256(compressed + 27); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 8), + _mm256_slli_epi32(w1, 24)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 4)); +} + +/* we packed 256 29-bit values, touching 29 256-bit words, using 464 bytes */ +static void avxunpackblock29(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 29 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(536870911); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 29), + _mm256_slli_epi32(w1, 3)))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 23), + _mm256_slli_epi32(w1, 9)))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 17), + _mm256_slli_epi32(w1, 15)))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 14), + _mm256_slli_epi32(w0, 18)))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 7, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 11), + _mm256_slli_epi32(w1, 21)))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 8, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 8), + _mm256_slli_epi32(w0, 24)))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 5), + _mm256_slli_epi32(w1, 27)))); + _mm256_storeu_si256(out + 10, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 2))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 31), + _mm256_slli_epi32(w0, 1)))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 25), + _mm256_slli_epi32(w0, 7)))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 22), + _mm256_slli_epi32(w1, 10)))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 15, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 19), + _mm256_slli_epi32(w0, 13)))); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256( + out + 16, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 13), + _mm256_slli_epi32(w0, 19)))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 10), + _mm256_slli_epi32(w1, 22)))); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 7), + _mm256_slli_epi32(w0, 25)))); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 4), + _mm256_slli_epi32(w1, 28)))); + _mm256_storeu_si256(out + 21, + _mm256_and_si256(mask, _mm256_srli_epi32(w1, 1))); + w0 = _mm256_lddqu_si256(compressed + 20); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + w1 = _mm256_lddqu_si256(compressed + 21); + _mm256_storeu_si256( + out + 23, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 27), + _mm256_slli_epi32(w1, 5)))); + w0 = _mm256_lddqu_si256(compressed + 22); + _mm256_storeu_si256( + out + 24, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 23); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 21), + _mm256_slli_epi32(w1, 11)))); + w0 = _mm256_lddqu_si256(compressed + 24); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 18), + _mm256_slli_epi32(w0, 14)))); + w1 = _mm256_lddqu_si256(compressed + 25); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 15), + _mm256_slli_epi32(w1, 17)))); + w0 = _mm256_lddqu_si256(compressed + 26); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 12), + _mm256_slli_epi32(w0, 20)))); + w1 = _mm256_lddqu_si256(compressed + 27); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 9), + _mm256_slli_epi32(w1, 23)))); + w0 = _mm256_lddqu_si256(compressed + 28); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 6), + _mm256_slli_epi32(w0, 26)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 3)); +} + +/* we packed 256 30-bit values, touching 30 256-bit words, using 480 bytes */ +static void avxunpackblock30(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 30 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(1073741823); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 30), + _mm256_slli_epi32(w1, 2)))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 26), + _mm256_slli_epi32(w1, 6)))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 22), + _mm256_slli_epi32(w1, 10)))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 7, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 18), + _mm256_slli_epi32(w1, 14)))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 8, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 14), + _mm256_slli_epi32(w1, 18)))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 12), + _mm256_slli_epi32(w0, 20)))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 10), + _mm256_slli_epi32(w1, 22)))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 8), + _mm256_slli_epi32(w0, 24)))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 6), + _mm256_slli_epi32(w1, 26)))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 4), + _mm256_slli_epi32(w0, 28)))); + _mm256_storeu_si256(out + 15, _mm256_srli_epi32(w0, 2)); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256(out + 16, _mm256_and_si256(mask, w1)); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 28), + _mm256_slli_epi32(w1, 4)))); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 24), + _mm256_slli_epi32(w1, 8)))); + w0 = _mm256_lddqu_si256(compressed + 20); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 22), + _mm256_slli_epi32(w0, 10)))); + w1 = _mm256_lddqu_si256(compressed + 21); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 20), + _mm256_slli_epi32(w1, 12)))); + w0 = _mm256_lddqu_si256(compressed + 22); + _mm256_storeu_si256( + out + 23, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 18), + _mm256_slli_epi32(w0, 14)))); + w1 = _mm256_lddqu_si256(compressed + 23); + _mm256_storeu_si256( + out + 24, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 16), + _mm256_slli_epi32(w1, 16)))); + w0 = _mm256_lddqu_si256(compressed + 24); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 14), + _mm256_slli_epi32(w0, 18)))); + w1 = _mm256_lddqu_si256(compressed + 25); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 12), + _mm256_slli_epi32(w1, 20)))); + w0 = _mm256_lddqu_si256(compressed + 26); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 10), + _mm256_slli_epi32(w0, 22)))); + w1 = _mm256_lddqu_si256(compressed + 27); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 8), + _mm256_slli_epi32(w1, 24)))); + w0 = _mm256_lddqu_si256(compressed + 28); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 6), + _mm256_slli_epi32(w0, 26)))); + w1 = _mm256_lddqu_si256(compressed + 29); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 4), + _mm256_slli_epi32(w1, 28)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w1, 2)); +} + +/* we packed 256 31-bit values, touching 31 256-bit words, using 496 bytes */ +static void avxunpackblock31(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 31 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + const __m256i mask = _mm256_set1_epi32(2147483647); + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, _mm256_and_si256(mask, w0)); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256( + out + 1, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 31), + _mm256_slli_epi32(w1, 1)))); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256( + out + 2, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 30), + _mm256_slli_epi32(w0, 2)))); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256( + out + 3, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 29), + _mm256_slli_epi32(w1, 3)))); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256( + out + 4, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 28), + _mm256_slli_epi32(w0, 4)))); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256( + out + 5, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 27), + _mm256_slli_epi32(w1, 5)))); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256( + out + 6, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 26), + _mm256_slli_epi32(w0, 6)))); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256( + out + 7, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 25), + _mm256_slli_epi32(w1, 7)))); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256( + out + 8, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 24), + _mm256_slli_epi32(w0, 8)))); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256( + out + 9, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 23), + _mm256_slli_epi32(w1, 9)))); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256( + out + 10, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 22), + _mm256_slli_epi32(w0, 10)))); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256( + out + 11, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 21), + _mm256_slli_epi32(w1, 11)))); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256( + out + 12, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 20), + _mm256_slli_epi32(w0, 12)))); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256( + out + 13, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 19), + _mm256_slli_epi32(w1, 13)))); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256( + out + 14, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 18), + _mm256_slli_epi32(w0, 14)))); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256( + out + 15, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 17), + _mm256_slli_epi32(w1, 15)))); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256( + out + 16, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 16), + _mm256_slli_epi32(w0, 16)))); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256( + out + 17, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 15), + _mm256_slli_epi32(w1, 17)))); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256( + out + 18, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 14), + _mm256_slli_epi32(w0, 18)))); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256( + out + 19, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 13), + _mm256_slli_epi32(w1, 19)))); + w0 = _mm256_lddqu_si256(compressed + 20); + _mm256_storeu_si256( + out + 20, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 12), + _mm256_slli_epi32(w0, 20)))); + w1 = _mm256_lddqu_si256(compressed + 21); + _mm256_storeu_si256( + out + 21, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 11), + _mm256_slli_epi32(w1, 21)))); + w0 = _mm256_lddqu_si256(compressed + 22); + _mm256_storeu_si256( + out + 22, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 10), + _mm256_slli_epi32(w0, 22)))); + w1 = _mm256_lddqu_si256(compressed + 23); + _mm256_storeu_si256( + out + 23, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 9), + _mm256_slli_epi32(w1, 23)))); + w0 = _mm256_lddqu_si256(compressed + 24); + _mm256_storeu_si256( + out + 24, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 8), + _mm256_slli_epi32(w0, 24)))); + w1 = _mm256_lddqu_si256(compressed + 25); + _mm256_storeu_si256( + out + 25, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 7), + _mm256_slli_epi32(w1, 25)))); + w0 = _mm256_lddqu_si256(compressed + 26); + _mm256_storeu_si256( + out + 26, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 6), + _mm256_slli_epi32(w0, 26)))); + w1 = _mm256_lddqu_si256(compressed + 27); + _mm256_storeu_si256( + out + 27, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 5), + _mm256_slli_epi32(w1, 27)))); + w0 = _mm256_lddqu_si256(compressed + 28); + _mm256_storeu_si256( + out + 28, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 4), + _mm256_slli_epi32(w0, 28)))); + w1 = _mm256_lddqu_si256(compressed + 29); + _mm256_storeu_si256( + out + 29, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w0, 3), + _mm256_slli_epi32(w1, 29)))); + w0 = _mm256_lddqu_si256(compressed + 30); + _mm256_storeu_si256( + out + 30, + _mm256_and_si256(mask, _mm256_or_si256(_mm256_srli_epi32(w1, 2), + _mm256_slli_epi32(w0, 30)))); + _mm256_storeu_si256(out + 31, _mm256_srli_epi32(w0, 1)); +} + +/* we packed 256 32-bit values, touching 32 256-bit words, using 512 bytes */ +static void avxunpackblock32(const __m256i *compressed, uint32_t *pout) { + /* we are going to access 32 256-bit words */ + __m256i w0, w1; + __m256i *out = (__m256i *)pout; + w0 = _mm256_lddqu_si256(compressed); + _mm256_storeu_si256(out + 0, w0); + w1 = _mm256_lddqu_si256(compressed + 1); + _mm256_storeu_si256(out + 1, w1); + w0 = _mm256_lddqu_si256(compressed + 2); + _mm256_storeu_si256(out + 2, w0); + w1 = _mm256_lddqu_si256(compressed + 3); + _mm256_storeu_si256(out + 3, w1); + w0 = _mm256_lddqu_si256(compressed + 4); + _mm256_storeu_si256(out + 4, w0); + w1 = _mm256_lddqu_si256(compressed + 5); + _mm256_storeu_si256(out + 5, w1); + w0 = _mm256_lddqu_si256(compressed + 6); + _mm256_storeu_si256(out + 6, w0); + w1 = _mm256_lddqu_si256(compressed + 7); + _mm256_storeu_si256(out + 7, w1); + w0 = _mm256_lddqu_si256(compressed + 8); + _mm256_storeu_si256(out + 8, w0); + w1 = _mm256_lddqu_si256(compressed + 9); + _mm256_storeu_si256(out + 9, w1); + w0 = _mm256_lddqu_si256(compressed + 10); + _mm256_storeu_si256(out + 10, w0); + w1 = _mm256_lddqu_si256(compressed + 11); + _mm256_storeu_si256(out + 11, w1); + w0 = _mm256_lddqu_si256(compressed + 12); + _mm256_storeu_si256(out + 12, w0); + w1 = _mm256_lddqu_si256(compressed + 13); + _mm256_storeu_si256(out + 13, w1); + w0 = _mm256_lddqu_si256(compressed + 14); + _mm256_storeu_si256(out + 14, w0); + w1 = _mm256_lddqu_si256(compressed + 15); + _mm256_storeu_si256(out + 15, w1); + w0 = _mm256_lddqu_si256(compressed + 16); + _mm256_storeu_si256(out + 16, w0); + w1 = _mm256_lddqu_si256(compressed + 17); + _mm256_storeu_si256(out + 17, w1); + w0 = _mm256_lddqu_si256(compressed + 18); + _mm256_storeu_si256(out + 18, w0); + w1 = _mm256_lddqu_si256(compressed + 19); + _mm256_storeu_si256(out + 19, w1); + w0 = _mm256_lddqu_si256(compressed + 20); + _mm256_storeu_si256(out + 20, w0); + w1 = _mm256_lddqu_si256(compressed + 21); + _mm256_storeu_si256(out + 21, w1); + w0 = _mm256_lddqu_si256(compressed + 22); + _mm256_storeu_si256(out + 22, w0); + w1 = _mm256_lddqu_si256(compressed + 23); + _mm256_storeu_si256(out + 23, w1); + w0 = _mm256_lddqu_si256(compressed + 24); + _mm256_storeu_si256(out + 24, w0); + w1 = _mm256_lddqu_si256(compressed + 25); + _mm256_storeu_si256(out + 25, w1); + w0 = _mm256_lddqu_si256(compressed + 26); + _mm256_storeu_si256(out + 26, w0); + w1 = _mm256_lddqu_si256(compressed + 27); + _mm256_storeu_si256(out + 27, w1); + w0 = _mm256_lddqu_si256(compressed + 28); + _mm256_storeu_si256(out + 28, w0); + w1 = _mm256_lddqu_si256(compressed + 29); + _mm256_storeu_si256(out + 29, w1); + w0 = _mm256_lddqu_si256(compressed + 30); + _mm256_storeu_si256(out + 30, w0); + w1 = _mm256_lddqu_si256(compressed + 31); + _mm256_storeu_si256(out + 31, w1); +} + +static avxpackblockfnc avxfuncPackArr[] = { + &avxpackblock0, &avxpackblock1, &avxpackblock2, &avxpackblock3, + &avxpackblock4, &avxpackblock5, &avxpackblock6, &avxpackblock7, + &avxpackblock8, &avxpackblock9, &avxpackblock10, &avxpackblock11, + &avxpackblock12, &avxpackblock13, &avxpackblock14, &avxpackblock15, + &avxpackblock16, &avxpackblock17, &avxpackblock18, &avxpackblock19, + &avxpackblock20, &avxpackblock21, &avxpackblock22, &avxpackblock23, + &avxpackblock24, &avxpackblock25, &avxpackblock26, &avxpackblock27, + &avxpackblock28, &avxpackblock29, &avxpackblock30, &avxpackblock31, + &avxpackblock32}; +static avxpackblockfnc avxfuncPackMaskArr[] = { + &avxpackblockmask0, &avxpackblockmask1, &avxpackblockmask2, + &avxpackblockmask3, &avxpackblockmask4, &avxpackblockmask5, + &avxpackblockmask6, &avxpackblockmask7, &avxpackblockmask8, + &avxpackblockmask9, &avxpackblockmask10, &avxpackblockmask11, + &avxpackblockmask12, &avxpackblockmask13, &avxpackblockmask14, + &avxpackblockmask15, &avxpackblockmask16, &avxpackblockmask17, + &avxpackblockmask18, &avxpackblockmask19, &avxpackblockmask20, + &avxpackblockmask21, &avxpackblockmask22, &avxpackblockmask23, + &avxpackblockmask24, &avxpackblockmask25, &avxpackblockmask26, + &avxpackblockmask27, &avxpackblockmask28, &avxpackblockmask29, + &avxpackblockmask30, &avxpackblockmask31, &avxpackblockmask32}; +static avxunpackblockfnc avxfuncUnpackArr[] = { + &avxunpackblock0, &avxunpackblock1, &avxunpackblock2, &avxunpackblock3, + &avxunpackblock4, &avxunpackblock5, &avxunpackblock6, &avxunpackblock7, + &avxunpackblock8, &avxunpackblock9, &avxunpackblock10, &avxunpackblock11, + &avxunpackblock12, &avxunpackblock13, &avxunpackblock14, &avxunpackblock15, + &avxunpackblock16, &avxunpackblock17, &avxunpackblock18, &avxunpackblock19, + &avxunpackblock20, &avxunpackblock21, &avxunpackblock22, &avxunpackblock23, + &avxunpackblock24, &avxunpackblock25, &avxunpackblock26, &avxunpackblock27, + &avxunpackblock28, &avxunpackblock29, &avxunpackblock30, &avxunpackblock31, + &avxunpackblock32}; +/** avxpacking **/ + +/* reads 256 values from "in", writes "bit" 256-bit vectors to "out" */ +void avxpack(const uint32_t *in, __m256i *out, const uint32_t bit) { + avxfuncPackMaskArr[bit](in, out); +} + +/* reads 256 values from "in", writes "bit" 256-bit vectors to "out" */ +void avxpackwithoutmask(const uint32_t *in, __m256i *out, const uint32_t bit) { + avxfuncPackArr[bit](in, out); +} + +/* reads "bit" 256-bit vectors from "in", writes 256 values to "out" */ +void avxunpack(const __m256i *in, uint32_t *out, const uint32_t bit) { + avxfuncUnpackArr[bit](in, out); +} + +#endif /* __AVX2__ */ diff --git a/cpp-projects/base/data/simdcomp/avxbitpacking.h b/cpp-projects/base/data/simdcomp/avxbitpacking.h new file mode 100644 index 0000000..f1aefb6 --- /dev/null +++ b/cpp-projects/base/data/simdcomp/avxbitpacking.h @@ -0,0 +1,35 @@ +/** + * This code is released under a BSD License. + */ + +#ifndef INCLUDE_AVXBITPACKING_H_ +#define INCLUDE_AVXBITPACKING_H_ + +#ifdef __AVX2__ + +#include "portability.h" + +/* AVX2 is required */ +#include +/* for memset */ +#include + +#include "simdcomputil.h" + +enum { AVXBlockSize = 256 }; + +/* max integer logarithm over a range of AVXBlockSize integers (256 integer) */ +uint32_t avxmaxbits(const uint32_t *begin); + +/* reads 256 values from "in", writes "bit" 256-bit vectors to "out" */ +void avxpack(const uint32_t *in, __m256i *out, const uint32_t bit); + +/* reads 256 values from "in", writes "bit" 256-bit vectors to "out" */ +void avxpackwithoutmask(const uint32_t *in, __m256i *out, const uint32_t bit); + +/* reads "bit" 256-bit vectors from "in", writes 256 values to "out" */ +void avxunpack(const __m256i *in, uint32_t *out, const uint32_t bit); + +#endif /* __AVX2__ */ + +#endif /* INCLUDE_AVXBITPACKING_H_ */ diff --git a/cpp-projects/base/data/simdcomp/portability.h b/cpp-projects/base/data/simdcomp/portability.h new file mode 100644 index 0000000..51c0670 --- /dev/null +++ b/cpp-projects/base/data/simdcomp/portability.h @@ -0,0 +1,81 @@ +/** + * This code is released under a BSD License. + */ +#ifndef SIMDBITCOMPAT_H_ +#define SIMDBITCOMPAT_H_ + +#include /* mostly for Microsoft compilers */ +#include + +#if SIMDCOMP_DEBUG +#define SIMDCOMP_ALWAYS_INLINE inline +#define SIMDCOMP_NEVER_INLINE +#define SIMDCOMP_PURE +#else +#if defined(__GNUC__) +#if __GNUC__ >= 3 +#define SIMDCOMP_ALWAYS_INLINE inline __attribute__((always_inline)) +#define SIMDCOMP_NEVER_INLINE __attribute__((noinline)) +#define SIMDCOMP_PURE __attribute__((pure)) +#else +#define SIMDCOMP_ALWAYS_INLINE inline +#define SIMDCOMP_NEVER_INLINE +#define SIMDCOMP_PURE +#endif +#elif defined(_MSC_VER) +#define SIMDCOMP_ALWAYS_INLINE __forceinline +#define SIMDCOMP_NEVER_INLINE +#define SIMDCOMP_PURE +#else +#if __has_attribute(always_inline) +#define SIMDCOMP_ALWAYS_INLINE inline __attribute__((always_inline)) +#else +#define SIMDCOMP_ALWAYS_INLINE inline +#endif +#if __has_attribute(noinline) +#define SIMDCOMP_NEVER_INLINE __attribute__((noinline)) +#else +#define SIMDCOMP_NEVER_INLINE +#endif +#if __has_attribute(pure) +#define SIMDCOMP_PURE __attribute__((pure)) +#else +#define SIMDCOMP_PURE +#endif +#endif +#endif + +#if defined(_MSC_VER) && _MSC_VER < 1600 +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +typedef signed char int8_t; +#else +#include /* part of Visual Studio 2010 and better, others likely anyway */ +#endif + +#if defined(_MSC_VER) +#define SIMDCOMP_ALIGNED(x) __declspec(align(x)) +#else +#if defined(__GNUC__) +#define SIMDCOMP_ALIGNED(x) __attribute__((aligned(x))) +#endif +#endif + +#if defined(_MSC_VER) +#include +/* 64-bit needs extending */ +#define SIMDCOMP_CTZ(result, mask) \ + do { \ + unsigned long index; \ + if (!_BitScanForward(&(index), (mask))) { \ + (result) = 32U; \ + } else { \ + (result) = (uint32_t)(index); \ + } \ + } while (0) +#else +#include +#define SIMDCOMP_CTZ(result, mask) result = __builtin_ctz(mask) +#endif + +#endif /* SIMDBITCOMPAT_H_ */ diff --git a/cpp-projects/base/data/simdcomp/simdbitpacking.c b/cpp-projects/base/data/simdcomp/simdbitpacking.c new file mode 100644 index 0000000..717abde --- /dev/null +++ b/cpp-projects/base/data/simdcomp/simdbitpacking.c @@ -0,0 +1,14399 @@ +/** + * This code is released under a BSD License. + */ +#include "simdbitpacking.h" + +static void SIMD_nullunpacker32(const __m128i *_in, uint32_t *out) { + (void)_in; + memset(out, 0, 32 * 4 * 4); +} + +static void __SIMD_fastpackwithoutmask1_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask2_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask3_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 3 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 3 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask5_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask6_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask7_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask9_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 7); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask10_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask11_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 7); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 9); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask12_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask13_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 7); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 9); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 11); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask14_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask15_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 13); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 11); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 9); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 7); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask17_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 7); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 9); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 11); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 13); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 15); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask18_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask19_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 11); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 17); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 9); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 15); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 7); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 13); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask20_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask21_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 9); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 19); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 7); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 17); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 15); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 13); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 11); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask22_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask23_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 19); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 15); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 11); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 7); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 21); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 17); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 22); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 13); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 9); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask24_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask25_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 11); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 22); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 15); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 19); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 23); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 9); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 13); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 24); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 17); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 21); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 7); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask26_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 22); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 24); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 22); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 24); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask27_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 22); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 17); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 7); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 24); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 19); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 9); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 26); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 21); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 11); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 23); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 13); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 25); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 15); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask28_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask29_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 26); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 23); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 17); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 11); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 28); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 25); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 22); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 19); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 13); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 7); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 27); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 24); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 21); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 15); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 9); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask30_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 28); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 26); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 24); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 22); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 28); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 26); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 24); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 22); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask31_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 30); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 29); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 28); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 27); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 26); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 25); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 24); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 23); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 22); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 21); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 20); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 19); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 18); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 17); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 16); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 15); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 14); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 13); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 12); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 11); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 10); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 9); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 8); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 7); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 6); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 5); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 4); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 3); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 2); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 1); + InReg = _mm_loadu_si128(++in); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask32_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpackwithoutmask4_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg; + uint32_t outer; + + for (outer = 0; outer < 4; ++outer) { + InReg = _mm_loadu_si128(in); + OutReg = InReg; + + InReg = _mm_loadu_si128(in + 1); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + + InReg = _mm_loadu_si128(in + 2); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + + InReg = _mm_loadu_si128(in + 3); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + + InReg = _mm_loadu_si128(in + 4); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + + InReg = _mm_loadu_si128(in + 5); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + + InReg = _mm_loadu_si128(in + 6); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + + InReg = _mm_loadu_si128(in + 7); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + + in += 8; + } +} + +static void __SIMD_fastpackwithoutmask8_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg; + uint32_t outer; + + for (outer = 0; outer < 8; ++outer) { + InReg = _mm_loadu_si128(in); + OutReg = InReg; + + InReg = _mm_loadu_si128(in + 1); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + + InReg = _mm_loadu_si128(in + 2); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + + InReg = _mm_loadu_si128(in + 3); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + + in += 4; + } +} + +static void __SIMD_fastpackwithoutmask16_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + __m128i InReg; + uint32_t outer; + + for (outer = 0; outer < 16; ++outer) { + InReg = _mm_loadu_si128(in); + OutReg = InReg; + + InReg = _mm_loadu_si128(in + 1); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + + in += 2; + } +} + +static void __SIMD_fastpack1_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 1) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack2_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 2) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack3_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 3) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 3 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 3 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack5_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 5) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack6_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 6) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack7_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 7) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack9_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 9) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 7); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack10_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 10) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack11_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 11) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 7); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 9); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack12_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 12) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack13_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 13) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 7); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 9); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 11); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack14_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 14) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack15_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 15) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 13); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 11); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 9); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 7); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack17_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 17) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 7); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 9); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 11); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 13); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 15); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack18_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 18) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack19_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 19) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 11); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 17); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 9); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 15); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 7); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 13); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack20_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 20) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack21_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 21) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 9); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 19); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 7); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 17); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 15); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 13); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 11); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack22_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 22) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack23_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 23) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 19); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 15); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 11); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 7); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 21); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 17); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 22); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 13); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 9); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack24_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 24) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack25_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 25) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 11); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 22); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 15); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 19); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 23); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 9); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 13); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 24); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 17); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 21); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 7); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack26_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 26) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 22); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 24); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 22); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 24); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack27_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 27) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 22); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 17); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 7); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 24); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 19); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 9); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 26); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 21); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 11); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 23); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 13); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 25); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 15); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack28_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 28) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack29_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 29) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 26); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 23); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 17); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 11); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 28); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 25); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 22); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 19); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 13); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 7); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 27); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 24); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 21); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 15); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 9); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack30_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 30) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 28); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 26); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 24); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 22); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 28); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 26); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 24); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 22); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack31_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + const __m128i mask = _mm_set1_epi32((1U << 31) - 1); + + __m128i InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 30); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 29); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 28); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 27); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 26); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 25); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 24); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 23); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 22); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 21); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 20); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 19); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 18); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 17); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 16); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 15); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 14); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 13); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 12); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 11); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 10); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 9); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 8); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 7); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 6); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 5); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 4); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 3); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 2); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_storeu_si128(out, OutReg); + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 1); + InReg = _mm_and_si128(_mm_loadu_si128(++in), mask); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack32_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i InReg = _mm_loadu_si128(in); + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + ++out; + InReg = _mm_loadu_si128(++in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); +} + +static void __SIMD_fastpack4_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, InReg; + const __m128i mask = _mm_set1_epi32((1U << 4) - 1); + uint32_t outer; + + for (outer = 0; outer < 4; ++outer) { + InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + + InReg = _mm_and_si128(_mm_loadu_si128(in + 1), mask); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + + InReg = _mm_and_si128(_mm_loadu_si128(in + 2), mask); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + + InReg = _mm_and_si128(_mm_loadu_si128(in + 3), mask); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + + InReg = _mm_and_si128(_mm_loadu_si128(in + 4), mask); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + + InReg = _mm_and_si128(_mm_loadu_si128(in + 5), mask); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + + InReg = _mm_and_si128(_mm_loadu_si128(in + 6), mask); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + + InReg = _mm_and_si128(_mm_loadu_si128(in + 7), mask); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + ++out; + + in += 8; + } +} + +static void __SIMD_fastpack8_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, InReg; + const __m128i mask = _mm_set1_epi32((1U << 8) - 1); + uint32_t outer; + + for (outer = 0; outer < 8; ++outer) { + InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + + InReg = _mm_and_si128(_mm_loadu_si128(in + 1), mask); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + + InReg = _mm_and_si128(_mm_loadu_si128(in + 2), mask); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + + InReg = _mm_and_si128(_mm_loadu_si128(in + 3), mask); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + ++out; + + in += 4; + } +} + +static void __SIMD_fastpack16_32(const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, InReg; + const __m128i mask = _mm_set1_epi32((1U << 16) - 1); + uint32_t outer; + + for (outer = 0; outer < 16; ++outer) { + InReg = _mm_and_si128(_mm_loadu_si128(in), mask); + OutReg = InReg; + + InReg = _mm_and_si128(_mm_loadu_si128(in + 1), mask); + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + ++out; + + in += 2; + } +} + +static void __SIMD_fastunpack1_32(const __m128i *in, uint32_t *_out) { + __m128i *out = (__m128i *)(_out); + __m128i InReg1 = _mm_loadu_si128(in); + __m128i InReg2 = InReg1; + __m128i OutReg1, OutReg2, OutReg3, OutReg4; + const __m128i mask = _mm_set1_epi32(1); + + uint32_t i, shift = 0; + + for (i = 0; i < 8; ++i) { + OutReg1 = _mm_and_si128(_mm_srli_epi32(InReg1, shift++), mask); + OutReg2 = _mm_and_si128(_mm_srli_epi32(InReg2, shift++), mask); + OutReg3 = _mm_and_si128(_mm_srli_epi32(InReg1, shift++), mask); + OutReg4 = _mm_and_si128(_mm_srli_epi32(InReg2, shift++), mask); + _mm_storeu_si128(out++, OutReg1); + _mm_storeu_si128(out++, OutReg2); + _mm_storeu_si128(out++, OutReg3); + _mm_storeu_si128(out++, OutReg4); + } +} + +static void __SIMD_fastunpack2_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 2) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 18), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 22), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 24), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 26), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 28), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 18), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 22), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 24), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 26), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 28), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack3_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 3) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 9), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 15), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 18), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 21), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 24), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 27), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 7), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 13), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 19), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 22), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 25), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 28), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 5), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 11), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 17), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 23), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 26), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack4_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 4) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 24), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 24), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 24), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 24), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack5_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 5) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 5), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 15), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 25), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 13), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 18), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 23), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 11), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 21), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 26), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 9), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 19), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 24), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 7), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 17), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 22), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack6_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 6) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 18), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 24), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 22), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 18), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 24), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 22), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack7_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 7) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 7), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 21), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 17), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 24), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 13), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 9), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 23), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 5), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 19), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 15), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 22), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 11), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 18), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack8_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 8) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack9_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 9) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 9), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 18), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 13), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 22), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 17), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 21), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 7), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 7), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 11), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 15), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 19), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 5), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 23); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack10_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 10) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 18), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 18), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack11_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 11) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 11), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 23); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 13), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 15), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 5), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 17), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 7), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 7), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 18), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 19), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 9), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 9), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 20), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 21); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack12_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 12) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack13_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 13) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 13), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 7), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 7), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 21); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 15), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 9), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 9), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 23); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 17), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 11), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 11), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 5), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 18), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 19); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack14_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 14) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack15_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 15) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 15), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 13), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 13), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 11), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 11), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 9), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 9), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 7), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 7), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 5), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 16), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 23); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 21); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 19); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 17); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack16_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 16) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack17_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 17) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 17); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 19); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 21); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 23); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 14), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 5), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 7), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 7), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 9), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 9), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 11), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 11), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 13), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 13), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 15), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 15); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack18_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 18) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack19_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 19) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 19); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 12), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 5), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 11), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 11), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 17), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 17); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 23); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 9), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 9), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 15), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 15); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 21); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 7), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 7), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 13), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 13); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack20_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 20) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack21_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 21) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 21); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 10), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 9), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 9), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 19), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 19); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 7), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 7), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 17), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 17); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 5), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 15), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 15); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 13), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 13); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 23); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 11), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 11); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack22_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 22) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 10); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 10); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack23_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 23) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 23); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 5), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 19), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 19); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 10); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 15), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 15); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 11), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 11); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 7), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 7), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 21), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 21); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 17), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 17); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 8), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 22), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 13), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 13); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 9), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 9); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack24_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 24) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack25_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 25) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 11), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 11); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 22), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 15), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 15); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 19), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 19); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 5), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 23), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 23); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 9), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 9); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 13), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 13); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 6), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 24), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 17), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 17); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 10); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 21), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 21); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 7), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 7); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack26_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 26) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 10); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 6); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 10); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 6); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack27_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 27) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 22), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 17), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 17); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 7), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 7); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 24), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 19), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 19); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 9), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 9); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 4), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 26), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 21), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 21); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 11), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 11); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 6); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 23), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 23); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 13), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 13); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 3), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 25), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 15), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 15); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 10); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 5); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack28_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 28) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 4); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 4); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 4); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 4); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack29_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 29) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 26), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 23), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 23); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 17), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 17); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 11), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 11); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 5); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 2), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 28), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 25), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 22), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 19), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 19); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 13), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 13); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 10); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 7), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 7); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 4); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(_mm_srli_epi32(InReg, 1), mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 27), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 24), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 21), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 21); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 15), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 15); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 9), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 9); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 6); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 3); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack30_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 30) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 10); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 6); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 4); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 2); + InReg = _mm_loadu_si128(++in); + + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 10); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 6); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 4); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 2); + _mm_storeu_si128(out++, OutReg); +} + +static void __SIMD_fastunpack31_32(const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + const __m128i mask = _mm_set1_epi32((1U << 31) - 1); + + OutReg = _mm_and_si128(InReg, mask); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 31); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 30), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 30); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 29), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 29); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 28), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 28); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 27), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 27); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 26), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 26); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 25), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 25); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 24), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 24); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 23), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 23); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 22), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 22); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 21), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 21); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 20), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 20); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 19), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 19); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 18), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 18); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 17), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 17); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 16), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 16); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 15), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 15); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 14), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 14); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 13), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 13); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 12), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 12); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 11), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 11); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 10), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 10); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 9), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 9); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 8), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 8); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 7), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 7); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 6), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 6); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 5), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 5); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 4), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 4); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 3), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 3); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 2), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 2); + InReg = _mm_loadu_si128(++in); + + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 1), mask)); + _mm_storeu_si128(out++, OutReg); + + OutReg = _mm_srli_epi32(InReg, 1); + _mm_storeu_si128(out++, OutReg); +} + +void __SIMD_fastunpack32_32(const __m128i *in, uint32_t *_out) { + __m128i *out = (__m128i *)(_out); + uint32_t outer; + + for (outer = 0; outer < 32; ++outer) { + _mm_storeu_si128(out++, _mm_loadu_si128(in++)); + } +} + +void simdunpack(const __m128i *in, uint32_t *out, const uint32_t bit) { + switch (bit) { + case 0: + SIMD_nullunpacker32(in, out); + return; + + case 1: + __SIMD_fastunpack1_32(in, out); + return; + + case 2: + __SIMD_fastunpack2_32(in, out); + return; + + case 3: + __SIMD_fastunpack3_32(in, out); + return; + + case 4: + __SIMD_fastunpack4_32(in, out); + return; + + case 5: + __SIMD_fastunpack5_32(in, out); + return; + + case 6: + __SIMD_fastunpack6_32(in, out); + return; + + case 7: + __SIMD_fastunpack7_32(in, out); + return; + + case 8: + __SIMD_fastunpack8_32(in, out); + return; + + case 9: + __SIMD_fastunpack9_32(in, out); + return; + + case 10: + __SIMD_fastunpack10_32(in, out); + return; + + case 11: + __SIMD_fastunpack11_32(in, out); + return; + + case 12: + __SIMD_fastunpack12_32(in, out); + return; + + case 13: + __SIMD_fastunpack13_32(in, out); + return; + + case 14: + __SIMD_fastunpack14_32(in, out); + return; + + case 15: + __SIMD_fastunpack15_32(in, out); + return; + + case 16: + __SIMD_fastunpack16_32(in, out); + return; + + case 17: + __SIMD_fastunpack17_32(in, out); + return; + + case 18: + __SIMD_fastunpack18_32(in, out); + return; + + case 19: + __SIMD_fastunpack19_32(in, out); + return; + + case 20: + __SIMD_fastunpack20_32(in, out); + return; + + case 21: + __SIMD_fastunpack21_32(in, out); + return; + + case 22: + __SIMD_fastunpack22_32(in, out); + return; + + case 23: + __SIMD_fastunpack23_32(in, out); + return; + + case 24: + __SIMD_fastunpack24_32(in, out); + return; + + case 25: + __SIMD_fastunpack25_32(in, out); + return; + + case 26: + __SIMD_fastunpack26_32(in, out); + return; + + case 27: + __SIMD_fastunpack27_32(in, out); + return; + + case 28: + __SIMD_fastunpack28_32(in, out); + return; + + case 29: + __SIMD_fastunpack29_32(in, out); + return; + + case 30: + __SIMD_fastunpack30_32(in, out); + return; + + case 31: + __SIMD_fastunpack31_32(in, out); + return; + + case 32: + __SIMD_fastunpack32_32(in, out); + return; + + default: + break; + } +} + +/*assumes that integers fit in the prescribed number of bits*/ +void simdpackwithoutmask(const uint32_t *in, __m128i *out, const uint32_t bit) { + switch (bit) { + case 0: + return; + + case 1: + __SIMD_fastpackwithoutmask1_32(in, out); + return; + + case 2: + __SIMD_fastpackwithoutmask2_32(in, out); + return; + + case 3: + __SIMD_fastpackwithoutmask3_32(in, out); + return; + + case 4: + __SIMD_fastpackwithoutmask4_32(in, out); + return; + + case 5: + __SIMD_fastpackwithoutmask5_32(in, out); + return; + + case 6: + __SIMD_fastpackwithoutmask6_32(in, out); + return; + + case 7: + __SIMD_fastpackwithoutmask7_32(in, out); + return; + + case 8: + __SIMD_fastpackwithoutmask8_32(in, out); + return; + + case 9: + __SIMD_fastpackwithoutmask9_32(in, out); + return; + + case 10: + __SIMD_fastpackwithoutmask10_32(in, out); + return; + + case 11: + __SIMD_fastpackwithoutmask11_32(in, out); + return; + + case 12: + __SIMD_fastpackwithoutmask12_32(in, out); + return; + + case 13: + __SIMD_fastpackwithoutmask13_32(in, out); + return; + + case 14: + __SIMD_fastpackwithoutmask14_32(in, out); + return; + + case 15: + __SIMD_fastpackwithoutmask15_32(in, out); + return; + + case 16: + __SIMD_fastpackwithoutmask16_32(in, out); + return; + + case 17: + __SIMD_fastpackwithoutmask17_32(in, out); + return; + + case 18: + __SIMD_fastpackwithoutmask18_32(in, out); + return; + + case 19: + __SIMD_fastpackwithoutmask19_32(in, out); + return; + + case 20: + __SIMD_fastpackwithoutmask20_32(in, out); + return; + + case 21: + __SIMD_fastpackwithoutmask21_32(in, out); + return; + + case 22: + __SIMD_fastpackwithoutmask22_32(in, out); + return; + + case 23: + __SIMD_fastpackwithoutmask23_32(in, out); + return; + + case 24: + __SIMD_fastpackwithoutmask24_32(in, out); + return; + + case 25: + __SIMD_fastpackwithoutmask25_32(in, out); + return; + + case 26: + __SIMD_fastpackwithoutmask26_32(in, out); + return; + + case 27: + __SIMD_fastpackwithoutmask27_32(in, out); + return; + + case 28: + __SIMD_fastpackwithoutmask28_32(in, out); + return; + + case 29: + __SIMD_fastpackwithoutmask29_32(in, out); + return; + + case 30: + __SIMD_fastpackwithoutmask30_32(in, out); + return; + + case 31: + __SIMD_fastpackwithoutmask31_32(in, out); + return; + + case 32: + __SIMD_fastpackwithoutmask32_32(in, out); + return; + + default: + break; + } +} + +/*assumes that integers fit in the prescribed number of bits*/ +void simdpack(const uint32_t *in, __m128i *out, const uint32_t bit) { + switch (bit) { + case 0: + return; + + case 1: + __SIMD_fastpack1_32(in, out); + return; + + case 2: + __SIMD_fastpack2_32(in, out); + return; + + case 3: + __SIMD_fastpack3_32(in, out); + return; + + case 4: + __SIMD_fastpack4_32(in, out); + return; + + case 5: + __SIMD_fastpack5_32(in, out); + return; + + case 6: + __SIMD_fastpack6_32(in, out); + return; + + case 7: + __SIMD_fastpack7_32(in, out); + return; + + case 8: + __SIMD_fastpack8_32(in, out); + return; + + case 9: + __SIMD_fastpack9_32(in, out); + return; + + case 10: + __SIMD_fastpack10_32(in, out); + return; + + case 11: + __SIMD_fastpack11_32(in, out); + return; + + case 12: + __SIMD_fastpack12_32(in, out); + return; + + case 13: + __SIMD_fastpack13_32(in, out); + return; + + case 14: + __SIMD_fastpack14_32(in, out); + return; + + case 15: + __SIMD_fastpack15_32(in, out); + return; + + case 16: + __SIMD_fastpack16_32(in, out); + return; + + case 17: + __SIMD_fastpack17_32(in, out); + return; + + case 18: + __SIMD_fastpack18_32(in, out); + return; + + case 19: + __SIMD_fastpack19_32(in, out); + return; + + case 20: + __SIMD_fastpack20_32(in, out); + return; + + case 21: + __SIMD_fastpack21_32(in, out); + return; + + case 22: + __SIMD_fastpack22_32(in, out); + return; + + case 23: + __SIMD_fastpack23_32(in, out); + return; + + case 24: + __SIMD_fastpack24_32(in, out); + return; + + case 25: + __SIMD_fastpack25_32(in, out); + return; + + case 26: + __SIMD_fastpack26_32(in, out); + return; + + case 27: + __SIMD_fastpack27_32(in, out); + return; + + case 28: + __SIMD_fastpack28_32(in, out); + return; + + case 29: + __SIMD_fastpack29_32(in, out); + return; + + case 30: + __SIMD_fastpack30_32(in, out); + return; + + case 31: + __SIMD_fastpack31_32(in, out); + return; + + case 32: + __SIMD_fastpack32_32(in, out); + return; + + default: + break; + } +} + +__m128i *simdpack_shortlength(const uint32_t *in, int length, __m128i *out, + const uint32_t bit) { + int k; + int inwordpointer; + __m128i P; + uint32_t firstpass; + if (bit == 0) + return out; /* nothing to do */ + if (bit == 32) { + memcpy(out, in, length * sizeof(uint32_t)); + return (__m128i *)((uint32_t *)out + length); + } + inwordpointer = 0; + P = _mm_setzero_si128(); + for (k = 0; k < length / 4; ++k) { + __m128i value = _mm_loadu_si128(((const __m128i *)in + k)); + P = _mm_or_si128(P, _mm_slli_epi32(value, inwordpointer)); + firstpass = sizeof(uint32_t) * 8 - inwordpointer; + if (bit < firstpass) { + inwordpointer += bit; + } else { + _mm_storeu_si128(out++, P); + P = _mm_srli_epi32(value, firstpass); + inwordpointer = bit - firstpass; + } + } + if (length % 4 != 0) { + uint32_t buffer[4]; + __m128i value; + for (k = 0; k < (length % 4); ++k) { + buffer[k] = in[length / 4 * 4 + k]; + } + for (k = (length % 4); k < 4; ++k) { + buffer[k] = 0; + } + value = _mm_loadu_si128((__m128i *)buffer); + P = _mm_or_si128(P, _mm_slli_epi32(value, inwordpointer)); + firstpass = sizeof(uint32_t) * 8 - inwordpointer; + if (bit < firstpass) { + inwordpointer += bit; + } else { + _mm_storeu_si128(out++, P); + P = _mm_srli_epi32(value, firstpass); + inwordpointer = bit - firstpass; + } + } + if (inwordpointer != 0) { + _mm_storeu_si128(out++, P); + } + return out; +} + +const __m128i *simdunpack_shortlength(const __m128i *in, int length, + uint32_t *out, const uint32_t bit) { + int k; + __m128i maskbits; + int inwordpointer; + __m128i P; + if (length == 0) + return in; + if (bit == 0) { + for (k = 0; k < length; ++k) { + out[k] = 0; + } + return in; + } + if (bit == 32) { + memcpy(out, in, length * sizeof(uint32_t)); + return (const __m128i *)((uint32_t *)in + length); + } + maskbits = _mm_set1_epi32((1U << bit) - 1); + inwordpointer = 0; + P = _mm_loadu_si128((__m128i *)in); + ++in; + if (length % 4 == 0) { + + for (k = 0; k + 1 < length / 4; ++k) { + __m128i answer = _mm_srli_epi32(P, inwordpointer); + const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer; + if (bit < firstpass) { + inwordpointer += bit; + } else { + P = _mm_loadu_si128((__m128i *)in); + ++in; + answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer); + inwordpointer = bit - firstpass; + } + answer = _mm_and_si128(maskbits, answer); + _mm_storeu_si128((__m128i *)out, answer); + out += 4; + } + if (k < length / 4) { + __m128i answer = _mm_srli_epi32(P, inwordpointer); + const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer; + if (bit < firstpass) { + inwordpointer += bit; + } else if (bit == firstpass) { + inwordpointer = 0; + } else { + P = _mm_loadu_si128((__m128i *)in); + ++in; + answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer); + inwordpointer = bit - firstpass; + } + answer = _mm_and_si128(maskbits, answer); + _mm_storeu_si128((__m128i *)out, answer); + out += 4; + } + + } else { + for (k = 0; k < length / 4; ++k) { + __m128i answer = _mm_srli_epi32(P, inwordpointer); + const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer; + if (bit < firstpass) { + inwordpointer += bit; + } else { + P = _mm_loadu_si128((__m128i *)in); + ++in; + answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer); + inwordpointer = bit - firstpass; + } + answer = _mm_and_si128(maskbits, answer); + _mm_storeu_si128((__m128i *)out, answer); + out += 4; + } + uint32_t buffer[4]; + __m128i answer = _mm_srli_epi32(P, inwordpointer); + const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer; + if (bit < firstpass) { + inwordpointer += bit; + } else if (bit == firstpass) { + inwordpointer = 0; + } else { + P = _mm_loadu_si128((__m128i *)in); + ++in; + answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer); + inwordpointer = bit - firstpass; + } + answer = _mm_and_si128(maskbits, answer); + _mm_storeu_si128((__m128i *)buffer, answer); + for (k = 0; k < (length % 4); ++k) { + *out = buffer[k]; + ++out; + } + } + return in; +} + +void simdfastset(__m128i *in128, uint32_t b, uint32_t value, size_t index) { + uint32_t *in = (uint32_t *)in128; + const int lane = index % 4; /* we have 4 interleaved lanes */ + const int bitsinlane = (index / 4) * b; /* how many bits in lane */ + const int firstwordinlane = bitsinlane / 32; + const int secondwordinlane = (bitsinlane + b - 1) / 32; + const uint32_t mask = (1U << b) - 1; + if (b == 0) + return; + /* we zero */ + if (b == 32) + in[4 * firstwordinlane + lane] = 0; + else + in[4 * firstwordinlane + lane] &= ~(mask << (bitsinlane % 32)); + + /* we write */ + in[4 * firstwordinlane + lane] |= (value << (bitsinlane % 32)); + + if (firstwordinlane == secondwordinlane) { + /* easy common case*/ + return; + } else { + /* harder case where we need to combine two words */ + const int firstbits = 32 - (bitsinlane % 32); + const int usablebits = b - firstbits; + const uint32_t mask2 = (1U << usablebits) - 1; + in[4 * firstwordinlane + 4 + lane] &= ~mask2; /* we zero */ + in[4 * firstwordinlane + 4 + lane] |= value >> firstbits; /* we write */ + return; + } +} + +int simdpack_compressedbytes(int length, const uint32_t bit) { + if (bit == 0) + return 0; /* nothing to do */ + if (bit == 32) { + return length * sizeof(uint32_t); + } + return (((length + 3) / 4) * bit + 31) / 32 * sizeof(__m128i); +} + +__m128i *simdpack_length(const uint32_t *in, size_t length, __m128i *out, + const uint32_t bit) { + size_t k; + for (k = 0; k < length / SIMDBlockSize; ++k) { + simdpack(in, out, bit); + in += SIMDBlockSize; + out += bit; + } + return simdpack_shortlength(in, length % SIMDBlockSize, out, bit); +} + +const __m128i *simdunpack_length(const __m128i *in, size_t length, + uint32_t *out, const uint32_t bit) { + size_t k; + for (k = 0; k < length / SIMDBlockSize; ++k) { + simdunpack(in, out, bit); + out += SIMDBlockSize; + in += bit; + } + return simdunpack_shortlength(in, length % SIMDBlockSize, out, bit); +} diff --git a/cpp-projects/base/data/simdcomp/simdbitpacking.h b/cpp-projects/base/data/simdcomp/simdbitpacking.h new file mode 100644 index 0000000..52f04de --- /dev/null +++ b/cpp-projects/base/data/simdcomp/simdbitpacking.h @@ -0,0 +1,75 @@ +/** + * This code is released under a BSD License. + */ +#ifndef SIMDBITPACKING_H_ +#define SIMDBITPACKING_H_ + +#include "portability.h" + +/* SSE2 is required */ +#include +/* for memset */ +#include + +#include "simdcomputil.h" + +/*** + * Please see example.c for various examples on how to make good use + * of these functions. + */ + +/* reads 128 values from "in", writes "bit" 128-bit vectors to "out". + * The input values are masked so that only the least significant "bit" bits are + * used. */ +void simdpack(const uint32_t *in, __m128i *out, const uint32_t bit); + +/* reads 128 values from "in", writes "bit" 128-bit vectors to "out". + * The input values are assumed to be less than 1< +#endif +#include + +#define Delta(curr, prev) \ + _mm_sub_epi32( \ + curr, _mm_or_si128(_mm_slli_si128(curr, 4), _mm_srli_si128(prev, 12))) + +/* returns the integer logarithm of v (bit width) */ +uint32_t bits(const uint32_t v) { +#ifdef _MSC_VER + unsigned long answer; + if (v == 0) { + return 0; + } + _BitScanReverse(&answer, v); + return answer + 1; +#else + return v == 0 ? 0 + : 32 - __builtin_clz( + v); /* assume GCC-like compiler if not microsoft */ +#endif +} + +static uint32_t maxbitas32int(const __m128i accumulator) { + const __m128i _tmp1 = _mm_or_si128( + _mm_srli_si128(accumulator, 8), + accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/ + const __m128i _tmp2 = + _mm_or_si128(_mm_srli_si128(_tmp1, 4), + _tmp1); /* (A,B,C xor A,D xor B) xor (0,0,0,C xor A)*/ + uint32_t ans = _mm_cvtsi128_si32(_tmp2); + return bits(ans); +} + +SIMDCOMP_PURE uint32_t maxbits(const uint32_t *begin) { + const __m128i *pin = (const __m128i *)(begin); + __m128i accumulator = _mm_loadu_si128(pin); + uint32_t k = 1; + for (; 4 * k < SIMDBlockSize; ++k) { + __m128i newvec = _mm_loadu_si128(pin + k); + accumulator = _mm_or_si128(accumulator, newvec); + } + return maxbitas32int(accumulator); +} +static uint32_t orasint(const __m128i accumulator) { + const __m128i _tmp1 = _mm_or_si128( + _mm_srli_si128(accumulator, 8), + accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/ + const __m128i _tmp2 = + _mm_or_si128(_mm_srli_si128(_tmp1, 4), + _tmp1); /* (A,B,C xor A,D xor B) xor (0,0,0,C xor A)*/ + return _mm_cvtsi128_si32(_tmp2); +} + +#ifdef __SSE4_1__ + +static uint32_t minasint(const __m128i accumulator) { + const __m128i _tmp1 = _mm_min_epu32( + _mm_srli_si128(accumulator, 8), + accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/ + const __m128i _tmp2 = + _mm_min_epu32(_mm_srli_si128(_tmp1, 4), + _tmp1); /* (A,B,C xor A,D xor B) xor (0,0,0,C xor A)*/ + return _mm_cvtsi128_si32(_tmp2); +} + +static uint32_t maxasint(const __m128i accumulator) { + const __m128i _tmp1 = _mm_max_epu32( + _mm_srli_si128(accumulator, 8), + accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/ + const __m128i _tmp2 = + _mm_max_epu32(_mm_srli_si128(_tmp1, 4), + _tmp1); /* (A,B,C xor A,D xor B) xor (0,0,0,C xor A)*/ + return _mm_cvtsi128_si32(_tmp2); +} + +uint32_t simdmin(const uint32_t *in) { + const __m128i *pin = (const __m128i *)(in); + __m128i accumulator = _mm_loadu_si128(pin); + uint32_t k = 1; + for (; 4 * k < SIMDBlockSize; ++k) { + __m128i newvec = _mm_loadu_si128(pin + k); + accumulator = _mm_min_epu32(accumulator, newvec); + } + return minasint(accumulator); +} + +void simdmaxmin(const uint32_t *in, uint32_t *getmin, uint32_t *getmax) { + const __m128i *pin = (const __m128i *)(in); + __m128i minaccumulator = _mm_loadu_si128(pin); + __m128i maxaccumulator = minaccumulator; + uint32_t k = 1; + for (; 4 * k < SIMDBlockSize; ++k) { + __m128i newvec = _mm_loadu_si128(pin + k); + minaccumulator = _mm_min_epu32(minaccumulator, newvec); + maxaccumulator = _mm_max_epu32(maxaccumulator, newvec); + } + *getmin = minasint(minaccumulator); + *getmax = maxasint(maxaccumulator); +} + +uint32_t simdmin_length(const uint32_t *in, uint32_t length) { + uint32_t currentmin = 0xFFFFFFFF; + uint32_t lengthdividedby4 = length / 4; + uint32_t offset = lengthdividedby4 * 4; + uint32_t k; + if (lengthdividedby4 > 0) { + const __m128i *pin = (const __m128i *)(in); + __m128i accumulator = _mm_loadu_si128(pin); + k = 1; + for (; 4 * k < lengthdividedby4 * 4; ++k) { + __m128i newvec = _mm_loadu_si128(pin + k); + accumulator = _mm_min_epu32(accumulator, newvec); + } + currentmin = minasint(accumulator); + } + for (k = offset; k < length; ++k) + if (in[k] < currentmin) + currentmin = in[k]; + return currentmin; +} + +void simdmaxmin_length(const uint32_t *in, uint32_t length, uint32_t *getmin, + uint32_t *getmax) { + uint32_t lengthdividedby4 = length / 4; + uint32_t offset = lengthdividedby4 * 4; + uint32_t k; + *getmin = 0xFFFFFFFF; + *getmax = 0; + if (lengthdividedby4 > 0) { + const __m128i *pin = (const __m128i *)(in); + __m128i minaccumulator = _mm_loadu_si128(pin); + __m128i maxaccumulator = minaccumulator; + k = 1; + for (; 4 * k < lengthdividedby4 * 4; ++k) { + __m128i newvec = _mm_loadu_si128(pin + k); + minaccumulator = _mm_min_epu32(minaccumulator, newvec); + maxaccumulator = _mm_max_epu32(maxaccumulator, newvec); + } + *getmin = minasint(minaccumulator); + *getmax = maxasint(maxaccumulator); + } + for (k = offset; k < length; ++k) { + if (in[k] < *getmin) + *getmin = in[k]; + if (in[k] > *getmax) + *getmax = in[k]; + } +} + +#endif + +SIMDCOMP_PURE uint32_t maxbits_length(const uint32_t *in, uint32_t length) { + uint32_t k; + uint32_t lengthdividedby4 = length / 4; + uint32_t offset = lengthdividedby4 * 4; + uint32_t bigxor = 0; + if (lengthdividedby4 > 0) { + const __m128i *pin = (const __m128i *)(in); + __m128i accumulator = _mm_loadu_si128(pin); + k = 1; + for (; 4 * k < 4 * lengthdividedby4; ++k) { + __m128i newvec = _mm_loadu_si128(pin + k); + accumulator = _mm_or_si128(accumulator, newvec); + } + bigxor = orasint(accumulator); + } + for (k = offset; k < length; ++k) + bigxor |= in[k]; + return bits(bigxor); +} + +/* maxbit over 128 integers (SIMDBlockSize) with provided initial value */ +uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t *in) { + __m128i initoffset = _mm_set1_epi32(initvalue); + const __m128i *pin = (const __m128i *)(in); + __m128i newvec = _mm_loadu_si128(pin); + __m128i accumulator = Delta(newvec, initoffset); + __m128i oldvec = newvec; + uint32_t k = 1; + for (; 4 * k < SIMDBlockSize; ++k) { + newvec = _mm_loadu_si128(pin + k); + accumulator = _mm_or_si128(accumulator, Delta(newvec, oldvec)); + oldvec = newvec; + } + initoffset = oldvec; + return maxbitas32int(accumulator); +} + +/* maxbit over |length| integers with provided initial value */ +uint32_t simdmaxbitsd1_length(uint32_t initvalue, const uint32_t *in, + uint32_t length) { + __m128i newvec; + __m128i oldvec; + __m128i initoffset; + __m128i accumulator; + const __m128i *pin; + uint32_t tmparray[4]; + uint32_t k = 1; + uint32_t acc; + + assert(length > 0); + + pin = (const __m128i *)(in); + initoffset = _mm_set1_epi32(initvalue); + switch (length) { + case 1: + newvec = _mm_set1_epi32(in[0]); + break; + case 2: + newvec = _mm_setr_epi32(in[0], in[1], in[1], in[1]); + break; + case 3: + newvec = _mm_setr_epi32(in[0], in[1], in[2], in[2]); + break; + default: + newvec = _mm_loadu_si128(pin); + break; + } + accumulator = Delta(newvec, initoffset); + oldvec = newvec; + + /* process 4 integers and build an accumulator */ + while (k * 4 + 4 <= length) { + newvec = _mm_loadu_si128(pin + k); + accumulator = _mm_or_si128(accumulator, Delta(newvec, oldvec)); + oldvec = newvec; + k++; + } + + /* extract the accumulator as an integer */ + _mm_storeu_si128((__m128i *)(tmparray), accumulator); + acc = tmparray[0] | tmparray[1] | tmparray[2] | tmparray[3]; + + /* now process the remaining integers */ + for (k *= 4; k < length; k++) + acc |= in[k] - (k == 0 ? initvalue : in[k - 1]); + + /* return the number of bits */ + return bits(acc); +} diff --git a/cpp-projects/base/data/simdcomp/simdcomputil.h b/cpp-projects/base/data/simdcomp/simdcomputil.h new file mode 100644 index 0000000..651e9fc --- /dev/null +++ b/cpp-projects/base/data/simdcomp/simdcomputil.h @@ -0,0 +1,50 @@ +/** + * This code is released under a BSD License. + */ + +#ifndef SIMDCOMPUTIL_H_ +#define SIMDCOMPUTIL_H_ + +#include "portability.h" + +/* SSE2 is required */ +#include + +/* returns the integer logarithm of v (bit width) */ +uint32_t bits(const uint32_t v); + +/* max integer logarithm over a range of SIMDBlockSize integers (128 integer) */ +uint32_t maxbits(const uint32_t *begin); + +/* same as maxbits, but we specify the number of integers */ +uint32_t maxbits_length(const uint32_t *in, uint32_t length); + +enum { SIMDBlockSize = 128 }; + +/* computes (quickly) the minimal value of 128 values */ +uint32_t simdmin(const uint32_t *in); + +/* computes (quickly) the minimal value of the specified number of values */ +uint32_t simdmin_length(const uint32_t *in, uint32_t length); + +#ifdef __SSE4_1__ +/* computes (quickly) the minimal and maximal value of the specified number of + * values */ +void simdmaxmin_length(const uint32_t *in, uint32_t length, uint32_t *getmin, + uint32_t *getmax); + +/* computes (quickly) the minimal and maximal value of the 128 values */ +void simdmaxmin(const uint32_t *in, uint32_t *getmin, uint32_t *getmax); + +#endif + +/* like maxbit over 128 integers (SIMDBlockSize) with provided initial value + and using differential coding */ +uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t *in); + +/* like simdmaxbitsd1, but calculates maxbits over |length| integers + with provided initial value. |length| can be any arbitrary value. */ +uint32_t simdmaxbitsd1_length(uint32_t initvalue, const uint32_t *in, + uint32_t length); + +#endif /* SIMDCOMPUTIL_H_ */ diff --git a/cpp-projects/base/data/simdcomp/simdfor.c b/cpp-projects/base/data/simdcomp/simdfor.c new file mode 100644 index 0000000..abd0955 --- /dev/null +++ b/cpp-projects/base/data/simdcomp/simdfor.c @@ -0,0 +1,15200 @@ +/** + * This code is released under a BSD License. + */ + +#include "simdfor.h" + +static __m128i iunpackFOR0(__m128i initOffset, const __m128i *_in, + uint32_t *_out) { + __m128i *out = (__m128i *)(_out); + int i; + (void)_in; + for (i = 0; i < 8; ++i) { + _mm_store_si128(out++, initOffset); + _mm_store_si128(out++, initOffset); + _mm_store_si128(out++, initOffset); + _mm_store_si128(out++, initOffset); + } + + return initOffset; +} + +static void ipackFOR0(__m128i initOffset, const uint32_t *_in, __m128i *out) { + (void)initOffset; + (void)_in; + (void)out; +} + +static void ipackFOR1(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR2(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR3(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 3 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 3 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR4(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR5(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR6(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR7(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR8(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR9(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 7); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR10(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR11(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 7); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 9); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR12(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR13(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 7); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 9); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 11); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR14(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR15(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 13); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 11); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 9); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 7); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR16(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR17(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 7); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 9); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 11); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 13); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 15); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR18(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR19(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 11); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 17); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 9); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 15); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 7); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 13); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR20(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR21(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 9); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 19); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 7); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 17); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 15); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 13); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 11); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR22(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR23(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 19); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 15); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 11); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 7); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 21); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 17); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 22); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 13); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 9); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR24(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR25(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 11); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 22); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 15); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 19); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 23); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 9); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 13); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 24); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 17); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 21); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 7); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR26(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 22); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 24); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 22); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 24); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR27(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 22); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 17); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 7); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 24); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 19); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 9); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 26); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 21); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 11); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 23); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 13); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 25); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 15); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR28(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR29(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 26); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 23); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 17); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 11); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 28); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 25); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 22); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 19); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 13); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 7); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 27); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 24); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 21); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 15); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 9); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR30(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 28); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 26); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 24); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 22); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_store_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 28); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 26); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 24); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 22); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR31(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_load_si128(in); + __m128i InReg = _mm_sub_epi32(CurrIn, initOffset); + OutReg = InReg; + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 30); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 29); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 28); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 27); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 26); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 25); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 24); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 23); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 22); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 21); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 20); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 19); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 18); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 17); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 16); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 15); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 14); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 13); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 12); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 11); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 10); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 9); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 8); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 7); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 6); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 5); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 4); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 3); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 2); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_store_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 1); + ++in; + CurrIn = _mm_load_si128(in); + InReg = _mm_sub_epi32(CurrIn, initOffset); + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + _mm_store_si128(out, OutReg); +} + +static void ipackFOR32(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i InReg = _mm_load_si128(in); + (void)initOffset; + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_load_si128(in); + + OutReg = InReg; + _mm_store_si128(out, OutReg); +} + +static __m128i iunpackFOR1(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 1) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR2(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 2) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR3(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 3) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR4(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 4) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR5(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 5) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR6(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 6) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR7(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 7) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR8(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 8) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR9(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 9) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 7), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR10(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 10) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR11(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 11) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 7), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 9), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR12(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 12) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR13(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 13) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 7), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 9), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 11), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR14(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 14) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR15(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 15) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 13), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 11), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 9), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 7), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR16(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 16) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR17(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 17) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 7), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 9), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 11), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 13), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 15), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR18(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 18) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR19(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 19) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 11), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 17), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 9), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 15), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 7), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 13), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR20(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 20) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR21(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 21) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 9), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 19), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 7), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 17), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 15), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 13), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 11), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR22(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 22) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR23(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 23) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 19), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 15), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 11), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 7), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 21), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 17), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 22), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 13), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 9), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR24(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 24) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR25(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 25) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 11), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 22), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 15), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 19), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 23), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 9), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 13), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 24), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 17), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 21), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 7), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR26(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 26) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR27(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 27) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 22), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 17), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 7), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 24), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 19), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 9), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 26), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 21), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 11), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 23), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 13), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 25), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 15), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR28(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 28) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR29(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 29) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 26), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 23), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 17), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 11), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 28), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 25), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 22), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 19), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 13), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 7), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 27), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 24), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 21), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 15), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 9), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR30(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 30) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR31(__m128i initOffset, const __m128i *in, + uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_load_si128(in); + __m128i OutReg; + __m128i tmp; + const __m128i mask = _mm_set1_epi32((1U << 31) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 30), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 29), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 28), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 27), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 26), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 25), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 24), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 23), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 22), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 21), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 20), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 19), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 18), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 17), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 16), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 15), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 14), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 13), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 12), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 11), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 10), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 9), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 8), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 7), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 6), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 5), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 4), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 3), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 2), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = tmp; + ++in; + InReg = _mm_load_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 1), mask)); + + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = tmp; + OutReg = _mm_add_epi32(OutReg, initOffset); + _mm_store_si128(out++, OutReg); + + return initOffset; +} + +static __m128i iunpackFOR32(__m128i initvalue, const __m128i *in, + uint32_t *_out) { + __m128i *mout = (__m128i *)_out; + __m128i invec; + size_t k; + (void)initvalue; + for (k = 0; k < 128 / 4; ++k) { + invec = _mm_load_si128(in++); + _mm_store_si128(mout++, invec); + } + return invec; +} + +void simdpackFOR(uint32_t initvalue, const uint32_t *in, __m128i *out, + const uint32_t bit) { + __m128i initOffset = _mm_set1_epi32(initvalue); + switch (bit) { + case 0: + ipackFOR0(initOffset, in, out); + break; + + case 1: + ipackFOR1(initOffset, in, out); + break; + + case 2: + ipackFOR2(initOffset, in, out); + break; + + case 3: + ipackFOR3(initOffset, in, out); + break; + + case 4: + ipackFOR4(initOffset, in, out); + break; + + case 5: + ipackFOR5(initOffset, in, out); + break; + + case 6: + ipackFOR6(initOffset, in, out); + break; + + case 7: + ipackFOR7(initOffset, in, out); + break; + + case 8: + ipackFOR8(initOffset, in, out); + break; + + case 9: + ipackFOR9(initOffset, in, out); + break; + + case 10: + ipackFOR10(initOffset, in, out); + break; + + case 11: + ipackFOR11(initOffset, in, out); + break; + + case 12: + ipackFOR12(initOffset, in, out); + break; + + case 13: + ipackFOR13(initOffset, in, out); + break; + + case 14: + ipackFOR14(initOffset, in, out); + break; + + case 15: + ipackFOR15(initOffset, in, out); + break; + + case 16: + ipackFOR16(initOffset, in, out); + break; + + case 17: + ipackFOR17(initOffset, in, out); + break; + + case 18: + ipackFOR18(initOffset, in, out); + break; + + case 19: + ipackFOR19(initOffset, in, out); + break; + + case 20: + ipackFOR20(initOffset, in, out); + break; + + case 21: + ipackFOR21(initOffset, in, out); + break; + + case 22: + ipackFOR22(initOffset, in, out); + break; + + case 23: + ipackFOR23(initOffset, in, out); + break; + + case 24: + ipackFOR24(initOffset, in, out); + break; + + case 25: + ipackFOR25(initOffset, in, out); + break; + + case 26: + ipackFOR26(initOffset, in, out); + break; + + case 27: + ipackFOR27(initOffset, in, out); + break; + + case 28: + ipackFOR28(initOffset, in, out); + break; + + case 29: + ipackFOR29(initOffset, in, out); + break; + + case 30: + ipackFOR30(initOffset, in, out); + break; + + case 31: + ipackFOR31(initOffset, in, out); + break; + + case 32: + ipackFOR32(initOffset, in, out); + break; + + default: + break; + } +} + +void simdunpackFOR(uint32_t initvalue, const __m128i *in, uint32_t *out, + const uint32_t bit) { + __m128i initOffset = _mm_set1_epi32(initvalue); + switch (bit) { + case 0: + iunpackFOR0(initOffset, in, out); + break; + + case 1: + iunpackFOR1(initOffset, in, out); + break; + + case 2: + iunpackFOR2(initOffset, in, out); + break; + + case 3: + iunpackFOR3(initOffset, in, out); + break; + + case 4: + iunpackFOR4(initOffset, in, out); + break; + + case 5: + iunpackFOR5(initOffset, in, out); + break; + + case 6: + iunpackFOR6(initOffset, in, out); + break; + + case 7: + iunpackFOR7(initOffset, in, out); + break; + + case 8: + iunpackFOR8(initOffset, in, out); + break; + + case 9: + iunpackFOR9(initOffset, in, out); + break; + + case 10: + iunpackFOR10(initOffset, in, out); + break; + + case 11: + iunpackFOR11(initOffset, in, out); + break; + + case 12: + iunpackFOR12(initOffset, in, out); + break; + + case 13: + iunpackFOR13(initOffset, in, out); + break; + + case 14: + iunpackFOR14(initOffset, in, out); + break; + + case 15: + iunpackFOR15(initOffset, in, out); + break; + + case 16: + iunpackFOR16(initOffset, in, out); + break; + + case 17: + iunpackFOR17(initOffset, in, out); + break; + + case 18: + iunpackFOR18(initOffset, in, out); + break; + + case 19: + iunpackFOR19(initOffset, in, out); + break; + + case 20: + iunpackFOR20(initOffset, in, out); + break; + + case 21: + iunpackFOR21(initOffset, in, out); + break; + + case 22: + iunpackFOR22(initOffset, in, out); + break; + + case 23: + iunpackFOR23(initOffset, in, out); + break; + + case 24: + iunpackFOR24(initOffset, in, out); + break; + + case 25: + iunpackFOR25(initOffset, in, out); + break; + + case 26: + iunpackFOR26(initOffset, in, out); + break; + + case 27: + iunpackFOR27(initOffset, in, out); + break; + + case 28: + iunpackFOR28(initOffset, in, out); + break; + + case 29: + iunpackFOR29(initOffset, in, out); + break; + + case 30: + iunpackFOR30(initOffset, in, out); + break; + + case 31: + iunpackFOR31(initOffset, in, out); + break; + + case 32: + iunpackFOR32(initOffset, in, out); + break; + + default: + break; + } +} + +uint32_t simdselectFOR(uint32_t initvalue, const __m128i *in, uint32_t bit, + int slot) { + const uint32_t *pin = (const uint32_t *)in; + if (bit == 0) { + return initvalue; + } else if (bit == 32) { + /* silly special case */ + return pin[slot]; + } else { + const int lane = slot % 4; /* we have 4 interleaved lanes */ + const int bitsinlane = (slot / 4) * bit; /* how many bits in lane */ + const int firstwordinlane = bitsinlane / 32; + const int secondwordinlane = (bitsinlane + bit - 1) / 32; + const uint32_t firstpart = + pin[4 * firstwordinlane + lane] >> (bitsinlane % 32); + const uint32_t mask = (1U << bit) - 1; + if (firstwordinlane == secondwordinlane) { + /* easy common case*/ + return initvalue + (firstpart & mask); + } else { + /* harder case where we need to combine two words */ + const uint32_t secondpart = pin[4 * firstwordinlane + 4 + lane]; + const int usablebitsinfirstword = 32 - (bitsinlane % 32); + return initvalue + + ((firstpart | (secondpart << usablebitsinfirstword)) & mask); + } + } +} + +int simdsearchwithlengthFOR(uint32_t initvalue, const __m128i *in, uint32_t bit, + int length, uint32_t key, uint32_t *presult) { + int count = length; + int begin = 0; + uint32_t val; + while (count > 0) { + int step = count / 2; + val = simdselectFOR(initvalue, in, bit, begin + step); + if (val < key) { + begin += step + 1; + count -= step + 1; + } else + count = step; + } + *presult = simdselectFOR(initvalue, in, bit, begin); + return begin; +} + +int simdpackFOR_compressedbytes(int length, const uint32_t bit) { + if (bit == 0) + return 0; /* nothing to do */ + if (bit == 32) { + return length * sizeof(uint32_t); + } + return (((length + 3) / 4) * bit + 31) / 32 * sizeof(__m128i); +} + +__m128i *simdpackFOR_length(uint32_t initvalue, const uint32_t *in, int length, + __m128i *out, const uint32_t bit) { + int k; + int inwordpointer; + __m128i P; + uint32_t firstpass; + __m128i offset; + if (bit == 0) + return out; /* nothing to do */ + if (bit == 32) { + memcpy(out, in, length * sizeof(uint32_t)); + return (__m128i *)((uint32_t *)out + length); + } + offset = _mm_set1_epi32(initvalue); + inwordpointer = 0; + P = _mm_setzero_si128(); + for (k = 0; k < length / 4; ++k) { + __m128i value = + _mm_sub_epi32(_mm_loadu_si128(((const __m128i *)in + k)), offset); + P = _mm_or_si128(P, _mm_slli_epi32(value, inwordpointer)); + firstpass = sizeof(uint32_t) * 8 - inwordpointer; + if (bit < firstpass) { + inwordpointer += bit; + } else { + _mm_storeu_si128(out++, P); + P = _mm_srli_epi32(value, firstpass); + inwordpointer = bit - firstpass; + } + } + if (length % 4 != 0) { + uint32_t buffer[4]; + __m128i value; + for (k = 0; k < (length % 4); ++k) { + buffer[k] = in[length / 4 * 4 + k]; + } + for (k = (length % 4); k < 4; ++k) { + buffer[k] = initvalue; + } + value = _mm_sub_epi32(_mm_loadu_si128((__m128i *)buffer), offset); + P = _mm_or_si128(P, _mm_slli_epi32(value, inwordpointer)); + firstpass = sizeof(uint32_t) * 8 - inwordpointer; + if (bit < firstpass) { + inwordpointer += bit; + } else { + _mm_storeu_si128(out++, P); + P = _mm_srli_epi32(value, firstpass); + inwordpointer = bit - firstpass; + } + } + if (inwordpointer != 0) { + _mm_storeu_si128(out++, P); + } + return out; +} + +const __m128i *simdunpackFOR_length(uint32_t initvalue, const __m128i *in, + int length, uint32_t *out, + const uint32_t bit) { + int k; + __m128i maskbits; + int inwordpointer; + __m128i P; + __m128i offset; + if (length == 0) + return in; + if (bit == 0) { + for (k = 0; k < length; ++k) { + out[k] = initvalue; + } + return in; + } + if (bit == 32) { + memcpy(out, in, length * sizeof(uint32_t)); + return (const __m128i *)((const uint32_t *)in + length); + } + offset = _mm_set1_epi32(initvalue); + maskbits = _mm_set1_epi32((1U << bit) - 1); + inwordpointer = 0; + P = _mm_loadu_si128((__m128i *)in); + ++in; + if (length % 4 == 0) { + + for (k = 0; k + 1 < length / 4; ++k) { + __m128i answer = _mm_srli_epi32(P, inwordpointer); + const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer; + if (bit < firstpass) { + inwordpointer += bit; + } else { + P = _mm_loadu_si128((__m128i *)in); + ++in; + answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer); + inwordpointer = bit - firstpass; + } + answer = _mm_and_si128(maskbits, answer); + _mm_storeu_si128((__m128i *)out, _mm_add_epi32(answer, offset)); + out += 4; + } + if (k < length / 4) { + __m128i answer = _mm_srli_epi32(P, inwordpointer); + const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer; + if (bit < firstpass) { + inwordpointer += bit; + } else if (bit == firstpass) { + inwordpointer = 0; + } else { + P = _mm_loadu_si128((__m128i *)in); + ++in; + answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer); + inwordpointer = bit - firstpass; + } + answer = _mm_and_si128(maskbits, answer); + _mm_storeu_si128((__m128i *)out, _mm_add_epi32(answer, offset)); + out += 4; + } + + } else { + for (k = 0; k < length / 4; ++k) { + __m128i answer = _mm_srli_epi32(P, inwordpointer); + const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer; + if (bit < firstpass) { + inwordpointer += bit; + } else { + P = _mm_loadu_si128((__m128i *)in); + ++in; + answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer); + inwordpointer = bit - firstpass; + } + answer = _mm_and_si128(maskbits, answer); + _mm_storeu_si128((__m128i *)out, _mm_add_epi32(answer, offset)); + out += 4; + } + uint32_t buffer[4]; + __m128i answer = _mm_srli_epi32(P, inwordpointer); + const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer; + if (bit < firstpass) { + inwordpointer += bit; + } else if (bit == firstpass) { + inwordpointer = 0; + } else { + P = _mm_loadu_si128((__m128i *)in); + ++in; + answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer); + inwordpointer = bit - firstpass; + } + answer = _mm_and_si128(maskbits, answer); + _mm_storeu_si128((__m128i *)buffer, _mm_add_epi32(answer, offset)); + for (k = 0; k < (length % 4); ++k) { + *out = buffer[k]; + ++out; + } + } + return in; +} + +void simdfastsetFOR(uint32_t initvalue, __m128i *in, uint32_t bit, + uint32_t value, size_t index) { + simdfastset(in, bit, value - initvalue, index); +} diff --git a/cpp-projects/base/data/simdcomp/simdfor.h b/cpp-projects/base/data/simdcomp/simdfor.h new file mode 100644 index 0000000..74642f5 --- /dev/null +++ b/cpp-projects/base/data/simdcomp/simdfor.h @@ -0,0 +1,72 @@ +/** + * This code is released under a BSD License. + */ +#ifndef INCLUDE_SIMDFOR_H_ +#define INCLUDE_SIMDFOR_H_ + +#include "portability.h" + +/* SSE2 is required */ +#include + +#include "simdbitpacking.h" +#include "simdcomputil.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* reads 128 values from "in", writes "bit" 128-bit vectors to "out" */ +void simdpackFOR(uint32_t initvalue, const uint32_t *in, __m128i *out, + const uint32_t bit); + +/* reads "bit" 128-bit vectors from "in", writes 128 values to "out" */ +void simdunpackFOR(uint32_t initvalue, const __m128i *in, uint32_t *out, + const uint32_t bit); + +/* how many compressed bytes are needed to compressed length integers using a +bit width of bit with the simdpackFOR_length function. */ +int simdpackFOR_compressedbytes(int length, const uint32_t bit); + +/* like simdpackFOR, but supports an undetermined number of inputs. +This is useful if you need to pack less than 128 integers. Note that this +function is much slower. Compressed data is stored in the memory location +between the provided (out) pointer and the returned pointer. */ +__m128i *simdpackFOR_length(uint32_t initvalue, const uint32_t *in, int length, + __m128i *out, const uint32_t bit); + +/* like simdunpackFOR, but supports an undetermined number of inputs. +This is useful if you need to unpack less than 128 integers. Note that this +function is much slower. The read compressed data is between the provided (in) +pointer and the returned pointer. */ +const __m128i *simdunpackFOR_length(uint32_t initvalue, const __m128i *in, + int length, uint32_t *out, + const uint32_t bit); + +/* returns the value stored at the specified "slot". + * */ +uint32_t simdselectFOR(uint32_t initvalue, const __m128i *in, uint32_t bit, + int slot); + +/* given a block of 128 packed values, this function sets the value at index + * "index" to "value" */ +void simdfastsetFOR(uint32_t initvalue, __m128i *in, uint32_t bit, + uint32_t value, size_t index); + +/* searches "bit" 128-bit vectors from "in" (= length<=128 encoded integers) for + * the first encoded uint32 value which is >= |key|, and returns its position. + * It is assumed that the values stored are in sorted order. The encoded key is + * stored in "*presult". The first length decoded integers, ignoring others. If + * no value is larger or equal to the key, length is returned. Length should be + * no larger than 128. + * + * If no value is larger or equal to the key, + * length is returned */ +int simdsearchwithlengthFOR(uint32_t initvalue, const __m128i *in, uint32_t bit, + int length, uint32_t key, uint32_t *presult); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* INCLUDE_SIMDFOR_H_ */ diff --git a/cpp-projects/base/data/simdcomp/simdintegratedbitpacking.c b/cpp-projects/base/data/simdcomp/simdintegratedbitpacking.c new file mode 100644 index 0000000..4d43936 --- /dev/null +++ b/cpp-projects/base/data/simdcomp/simdintegratedbitpacking.c @@ -0,0 +1,25357 @@ +/** + * This code is released under a BSD License. + */ +#include "simdintegratedbitpacking.h" + +#if defined(__SSSE3__) || defined(__AVX__) +#define Delta(curr, prev) _mm_sub_epi32(curr, _mm_alignr_epi8(curr, prev, 12)) +#else +#define Delta(curr, prev) \ + _mm_sub_epi32( \ + curr, _mm_or_si128(_mm_slli_si128(curr, 4), _mm_srli_si128(prev, 12))) +#endif + +#define PrefixSum(ret, curr, prev) \ + do { \ + const __m128i _tmp1 = _mm_add_epi32(_mm_slli_si128(curr, 8), curr); \ + const __m128i _tmp2 = _mm_add_epi32(_mm_slli_si128(_tmp1, 4), _tmp1); \ + ret = _mm_add_epi32(_tmp2, _mm_shuffle_epi32(prev, 0xff)); \ + } while (0) + +__m128i iunpack0(__m128i initOffset, const __m128i *_in, uint32_t *_out) { + __m128i *out = (__m128i *)(_out); + const __m128i constant = _mm_shuffle_epi32(initOffset, 0xff); + uint32_t i; + (void)_in; + + for (i = 0; i < 8; ++i) { + _mm_storeu_si128(out++, constant); + _mm_storeu_si128(out++, constant); + _mm_storeu_si128(out++, constant); + _mm_storeu_si128(out++, constant); + } + + return initOffset; +} + +void ipackwithoutmask0(__m128i initOffset, const uint32_t *_in, __m128i *out) { + (void)initOffset; + (void)_in; + (void)out; +} + +void ipack0(__m128i initOffset, const uint32_t *_in, __m128i *out) { + (void)initOffset; + (void)_in; + (void)out; +} + +void ipackwithoutmask1(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_loadu_si128(in); + __m128i InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); +} + +void ipack1(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(1U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask2(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_loadu_si128(in); + __m128i InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); +} + +void ipack2(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(3U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask3(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_loadu_si128(in); + __m128i InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 3 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 3 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); +} + +void ipack3(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(7U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 3 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 3 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask4(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_loadu_si128(in); + __m128i InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); +} + +void ipack4(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(15U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask5(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_loadu_si128(in); + __m128i InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); +} + +void ipack5(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(31U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 5 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask6(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_loadu_si128(in); + __m128i InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); +} + +void ipack6(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(63U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 6 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask7(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); +} + +void ipack7(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(127U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 7 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask8(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); +} + +void ipack8(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(255U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask9(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); +} + +void ipack9(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(511U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 9 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask10(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); +} + +void ipack10(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(1023U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 10 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask11(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); +} + +void ipack11(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(2047U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 11 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask12(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); +} + +void ipack12(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(4095U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 12 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask13(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); +} + +void ipack13(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(8191U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 13 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask14(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); +} + +void ipack14(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(16383U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 14 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask15(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg; + + __m128i CurrIn = _mm_loadu_si128(in); + __m128i InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); +} + +void ipack15(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(32767U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 15 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask16(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); +} + +void ipack16(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(65535U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask17(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); +} + +void ipack17(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(131071U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 17 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask18(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); +} + +void ipack18(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(262143U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 18 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask19(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); +} + +void ipack19(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(524287U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 19 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask20(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); +} + +void ipack20(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(1048575U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 20 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask21(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 19); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); +} + +void ipack21(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(2097151U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 19); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 21 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask22(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); +} + +void ipack22(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(4194303U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 22 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask23(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 19); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 21); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); +} + +void ipack23(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(8388607U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 19); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 21); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 23 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask24(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); +} + +void ipack24(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(16777215U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 24 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask25(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 19); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 23); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 21); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); +} + +void ipack25(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(33554431U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 19); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 23); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 21); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 25 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask26(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); +} + +void ipack26(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(67108863U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 26 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask27(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 19); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 26); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 21); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 23); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 25); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_storeu_si128(out, OutReg); +} + +void ipack27(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(134217727U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 19); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 26); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 21); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 23); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 25); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 27 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask28(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); +} + +void ipack28(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(268435455U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 28 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask29(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 26); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 23); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 28); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 25); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 19); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 27); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 21); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + _mm_storeu_si128(out, OutReg); +} + +void ipack29(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(536870911U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 26); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 23); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 28); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 25); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 19); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 27); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 21); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 29 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask30(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 28); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 26); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 28); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 26); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_storeu_si128(out, OutReg); +} + +void ipack30(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(1073741823U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 28); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 26); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 28); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 26); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 30 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask31(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 30); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 29); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 28); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 27); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 26); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 25); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 23); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 21); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 19); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = Delta(CurrIn, initOffset); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + _mm_storeu_si128(out, OutReg); +} + +void ipack31(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, CurrIn, InReg; + + const __m128i mask = _mm_set1_epi32(2147483647U); + ; + + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + OutReg = InReg; + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 31)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 30); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 30)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 29); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 29)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 28); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 28)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 27); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 27)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 26); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 26)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 25); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 25)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 24); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 24)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 23); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 23)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 22); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 22)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 21); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 21)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 20); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 20)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 19); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 19)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 18); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 18)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 17); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 17)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 16); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 16)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 15); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 15)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 14); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 14)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 13); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 13)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 12); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 12)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 11); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 11)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 10); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 10)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 9); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 9)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 8); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 8)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 7); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 7)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 6); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 6)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 5); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 5)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 4); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 4)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 3); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 3)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 2); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 2)); + _mm_storeu_si128(out, OutReg); + + ++out; + OutReg = _mm_srli_epi32(InReg, 31 - 1); + ++in; + CurrIn = _mm_loadu_si128(in); + InReg = _mm_and_si128(Delta(CurrIn, initOffset), mask); + initOffset = CurrIn; + + OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, 1)); + _mm_storeu_si128(out, OutReg); +} + +void ipackwithoutmask32(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, InReg; + (void)initOffset; + + InReg = _mm_loadu_si128(in); + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); +} + +void ipack32(__m128i initOffset, const uint32_t *_in, __m128i *out) { + const __m128i *in = (const __m128i *)(_in); + __m128i OutReg, InReg; + (void)initOffset; + + InReg = _mm_loadu_si128(in); + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); + + ++out; + ++in; + InReg = _mm_loadu_si128(in); + + OutReg = InReg; + _mm_storeu_si128(out, OutReg); +} + +__m128i iunpack1(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 1) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack2(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 2) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack3(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 3) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack4(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 4) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack5(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 5) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack6(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 6) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack7(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 7) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack8(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 8) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack9(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 9) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 7), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack10(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 10) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack11(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 11) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 7), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 9), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack12(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 12) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack13(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 13) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 7), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 9), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 11), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack14(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 14) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack15(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 15) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 13), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 11), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 9), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 7), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack16(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 16) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack17(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 17) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 7), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 9), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 11), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 13), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 15), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack18(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 18) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack19(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 19) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 11), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 17), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 9), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 15), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 7), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 13), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack20(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 20) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack21(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 21) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 9), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 19), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 7), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 17), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 15), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 13), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 11), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack22(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 22) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack23(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 23) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 19), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 15), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 11), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 7), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 21), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 17), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 22), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 13), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 9), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack24(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 24) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack25(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 25) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 11), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 22), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 15), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 19), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 23), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 9), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 13), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 24), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 17), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 21), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 7), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack26(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 26) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack27(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 27) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 22), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 17), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 7), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 24), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 19), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 9), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 26), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 21), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 11), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 23), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 13), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 25), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 15), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack28(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 28) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack29(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 29) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 26), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 23), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 17), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 11), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 28), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 25), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 22), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 19), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 13), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 7), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 27), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 24), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 21), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 15), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 9), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack30(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 30) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack31(__m128i initOffset, const __m128i *in, uint32_t *_out) { + + __m128i *out = (__m128i *)(_out); + __m128i InReg = _mm_loadu_si128(in); + __m128i OutReg; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 31) - 1); + + tmp = InReg; + OutReg = _mm_and_si128(tmp, mask); + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 31); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 30), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 30); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 29), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 29); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 28), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 28); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 27), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 27); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 26), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 26); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 25), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 25); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 24), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 24); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 23), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 23); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 22), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 22); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 21), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 21); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 20), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 20); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 19), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 19); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 18), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 18); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 17), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 17); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 16), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 16); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 15), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 15); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 14), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 14); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 13), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 13); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 12), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 12); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 11), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 11); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 10), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 10); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 9), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 9); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 8), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 8); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 7), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 7); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 6), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 6); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 5), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 5); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 4), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 4); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 3), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 3); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 2), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 2); + OutReg = tmp; + ++in; + InReg = _mm_loadu_si128(in); + OutReg = + _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 1), mask)); + + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + tmp = _mm_srli_epi32(InReg, 1); + OutReg = tmp; + PrefixSum(OutReg, OutReg, initOffset); + initOffset = OutReg; + _mm_storeu_si128(out++, OutReg); + + return initOffset; +} + +__m128i iunpack32(__m128i initOffset, const __m128i *in, uint32_t *_out) { + __m128i *mout = (__m128i *)(_out); + __m128i invec; + size_t k; + (void)initOffset; + for (k = 0; k < 128 / 4; ++k) { + invec = _mm_loadu_si128(in++); + _mm_storeu_si128(mout++, invec); + } + return invec; +} + +void simdunpackd1(uint32_t initvalue, const __m128i *in, uint32_t *out, + const uint32_t bit) { + __m128i initOffset = _mm_set1_epi32(initvalue); + switch (bit) { + case 0: + iunpack0(initOffset, in, out); + break; + + case 1: + iunpack1(initOffset, in, out); + break; + + case 2: + iunpack2(initOffset, in, out); + break; + + case 3: + iunpack3(initOffset, in, out); + break; + + case 4: + iunpack4(initOffset, in, out); + break; + + case 5: + iunpack5(initOffset, in, out); + break; + + case 6: + iunpack6(initOffset, in, out); + break; + + case 7: + iunpack7(initOffset, in, out); + break; + + case 8: + iunpack8(initOffset, in, out); + break; + + case 9: + iunpack9(initOffset, in, out); + break; + + case 10: + iunpack10(initOffset, in, out); + break; + + case 11: + iunpack11(initOffset, in, out); + break; + + case 12: + iunpack12(initOffset, in, out); + break; + + case 13: + iunpack13(initOffset, in, out); + break; + + case 14: + iunpack14(initOffset, in, out); + break; + + case 15: + iunpack15(initOffset, in, out); + break; + + case 16: + iunpack16(initOffset, in, out); + break; + + case 17: + iunpack17(initOffset, in, out); + break; + + case 18: + iunpack18(initOffset, in, out); + break; + + case 19: + iunpack19(initOffset, in, out); + break; + + case 20: + iunpack20(initOffset, in, out); + break; + + case 21: + iunpack21(initOffset, in, out); + break; + + case 22: + iunpack22(initOffset, in, out); + break; + + case 23: + iunpack23(initOffset, in, out); + break; + + case 24: + iunpack24(initOffset, in, out); + break; + + case 25: + iunpack25(initOffset, in, out); + break; + + case 26: + iunpack26(initOffset, in, out); + break; + + case 27: + iunpack27(initOffset, in, out); + break; + + case 28: + iunpack28(initOffset, in, out); + break; + + case 29: + iunpack29(initOffset, in, out); + break; + + case 30: + iunpack30(initOffset, in, out); + break; + + case 31: + iunpack31(initOffset, in, out); + break; + + case 32: + iunpack32(initOffset, in, out); + break; + + default: + break; + } +} + +/*assumes that integers fit in the prescribed number of bits*/ + +void simdpackwithoutmaskd1(uint32_t initvalue, const uint32_t *in, __m128i *out, + const uint32_t bit) { + __m128i initOffset = _mm_set1_epi32(initvalue); + switch (bit) { + case 0: + break; + + case 1: + ipackwithoutmask1(initOffset, in, out); + break; + + case 2: + ipackwithoutmask2(initOffset, in, out); + break; + + case 3: + ipackwithoutmask3(initOffset, in, out); + break; + + case 4: + ipackwithoutmask4(initOffset, in, out); + break; + + case 5: + ipackwithoutmask5(initOffset, in, out); + break; + + case 6: + ipackwithoutmask6(initOffset, in, out); + break; + + case 7: + ipackwithoutmask7(initOffset, in, out); + break; + + case 8: + ipackwithoutmask8(initOffset, in, out); + break; + + case 9: + ipackwithoutmask9(initOffset, in, out); + break; + + case 10: + ipackwithoutmask10(initOffset, in, out); + break; + + case 11: + ipackwithoutmask11(initOffset, in, out); + break; + + case 12: + ipackwithoutmask12(initOffset, in, out); + break; + + case 13: + ipackwithoutmask13(initOffset, in, out); + break; + + case 14: + ipackwithoutmask14(initOffset, in, out); + break; + + case 15: + ipackwithoutmask15(initOffset, in, out); + break; + + case 16: + ipackwithoutmask16(initOffset, in, out); + break; + + case 17: + ipackwithoutmask17(initOffset, in, out); + break; + + case 18: + ipackwithoutmask18(initOffset, in, out); + break; + + case 19: + ipackwithoutmask19(initOffset, in, out); + break; + + case 20: + ipackwithoutmask20(initOffset, in, out); + break; + + case 21: + ipackwithoutmask21(initOffset, in, out); + break; + + case 22: + ipackwithoutmask22(initOffset, in, out); + break; + + case 23: + ipackwithoutmask23(initOffset, in, out); + break; + + case 24: + ipackwithoutmask24(initOffset, in, out); + break; + + case 25: + ipackwithoutmask25(initOffset, in, out); + break; + + case 26: + ipackwithoutmask26(initOffset, in, out); + break; + + case 27: + ipackwithoutmask27(initOffset, in, out); + break; + + case 28: + ipackwithoutmask28(initOffset, in, out); + break; + + case 29: + ipackwithoutmask29(initOffset, in, out); + break; + + case 30: + ipackwithoutmask30(initOffset, in, out); + break; + + case 31: + ipackwithoutmask31(initOffset, in, out); + break; + + case 32: + ipackwithoutmask32(initOffset, in, out); + break; + + default: + break; + } +} + +void simdpackd1(uint32_t initvalue, const uint32_t *in, __m128i *out, + const uint32_t bit) { + __m128i initOffset = _mm_set1_epi32(initvalue); + switch (bit) { + case 0: + break; + ; + + case 1: + ipack1(initOffset, in, out); + break; + + case 2: + ipack2(initOffset, in, out); + break; + + case 3: + ipack3(initOffset, in, out); + break; + + case 4: + ipack4(initOffset, in, out); + break; + + case 5: + ipack5(initOffset, in, out); + break; + + case 6: + ipack6(initOffset, in, out); + break; + + case 7: + ipack7(initOffset, in, out); + break; + + case 8: + ipack8(initOffset, in, out); + break; + + case 9: + ipack9(initOffset, in, out); + break; + + case 10: + ipack10(initOffset, in, out); + break; + + case 11: + ipack11(initOffset, in, out); + break; + + case 12: + ipack12(initOffset, in, out); + break; + + case 13: + ipack13(initOffset, in, out); + break; + + case 14: + ipack14(initOffset, in, out); + break; + + case 15: + ipack15(initOffset, in, out); + break; + + case 16: + ipack16(initOffset, in, out); + break; + + case 17: + ipack17(initOffset, in, out); + break; + + case 18: + ipack18(initOffset, in, out); + break; + + case 19: + ipack19(initOffset, in, out); + break; + + case 20: + ipack20(initOffset, in, out); + break; + + case 21: + ipack21(initOffset, in, out); + break; + + case 22: + ipack22(initOffset, in, out); + break; + + case 23: + ipack23(initOffset, in, out); + break; + + case 24: + ipack24(initOffset, in, out); + break; + + case 25: + ipack25(initOffset, in, out); + break; + + case 26: + ipack26(initOffset, in, out); + break; + + case 27: + ipack27(initOffset, in, out); + break; + + case 28: + ipack28(initOffset, in, out); + break; + + case 29: + ipack29(initOffset, in, out); + break; + + case 30: + ipack30(initOffset, in, out); + break; + + case 31: + ipack31(initOffset, in, out); + break; + + case 32: + ipack32(initOffset, in, out); + break; + + default: + break; + } +} + +void simdfastsetd1fromprevious(__m128i *in, uint32_t bit, + uint32_t previousvalue, uint32_t value, + size_t index) { + simdfastset(in, bit, value - previousvalue, index); +} + +#ifdef __SSE4_1__ + +void simdfastsetd1(uint32_t initvalue, __m128i *in, uint32_t bit, + uint32_t value, size_t index) { + if (index == 0) { + simdfastset(in, bit, value - initvalue, index); + } else { + uint32_t prev = simdselectd1(initvalue, in, bit, index - 1); + simdfastset(in, bit, value - prev, index); + } +} + +#endif diff --git a/cpp-projects/base/data/simdcomp/simdintegratedbitpacking.h b/cpp-projects/base/data/simdcomp/simdintegratedbitpacking.h new file mode 100644 index 0000000..d8f3f2f --- /dev/null +++ b/cpp-projects/base/data/simdcomp/simdintegratedbitpacking.h @@ -0,0 +1,96 @@ +/** + * This code is released under a BSD License. + */ + +#ifndef SIMD_INTEGRATED_BITPACKING_H +#define SIMD_INTEGRATED_BITPACKING_H + +#include "portability.h" + +/* SSE2 is required */ +#include + +#include "simdbitpacking.h" +#include "simdcomputil.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* reads 128 values from "in", writes "bit" 128-bit vectors to "out" + integer values should be in sorted order (for best results). + The differences are masked so that only the least significant "bit" bits are + used. */ +void simdpackd1(uint32_t initvalue, const uint32_t *in, __m128i *out, + const uint32_t bit); + +/* reads 128 values from "in", writes "bit" 128-bit vectors to "out" + integer values should be in sorted order (for best results). + The difference values are assumed to be less than 1<= |key|, and returns its position. It is + *assumed that the values stored are in sorted order. The encoded key is stored + *in "*presult". If no value is larger or equal to the key, 128 is returned. The + *pointer initOffset is a pointer to the last four value decoded (when starting + *out, this can be a zero vector or initialized with _mm_set1_epi32(init)), and + *the vector gets updated. + **/ +int simdsearchd1(__m128i *initOffset, const __m128i *in, uint32_t bit, + uint32_t key, uint32_t *presult); + +/* searches "bit" 128-bit vectors from "in" (= length<=128 encoded integers) for + * the first encoded uint32 value which is >= |key|, and returns its position. + * It is assumed that the values stored are in sorted order. The encoded key is + * stored in "*presult". The first length decoded integers, ignoring others. If + * no value is larger or equal to the key, length is returned. Length should be + * no larger than 128. + * + * If no value is larger or equal to the key, + * length is returned */ +int simdsearchwithlengthd1(uint32_t initvalue, const __m128i *in, uint32_t bit, + int length, uint32_t key, uint32_t *presult); + +/* returns the value stored at the specified "slot". + * */ +uint32_t simdselectd1(uint32_t initvalue, const __m128i *in, uint32_t bit, + int slot); + +/* given a block of 128 packed values, this function sets the value at index + * "index" to "value", you must somehow know the previous value. Because of + * differential coding, all following values are incremented by the offset + * between this new value and the old value... This functions is useful if you + * want to modify the last value. + */ +void simdfastsetd1fromprevious(__m128i *in, uint32_t bit, + uint32_t previousvalue, uint32_t value, + size_t index); + +/* given a block of 128 packed values, this function sets the value at index + * "index" to "value", This function computes the previous value if needed. + * Because of differential coding, all following values are incremented by the + * offset between this new value and the old value... This functions is useful + * if you want to modify the last value. + */ +void simdfastsetd1(uint32_t initvalue, __m128i *in, uint32_t bit, + uint32_t value, size_t index); + +/*Simply scan the data + * The pointer initOffset is a pointer to the last four value decoded + * (when starting out, this can be a zero vector or initialized with + * _mm_set1_epi32(init);), and the vector gets updated. + * */ + +void simdscand1(__m128i *initOffset, const __m128i *in, uint32_t bit); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/cpp-projects/base/data/simdcomp/simdpackedsearch.c b/cpp-projects/base/data/simdcomp/simdpackedsearch.c new file mode 100644 index 0000000..c4aaf53 --- /dev/null +++ b/cpp-projects/base/data/simdcomp/simdpackedsearch.c @@ -0,0 +1,16691 @@ +/** + * This code is released under a BSD License. + */ +#ifdef __SSE4_1__ + +#include "simdintegratedbitpacking.h" +#include + +SIMDCOMP_ALIGNED(16) +static int8_t shuffle_mask_bytes[256] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4, 5, 6, 7, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, + 15, 4, 5, 6, 7, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 12, 13, 14, 15, + 8, 9, 10, 11, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15, 8, 9, 10, + 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 12, 13, + 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 0, + 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, 15, +}; +static const __m128i *shuffle_mask = (__m128i *)shuffle_mask_bytes; + +/* should emulate std:lower_bound*/ +static int lower_bound(uint32_t *A, uint32_t key, int imin, int imax) { + int imid; + imax--; + while (imin + 1 < imax) { + imid = imin + ((imax - imin) / 2); + + if (A[imid] >= key) { + imax = imid; + } else if (A[imid] < key) { + imin = imid; + } + } + if (A[imin] >= key) + return imin; + return imax; +} + +#define PrefixSum(ret, curr, prev) \ + do { \ + const __m128i _tmp1 = _mm_add_epi32(_mm_slli_si128(curr, 8), curr); \ + const __m128i _tmp2 = _mm_add_epi32(_mm_slli_si128(_tmp1, 4), _tmp1); \ + ret = _mm_add_epi32(_tmp2, _mm_shuffle_epi32(prev, 0xff)); \ + } while (0) + +/* perform a lower-bound search for |key| in |out|; the resulting uint32 + * is stored in |*presult|.*/ +#define CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult) \ + do { \ + __m128i tmpout = _mm_sub_epi32(out, conversion); \ + uint32_t mmask = \ + _mm_movemask_ps(_mm_castsi128_ps(_mm_cmplt_epi32(tmpout, key4))); \ + if (mmask != 15) { \ + const __m128i p = _mm_shuffle_epi8(out, shuffle_mask[mmask ^ 15]); \ + int offset; \ + int remaining = length - i; \ + SIMDCOMP_CTZ(offset, mmask ^ 15); \ + *presult = _mm_cvtsi128_si32(p); \ + if (offset < remaining) \ + return (i + offset); \ + } \ + i += 4; \ + if (i >= length) { /* reached end of array? */ \ + *presult = key + 1; \ + return (length); \ + } \ + } while (0) + +static int iunpacksearchwithlength0(__m128i initOffset, const __m128i *_in, + int length, uint32_t key, + uint32_t *presult) { + if (length > 0) { + uint32_t repeatedvalue = (uint32_t)_mm_extract_epi32(initOffset, 3); + if (repeatedvalue >= key) { + *presult = repeatedvalue; + return 0; + } + } + (void)_in; + *presult = key + 1; + return (length); +} + +static int iunpacksearchwithlength1(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 1) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength2(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 2) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength3(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 3) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength4(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 4) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength5(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 5) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength6(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 6) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength7(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 7) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength8(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 8) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength9(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 9) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 7), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength10(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 10) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength11(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 11) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 7), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 9), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength12(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 12) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength13(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 13) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 7), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 9), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 11), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength14(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 14) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength15(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 15) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 13), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 11), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 9), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 7), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength16(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 16) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength17(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 17) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 7), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 9), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 11), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 13), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 15), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength18(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 18) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength19(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 19) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 11), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 17), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 9), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 15), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 7), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 13), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength20(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 20) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength21(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 21) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 9), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 19), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 7), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 17), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 15), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 13), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 11), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength22(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 22) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength23(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 23) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 19), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 15), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 11), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 7), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 21), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 17), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 22), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 13), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 9), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength24(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 24) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength25(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 25) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 11), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 22), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 15), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 19), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 23), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 9), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 13), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 24), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 17), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 21), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 7), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength26(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 26) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength27(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 27) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 22), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 17), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 7), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 24), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 19), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 9), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 26), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 21), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 11), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 23), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 13), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 25), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 15), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength28(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 28) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength29(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 29) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 26), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 23), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 17), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 11), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 28), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 25), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 22), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 19), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 13), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 7), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 27), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 24), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 21), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 15), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 9), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength30(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 30) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength31(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 31) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 30), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 29), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 28), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 27), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 26), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 25), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 24), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 23), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 22), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 21), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 20), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 19), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 18), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 17), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 16), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 15), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 14), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 13), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 12), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 11), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 10), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 9), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 8), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 7), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 6), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 5), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 4), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 3), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 2), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 1), mask)); + + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = tmp; + PrefixSum(out, out, initOffset); + initOffset = out; + CHECK_AND_INCREMENT_WITH_LENGTH(i, out, length, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearchwithlength32(__m128i initOffset, const __m128i *in, + int length, uint32_t key, + uint32_t *presult) { + uint32_t *in32 = (uint32_t *)in; + int answer = lower_bound(in32, key, 0, length); + if (in32[answer] < key) { + *presult = key + 1; + return (length); + } + (void)initOffset; + *presult = in32[answer]; + return answer; +} + +int simdsearchwithlengthd1(uint32_t initvalue, const __m128i *in, uint32_t bit, + int length, uint32_t key, uint32_t *presult) { + __m128i initOffset = _mm_set1_epi32(initvalue); + switch (bit) { + case 0: + return iunpacksearchwithlength0(initOffset, in, length, key, presult); + + case 1: + return iunpacksearchwithlength1(initOffset, in, length, key, presult); + + case 2: + return iunpacksearchwithlength2(initOffset, in, length, key, presult); + + case 3: + return iunpacksearchwithlength3(initOffset, in, length, key, presult); + + case 4: + return iunpacksearchwithlength4(initOffset, in, length, key, presult); + + case 5: + return iunpacksearchwithlength5(initOffset, in, length, key, presult); + + case 6: + return iunpacksearchwithlength6(initOffset, in, length, key, presult); + + case 7: + return iunpacksearchwithlength7(initOffset, in, length, key, presult); + + case 8: + return iunpacksearchwithlength8(initOffset, in, length, key, presult); + + case 9: + return iunpacksearchwithlength9(initOffset, in, length, key, presult); + + case 10: + return iunpacksearchwithlength10(initOffset, in, length, key, presult); + + case 11: + return iunpacksearchwithlength11(initOffset, in, length, key, presult); + + case 12: + return iunpacksearchwithlength12(initOffset, in, length, key, presult); + + case 13: + return iunpacksearchwithlength13(initOffset, in, length, key, presult); + + case 14: + return iunpacksearchwithlength14(initOffset, in, length, key, presult); + + case 15: + return iunpacksearchwithlength15(initOffset, in, length, key, presult); + + case 16: + return iunpacksearchwithlength16(initOffset, in, length, key, presult); + + case 17: + return iunpacksearchwithlength17(initOffset, in, length, key, presult); + + case 18: + return iunpacksearchwithlength18(initOffset, in, length, key, presult); + + case 19: + return iunpacksearchwithlength19(initOffset, in, length, key, presult); + + case 20: + return iunpacksearchwithlength20(initOffset, in, length, key, presult); + + case 21: + return iunpacksearchwithlength21(initOffset, in, length, key, presult); + + case 22: + return iunpacksearchwithlength22(initOffset, in, length, key, presult); + + case 23: + return iunpacksearchwithlength23(initOffset, in, length, key, presult); + + case 24: + return iunpacksearchwithlength24(initOffset, in, length, key, presult); + + case 25: + return iunpacksearchwithlength25(initOffset, in, length, key, presult); + + case 26: + return iunpacksearchwithlength26(initOffset, in, length, key, presult); + + case 27: + return iunpacksearchwithlength27(initOffset, in, length, key, presult); + + case 28: + return iunpacksearchwithlength28(initOffset, in, length, key, presult); + + case 29: + return iunpacksearchwithlength29(initOffset, in, length, key, presult); + + case 30: + return iunpacksearchwithlength30(initOffset, in, length, key, presult); + + case 31: + return iunpacksearchwithlength31(initOffset, in, length, key, presult); + + case 32: + return iunpacksearchwithlength32(initOffset, in, length, key, presult); + + default: + break; + } + return (-1); +} + +/* perform a lower-bound search for |key| in |out|; the resulting uint32 + * is stored in |*presult|.*/ +#define CHECK_AND_INCREMENT(i, out, key, presult) \ + do { \ + __m128i tmpout = _mm_sub_epi32(out, conversion); \ + uint32_t mmask = \ + _mm_movemask_ps(_mm_castsi128_ps(_mm_cmplt_epi32(tmpout, key4))); \ + if (mmask != 15) { \ + __m128i p = _mm_shuffle_epi8(out, shuffle_mask[mmask ^ 15]); \ + int offset; \ + SIMDCOMP_CTZ(offset, mmask ^ 15); \ + *presult = _mm_cvtsi128_si32(p); \ + return (i + offset); \ + } \ + i += 4; \ + } while (0) + +static int iunpacksearch0(__m128i *initOffset, const __m128i *_in, uint32_t key, + uint32_t *presult) { + uint32_t repeatedvalue = (uint32_t)_mm_extract_epi32(*initOffset, 3); + if (repeatedvalue >= key) { + *presult = repeatedvalue; + return 0; + } + *presult = key + 1; + (void)_in; + return (128); +} + +static int iunpacksearch1(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 1) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch2(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 2) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch3(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 3) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch4(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 4) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch5(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 5) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch6(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 6) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch7(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 7) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch8(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 8) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch9(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 9) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch10(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 10) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch11(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 11) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch12(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 12) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch13(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 13) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch14(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 14) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch15(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 15) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch16(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 16) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch17(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 17) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch18(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 18) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch19(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 19) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch20(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 20) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch21(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 21) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch22(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 22) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch23(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 23) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch24(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 24) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch25(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 25) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 23), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch26(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 26) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch27(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 27) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 23), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 25), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch28(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 28) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch29(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 29) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 23), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 28), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 25), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 27), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch30(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 30) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch31(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 31) - 1); + __m128i conversion = _mm_set1_epi32(2147483648U); + __m128i key4 = _mm_set1_epi32(key - 2147483648U); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 30), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 29), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 28), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 27), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 25), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 23), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 5); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 3); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 2); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + tmp = _mm_srli_epi32(InReg, 1); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, key, presult); + + *presult = key + 1; + return (128); +} + +static int iunpacksearch32(__m128i *initOffset, const __m128i *in, uint32_t key, + uint32_t *presult) { + uint32_t *in32 = (uint32_t *)in; + int answer = lower_bound(in32, key, 0, 128); + if (in32[answer] < key) { + *presult = key + 1; + return (128); + } + *presult = in32[answer]; + *initOffset = _mm_load_si128(in + 31); + return answer; +} + +int simdsearchd1(__m128i *initOffset, const __m128i *in, uint32_t bit, + uint32_t key, uint32_t *presult) { + switch (bit) { + case 0: + return iunpacksearch0(initOffset, in, key, presult); + + case 1: + return iunpacksearch1(initOffset, in, key, presult); + + case 2: + return iunpacksearch2(initOffset, in, key, presult); + + case 3: + return iunpacksearch3(initOffset, in, key, presult); + + case 4: + return iunpacksearch4(initOffset, in, key, presult); + + case 5: + return iunpacksearch5(initOffset, in, key, presult); + + case 6: + return iunpacksearch6(initOffset, in, key, presult); + + case 7: + return iunpacksearch7(initOffset, in, key, presult); + + case 8: + return iunpacksearch8(initOffset, in, key, presult); + + case 9: + return iunpacksearch9(initOffset, in, key, presult); + + case 10: + return iunpacksearch10(initOffset, in, key, presult); + + case 11: + return iunpacksearch11(initOffset, in, key, presult); + + case 12: + return iunpacksearch12(initOffset, in, key, presult); + + case 13: + return iunpacksearch13(initOffset, in, key, presult); + + case 14: + return iunpacksearch14(initOffset, in, key, presult); + + case 15: + return iunpacksearch15(initOffset, in, key, presult); + + case 16: + return iunpacksearch16(initOffset, in, key, presult); + + case 17: + return iunpacksearch17(initOffset, in, key, presult); + + case 18: + return iunpacksearch18(initOffset, in, key, presult); + + case 19: + return iunpacksearch19(initOffset, in, key, presult); + + case 20: + return iunpacksearch20(initOffset, in, key, presult); + + case 21: + return iunpacksearch21(initOffset, in, key, presult); + + case 22: + return iunpacksearch22(initOffset, in, key, presult); + + case 23: + return iunpacksearch23(initOffset, in, key, presult); + + case 24: + return iunpacksearch24(initOffset, in, key, presult); + + case 25: + return iunpacksearch25(initOffset, in, key, presult); + + case 26: + return iunpacksearch26(initOffset, in, key, presult); + + case 27: + return iunpacksearch27(initOffset, in, key, presult); + + case 28: + return iunpacksearch28(initOffset, in, key, presult); + + case 29: + return iunpacksearch29(initOffset, in, key, presult); + + case 30: + return iunpacksearch30(initOffset, in, key, presult); + + case 31: + return iunpacksearch31(initOffset, in, key, presult); + + case 32: + return iunpacksearch32(initOffset, in, key, presult); + + default: + break; + } + return (-1); +} + +#endif diff --git a/cpp-projects/base/data/simdcomp/simdpackedselect.c b/cpp-projects/base/data/simdcomp/simdpackedselect.c new file mode 100644 index 0000000..e81f66a --- /dev/null +++ b/cpp-projects/base/data/simdcomp/simdpackedselect.c @@ -0,0 +1,15357 @@ +/** + * This code is released under a BSD License. + */ +#ifdef __SSE4_1__ +#include "simdintegratedbitpacking.h" +#include + +SIMDCOMP_ALIGNED(16) +int8_t shuffle_mask_bytes[256] = { + 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static const __m128i *shuffle_mask = (__m128i *)shuffle_mask_bytes; + +uint32_t branchlessextract(__m128i out, int i) { + return _mm_cvtsi128_si32(_mm_shuffle_epi8(out, shuffle_mask[i])); +} + +#define PrefixSum(ret, curr, prev) \ + do { \ + const __m128i _tmp1 = _mm_add_epi32(_mm_slli_si128(curr, 8), curr); \ + const __m128i _tmp2 = _mm_add_epi32(_mm_slli_si128(_tmp1, 4), _tmp1); \ + ret = _mm_add_epi32(_tmp2, _mm_shuffle_epi32(prev, 0xff)); \ + } while (0) + +#define CHECK_AND_INCREMENT(i, out, slot) \ + i += 4; \ + if (i > slot) { \ + return branchlessextract(out, slot - (i - 4)); \ + } + +static uint32_t iunpackselect1(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 1) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect2(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 2) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect3(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 3) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect4(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 4) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect5(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 5) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect6(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 6) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect7(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 7) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect8(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 8) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect9(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 9) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect10(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 10) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect11(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 11) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect12(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 12) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect13(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 13) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect14(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 14) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect15(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 15) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect16(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 16) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect17(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 17) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect18(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 18) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect19(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 19) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect20(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 20) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect21(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 21) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect22(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 22) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect23(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 23) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect24(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 24) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect25(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 25) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 23), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect26(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 26) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect27(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 27) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 23), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 25), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect28(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 28) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect29(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 29) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 23), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 28), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 25), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 27), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect30(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 30) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect31(__m128i *initOffset, const __m128i *in, + int slot) { + int i = 0; + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 31) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 30), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 29), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 28), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 27), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 25), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 23), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 5); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 3); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 2); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + tmp = _mm_srli_epi32(InReg, 1); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; + CHECK_AND_INCREMENT(i, out, slot); + + return (0); +} + +static uint32_t iunpackselect32(__m128i *initOffset, const __m128i *in, + int slot) { + uint32_t *begin = (uint32_t *)in; + *initOffset = _mm_load_si128(in + 31); + return begin[slot]; +} + +uint32_t simdselectd1(uint32_t init, const __m128i *in, uint32_t bit, + int slot) { + __m128i vecinitOffset = _mm_set1_epi32(init); + __m128i *initOffset = &vecinitOffset; + slot &= 127; /* to avoid problems */ + + switch (bit) { + case 0: + return _mm_extract_epi32(*initOffset, 3); + break; + + case 1: + return iunpackselect1(initOffset, in, slot); + break; + + case 2: + return iunpackselect2(initOffset, in, slot); + break; + + case 3: + return iunpackselect3(initOffset, in, slot); + break; + + case 4: + return iunpackselect4(initOffset, in, slot); + break; + + case 5: + return iunpackselect5(initOffset, in, slot); + break; + + case 6: + return iunpackselect6(initOffset, in, slot); + break; + + case 7: + return iunpackselect7(initOffset, in, slot); + break; + + case 8: + return iunpackselect8(initOffset, in, slot); + break; + + case 9: + return iunpackselect9(initOffset, in, slot); + break; + + case 10: + return iunpackselect10(initOffset, in, slot); + break; + + case 11: + return iunpackselect11(initOffset, in, slot); + break; + + case 12: + return iunpackselect12(initOffset, in, slot); + break; + + case 13: + return iunpackselect13(initOffset, in, slot); + break; + + case 14: + return iunpackselect14(initOffset, in, slot); + break; + + case 15: + return iunpackselect15(initOffset, in, slot); + break; + + case 16: + return iunpackselect16(initOffset, in, slot); + break; + + case 17: + return iunpackselect17(initOffset, in, slot); + break; + + case 18: + return iunpackselect18(initOffset, in, slot); + break; + + case 19: + return iunpackselect19(initOffset, in, slot); + break; + + case 20: + return iunpackselect20(initOffset, in, slot); + break; + + case 21: + return iunpackselect21(initOffset, in, slot); + break; + + case 22: + return iunpackselect22(initOffset, in, slot); + break; + + case 23: + return iunpackselect23(initOffset, in, slot); + break; + + case 24: + return iunpackselect24(initOffset, in, slot); + break; + + case 25: + return iunpackselect25(initOffset, in, slot); + break; + + case 26: + return iunpackselect26(initOffset, in, slot); + break; + + case 27: + return iunpackselect27(initOffset, in, slot); + break; + + case 28: + return iunpackselect28(initOffset, in, slot); + break; + + case 29: + return iunpackselect29(initOffset, in, slot); + break; + + case 30: + return iunpackselect30(initOffset, in, slot); + break; + + case 31: + return iunpackselect31(initOffset, in, slot); + break; + + case 32: + return iunpackselect32(initOffset, in, slot); + break; + + default: + break; + } + + return (-1); +} + +static void iunpackscan1(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 1) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan2(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 2) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan3(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 3) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 3 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan4(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 4) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan5(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 5) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 5 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan6(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 6) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan7(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 7) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 7 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan8(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 8) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan9(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 9) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 9 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan10(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 10) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan11(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 11) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 11 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan12(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 12) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 12 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan13(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 13) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 13 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan14(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 14) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 14 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan15(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 15) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 15 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan16(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 16) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan17(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 17) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 17 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan18(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 18) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 18 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan19(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 19) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 19 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan20(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 20) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan21(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 21) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 21 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan22(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 22) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan23(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 23) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 23 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan24(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 24) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan25(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 25) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 23), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 25 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan26(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 26) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan27(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 27) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 23), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 25), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 27 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan28(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 28) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan29(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 29) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 23), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 28), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 25), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 27), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 29 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan30(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 30) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan31(__m128i *initOffset, const __m128i *in) { + + __m128i InReg = _mm_loadu_si128(in); + __m128i out; + __m128i tmp; + __m128i mask = _mm_set1_epi32((1U << 31) - 1); + + tmp = InReg; + out = _mm_and_si128(tmp, mask); + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 31); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 30), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 30); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 29), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 29); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 28), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 28); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 27), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 27); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 26), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 26); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 25), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 25); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 24), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 24); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 23), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 23); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 22), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 22); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 21), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 21); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 20), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 20); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 19), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 19); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 18), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 18); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 17), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 17); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 16), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 16); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 15), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 15); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 14), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 14); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 13), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 13); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 12), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 12); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 11), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 11); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 10), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 10); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 9), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 9); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 8), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 8); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 7), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 7); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 6), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 6); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 5), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 5); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 4), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 4); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 3), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 3); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 2), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 2); + out = tmp; + ++in; + InReg = _mm_loadu_si128(in); + out = _mm_or_si128(out, _mm_and_si128(_mm_slli_epi32(InReg, 31 - 1), mask)); + + PrefixSum(out, out, *initOffset); + *initOffset = out; + + tmp = _mm_srli_epi32(InReg, 1); + out = tmp; + PrefixSum(out, out, *initOffset); + *initOffset = out; +} + +static void iunpackscan32(__m128i *initOffset, const __m128i *in) { + *initOffset = _mm_load_si128(in + 31); +} + +void simdscand1(__m128i *initOffset, const __m128i *in, uint32_t bit) { + switch (bit) { + case 0: + return; + break; + + case 1: + iunpackscan1(initOffset, in); + break; + + case 2: + iunpackscan2(initOffset, in); + break; + + case 3: + iunpackscan3(initOffset, in); + break; + + case 4: + iunpackscan4(initOffset, in); + break; + + case 5: + iunpackscan5(initOffset, in); + break; + + case 6: + iunpackscan6(initOffset, in); + break; + + case 7: + iunpackscan7(initOffset, in); + break; + + case 8: + iunpackscan8(initOffset, in); + break; + + case 9: + iunpackscan9(initOffset, in); + break; + + case 10: + iunpackscan10(initOffset, in); + break; + + case 11: + iunpackscan11(initOffset, in); + break; + + case 12: + iunpackscan12(initOffset, in); + break; + + case 13: + iunpackscan13(initOffset, in); + break; + + case 14: + iunpackscan14(initOffset, in); + break; + + case 15: + iunpackscan15(initOffset, in); + break; + + case 16: + iunpackscan16(initOffset, in); + break; + + case 17: + iunpackscan17(initOffset, in); + break; + + case 18: + iunpackscan18(initOffset, in); + break; + + case 19: + iunpackscan19(initOffset, in); + break; + + case 20: + iunpackscan20(initOffset, in); + break; + + case 21: + iunpackscan21(initOffset, in); + break; + + case 22: + iunpackscan22(initOffset, in); + break; + + case 23: + iunpackscan23(initOffset, in); + break; + + case 24: + iunpackscan24(initOffset, in); + break; + + case 25: + iunpackscan25(initOffset, in); + break; + + case 26: + iunpackscan26(initOffset, in); + break; + + case 27: + iunpackscan27(initOffset, in); + break; + + case 28: + iunpackscan28(initOffset, in); + break; + + case 29: + iunpackscan29(initOffset, in); + break; + + case 30: + iunpackscan30(initOffset, in); + break; + + case 31: + iunpackscan31(initOffset, in); + break; + + case 32: + iunpackscan32(initOffset, in); + break; + + default: + break; + } + + return; +} + +#endif diff --git a/cpp-projects/base/exvr/ex_component.cpp b/cpp-projects/base/exvr/ex_component.cpp new file mode 100644 index 0000000..b5591fe --- /dev/null +++ b/cpp-projects/base/exvr/ex_component.cpp @@ -0,0 +1,111 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "ex_component.hpp" + +using namespace tool::ex; + +bool ExComponent::is_visible(int cKey){ + if(exp->isVisibleCBP != nullptr){ + return (*exp->isVisibleCBP)(cKey); + }else{ + return false; + } +} + +bool ExComponent::is_updating(int cKey){ + if(exp->isUpdatingCBP != nullptr){ + return (*exp->isUpdatingCBP)(cKey); + }else{ + return false; + } +} + +bool ExComponent::is_closed(int cKey){ + if(exp->isClosedCBP != nullptr){ + return (*exp->isClosedCBP)(cKey); + }else{ + return false; + } +} + +long ExComponent::ellapsed_time_exp_ms(){ + if(exp->ellapsedTimeExpMsCBP != nullptr){ + return (*exp->ellapsedTimeExpMsCBP)(); + }else{ + return 0; + } +} + +long ExComponent::ellapsed_time_routine_ms(){ + if(exp->ellapsedTimeRoutineMsCBP != nullptr){ + return (*exp->ellapsedTimeRoutineMsCBP)(); + }else{ + return 0; + } +} + +void ExComponent::signal_bool(int index, bool value){ + if(exp->signalBoolCBP != nullptr){ + (*exp->signalBoolCBP)(key(), index, value ? 1 : 0); + }else{ + + } +} + +void ExComponent::signal_int(int index, int value){ + if(exp->signalIntCBP != nullptr){ + (*exp->signalIntCBP)(key(), index, value); + }else{ + + } +} + +void ExComponent::signal_float(int index, float value){ + if(exp->signalFloatCBP != nullptr){ + (*exp->signalFloatCBP)(key(), index, value); + }else{ + + } +} + +void ExComponent::signal_double(int index, double value){ + if(exp->signalDoubleCBP != nullptr){ + (*exp->signalDoubleCBP)(key(), index, value); + }else{ + + } +} + +void ExComponent::signal_string(int index, std::string value){ + if(exp->signalStringCBP != nullptr){ + (*exp->signalStringCBP)(key(), index, value.c_str()); + }else{ + + } +} + diff --git a/cpp-projects/base/exvr/ex_component.hpp b/cpp-projects/base/exvr/ex_component.hpp new file mode 100644 index 0000000..a9e702e --- /dev/null +++ b/cpp-projects/base/exvr/ex_component.hpp @@ -0,0 +1,94 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "ex_element.hpp" + +namespace tool::ex { + + +class ExComponent : public ExElement{ + +public: + + virtual ~ExComponent(){} + + virtual bool initialize(){return true;} + virtual void clean(){} + + virtual void start_experiment(){} + virtual void stop_experiment(){} + + virtual void set_current_config(const std::string &configName){static_cast(configName);} + virtual void update_from_current_config(){} + virtual void pre_start_routine(){} + virtual void start_routine(){} + virtual void post_start_routine(){} + virtual void stop_routine(){} + + virtual void on_gui(){} + virtual void pre_update(){} + virtual void update(){} + virtual void post_update(){} + + virtual void set_visibility(bool visible){static_cast(visible);} + virtual void set_update_state(bool doUpdate){static_cast(doUpdate);} + virtual void play(){} + virtual void pause(){} + + virtual void update_parameter_from_gui(const std::string &updatedParameter){static_cast(updatedParameter);} + virtual void action_from_gui(bool initConfig, const std::string &action){static_cast(initConfig);static_cast(action);} + + virtual void slot(int index){static_cast(index);} + + int key() override{ + return get(ParametersContainer::Global, "component_key"); + } + + Logger::SenderT sender_type() override{ + return Logger::SenderT::Component; + } + + // callbacks + bool is_visible(int cKey); + bool is_updating(int cKey); + bool is_closed(int cKey); + + long ellapsed_time_exp_ms(); + long ellapsed_time_routine_ms(); + + void signal_bool(int index, bool value); + void signal_int(int index, int value); + void signal_float(int index, float value); + void signal_double(int index, double value); + void signal_string(int index, std::string value); + +}; +} + diff --git a/cpp-projects/base/exvr/ex_element.cpp b/cpp-projects/base/exvr/ex_element.cpp new file mode 100644 index 0000000..069c35a --- /dev/null +++ b/cpp-projects/base/exvr/ex_element.cpp @@ -0,0 +1,110 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "ex_element.hpp" + +using namespace tool; +using namespace ex; + +auto ExElement::set_exp(ExExperiment *e) -> void{ + exp = e; +} + +auto ExElement::get_array_size(ParametersContainer pc, std::string_view name) const -> int{ + if(contains_array(pc, name)){ + return std::get<1>(arrayContainers.at(pc).at(name)); + } + Logger::get()->error(std::format("get_array_size: no value with name {}", name)); + return 0; +} + +auto ExElement::log_message(std::string message) -> void{ + if(exp->logMessageIdCBP != nullptr){ + (*exp->logMessageIdCBP)(message.c_str(), (int)sender_type(), key()); + }else{ + + } +} + +auto ExElement::log_warning(std::string warningMessage) -> void{ + if(exp->logWarningIdCBP != nullptr){ + (*exp->logWarningIdCBP)(warningMessage.c_str(), (int)sender_type(), key()); + }else{ + + } +} + +auto ExElement::log_error(std::string errorMessage) -> void{ + if(exp->logErrorIdCBP != nullptr){ + (*exp->logErrorIdCBP)(errorMessage.c_str(), (int)sender_type(), key()); + }else{ + + } +} + +auto ExElement::stack_trace_log(std::string stackTraceMessage) -> void{ + if(exp->stackTraceCBP != nullptr){ + (*exp->stackTraceCBP)(stackTraceMessage.c_str()); + }else{ + + } +} + +auto ExElement::next() -> void{ + if(exp->nextCBP != nullptr){ + (*exp->nextCBP)(); + }else{ + + } +} + +auto ExElement::previous() -> void{ + if(exp->previousCBP != nullptr){ + (*exp->previousCBP)(); + }else{ + + } +} + +auto ExElement::pause_editor() -> void{ + if(exp->pauseEditorCBP != nullptr){ + (*exp->pauseEditorCBP)(); + }else{ + + } +} + +auto ExElement::close(int key) -> void{ + if(exp->closeCBP != nullptr){ + (*exp->closeCBP)(key); + }else{ + + } +} + +auto ExElement::component_key(std::string componentName) -> int{ + return (*exp->getCBP)(componentName.c_str()); +} diff --git a/cpp-projects/base/exvr/ex_element.hpp b/cpp-projects/base/exvr/ex_element.hpp new file mode 100644 index 0000000..2b219e1 --- /dev/null +++ b/cpp-projects/base/exvr/ex_element.hpp @@ -0,0 +1,168 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// base +#include "utility/unordered_map.hpp" +#include "utility/string_unordered_map.hpp" + +// local +#include + +// local +#include "ex_experiment.hpp" + +namespace tool::ex { + +using namespace std::literals::string_view_literals; + +enum class ParametersContainer : int { + InitConfig=0, CurrentConfig=1, Dynamic=2, Global=3,SizeEnum +}; + +using PC = ParametersContainer; +using Name = std::string_view; +using TParametersContainer = std::tuple; + +static constexpr TupleArray parametersContainers ={{ + TParametersContainer + {PC::InitConfig, "init"sv }, + {PC::CurrentConfig, "current"sv }, + {PC::Dynamic, "dynamic"sv }, + {PC::Global, "global"sv }, +}}; + +[[maybe_unused]] constexpr static auto get_name(ParametersContainer pc) -> Name{ + return parametersContainers.at<0,1>(pc); +} + +class ExElement{ +public: + + auto set_exp(tool::ex::ExExperiment *e) -> void; + + // callbacks + auto log_warning(std::string warningMessage) -> void; + auto log_error(std::string errorMessage) -> void; + auto log_message(std::string message) -> void; + auto stack_trace_log(std::string stackTraceMessage) -> void; + auto close(int cKey) -> void; + auto next() -> void; + auto previous() -> void; + auto pause_editor() -> void; + auto component_key(std::string componentName) -> int; + + // containers + auto contains(ParametersContainer pc, std::string_view name) const -> bool{ + if(containers.contains(pc)){ + return containers.at(pc).contains(name); + } + return false; + } + + template + auto get(ParametersContainer pc, std::string_view name) -> T{ + if(contains(pc,name)){ + try{ + return std::any_cast(containers[pc][name]); + }catch (const std::bad_any_cast& e){ + log_error(std::format("get: cast error: {}", e.what())); + } + } + return T{}; + } + + template + auto get_ptr(ParametersContainer pc, std::string_view name) -> T*{ + if(contains(pc,name)){ + try{ + return std::any_cast(&containers[pc][name]); + }catch (const std::bad_any_cast& e){ + log_error(std::format("get_ptr: cast error: {}", e.what())); + } + } + return nullptr; + } + + + template + auto set(ParametersContainer pc, std::string_view name, T value) -> void{ + if(!containers.contains(pc)){ + containers[pc] = {}; + } + containers[pc][name] = value; + } + + // array + auto contains_array(ParametersContainer pc, std::string_view name) const -> bool{ + if(arrayContainers.contains(pc)){ + return arrayContainers.at(pc).contains(name); + } + return false; + } + + template + auto set_array(ParametersContainer pc, std::string_view name, std::vector values) -> void{ + if(!containers.contains(pc)){ + arrayContainers[pc] = {}; + } + arrayContainers[pc][name] = std::make_tuple(std::move(values), static_cast(values.size())); + } + + template + auto get_array(ParametersContainer pc, std::string_view name) -> std::vector{ + if(contains_array(pc,name)){ + try{ + return std::any_cast>(std::get<0>(arrayContainers[pc][name])); + }catch (const std::bad_any_cast& e){ + log_error(std::format("get_array: cast error: {}", e.what())); + } + } + return std::vector{}; + } + + auto get_array_size(ParametersContainer pc, std::string_view name) const -> int; + + + virtual auto key() -> int = 0; + virtual auto sender_type() -> Logger::SenderT = 0; + + tool::ex::ExExperiment *exp = nullptr; + tool::Logger *logger = nullptr; + +protected: + + umap> containers = { + {ParametersContainer::Global, {}} + }; + umap>> arrayContainers{ + {ParametersContainer::Global, {}} + }; +}; +} + diff --git a/cpp-projects/base/exvr/ex_experiment.cpp b/cpp-projects/base/exvr/ex_experiment.cpp new file mode 100644 index 0000000..dc6a335 --- /dev/null +++ b/cpp-projects/base/exvr/ex_experiment.cpp @@ -0,0 +1,140 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "ex_experiment.hpp" + +using namespace tool::ex; + +bool ExExperiment::generate_logger(std::string_view pathDir, std::string_view fileName){ + if(!Logger::init(pathDir, fileName)){ + return false; + } + logger = Logger::take_ownership(); + return true; +} + +void ExExperiment::generate_logger_no_file(){ + Logger::no_file_init(); + logger = Logger::take_ownership(); +} + +void ExExperiment::init_callbacks( + LogMessageCB logMessageCB, + LogWarningCB logWarningCB, + LogErrorCB logErrorCB, + LogMessageIdCB logMessageIdCB, + LogWarningIdCB logWarningIdCB, + LogErrorIdCB logErrorIdCB, + StackTraceCB stackTraceCB, + PauseEditorCB pauseEditorCB, + EllapsedTimeExpMsCB ellapsedTimeExpMsCB, + EllapsedTimeRoutineMsCB ellapsedTimeRoutineMsCB, + GetCB getCB, + IsVisibleCB isVisibleCB, + IsUpdatingCB isUpdatingCB, + IsClosedCB isClosedCB, + NextCB nextCB, + PreviousCB previousCB, + CloseCB closeCB, + SignalBoolCB signalBoolCB, + SignalIntCB signalIntCB, + SignalFloatCB signalFloatCB, + SignalDoubleCB signalDoubleCB, + SignalStringCB signalStringCB){ + + logMessageCBP = std::make_unique(logMessageCB); + logWarningCBP = std::make_unique(logWarningCB); + logErrorCBP = std::make_unique(logErrorCB); + + logMessageIdCBP = std::make_unique(logMessageIdCB); + logWarningIdCBP = std::make_unique(logWarningIdCB); + logErrorIdCBP = std::make_unique(logErrorIdCB); + + stackTraceCBP = std::make_unique(stackTraceCB); + pauseEditorCBP = std::make_unique(pauseEditorCB); + + ellapsedTimeExpMsCBP = std::make_unique(ellapsedTimeExpMsCB); + ellapsedTimeRoutineMsCBP = std::make_unique(ellapsedTimeRoutineMsCB); + + getCBP = std::make_unique(getCB); + isVisibleCBP = std::make_unique(isVisibleCB); + isUpdatingCBP = std::make_unique(isUpdatingCB); + isClosedCBP = std::make_unique(isClosedCB); + + nextCBP = std::make_unique(nextCB); + previousCBP = std::make_unique(previousCB); + closeCBP = std::make_unique(closeCB); + + signalBoolCBP = std::make_unique(signalBoolCB); + signalIntCBP = std::make_unique(signalIntCB); + signalFloatCBP = std::make_unique(signalFloatCB); + signalDoubleCBP = std::make_unique(signalDoubleCB); + signalStringCBP = std::make_unique(signalStringCB); + + logger->message_signal.connect([&](std::string message){ + if(logMessageCBP != nullptr){ + (*logMessageCBP)(message.c_str()); + }else{ + Logger::message(std::format("[DLL] {}", message)); + } + }); + logger->warning_signal.connect([&](std::string warning){ + if(logWarningCBP != nullptr){ + (*logWarningCBP)(warning.c_str()); + }else{ + Logger::warning(std::format("[DLL] {}", warning)); + } + }); + logger->error_signal.connect([&](std::string error){ + if(logErrorCBP != nullptr){ + (*logErrorCBP)(error.c_str()); + }else{ + Logger::error(std::format("[DLL] {}", error)); + } + }); + logger->message_id_signal.connect([&](std::string message, Logger::SenderT sType, int sKey){ + if(logMessageIdCBP != nullptr){ + (*logMessageIdCBP)(message.c_str(), static_cast(sType), sKey); + }else{ + + } + }); + logger->warning_id_signal.connect([&](std::string warning, Logger::SenderT sType, int sKey){ + if(logWarningIdCBP != nullptr){ + (*logWarningIdCBP)(warning.c_str(), static_cast(sType), sKey); + }else{ + + } + }); + logger->error_id_signal.connect([&](std::string error, Logger::SenderT sType, int sKey){ + if(logErrorIdCBP != nullptr){ + (*logErrorIdCBP)(error.c_str(), static_cast(sType), sKey); + }else{ + + } + }); +} diff --git a/cpp-projects/base/exvr/ex_experiment.hpp b/cpp-projects/base/exvr/ex_experiment.hpp new file mode 100644 index 0000000..47a5d0f --- /dev/null +++ b/cpp-projects/base/exvr/ex_experiment.hpp @@ -0,0 +1,132 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include +#include +#include +#include + +// local +#include "utility/format.hpp" +#include "utility/tuple_array.hpp" +#include "utility/logger.hpp" + + +typedef void (__stdcall * LogMessageCB)(const char*); +typedef void (__stdcall * LogWarningCB)(const char*); +typedef void (__stdcall * LogErrorCB)(const char*); + +typedef void (__stdcall * LogMessageIdCB)(const char*, int, int); +typedef void (__stdcall * LogWarningIdCB)(const char*, int, int); +typedef void (__stdcall * LogErrorIdCB)(const char*, int, int); + +typedef int (__stdcall * GetCB)(const char*); +typedef int (__stdcall * IsVisibleCB)(int); +typedef int (__stdcall * IsUpdatingCB)(int); +typedef int (__stdcall * IsClosedCB)(int); + +typedef void (__stdcall * SignalBoolCB)(int, int,int); +typedef void (__stdcall * SignalIntCB)(int, int,int); +typedef void (__stdcall * SignalFloatCB)(int, int,float); +typedef void (__stdcall * SignalDoubleCB)(int, int,double); +typedef void (__stdcall * SignalStringCB)(int, int,const char*); + +typedef void (__stdcall * NextCB)(); +typedef void (__stdcall * PreviousCB)(); +typedef void (__stdcall * CloseCB)(int); + +typedef long (__stdcall * EllapsedTimeExpMsCB)(); +typedef long (__stdcall * EllapsedTimeRoutineMsCB)(); + +// debug +typedef void (__stdcall * StackTraceCB)(const char*); +typedef void (__stdcall * PauseEditorCB)(); + +namespace tool::ex { + +class ExExperiment{ +public: + + bool generate_logger(std::string_view pathDir, std::string_view fileName); + void generate_logger_no_file(); + + std::unique_ptr logger = nullptr; + + std::unique_ptr logMessageCBP = nullptr; + std::unique_ptr logWarningCBP = nullptr; + std::unique_ptr logErrorCBP = nullptr; + std::unique_ptr logMessageIdCBP = nullptr; + std::unique_ptr logWarningIdCBP = nullptr; + std::unique_ptr logErrorIdCBP = nullptr; + std::unique_ptr ellapsedTimeExpMsCBP = nullptr; + std::unique_ptr ellapsedTimeRoutineMsCBP = nullptr; + std::unique_ptr getCBP = nullptr; + std::unique_ptr isVisibleCBP= nullptr; + std::unique_ptr isUpdatingCBP= nullptr; + std::unique_ptr isClosedCBP= nullptr; + std::unique_ptr nextCBP = nullptr; + std::unique_ptr previousCBP = nullptr; + std::unique_ptr closeCBP = nullptr; + std::unique_ptr signalBoolCBP= nullptr; + std::unique_ptr signalIntCBP= nullptr; + std::unique_ptr signalFloatCBP= nullptr; + std::unique_ptr signalDoubleCBP= nullptr; + std::unique_ptr signalStringCBP= nullptr; + + std::unique_ptr stackTraceCBP = nullptr; + std::unique_ptr pauseEditorCBP = nullptr; + + void init_callbacks( + LogMessageCB logMessageCB, + LogWarningCB logWarningCB, + LogErrorCB logErrorCB, + LogMessageIdCB logMessageIdCB, + LogWarningIdCB logWarningIdCB, + LogErrorIdCB logErrorIdCB, + StackTraceCB stackTraceCB, + PauseEditorCB pauseEditorCB, + EllapsedTimeExpMsCB ellapsedTimeExpMsCB, + EllapsedTimeRoutineMsCB ellapsedTimeRoutineMsCB, + GetCB getCB, + IsVisibleCB isVisibleCB, + IsUpdatingCB isUpdatingCB, + IsClosedCB isClosedCB, + NextCB nextCB, + PreviousCB previousCB, + CloseCB closeCB, + SignalBoolCB signalBoolCB, + SignalIntCB signalIntCB, + SignalFloatCB signalFloatCB, + SignalDoubleCB signalDoubleCB, + SignalStringCB signalStringCB + ); +}; +} + diff --git a/cpp-projects/base/exvr/ex_resource.cpp b/cpp-projects/base/exvr/ex_resource.cpp new file mode 100644 index 0000000..607a05d --- /dev/null +++ b/cpp-projects/base/exvr/ex_resource.cpp @@ -0,0 +1,32 @@ + + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "ex_resource.hpp" + + +using namespace tool::ex; diff --git a/cpp-projects/base/exvr/ex_resource.hpp b/cpp-projects/base/exvr/ex_resource.hpp new file mode 100644 index 0000000..ae49640 --- /dev/null +++ b/cpp-projects/base/exvr/ex_resource.hpp @@ -0,0 +1,52 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "ex_element.hpp" + +namespace tool::ex { + +class ExResource : public ExElement{ + +public: + + virtual ~ExResource(){} + + virtual bool initialize(){return true;} + virtual void clean(){} + + int key() override{ + return get(ParametersContainer::Global, "resource_key"); + } + + Logger::SenderT sender_type() override{ + return Logger::SenderT::Resource; + } +}; +} diff --git a/cpp-projects/base/files/assimp_loader.cpp b/cpp-projects/base/files/assimp_loader.cpp new file mode 100644 index 0000000..0da011c --- /dev/null +++ b/cpp-projects/base/files/assimp_loader.cpp @@ -0,0 +1,682 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "assimp_loader.hpp" + +// std +#include +#include + +// assimp +#include +#include + +// local +#include "utility/logger.hpp" + +using namespace tool::geo; +using namespace tool::files; + +namespace fs = std::filesystem; + +auto AiLoader::load_model(std::string_view path, bool verbose) -> std::shared_ptr{ + + if(verbose){ + Logger::message(std::format("[AiLoader] Load model with path [{}].\n", path)); + } + + fs::path pathModel = path; + if(!fs::exists(pathModel)){ + Logger::error(std::format("[AiLoader] Path [{}] not found, model cannot be loaded.\n", path)); + return nullptr; + } + + // read from assimp + Assimp::Importer import; + auto importFlag = aiProcess_Triangulate; // aiProcess_EmbedTextures / aiProcess_FlipUVs + const aiScene *scene = import.ReadFile(path.data(), importFlag); + if(!scene || scene->mFlags & AI_SCENE_FLAGS_INCOMPLETE || !scene->mRootNode){ + Logger::error(std::format("[AiLoader] Assimp importer failure: [{}] for model with path [{}].\n", import.GetErrorString(), path)); + return nullptr; + } + + // create model + auto model = std::make_shared(); + model->directory = pathModel.parent_path().string(); + model->name = scene->mRootNode->mName.C_Str(); + + if(verbose){ + Logger::message(std::format("[AiLoader] Loaded model name [{}].\n", model->name)); + } + + // retrieve global inverse transform + model->globalInverseTr = scene->mRootNode->mTransformation; + model->globalInverseTr.Inverse(); + + auto m = scene->mRootNode->mTransformation; + m.Inverse(); + + model->globalInverseTr2 = geo::Mat4f{ + m.a1,m.a2,m.a3,m.a4, + m.b1,m.b2,m.b3,m.b4, + m.c1,m.c2,m.c3,m.c4, + m.d1,m.d2,m.d3,m.d4, + }; + +// aiVector3t aiTr,aiRot,aiSc; +// m_GlobalInverseTransform.Decompose(aiSc,aiRot,aiTr); + + + +// auto r = geo::Pt3f{aiRot.x,aiRot.y,aiRot.z}; +// model->globalInverseTr = geo::Mat4f::transform( +// geo::Pt3f{aiSc.x,aiSc.y,aiSc.z}, +// geo::Pt3f{rad_2_deg(r.x()),rad_2_deg(r.y()),rad_2_deg(r.z())}, +// geo::Pt3f{aiTr.x,aiTr.y,aiTr.z} +// ); +// std::cout << "GLOBAL INVERSE:\n " <HasTextures()){ +// for(size_t ii = 0; ii < scene->mNumTextures; ++ii){ + +// auto aiTexture = scene->mTextures[ii]; +// std::string file = aiTexture->mFilename.C_Str(); +// std::cout << "embedded texture " << ii << " " << file << " " << aiTexture->mWidth << " " << aiTexture->mHeight << "\n"; + +// for(size_t jj = 0; jj < aiTexture->mWidth * aiTexture->mHeight; ++jj){ + +// aiTexture->pcData->r; +// aiTexture->pcData->g; +// aiTexture->pcData->b; +// aiTexture->pcData->a; +// } + +// } +// } + + + // retrieve materials + if(scene->HasMaterials()){ + + if(verbose){ + std::cout << "[ASSIMP_LOADER] Load materials: " << scene->mNumMaterials << "\n"; + } + + for(size_t ii = 0; ii < scene->mNumMaterials; ++ii){ + read_material(model.get(), scene->mMaterials[ii]); + } + } + + + // retrieve meshes + if(scene->HasMeshes()){ + + if(verbose){ + std::cout << "[ASSIMP_LOADER] Load meshes: " << scene->mNumMeshes << "\n"; + } + + for(size_t ii = 0; ii < scene->mNumMeshes; ++ii){ + if(verbose){ + std::cout << "[ASSIMP_LOADER] Mesh: " << scene->mMeshes[ii]->mName.C_Str() << "\n"; + } + read_mesh(model.get(), scene->mMeshes[ii]); + } + } + + // retrieve animations + if(scene->HasAnimations()){ + + if(verbose){ + std::cout << "[ASSIMP_LOADER] Load animations: " << scene->mNumAnimations << "\n"; + } + + for(size_t ii = 0; ii < scene->mNumAnimations; ++ii){ + + if(verbose){ + std::cout << "[ASSIMP_LOADER] Animation: " << scene->mAnimations[ii]->mName.C_Str() << "\n"; + } + + auto assimpAnimation = scene->mAnimations[ii]; + + + model->animations.emplace_back(tool::graphics::Animation{assimpAnimation->mName.C_Str(), assimpAnimation->mDuration, assimpAnimation->mTicksPerSecond}); + auto animation = model->animations.back(); + + for(size_t jj = 0; jj < assimpAnimation->mNumChannels; ++jj){ + + auto assimpChannel = assimpAnimation->mChannels[jj]; + const std::string affectedNodeName = assimpChannel->mNodeName.C_Str(); + + tool::graphics::AnimationKeys keys; + keys.positionTimes.resize(assimpChannel->mNumPositionKeys); + keys.positionKeys.resize(assimpChannel->mNumPositionKeys); + for(size_t kk = 0; kk < keys.positionTimes.size(); ++kk){ + auto &key = assimpChannel->mPositionKeys[kk]; + keys.positionTimes[kk] = key.mTime; + keys.positionKeys[kk] = {key.mValue.x,key.mValue.y,key.mValue.z}; + } + + keys.rotationTimes.resize(assimpChannel->mNumRotationKeys); + keys.rotationKeys.resize(assimpChannel->mNumRotationKeys); + for(size_t kk = 0; kk < keys.rotationTimes.size(); ++kk){ + auto &key = assimpChannel->mRotationKeys[kk]; + keys.rotationTimes[kk] = key.mTime; + keys.rotationKeys[kk] = {key.mValue.x,key.mValue.y,key.mValue.z, key.mValue.w}; + } + + keys.scalingTimes.resize(assimpChannel->mNumScalingKeys); + keys.scalingKeys.resize(assimpChannel->mNumScalingKeys); + for(size_t kk = 0; kk < keys.scalingTimes.size(); ++kk){ + auto &key = assimpChannel->mScalingKeys[kk]; + keys.scalingTimes[kk] = key.mTime; + keys.scalingKeys[kk] = {key.mValue.x,key.mValue.y,key.mValue.z}; + } + + model->animationsKeys[animation.name][affectedNodeName] = std::move(keys); + } + } + } + + // bones + read_bones_hierarchy(&model->bonesHierachy, scene->mRootNode); + + return model; +} + + +void AiLoader::read_mesh(Model *model, aiMesh *aiMesh){ + + bool verbose = false; + + auto gmesh = std::make_shared(); + gmesh->name = aiMesh->mName.C_Str(); + gmesh->material = &model->materials[aiMesh->mMaterialIndex]; + + Mesh *mesh = &gmesh->mesh; + + bool hasPoints = aiMesh->mPrimitiveTypes & aiPrimitiveType::aiPrimitiveType_POINT; + bool hasLines = aiMesh->mPrimitiveTypes & aiPrimitiveType::aiPrimitiveType_LINE; + bool hasTriangles = aiMesh->mPrimitiveTypes & aiPrimitiveType::aiPrimitiveType_TRIANGLE; + bool hasPolygons = aiMesh->mPrimitiveTypes & aiPrimitiveType::aiPrimitiveType_POLYGON; + + // process vertex positions, normals and texture coordinates + mesh->vertices.reserve(aiMesh->mNumVertices); + if(aiMesh->HasNormals()){ + mesh->normals.reserve(aiMesh->mNumVertices); + } + + if(aiMesh->HasTextureCoords(0)){ + mesh->tCoords.reserve(aiMesh->mNumVertices); + }else{ + mesh->tCoords.resize(aiMesh->mNumVertices); + std::fill(std::begin(mesh->tCoords), std::end(mesh->tCoords), Pt2f{0.f,0.f}); + } + + if(aiMesh->HasTangentsAndBitangents()){ + mesh->tangents.resize(aiMesh->mNumVertices); + } + + if(aiMesh->HasVertexColors(0)){ + mesh->colors.reserve(aiMesh->mNumVertices); + } + + if(aiMesh->HasBones()){ + mesh->bones.resize(aiMesh->mNumVertices); + } + + for(unsigned int ii = 0; ii < aiMesh->mNumVertices; ii++){ + + // position + mesh->vertices.emplace_back(Pt3f{aiMesh->mVertices[ii].x, aiMesh->mVertices[ii].y, aiMesh->mVertices[ii].z}); + + // normal + if(aiMesh->HasNormals()){ + mesh->normals.emplace_back(Vec3f{aiMesh->mNormals[ii].x, aiMesh->mNormals[ii].y, aiMesh->mNormals[ii].z}); + } + + // uv + // aiMesh->GetNumUVChannels() + if(aiMesh->HasTextureCoords(0)){ + mesh->tCoords.emplace_back(Pt2f{aiMesh->mTextureCoords[0][ii].x, aiMesh->mTextureCoords[0][ii].y}); + } + + // tangents + if(aiMesh->HasTangentsAndBitangents()){ + mesh->tangents.emplace_back(Pt4f{aiMesh->mTangents->x,aiMesh->mTangents->y,aiMesh->mTangents->z,1.f}); + } + + // colors + // aiMesh->GetNumColorChannels() + if(aiMesh->HasVertexColors(0)){ + mesh->colors.emplace_back(Pt4f{ + aiMesh->mColors[0][ii].r, + aiMesh->mColors[0][ii].g, + aiMesh->mColors[0][ii].b, + aiMesh->mColors[0][ii].a + }); + } + + // aiMesh->mBitangents + // aiMesh->mMethod + // aiMesh->mAnimMeshes + } + + // process indices + if(hasTriangles && !hasPoints && !hasLines && !hasPolygons){ + + mesh->triIds.reserve(aiMesh->mNumFaces); + + for(unsigned int ii = 0; ii < aiMesh->mNumFaces; ii++){ + aiFace face = aiMesh->mFaces[ii]; + mesh->triIds.emplace_back(TriIds{{face.mIndices[0], face.mIndices[1], face.mIndices[2]}}); + } + + }else{ + std::cerr << "[ASSIMP_LOADER] Face format not managed.\n"; + } + + // compute normals if necessary + if(!aiMesh->HasNormals()){ + mesh->generate_normals(); + } + + // generate tangents if necessary + if(!aiMesh->HasTangentsAndBitangents() && (mesh->normals.size() > 0) && aiMesh->HasTextureCoords(0)){ + mesh->generate_tangents(); + } + +// if(mesh->tangents.size() != mesh->vertices.size()){ +// std::cout << "[ASSIMP_LOADER] Invalid tangents. Recomputing.\n"; +// mesh->generate_tangents(); +// } + + // bones + if(aiMesh->HasBones()){ + + +// for (uint i = 0 ; i < pMesh->mNumBones ; i++) { +// uint BoneIndex = 0; +// string BoneName(pMesh->mBones[i]->mName.data); + +// if (m_BoneMapping.find(BoneName) == m_BoneMapping.end()) { +// BoneIndex = m_NumBones; +// m_NumBones++; +// BoneInfo bi; +// m_BoneInfo.push_back(bi); +// } +// else { +// BoneIndex = m_BoneMapping[BoneName]; +// } + +// m_BoneMapping[BoneName] = BoneIndex; +// m_BoneInfo[BoneIndex].BoneOffset = pMesh->mBones[i]->mOffsetMatrix; + +// for (uint j = 0 ; j < pMesh->mBones[i]->mNumWeights ; j++) { +// uint VertexID = m_Entries[MeshIndex].BaseVertex + pMesh->mBones[i]->mWeights[j].mVertexId; +// float Weight = pMesh->mBones[i]->mWeights[j].mWeight; +// Bones[VertexID].AddBoneData(BoneIndex, Weight); +// } +// } + + if(verbose){ + std::cout << "Num bones: " << aiMesh->mNumBones << "\n"; + } + + for(size_t ii = 0; ii < aiMesh->mNumBones; ++ii){ + + unsigned int boneIndex = 0; + auto bone = aiMesh->mBones[ii]; + std::string boneName(bone->mName.C_Str()); + + if(verbose){ + std::cout << "Bone: " << boneName << "\n"; + } + + + if(model->bonesMapping.count(boneName) == 0){ + + boneIndex = static_cast(model->bonesMapping.size()); + model->bonesInfo.emplace_back(graphics::BoneInfo{}); +// model->bonesMapping[boneName] = boneIndex; + + if(verbose){ + std::cout << "Add bone in mapping, current index: " << boneIndex << " " << model->bonesMapping.size() << "\n"; + } + }else{ + boneIndex = model->bonesMapping[boneName]; + + + if(verbose){ + std::cout << "Bone already in mapping at index: " << boneIndex << "\n"; + } + } + + model->bonesMapping[boneName] = boneIndex; + + model->bonesInfo[boneIndex].offset = bone->mOffsetMatrix; + +// model->bonesInfo[boneIndex].offset = geo::Mat4f +// { +// m.a1,m.a2,m.a3,m.a4, +// m.b1,m.b2,m.b3,m.b4, +// m.c1,m.c2,m.c3,m.c4, +// m.d1,m.d2,m.d3,m.d4, +// }; + +// // bone offset +// // # decompose +// aiVector3t aiTr,aiRot,aiSc; +// bone->mOffsetMatrix.Decompose(aiSc,aiRot,aiTr); +// // # create offset transform +// auto r = geo::Pt3f{aiRot.x,aiRot.y,aiRot.z}; +// model->bonesInfo[boneIndex].offset = geo::Mat4f::transform( +// geo::Pt3f{aiSc.x,aiSc.y,aiSc.z}, +// geo::Pt3f{rad_2_deg(r.x()),rad_2_deg(r.y()),rad_2_deg(r.z())}, +//// geo::Pt3f{(r.x()),(r.y()),(r.z())}, +// geo::Pt3f{aiTr.x,aiTr.y,aiTr.z} +// ); + + if(verbose){ + std::cout << "Num weights: " << bone->mNumWeights << "\n"; + } + for (size_t jj = 0 ; jj < bone->mNumWeights; jj++) { + unsigned int VertexId = bone->mWeights[jj].mVertexId; + float Weight = bone->mWeights[jj].mWeight; + mesh->bones[VertexId].add_bone_data(boneIndex, Weight); + } + } + } + + model->gmeshes.emplace_back(std::move(gmesh)); +} + +void AiLoader::read_material(Model *model, aiMaterial *aiMat){ + + using MatP = Material::Property; + + // read properties + Material material; + // # str + material.name = to_string(read_property(MatP::name, aiMat)); + // # int + material.backfaceCulling = read_property(MatP::twosided, aiMat).value() != 0; + material.wireframe = read_property(MatP::enable_wireframe, aiMat).value() != 0; + // # float + material.opacity = read_property(MatP::opacity, aiMat).value(); + material.shininess = read_property(MatP::shininess, aiMat).value(); + material.shininessStrength = read_property(MatP::shininess_strength, aiMat).value(); + material.refraction = read_property(MatP::refacti, aiMat).value(); + material.reflectivity = read_property(MatP::reflectivity, aiMat).value(); + // # point3f + material.ambiantColor = to_color(read_property(MatP::color_ambient, aiMat)); + material.diffuseColor = to_color(read_property(MatP::color_diffuse, aiMat)); + material.specularColor = to_color(read_property(MatP::color_specular, aiMat)); + material.emissiveColor = to_color(read_property(MatP::color_emissive, aiMat)); + material.transparentColor = to_color(read_property(MatP::color_transparent, aiMat)); + material.reflectiveColor = to_color(read_property(MatP::color_reflective, aiMat)); + // mat.mNumAllocated; + // mat.mNumProperties; + + // read textures + for(const auto &type : textureTypes.data){ + for(unsigned int ii = 0; ii < aiMat->GetTextureCount(std::get<1>(type)); ++ii){ + + // ai data + aiString path; // receives the path to the texture. If the texture is embedded, receives a '*' followed by the id of the texture + // (for the textures stored in the corresponding scene) which can be converted to an int using a function like atoi. NULL is a valid value + aiTextureMapping mapping = aiTextureMapping_UV; // texture mapping, NULL is allowed as value + unsigned int uvIndex; // uv index of the texture (NULL is valid value) + ai_real blend; // blend factor for the texture + aiTextureOp operation = aiTextureOp_Multiply;// texture operation to be performed between this texture and the previous texture + aiTextureMapMode mapMode[3];// mapping modes to be used for the texture, the parameter may be NULL but if it is a valid pointer it MUST + aiMat->GetTexture(std::get<1>(type), ii, &path, &mapping, &uvIndex, &blend, &operation, &mapMode[0]); + + if(auto pathTexture = retrieve_texture_path(model, path); pathTexture.has_value()){ + + Texture2D *texture = nullptr; + if(model->textures.count(pathTexture.value()) == 0){ // add texture + model->textures[pathTexture.value()] = Texture2D(pathTexture.value()); + } + texture = &model->textures[pathTexture.value()]; + + TextureInfo textureInfo; + textureInfo.texture = texture; + textureInfo.options.type = std::get<0>(type); + textureInfo.options.mapping = get_texture_mapping(mapping); + textureInfo.options.operation = get_texture_operation(operation); + textureInfo.options.mapMode = Pt3{ + get_texture_map_mode(mapMode[0]), + get_texture_map_mode(mapMode[1]), + get_texture_map_mode(mapMode[2]) + }; + + // others textures info + auto wrapping = read_texture_property(MatP::text_mapping, aiMat, textureInfo.options.type, ii).value(); + auto uvwSource = read_texture_property(MatP::text_uvw_source, aiMat, textureInfo.options.type, ii).value(); + auto mappingModeU = read_texture_property(MatP::text_mapping_mode_u, aiMat, textureInfo.options.type, ii).value(); + auto mappingModeV = read_texture_property(MatP::text_mapping_mode_v, aiMat, textureInfo.options.type, ii).value(); + auto flags = read_texture_property(MatP::text_flags, aiMat, textureInfo.options.type, ii).value(); + auto texmapAxis = read_texture_property(MatP::text_texmap_axis, aiMat, textureInfo.options.type, ii).value(); + auto blend = read_texture_property(MatP::text_blend, aiMat, textureInfo.options.type, ii).value(); + + static_cast(wrapping); + static_cast(uvwSource); + static_cast(mappingModeU); + static_cast(mappingModeV); + static_cast(flags); + static_cast(texmapAxis); + static_cast(blend); +// name,twosided,shading_model,enable_wireframe,blend_func,opacity, bumpscaling, shininess, reflectivity, +// shininess_strength, refacti, color_diffuse, color_ambient, color_specular, color_emissive, color_transparent, +// color_reflective, global_background_image, +// text_blend, text_mapping, text_operation, text_uvw_source, +// text_mapping_mode_u, text_mapping_mode_v, +// text_texmap_axis, text_flags, + + // add infos + material.texturesInfo[textureInfo.options.type].emplace_back(std::move(textureInfo)); + } + } + } + + model->materials.emplace_back(std::move(material)); +} + + + +std::optional AiLoader::retrieve_texture_path(Model *model, const aiString &aiPath){ + + const std::string path = aiPath.C_Str(); + if(path.length() > 0){ + if(path[0] == '*'){ + std::cout << "[ASSIMP_LOADER] Embedded texture detected, not managed yet\n"; + return {}; + } + } + + namespace fs = std::filesystem; + fs::path texturePath = path; + + std::error_code code; + if(!fs::exists(texturePath,code)){ // check if full path exist + + fs::path dirPath = model->directory; + + std::vector pathsToTest; + pathsToTest.emplace_back(dirPath / texturePath.filename()); + pathsToTest.emplace_back(dirPath / "texture" / texturePath.filename()); + pathsToTest.emplace_back(dirPath / "textures" / texturePath.filename()); + pathsToTest.emplace_back(dirPath / ".." / "texture" / texturePath.filename()); + pathsToTest.emplace_back(dirPath / ".." / "textures" / texturePath.filename()); + + bool found = false; + for(const auto &pathToTest : pathsToTest){ + if(fs::exists(pathToTest)){ + found = true; + texturePath = pathToTest; + break; + } + } + + if(!found){ + std::cerr << "[ASSIMP_LOADER] Cannot find texture " << texturePath.filename() << "\n"; + return {}; + } + } + + // found path + return texturePath.string(); +} + +void AiLoader::read_bones_hierarchy(tool::graphics::BonesHierarchy *bones, aiNode *node){ + + bones->boneName = node->mName.C_Str(); + + // set transform +// const auto &m = node->mTransformation; +// aiVector3t aiTr,aiRot,aiSc; +// node->mTransformation.Decompose(aiSc,aiRot,aiTr); +// auto r = geo::Pt3f{aiRot.x,aiRot.y,aiRot.z}; +// bones->tr = geo::Mat4f::transform( +// geo::Pt3f{aiSc.x,aiSc.y,aiSc.z}, +// geo::Pt3f{rad_2_deg(r.x()),rad_2_deg(r.y()),rad_2_deg(r.z())}, +//// geo::Pt3f{(r.x()),(r.y()),(r.z())}, +// geo::Pt3f{aiTr.x,aiTr.y,aiTr.z} +// ); + + bones->tr = node->mTransformation; +// const auto &m = node->mTransformation;//.Transpose(); +// bones->tr = geo::Mat4f +// { +// m.a1,m.a2,m.a3,m.a4, +// m.b1,m.b2,m.b3,m.b4, +// m.c1,m.c2,m.c3,m.c4, +// m.d1,m.d2,m.d3,m.d4, +// }; + + +// std::cout << "TR: " << geo::Pt3f{aiSc.x,aiSc.y,aiSc.z} << " " << geo::Pt3f{rad_2_deg(r.x()),rad_2_deg(r.y()),rad_2_deg(r.z())} << " " << geo::Pt3f{aiTr.x,aiTr.y,aiTr.z} << "\n"; + + for(size_t ii = 0; ii < node->mNumChildren; ++ii){ + graphics::BonesHierarchy bh; + read_bones_hierarchy(&bh, node->mChildren[ii]); + bones->children.emplace_back(std::move(bh)); + } +} + + + +//Texture2D *AiLoader::read_texture(Model *model, aiMaterial *mat, aiTextureType type, unsigned int index){ + +// // ai data +// aiString path; // receives the path to the texture. If the texture is embedded, receives a '*' followed by the id of the texture +// // (for the textures stored in the corresponding scene) which can be converted to an int using a function like atoi. NULL is a valid value +// aiTextureMapping mapping; // texture mapping, NULL is allowed as value +// unsigned int uvIndex; // uv index of the texture (NULL is valid value) +// ai_real blend; // blend factor for the texture +// aiTextureOp operation; // texture operation to be performed between this texture and the previous texture +// aiTextureMapMode mapMode[3];// mapping modes to be used for the texture, the parameter may be NULL but if it is a valid pointer it MUST +// // point to an array of 3 aiTextureMapMode's (one for each axis: UVW order (=XYZ)). +// mat->GetTexture(type, index, &path, &mapping, &uvIndex, &blend, &operation, &mapMode[0]); + +// // auto wrapping = read_texture_property(MatP::text_mapping, aiMat, texturesPerType.first, ii).value(); +// // auto uvwSource = read_texture_property(MatP::text_uvw_source, aiMat, texturesPerType.first, ii).value(); +// // auto mappingModeU = read_texture_property(MatP::text_mapping_mode_u, aiMat, texturesPerType.first, ii).value(); +// // auto mappingModeV = read_texture_property(MatP::text_mapping_mode_v, aiMat, texturesPerType.first, ii).value(); +// // auto flags = read_texture_property(MatP::text_flags, aiMat, texturesPerType.first, ii).value(); +// // auto texmapAxis = read_texture_property(MatP::text_texmap_axis, aiMat, texturesPerType.first, ii).value(); +// // auto blend = read_texture_property(MatP::text_blend, aiMat, texturesPerType.first, ii).value(); + +// // find texture +// namespace fs = std::filesystem; + + +// const std::string aiPath = path.C_Str(); +// if(aiPath.length() > 0){ +// if(aiPath[0] == '*'){ +// std::cout << "[ASSIMP_LOADER] Embedded texture detected, not managed yet\n"; +// return nullptr; +// } +// } + +// fs::path texturePath = aiPath; +// if(!fs::exists(texturePath)){ // check if full path exist + +// fs::path dirPath = model->directory; + +// std::vector pathsToTest; +// pathsToTest.emplace_back(dirPath / texturePath.filename()); +// pathsToTest.emplace_back(dirPath / "texture" / texturePath.filename()); +// pathsToTest.emplace_back(dirPath / "textures" / texturePath.filename()); +// pathsToTest.emplace_back(dirPath / ".." / "texture" / texturePath.filename()); +// pathsToTest.emplace_back(dirPath / ".." / "textures" / texturePath.filename()); + +// bool found = false; +// for(const auto &pathToTest : pathsToTest){ +// if(fs::exists(pathToTest)){ +// found = true; +// texturePath = pathToTest; +// break; +// } +// } + +// if(!found){ +// std::cerr << "[ASSIMP_LOADER] Cannot find texture " << texturePath.filename() << "\n"; +// return nullptr; +// } +// } + + +// std::string foundPath = texturePath.u8string(); + + +// auto textures = &model->m_textures; +// if(textures->count(foundPath) == 0){ + +// // load texture +// Texture2D texture(foundPath); + +// // type +// texture.type = get_texture_type(type); + +// // mapping +// texture.mapping = get_texture_mapping(mapping); + +// // operation +// texture.operation = get_texture_operation(operation); + +// // mapMode +// texture.mapMode = Pt3{get_texture_map_mode(mapMode[0]),get_texture_map_mode(mapMode[1]),get_texture_map_mode(mapMode[2])}; + + +// // add texture to model +// (*textures)[foundPath] = std::move(texture); +// } + +// return &(*textures)[foundPath]; +//} diff --git a/cpp-projects/base/files/assimp_loader.hpp b/cpp-projects/base/files/assimp_loader.hpp new file mode 100644 index 0000000..d4b4998 --- /dev/null +++ b/cpp-projects/base/files/assimp_loader.hpp @@ -0,0 +1,206 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include + +// assimp +#include +#include + +// local +#include "graphics/model.hpp" +#include "utility/tuple_array.hpp" + +namespace tool::files { + +using Texture2D = graphics::Texture2D; +using TextureInfo = graphics::TextureInfo; +using GMesh = graphics::GMesh; +using Model = graphics::Model; +using Mesh = geo::Mesh; + +using namespace std::literals::string_view_literals; + +class AiLoader{ + +public: + static auto load_model(std::string_view path, bool verbose = false) -> std::shared_ptr; + +private: + + using TextureOperation = graphics::TextureOperation; + using TTexOp = std::tuple; + static constexpr TupleArray textureOperations ={{ + TTexOp{TextureOperation::add, aiTextureOp::aiTextureOp_Add}, + TTexOp{TextureOperation::divide, aiTextureOp::aiTextureOp_Divide}, + TTexOp{TextureOperation::multiply, aiTextureOp::aiTextureOp_Multiply}, + TTexOp{TextureOperation::substract, aiTextureOp::aiTextureOp_Subtract}, + TTexOp{TextureOperation::singed_add, aiTextureOp::aiTextureOp_SignedAdd}, + TTexOp{TextureOperation::smooth_add, aiTextureOp::aiTextureOp_SmoothAdd}, + }}; + static TextureOperation get_texture_operation(aiTextureOp t){ + return textureOperations.at<1,0>(t); + } + static aiTextureOp get_assimp_texture_operation(TextureOperation t){ + return textureOperations.at<0,1>(t); + } + + using TextureType = graphics::TextureType; + using TTexType = std::tuple; + static constexpr TupleArray textureTypes ={{ + TTexType{TextureType::diffuse, aiTextureType_DIFFUSE}, + TTexType{TextureType::ambiant, aiTextureType_AMBIENT}, + TTexType{TextureType::specular, aiTextureType_SPECULAR}, + TTexType{TextureType::normal, aiTextureType_NORMALS}, + TTexType{TextureType::height, aiTextureType_HEIGHT}, + TTexType{TextureType::light_map, aiTextureType_LIGHTMAP}, + TTexType{TextureType::opacity, aiTextureType_OPACITY}, + TTexType{TextureType::emissive, aiTextureType_EMISSIVE}, + TTexType{TextureType::shininess, aiTextureType_SHININESS}, + TTexType{TextureType::reflection, aiTextureType_REFLECTION}, + TTexType{TextureType::displacement, aiTextureType_DISPLACEMENT}, + }}; + + static TextureType get_texture_type(aiTextureType t){ + return textureTypes.at<1,0>(t); + } + static aiTextureType get_assimp_texture_type(TextureType t){ + return textureTypes.at<0,1>(t); + } + + using TextureMapping = graphics::TextureMapping; + using TTexMap = std::tuple; + static constexpr TupleArray textureMappings ={{ + TTexMap{TextureMapping::UV, aiTextureMapping::aiTextureMapping_UV}, + TTexMap{TextureMapping::box, aiTextureMapping::aiTextureMapping_BOX}, + TTexMap{TextureMapping::plane, aiTextureMapping::aiTextureMapping_PLANE}, + TTexMap{TextureMapping::sphere, aiTextureMapping::aiTextureMapping_SPHERE}, + TTexMap{TextureMapping::cylinder, aiTextureMapping::aiTextureMapping_CYLINDER}, + TTexMap{TextureMapping::other, aiTextureMapping::aiTextureMapping_OTHER}, + }}; + + static TextureMapping get_texture_mapping(aiTextureMapping t){ + return textureMappings.at<1,0>(t); + } + static aiTextureMapping get_assimp_texture_mapping(TextureMapping t){ + return textureMappings.at<0,1>(t); + } + + using TextureMapMode = graphics::TextureMapMode; + using TTexMapMode = std::tuple; + static constexpr TupleArray textureMapModes ={{ + TTexMapMode{TextureMapMode::wrap, aiTextureMapMode::aiTextureMapMode_Wrap}, + TTexMapMode{TextureMapMode::clamp, aiTextureMapMode::aiTextureMapMode_Clamp}, + TTexMapMode{TextureMapMode::decal, aiTextureMapMode::aiTextureMapMode_Decal}, + TTexMapMode{TextureMapMode::mirro, aiTextureMapMode::aiTextureMapMode_Mirror}, + }}; + + static TextureMapMode get_texture_map_mode(aiTextureMapMode t){ + return textureMapModes.at<1,0>(t); + } + static aiTextureMapMode get_assimp_texture_map_mode(TextureMapMode t){ + return textureMapModes.at<0,1>(t); + } + + using Material = graphics::Material; + using TMatProp = std::tuple; + static constexpr TupleArray materialProperties ={{ + TMatProp{Material::Property::name, "?mat.name"sv}, + TMatProp{Material::Property::twosided, "$mat.twosided"sv}, + TMatProp{Material::Property::shading_model, "$mat.shadingm"sv}, + TMatProp{Material::Property::enable_wireframe, "$mat.wireframe"sv}, + TMatProp{Material::Property::blend_func, "$mat.blend"sv}, + TMatProp{Material::Property::opacity, "$mat.opacity"sv}, + TMatProp{Material::Property::bumpscaling, "$mat.bumpscaling"sv}, + TMatProp{Material::Property::shininess, "$mat.shininess"sv}, + TMatProp{Material::Property::reflectivity, "$mat.reflectivity"sv}, + TMatProp{Material::Property::shininess_strength, "$mat.shinpercent"sv}, + TMatProp{Material::Property::refacti, "$mat.refracti"sv}, + TMatProp{Material::Property::color_diffuse, "$mat.diffuse"sv}, + TMatProp{Material::Property::color_ambient, "$mat.ambient"sv}, + TMatProp{Material::Property::color_specular, "$mat.specular"sv}, + TMatProp{Material::Property::color_emissive, "$mat.emissive"sv}, + TMatProp{Material::Property::color_transparent, "$mat.transparent"sv}, + TMatProp{Material::Property::color_reflective, "$mat.reflective"sv}, + TMatProp{Material::Property::global_background_image, "$mat.global"sv}, + TMatProp{Material::Property::text_blend, "$tex.blend"sv}, + TMatProp{Material::Property::text_mapping, "$tex.mapping"sv}, + TMatProp{Material::Property::text_operation, "$tex.op"sv}, + TMatProp{Material::Property::text_uvw_source, "$tex.uvwsrc"sv}, + TMatProp{Material::Property::text_mapping_mode_u, "$tex.mapmodeu"sv}, + TMatProp{Material::Property::text_mapping_mode_v, "$tex.mapmodev"sv}, + TMatProp{Material::Property::text_texmap_axis, "$tex.mapaxis"sv}, + TMatProp{Material::Property::text_flags, "$tex.flags"sv}, + // _AI_MATKEY_UVTRANSFORM_BASE + }}; + + static std::string_view get_assimp_str_code(Material::Property p) { + return materialProperties.at<0,1>(p); + } + + template + static std::optional read_property(Material::Property property, aiMaterial *assimpMaterial){ + T value; + if(AI_SUCCESS == assimpMaterial->Get(std::string(get_assimp_str_code(property)).c_str(),0,0, value)){ + return {value}; + } + return {T{}}; + } + + static geo::Col3f to_color(std::optional assimpColor){ + if(assimpColor.has_value()){ + return geo::Col3f{assimpColor.value().r, assimpColor.value().g, assimpColor.value().b}; + } + return {}; + } + + static const char* to_string(std::optional assimpStr){ + if(assimpStr.has_value()){ + return assimpStr.value().C_Str(); + } + return {}; + } + + template + static std::optional read_texture_property(Material::Property property, aiMaterial *assimpMaterial, TextureType type, unsigned int index){ + T value; + if(AI_SUCCESS == assimpMaterial->Get(std::string(get_assimp_str_code(property)).c_str(),get_assimp_texture_type(type),index, value)){ + return {value}; + } + return {T{}}; + } + + static std::optional retrieve_texture_path(Model *model, const aiString &aiPath); + static void read_mesh(Model *model, aiMesh *aiMesh); + static void read_material(Model *model, aiMaterial *aiMat); + static void read_bones_hierarchy(graphics::BonesHierarchy *bones, aiNode *node); +}; +} + diff --git a/cpp-projects/base/files/binary_settings.hpp b/cpp-projects/base/files/binary_settings.hpp new file mode 100644 index 0000000..b6dc5d3 --- /dev/null +++ b/cpp-projects/base/files/binary_settings.hpp @@ -0,0 +1,74 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "settings.hpp" +#include "utility/vector.hpp" + +namespace tool::files { + +struct SubBinarySettings{ + virtual auto init_from_data(std::int8_t *data) -> void = 0; + virtual auto convert_to_data(std::int8_t *data) const -> void = 0; + virtual auto total_data_size() const noexcept-> size_t = 0; +}; + +struct BinaryFileSettings : public BaseSettings{ + + auto init_from_file(const std::string &filePath) -> bool{ + return init_from_binary_file(filePath); + } + + auto save_to_file(const std::string &filePath) const -> bool{ + return save_to_binary_file(filePath); + } + + static auto save_to_file(const std::vector &settingsA, const std::string &filePath) -> bool{ + return BaseSettings::save_to_binary_file(convert(settingsA), filePath); + } + + static auto init_from_file(std::vector &settingsA, const std::string &filePath) -> bool{ + auto s = convert(settingsA); + return BaseSettings::init_from_binary_file(s, filePath); + } + +private: + virtual auto init_from_text(const std::string &) -> void override{} + virtual auto convert_to_text() const -> std::string override{return {};} +}; + +struct BinarySettings : public BaseSettings{ +private: + virtual auto init_from_text(const std::string &) -> void override{} + virtual auto convert_to_text() const -> std::string override{return {};} +}; + +} + + + diff --git a/cpp-projects/base/files/cloud_io.cpp b/cpp-projects/base/files/cloud_io.cpp new file mode 100644 index 0000000..92e1570 --- /dev/null +++ b/cpp-projects/base/files/cloud_io.cpp @@ -0,0 +1,104 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "cloud_io.hpp" + +// std +#include + +// local +#include "utility/logger.hpp" + +using namespace tool::files; + +auto CloudIO::open_file(const std::string &path, std::ofstream &file) -> bool { + + file.open(path); + if(!file.is_open()){ + Logger::error(std::format("[CloudIO::save_cloud] Cannot open file from path {}.\n", path)); + return false; + } + return true; +} + +auto CloudIO::check_buffers_sizes(size_t sizeVertices, size_t sizeColors) -> bool { + + if(sizeVertices == 0){ + Logger::error("[CloudIO::save_cloud] No vertices.\n"); + return false; + } + if(sizeColors != 0 && (sizeVertices != sizeColors)){ + Logger::error("[CloudIO::save_cloud] Not the same number of vertices and colors\n"); + return false; + } + return true; +} + +auto CloudIO::check_input_values(size_t size, bool hasVertices, bool hasColors, int dimension, int nbChannels) -> bool { + + bool isValid = true; + if(size == 0 || dimension < 2 || dimension > 3 || !hasVertices){ + isValid = false; + } + if(hasColors && (nbChannels < 3 || nbChannels > 4)){ + isValid = false; + } + + if(!isValid){ + Logger::error("[CloudIO::save_cloud] Invalid inputs values.\n"); + } + return isValid; +} + +auto CloudIO::write_vertex(std::ostream &file, float v1, float v2) -> void { + file << std::format(vLine2, v1, v2); +} + +auto CloudIO::write_vertex(std::ostream &file, float v1, float v2, float v3) -> void { + file << std::format(vLine3, v1, v2, v3); +} + +auto CloudIO::write_vertex(std::ostream &file, float v1, float v2, float v3, float v4, float v5) -> void { + file << std::format(vLine5, v1, v2, v3, v4, v5); +} + +auto CloudIO::write_vertex(std::ostream &file, float v1, float v2, float v3, float v4, float v5, float v6) -> void { + file << std::format(vLine6, v1, v2, v3, v4, v5, v6); +} + +auto CloudIO::write_vertex(std::ostream &file, float v1, float v2, float v3, float v4, float v5, float v6, float v7) -> void { + file << std::format(vLine7, v1, v2, v3, v4, v5, v6, v7); +} + +auto CloudIO::write_vertex_normal(std::ostream &file, float v1, float v2) -> void { + file << std::format(vnLine2, v1, v2); +} + +auto CloudIO::write_vertex_normal(std::ostream &file, float v1, float v2, float v3) -> void { + file << std::format(vnLine3, v1, v2, v3); +} + diff --git a/cpp-projects/base/files/cloud_io.hpp b/cpp-projects/base/files/cloud_io.hpp new file mode 100644 index 0000000..9a471f7 --- /dev/null +++ b/cpp-projects/base/files/cloud_io.hpp @@ -0,0 +1,207 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include +#include +#include +#include + +// local +#include "geometry/point2.hpp" +#include "geometry/point4.hpp" +#include "geometry/cloud.hpp" + +namespace tool::files { + +using namespace std::string_view_literals; +class CloudIO{ + +public: + + static auto save_cloud(const std::string &path, const geo::ColoredCloudData &cloud) -> bool{ + if(cloud.is_valid()){ + + return save_cloud( + path, + cloud.size(), + cloud.vertices.get_data(), + cloud.has_colors() ? cloud.colors.get_data() : nullptr, + cloud.has_normals() ? cloud.normals.get_data() : nullptr + ); + } + return false; + } + + template + static auto save_cloud(const std::string &path, size_t size, const geo::Pt2 *vertices, const geo::Pt3 *colors = nullptr, const geo::Pt2 *normals = nullptr) -> bool{ + return save_cloud( + path, + size, + reinterpret_cast(vertices), + colors != nullptr ? reinterpret_cast(colors) : nullptr, + normals != nullptr ? reinterpret_cast(normals) : nullptr + ); + } + + template + static auto save_cloud(const std::string &path, size_t size, const geo::Pt2 *vertices, const geo::Pt4 *colors = nullptr, const geo::Pt2 *normals = nullptr) -> bool{ + return save_cloud( + path, + size, + reinterpret_cast(vertices), + colors != nullptr ? reinterpret_cast(colors) : nullptr, + normals != nullptr ? reinterpret_cast(normals) : nullptr + ); + } + + template + static auto save_cloud(const std::string &path, size_t size, const geo::Pt3 *vertices, const geo::Pt3 *colors = nullptr, const geo::Pt3 *normals = nullptr) -> bool{ + return save_cloud( + path, + size, + reinterpret_cast(vertices), + colors != nullptr ? reinterpret_cast(colors) : nullptr, + normals != nullptr ? reinterpret_cast(normals) : nullptr + ); + } + + template + static auto save_cloud(const std::string &path, size_t size, const geo::Pt3 *vertices, const geo::Pt4 *colors = nullptr, const geo::Pt3 *normals = nullptr) -> bool{ + return save_cloud( + path, + size, + reinterpret_cast(vertices), + colors != nullptr ? reinterpret_cast(colors) : nullptr, + normals != nullptr ? reinterpret_cast(normals) : nullptr + ); + } + + template + static auto save_cloud(const std::string &path, const std::vector &vertices, const std::vector &colors) -> bool{ + + if(check_buffers_sizes(vertices.size(), colors.size())){ + if(colors.size() == 0){ + return save_cloud(path, vertices.size(), vertices.data(), nullptr, nullptr); + }else{ + return save_cloud(path, vertices.size(), vertices.data(), colors.data(), nullptr); + } + } + return false; + } + + template + static auto save_cloud(const std::string &path, size_t size, const TV *vertices, const TC *colors, const TV *normals) -> bool{ + + if(!check_input_values(size, vertices != nullptr, colors != nullptr, dimension, nbChannels)){ + return false; + } + + std::ofstream file; + if(!open_file(path, file)){ + return false; + } + + if(colors != nullptr){ + for(size_t ii = 0; ii < size; ++ii){ + write_vertex_line(file, &vertices[ii*dimension], &colors[ii*nbChannels]); + } + }else{ + for(size_t ii = 0; ii < size; ++ii){ + write_vertex_line(file, &vertices[ii*dimension]); + } + } + + if(normals != nullptr){ + for(size_t ii = 0; ii < size; ++ii){ + write_vertex_normal_line(file, &normals[ii*dimension]); + } + } + + file.close(); + return true; + } + +private: + + + template + static auto write_vertex_line(std::ostream &file, const TV *v) -> void{ + if constexpr (dimension == 2){ + write_vertex(file, v[0], v[1]); + } else if constexpr (dimension == 3){ + write_vertex(file, v[0], v[1], v[2]); + } + } + + template + static auto write_vertex_line(std::ostream &file, const TV *v, const TC *c) -> void{ + if constexpr (dimension == 2 && nbChannels == 3){ + write_vertex(file, v[0], v[1], c[0], c[1], c[2]); + }else if constexpr (dimension == 2 && nbChannels == 4){ + write_vertex(file, v[0], v[1], c[0], c[1], c[2], c[3]); + }else if constexpr (dimension == 3 && nbChannels == 3){ + write_vertex(file, v[0], v[1], v[2], c[0], c[1], c[2]); + }else if constexpr (dimension == 3 && nbChannels == 4){ + write_vertex(file, v[0], v[1], v[2], c[0], c[1], c[2], c[3]); + } + } + + template + static auto write_vertex_normal_line(std::ostream &file, const TV *vn) -> void{ + if constexpr (dimension == 2){ + write_vertex_normal(file, vn[0], vn[1]); + } else if constexpr (dimension == 3){ + write_vertex_normal(file, vn[0], vn[1], vn[2]); + } + } + + static auto write_vertex(std::ostream &file, float v1, float v2) -> void; + static auto write_vertex(std::ostream &file, float v1, float v2, float v3) -> void; + static auto write_vertex(std::ostream &file, float v1, float v2, float v3, float v4, float v5) -> void; + static auto write_vertex(std::ostream &file, float v1, float v2, float v3, float v4, float v5, float v6) -> void; + static auto write_vertex(std::ostream &file, float v1, float v2, float v3, float v4, float v5, float v6, float v7) -> void; + + static auto write_vertex_normal(std::ostream &file, float v1, float v2) -> void; + static auto write_vertex_normal(std::ostream &file, float v1, float v2, float v3) -> void; + + static auto open_file(const std::string &path, std::ofstream &file) -> bool; + static auto check_buffers_sizes(size_t sizeVertices, size_t sizeColors) -> bool; + static auto check_input_values(size_t size, bool hasVertices, bool hasColors, int dimension, int nbChannels) -> bool; + + static constexpr std::string_view vLine2 = "v {} {}\n"sv; + static constexpr std::string_view vLine3 = "v {} {} {}\n"sv; + static constexpr std::string_view vLine5 = "v {} {} {} {} {}\n"sv; + static constexpr std::string_view vLine6 = "v {} {} {} {} {} {}\n"sv; + static constexpr std::string_view vLine7 = "v {} {} {} {} {} {} {}•\n"sv; + + static constexpr std::string_view vnLine2 = "vn {} {}\n"sv; + static constexpr std::string_view vnLine3 = "vn {} {} {}\n"sv; +}; + +} diff --git a/cpp-projects/base/files/settings.cpp b/cpp-projects/base/files/settings.cpp new file mode 100644 index 0000000..2d1f151 --- /dev/null +++ b/cpp-projects/base/files/settings.cpp @@ -0,0 +1,219 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "settings.hpp" + +// std +#include +#include + +// local +#include "utility/io_file.hpp" +#include "utility/io_data.hpp" +#include "utility/string.hpp" +#include "utility/logger.hpp" + +using namespace tool::files; + +auto BaseSettings::init_from_binary_file(const std::string &filePath) -> bool{ + Logger::message(std::format("Open [{}] binary file with path [{}]\n", file_description(), filePath)); + if(auto content = File::read_content(filePath); content.has_value()){ + + if(content->size() != total_data_size()){ + Logger::warning(std::format("Input [{}] file with path [{}] of size [{}] different from expected [{}]\n", + file_description(), filePath, content->size(), total_data_size())); + } + m_inputFileSize = content.value().size(); + init_from_data(reinterpret_cast(content.value().data())); + }else{ + Logger::error(std::format("Cannot open [{}] file with path: [{}]\n", file_description(), filePath)); + return false; + } + + return true; +} + +auto BaseSettings::init_from_text_file(const std::string &filePath) -> bool{ + Logger::message(std::format("Open [{}] text file with path [{}]\n", file_description(), filePath)); + if(auto content = File::read_content(filePath); content.has_value()){ + init_from_text(content.value()); + }else{ + Logger::error(std::format("Cannot open [{}] file with path: [{}]\n", file_description(), filePath)); + return false; + } + + return true; +} + +auto BaseSettings::save_to_text_file(const std::string &filePath) const -> bool{ + Logger::message(std::format("Save [{}] to text file with path [{}]\n", file_description(), filePath)); + if(!File::write_text_content(filePath, convert_to_text())){ + Logger::error(std::format("Cannot save [{}] file with path: [{}]\n", file_description(), filePath)); + return false; + } + return true; +} + +auto BaseSettings::save_to_binary_file(const std::string &filePath) const -> bool{ + Logger::message(std::format("Save [{}] to binary file with path [{}]\n", file_description(), filePath)); + std::vector content; + content.resize(total_data_size()); + convert_to_data(content.data()); + if(!File::write_binary_content(filePath, content)){ + Logger::error(std::format("Cannot save [{}] file with path: [{}]\n", file_description(), filePath)); + return false; + } + return true; +} + +auto BaseSettings::save_to_text_file(const std::vector &settingsA, const std::string &filePath) -> bool{ + + if(settingsA.size() == 0){ + Logger::error("Input multi settings array is empty.\n"); + return false; + } + std::string content; + std::vector contents; + for(const auto settings : settingsA){ + contents.push_back(settings->convert_to_text()); + } + if(!File::write_text_content(filePath, std::format("{}%%%{}", contents.size(), String::join(contents, "%%%")))){ + Logger::error(std::format("Cannot write multi settings array to path [{}].\n", filePath)); + return false; + } + return true; +} + +auto BaseSettings::init_from_text_file(std::vector &settingsA, const std::string &filePath) -> bool{ + + if(settingsA.size() == 0){ + Logger::error("Input multi settings array is empty.\n"); + return false; + } + + // read content + auto content = File::read_content(filePath); + if(!content.has_value()){ + Logger::error(std::format("Cannot read file with path [{}].\n", filePath)); + return false; + } + if(content.value().length() == 0){ + Logger::error(std::format("File with path [{}] is empty.\n", filePath)); + return false; + } + + auto contents = String::split(content.value(), "%%%"); + if(contents.size() <= 1){ + Logger::error("Invalid content for multi settings file.\n"); + return false; + } + + size_t nbParts = std::stoi(contents[0]); + if(settingsA.size() != nbParts){ + Logger::error(std::format("Invalid number of parts ([{}] instead of [{}]) for multi settings file.\n", nbParts, settingsA.size())); + return false; + } + + for(size_t ii = 0; ii < nbParts; ++ii){ + settingsA[ii]->init_from_text(contents[ii+1]); + } + + return true; +} + +#include + +auto BaseSettings::save_to_binary_file(const std::vector &settingsA, const std::string &filePath) -> bool{ + + if(settingsA.size() == 0){ + Logger::error("Input multi settings array is empty.\n"); + return false; + } + + std::vector content; + size_t totalDataSize = 0; + for(const auto settings : settingsA){ + totalDataSize += settings->total_data_size(); + } + + content.resize(totalDataSize + sizeof(std::int16_t) + settingsA.size() * sizeof(size_t)); + auto data = content.data(); + + size_t offset = 0; + std::int16_t nbParts = static_cast(settingsA.size()); + write(nbParts, data, offset); + + for(const auto settings : settingsA){ + settings->convert_to_data(content.data() + offset); + offset += settings->total_data_size(); + } + + if(!File::write_binary_content(filePath, content)){ + Logger::error(std::format("Cannot write multi settings array to path [{}].\n", filePath)); + return false; + } + return true; +} + +auto BaseSettings::init_from_binary_file(std::vector &settingsA, const std::string &filePath) -> bool{ + + if(settingsA.size() == 0){ + Logger::error("Input multi settings array is empty.\n"); + return false; + } + + // read content + auto content = File::read_content(filePath); + if(!content.has_value()){ + Logger::error(std::format("Cannot read file with path [{}].\n", filePath)); + return false; + } + if(content.value().length() == 0){ + Logger::error(std::format("File with path [{}] is empty.\n", filePath)); + return false; + } + + // read nb of elements + auto data = reinterpret_cast(content.value().data()); + std::int16_t nbParts = 0; + size_t offset = 0; + read(nbParts, data, offset); + + if(nbParts == 0 || (settingsA.size() != nbParts)){ + Logger::error(std::format("Invalid number of parts ([{}] instead of [{}]) for multi settings file.\n", nbParts, settingsA.size())); + return false; + } + + // read data per element + for(auto settings : settingsA){ + settings->init_from_data(data + offset); + offset += settings->total_data_size(); + } + + return true; +} + diff --git a/cpp-projects/base/files/settings.hpp b/cpp-projects/base/files/settings.hpp new file mode 100644 index 0000000..b755118 --- /dev/null +++ b/cpp-projects/base/files/settings.hpp @@ -0,0 +1,66 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include +#include +#include + +namespace tool::files { + +using namespace std::literals::string_view_literals; + +struct BaseSettings{ + +protected: + virtual auto init_from_binary_file(const std::string &filePath) -> bool; + virtual auto init_from_text_file(const std::string &filePath) -> bool; + + virtual auto save_to_text_file(const std::string &filePath) const -> bool; + virtual auto save_to_binary_file(const std::string &filePath) const -> bool; + + virtual auto init_from_text(const std::string &text) -> void = 0; + virtual auto convert_to_text() const -> std::string = 0; + + virtual auto init_from_data(std::int8_t *data) -> void = 0; + virtual auto convert_to_data(std::int8_t *data) const -> void = 0; + virtual auto total_data_size() const noexcept-> size_t = 0; + + virtual auto type() const noexcept -> std::int32_t {return 0;}; + virtual auto file_description() const noexcept -> std::string_view {return "default"sv;} + + static auto save_to_text_file(const std::vector &settingsA, const std::string &filePath) -> bool; + static auto init_from_text_file(std::vector &settingsA, const std::string &filePath) -> bool; + + static auto save_to_binary_file(const std::vector &settingsA, const std::string &filePath) -> bool; + static auto init_from_binary_file(std::vector &settingsA, const std::string &filePath) -> bool; + + size_t m_inputFileSize = 0; +}; + +} diff --git a/cpp-projects/base/files/text_settings.hpp b/cpp-projects/base/files/text_settings.hpp new file mode 100644 index 0000000..c06b416 --- /dev/null +++ b/cpp-projects/base/files/text_settings.hpp @@ -0,0 +1,65 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "settings.hpp" +#include "utility/vector.hpp" + +namespace tool::files { + +struct SubTextSettings{ + virtual auto init_from_text(const std::string &text) -> void = 0; + virtual auto convert_to_text() const -> std::string = 0; +}; + +struct TextSettings : public BaseSettings{ + + auto init_from_file(const std::string &filePath) -> bool{ + return init_from_text_file(filePath); + } + + auto save_to_file(const std::string &filePath) const -> bool{ + return save_to_text_file(filePath); + } + + static auto save_to_file(const std::vector &settingsA, const std::string &filePath) -> bool{ + return BaseSettings::save_to_text_file(convert(settingsA), filePath); + } + + static auto init_from_file(std::vector &settingsA, const std::string &filePath) -> bool{ + auto s = convert(settingsA); + return BaseSettings::init_from_text_file(s, filePath); + } + +private : + + virtual auto init_from_data(std::int8_t *) -> void override {} + virtual auto convert_to_data(std::int8_t *) const -> void override {} + virtual auto total_data_size() const noexcept-> size_t override {return 0;} +}; +} diff --git a/cpp-projects/base/geometry/aabb2.hpp b/cpp-projects/base/geometry/aabb2.hpp new file mode 100644 index 0000000..50c31f8 --- /dev/null +++ b/cpp-projects/base/geometry/aabb2.hpp @@ -0,0 +1,89 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "point2.hpp" +#include "utility/maths_utility.hpp" + +namespace tool::geo { + +template +struct AABB2{ + + + AABB2() = default; + + constexpr AABB2(Pt2 o, Vec2 s) noexcept : origin(o), size(s){ + } + + constexpr Pt2 min() const noexcept{ + const Vec2 p1{origin + size}; + const Vec2 p2{origin - size}; + return Pt2{std::min(p1.x(), p2.x()), std::min(p1.y(), p2.y())}; + } + + constexpr Vec2 max() const noexcept{ + const Vec2 p1{origin + size}; + const Vec2 p2{origin - size}; + return Vec2(std::max(p1.x(), p2.x()), std::max(p1.y(), p2.y())); + } + + constexpr std::array,4> subdivide() const noexcept{ + const Vec2 midSize = size*acc{0.5}; + return {{AABB2{origin - midSize, midSize}, + AABB2{origin + Vec2{-midSize.x(), midSize.y()}, midSize}, + AABB2{origin + midSize, midSize}, + AABB2{origin + Vec2{midSize.x(), -midSize.y()}, midSize}}}; + } + + Pt2 origin = {0,0}; + Vec2 size = {1,1}; +}; + + +template +constexpr bool point_in_aabb(const Pt2 &p, const AABB2 &aabb) noexcept{ + + const Pt2 pMin = aabb.min(); + const Pt2 pMax = aabb.max(); + const bool xMinE = math::almost_equal(p.x(),pMin.x()); + const bool yMinE = math::almost_equal(p.y(),pMin.y()); + + if((p.x() > pMin.x() || xMinE) && (p.y() > pMin.y() || yMinE) ){ + + const bool xMaxE = math::almost_equal(p.x(),pMax.x()); + const bool yMaxE = math::almost_equal(p.y(),pMax.y()); + if((p.x() < pMax.x() || xMaxE) && (p.y() < pMax.y() || yMaxE)){ + return true; + } + } + + return false; +} + +} diff --git a/cpp-projects/base/geometry/aabb3.hpp b/cpp-projects/base/geometry/aabb3.hpp new file mode 100644 index 0000000..b61c2c8 --- /dev/null +++ b/cpp-projects/base/geometry/aabb3.hpp @@ -0,0 +1,139 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point3.hpp" + +namespace tool::geo { + +template +struct AABB3{ + + Pt3 min; + Pt3 max; +// AABB3() = default; + +// constexpr AABB3(const Pt3 &o, const Vec3 &s) noexcept : origin(o), size(s){ +// } + +// constexpr auto min() const noexcept -> Vec3{ +// Vec3 p1 = origin + size; +// Vec3 p2 = origin - size; +// return Vec3(std::min(p1.x(), p2.x()), std::min(p1.y(), p2.y()),std::min(p1.z(), p2.z())); +// } + +// constexpr auto max() const noexcept-> Vec3{ +// Vec3 p1 = origin + size; +// Vec3 p2 = origin - size; +// return Vec3(std::max(p1.x(), p2.x()), std::max(p1.y(), p2.y()),std::max(p1.z(), p2.z())); +// } + +// Pt3 origin = {0,0,0}; +// Vec3 size = {1,1,1}; + +}; + + +//template +//constexpr AABB3 aabb_from_points(const Vec3 &min, const Vec3 &max) noexcept{ +// return AABB3((min + max) * acc{0.5}, (max - min) * acc{0.5}); +//} + +//template +//constexpr bool point_in_aabb(const Pt3 &p, const AABB3 &aabb) noexcept{ + +// const Pt3 pMin = aabb.min(); +// const Pt3 pMax = aabb.max(); +// const bool xMinE = tool::almost_equal(p.x(),pMin.x()); +// const bool yMinE = tool::almost_equal(p.y(),pMin.y()); +// const bool zMinE = tool::almost_equal(p.z(),pMin.z()); + +// if((p.x() > pMin.x() || xMinE) && (p.y() > pMin.y() || yMinE) && (p.z() > pMin.z() || zMinE)){ + +// const bool xMaxE = tool::almost_equal(p.x(),pMax.x()); +// const bool yMaxE = tool::almost_equal(p.y(),pMax.y()); +// const bool zMaxE = tool::almost_equal(p.z(),pMax.z()); + +// if((p.x() < pMax.x() || xMaxE) && (p.y() < pMax.y() || yMaxE) && (p.z() < pMax.z() || zMaxE)){ +// return true; +// } +// } + +// return false; +//} + +//template +//constexpr Pt3 closest_point(const AABB3 &aabb, const Pt3 &p) noexcept{ + +// Pt3 res = p; +// const Pt3 pMin = aabb.min(); +// const Pt3 pMax = aabb.max(); + +// res.x() = (res.x() < pMin.x()) ? pMin.x() : res.x(); +// res.y() = (res.y() < pMin.y()) ? pMin.y() : res.y(); +// res.z() = (res.z() < pMin.z()) ? pMin.z() : res.z(); + +// res.x() = (res.x() > pMax.x()) ? pMax.x() : res.x(); +// res.y() = (res.y() > pMax.y()) ? pMax.y() : res.y(); +// res.z() = (res.z() > pMax.z()) ? pMax.z() : res.z(); + +// return res; +//} + +//template +//constexpr Interval interval(const AABB3 &aabb, const Vec3 &axis) noexcept{ + +// const Pt3 i = aabb.min(); +// const Pt3 a = aabb.max(); + +// const std::array,8> vertices = { +// Pt3(i.x(), a.y(), a.z()), +// Pt3(i.x(), a.y(), i.z()), +// Pt3(i.x(), i.y(), a.z()), +// Pt3(i.x(), i.y(), i.z()), +// Pt3(a.x(), a.y(), a.z()), +// Pt3(a.x(), a.y(), i.z()), +// Pt3(a.x(), i.y(), a.z()), +// Pt3(a.x(), i.y(), i.z()) +// }; + +// const acc dotV = dot(axis, vertices[0]); +// Interval res{dotV,dotV}; +// for(const auto &pt : vertices){ +// const acc projection = dot(axis,pt); +// res.min() = (projection < res.min()) ? projection : res.min(); +// res.max() = (projection > res.max()) ? projection : res.max(); +// } + +// return res; +//} + +} + + diff --git a/cpp-projects/base/geometry/circle.hpp b/cpp-projects/base/geometry/circle.hpp new file mode 100644 index 0000000..206ac35 --- /dev/null +++ b/cpp-projects/base/geometry/circle.hpp @@ -0,0 +1,47 @@ + + + +///******************************************************************************* +//** Toolset-base ** +//** MIT License ** +//** Copyright (c) [2018] [Florian Lance] ** +//** ** +//** Permission is hereby granted, free of charge, to any person obtaining a ** +//** copy of this software and associated documentation files (the "Software"), ** +//** to deal in the Software without restriction, including without limitation ** +//** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +//** and/or sell copies of the Software, and to permit persons to whom the ** +//** Software is furnished to do so, subject to the following conditions: ** +//** ** +//** The above copyright notice and this permission notice shall be included in ** +//** all copies or substantial portions of the Software. ** +//** ** +//** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +//** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +//** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +//** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +//** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +//** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +//** DEALINGS IN THE SOFTWARE. ** +//** ** +//********************************************************************************/ + +//#pragma once + +//// local +//#include "geometry/point2.hpp" + +//namespace tool::geo { + +//template +//struct Circle{ + +// Circle() = default; + +// constexpr Circle(acc r, const Pt2 &c) noexcept : radius(r), position(c){ +// } + +// acc radius{1}; +// Pt2 position; +//}; +//} diff --git a/cpp-projects/base/geometry/cloud.cpp b/cpp-projects/base/geometry/cloud.cpp new file mode 100644 index 0000000..737b3ad --- /dev/null +++ b/cpp-projects/base/geometry/cloud.cpp @@ -0,0 +1,77 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "cloud.hpp" + +using namespace tool::geo; + +auto ColoredCloudData::merge(const ColoredCloudData &cloud) -> void{ + + if(!is_valid() || !cloud.is_valid()){ + return; + } + + if(!empty()){ + + vertices.merge(cloud.vertices); + + if(has_colors() && cloud.has_colors()){ + colors.merge(cloud.colors); + } + + if(has_normals() && cloud.has_normals()){ + normals.merge(cloud.normals); + } + + }else{ + *this = cloud; + } +} + +auto ColoredCloudData::fill_colors(const geo::Pt3f &color) -> void{ + colors.fill(color); +} + +auto ColoredCloudData::remove_outliers(const Pt3f &target, float maxDistance) -> void { + keep_from_ids(vertices.get_outliers_id(target, maxDistance)); +} + +auto ColoredCloudData::keep_from_ids(const std::vector &ids) noexcept -> void{ + + vertices.keep_from_ids(ids); + + if(has_colors()){ + colors.keep_from_ids(ids); + } + + if(has_normals()){ + normals.keep_from_ids(ids); + } +} + + + diff --git a/cpp-projects/base/geometry/cloud.hpp b/cpp-projects/base/geometry/cloud.hpp new file mode 100644 index 0000000..1fb0018 --- /dev/null +++ b/cpp-projects/base/geometry/cloud.hpp @@ -0,0 +1,350 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "vertices.hpp" +#include "graphics/colors.hpp" + +namespace tool::geo { + +struct Geometry { + + enum class Type : std::int8_t{ + Unspecified = 0, + Triangle = 1, + PointCloud = 2, +// PointCloud = 1, +// VoxelGrid = 2, +// Octree = 3, +// LineSet = 4, +// MeshBase = 5, +// TriangleMesh = 6, +// HalfEdgeTriangleMesh = 7, +// Image = 8, +// RGBDImage = 9, +// TetraMesh = 10, +// OrientedBoundingBox = 11, +// AxisAlignedBoundingBox = 12, + }; + + Geometry(Type type, int dimension) : m_type(type), m_dim(dimension) { + } + virtual ~Geometry() {} + + // get + constexpr auto type() const -> Type{return m_type;} + constexpr auto dimension() const -> int{return m_dim;} + virtual auto is_empty() const -> bool = 0; + +private: + Type m_type = Type::Unspecified; + int m_dim = 0; +}; + +struct Geometry3D : public Geometry{ + +// ~Geometry3D() override {} + +//protected: +// /// \brief Parameterized Constructor. +// /// +// /// \param type type of object based on GeometryType. +// Geometry3D(GeometryType type) : Geometry(type, 3) {} + +//public: +// Geometry3D& Clear() override = 0; +// bool IsEmpty() const override = 0; +// /// Returns min bounds for geometry coordinates. +// virtual Eigen::Vector3d GetMinBound() const = 0; +// /// Returns max bounds for geometry coordinates. +// virtual Eigen::Vector3d GetMaxBound() const = 0; +// /// Returns the center of the geometry coordinates. +// virtual Eigen::Vector3d GetCenter() const = 0; + +// /// Creates the axis-aligned bounding box around the points of the object. +// /// Further details in AxisAlignedBoundingBox::CreateFromPoints() +// virtual AxisAlignedBoundingBox GetAxisAlignedBoundingBox() const = 0; + +// /// Creates an oriented bounding box around the points of the object. +// /// Further details in OrientedBoundingBox::CreateFromPoints() +// /// \param robust If set to true uses a more robust method which works +// /// in degenerate cases but introduces noise to the points +// /// coordinates. +// virtual OrientedBoundingBox GetOrientedBoundingBox( +// bool robust = false) const = 0; + +// /// Creates the minimal oriented bounding box around the points of the +// /// object. Further details in +// /// OrientedBoundingBox::CreateFromPointsMinimal() +// /// \param robust If set to true uses a more robust method which works +// /// in degenerate cases but introduces noise to the points +// /// coordinates. +// virtual OrientedBoundingBox GetMinimalOrientedBoundingBox( +// bool robust = false) const = 0; + +// /// \brief Apply transformation (4x4 matrix) to the geometry coordinates. +// virtual Geometry3D& Transform(const Eigen::Matrix4d& transformation) = 0; + +// /// \brief Apply translation to the geometry coordinates. +// /// +// /// \param translation A 3D vector to transform the geometry. +// /// \param relative If `true`, the \p translation is directly applied to the +// /// geometry. Otherwise, the geometry center is moved to the \p translation. +// virtual Geometry3D& Translate(const Eigen::Vector3d& translation, +// bool relative = true) = 0; +// /// \brief Apply scaling to the geometry coordinates. +// /// Given a scaling factor \f$s\f$, and center \f$c\f$, a given point +// /// \f$p\f$ is transformed according to \f$s (p - c) + c\f$. +// /// +// /// \param scale The scale parameter that is multiplied to the +// /// points/vertices of the geometry. +// /// \param center Scale center that is used to resize the geometry. +// virtual Geometry3D& Scale(const double scale, +// const Eigen::Vector3d& center) = 0; + +// /// \brief Apply rotation to the geometry coordinates and normals. +// /// Given a rotation matrix \f$R\f$, and center \f$c\f$, a given point +// /// \f$p\f$ is transformed according to \f$R (p - c) + c\f$. +// /// +// /// \param R A 3x3 rotation matrix +// /// \param center Rotation center that is used for the rotation. +// virtual Geometry3D& Rotate(const Eigen::Matrix3d& R, +// const Eigen::Vector3d& center) = 0; + +// virtual Geometry3D& Rotate(const Eigen::Matrix3d& R); + +// /// Get Rotation Matrix from XYZ RotationType. +// static Eigen::Matrix3d GetRotationMatrixFromXYZ( +// const Eigen::Vector3d& rotation); +// /// Get Rotation Matrix from YZX RotationType. +// static Eigen::Matrix3d GetRotationMatrixFromYZX( +// const Eigen::Vector3d& rotation); +// /// Get Rotation Matrix from ZXY RotationType. +// static Eigen::Matrix3d GetRotationMatrixFromZXY( +// const Eigen::Vector3d& rotation); +// /// Get Rotation Matrix from XZY RotationType. +// static Eigen::Matrix3d GetRotationMatrixFromXZY( +// const Eigen::Vector3d& rotation); +// /// Get Rotation Matrix from ZYX RotationType. +// static Eigen::Matrix3d GetRotationMatrixFromZYX( +// const Eigen::Vector3d& rotation); +// /// Get Rotation Matrix from YXZ RotationType. +// static Eigen::Matrix3d GetRotationMatrixFromYXZ( +// const Eigen::Vector3d& rotation); +// /// Get Rotation Matrix from AxisAngle RotationType. +// static Eigen::Matrix3d GetRotationMatrixFromAxisAngle( +// const Eigen::Vector3d& rotation); +// /// Get Rotation Matrix from Quaternion. +// static Eigen::Matrix3d GetRotationMatrixFromQuaternion( +// const Eigen::Vector4d& rotation); + +//protected: +// /// Compute min bound of a list points. +// Eigen::Vector3d ComputeMinBound( +// const std::vector& points) const; +// /// Compute max bound of a list points. +// Eigen::Vector3d ComputeMaxBound( +// const std::vector& points) const; +// /// Computer center of a list of points. +// Eigen::Vector3d ComputeCenter( +// const std::vector& points) const; + +// /// \brief Resizes the colors vector and paints a uniform color. +// /// +// /// \param colors An array of eigen vectors specifies colors in RGB. +// /// \param size The resultant size of the colors array. +// /// \param color The final color in which the colors will be painted. +// void ResizeAndPaintUniformColor(std::vector& colors, +// const size_t size, +// const Eigen::Vector3d& color) const; + +// /// \brief Transforms all points with the transformation matrix. +// /// +// /// \param transformation 4x4 matrix for transformation. +// /// \param points A list of points to be transformed. +// void TransformPoints(const Eigen::Matrix4d& transformation, +// std::vector& points) const; + +// /// \brief Transforms the normals with the transformation matrix. +// /// +// /// \param transformation 4x4 matrix for transformation. +// /// \param normals A list of normals to be transformed. +// void TransformNormals(const Eigen::Matrix4d& transformation, +// std::vector& normals) const; + +// /// \brief Transforms all covariance matrices with the transformation. +// /// +// /// \param transformation 4x4 matrix for transformation. +// /// \param covariances A list of covariance matrices to be transformed. +// void TransformCovariances(const Eigen::Matrix4d& transformation, +// std::vector& covariances) const; + +// /// \brief Apply translation to the geometry coordinates. +// /// +// /// \param translation A 3D vector to transform the geometry. +// /// \param points A list of points to be transformed. +// /// \param relative If `true`, the \p translation is directly applied to the +// /// \p points. Otherwise, the center of the \p points is moved to the \p +// /// translation. +// void TranslatePoints(const Eigen::Vector3d& translation, +// std::vector& points, +// bool relative) const; + +// /// \brief Scale the coordinates of all points by the scaling factor \p +// /// scale. +// /// +// /// \param scale The scale factor that is used to resize the geometry +// /// \param points A list of points to be transformed +// /// \param center Scale center that is used to resize the geometry.. +// void ScalePoints(const double scale, +// std::vector& points, +// const Eigen::Vector3d& center) const; + +// /// \brief Rotate all points with the rotation matrix \p R. +// /// +// /// \param R A 3x3 rotation matrix +// /// defines the axis of rotation and the norm the angle around this axis. +// /// \param points A list of points to be transformed. +// /// \param center Rotation center that is used for the rotation. +// void RotatePoints(const Eigen::Matrix3d& R, +// std::vector& points, +// const Eigen::Vector3d& center) const; + +// /// \brief Rotate all normals with the rotation matrix \p R. +// /// +// /// \param R A 3x3 rotation matrix +// /// \param normals A list of normals to be transformed. +// void RotateNormals(const Eigen::Matrix3d& R, +// std::vector& normals) const; + +// /// \brief Rotate all covariance matrices with the rotation matrix \p R. +// /// +// /// \param R A 3x3 rotation matrix +// /// \param covariances A list of covariance matrices to be transformed. +// void RotateCovariances(const Eigen::Matrix3d& R, +// std::vector& covariances) const; +}; + +struct Cloud2D{ + +}; + + + +struct Cloud3D{ + +}; + +struct ColoredCloudData{ + + ColoredCloudData() = default; + ColoredCloudData(const ColoredCloudData& other) = default; + ColoredCloudData& operator=(const ColoredCloudData& other) = default; + ColoredCloudData(ColoredCloudData&& other) = default; + ColoredCloudData& operator=(ColoredCloudData&& other) = default; + + constexpr explicit ColoredCloudData(size_t size, bool useNormals = false){ + resize(size, useNormals); + } + + constexpr ColoredCloudData(const geo::Pt3f &vertex, const geo::Pt3f &color){ + vertices.push_back(vertex); + colors.push_back(color); + } + + // get + [[nodiscard]] constexpr auto empty() const noexcept -> bool {return vertices.empty();} + [[nodiscard]] constexpr auto size() const noexcept -> size_t {return vertices.size();} + [[nodiscard]] constexpr auto has_vertices() const noexcept -> bool{return !vertices.empty();} + [[nodiscard]] constexpr auto has_colors() const noexcept -> bool{return !colors.empty();} + [[nodiscard]] constexpr auto has_normals() const noexcept -> bool{return !normals.empty();} + [[nodiscard]] constexpr auto is_valid() const noexcept -> bool{ + if(has_colors()){ + if(vertices.size() != colors.size()){ + return false; + } + } + if(has_normals()){ + if(vertices.size() != normals.size()){ + return false; + } + } + return true; + } + + // geometry modifiers + auto remove_outliers(const Pt3f &target, float maxDistance) -> void; + + // add + auto merge(const ColoredCloudData &cloud)-> void; + + // remove + auto keep_from_ids(const std::vector &ids) noexcept -> void; + + // modify + auto fill_colors(const geo::Pt3f &color) -> void; + + // memory + constexpr auto resize(size_t size, bool useNormals = false) -> void{ + + vertices.resize(size); + colors.resize(size); + + if(useNormals){ + normals.resize(size); + } + } + + constexpr auto reserve(size_t capacity, bool useNormals = false) -> void{ + + vertices.reserve(capacity); + colors.reserve(capacity); + + if(useNormals){ + normals.reserve(capacity); + } + } + + constexpr auto clear() -> void{ + vertices.clear(); + colors.clear(); + normals.clear(); + } + + constexpr auto shrink_to_fit() -> void{ + vertices.shrink_to_fit(); + colors.shrink_to_fit(); + normals.shrink_to_fit(); + } + + Vertices3D vertices; + ColorsRGB32 colors; + Vertices3D normals; +}; + +} diff --git a/cpp-projects/base/geometry/geometry.hpp b/cpp-projects/base/geometry/geometry.hpp new file mode 100644 index 0000000..883ef38 --- /dev/null +++ b/cpp-projects/base/geometry/geometry.hpp @@ -0,0 +1,75 @@ + + + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + + +#pragma once + +// local +#include "point.hpp" + + +namespace tool::geo { + +//template +//Vector project(const Vector len, const Vector dir){ +// return len.project(dir); +//} + +//template +//Vector perpendicular(const Vector len, const Vector dir){ +// return len.perpendicular(dir); +//} + + +//inline Vec project(const Vector &dir) const { +// return dir * (dot(*this, dir) / dir.square_norm()); +//} + +//inline Vec perpendicular(const Vector &dir) const { +// return (*this) - project(dir); +//} + +//inline Vec reflection(const Vector &normal) const{ +// return (*this) - normal * (dot(*this,normal) * acc{2}); +//} + + +//template +//acc angle(const Pt &l, const Pt &r){ +// acc m = sqrt(l.square_norm()*r.square_norm()); +// return std::acos(dot(l,r)/m); +//} + +//template +//bool is_perpendicular(const vecN &l, const vecN &r, int ulp = 3) noexcept{ +// return almost_equal(dot(l,r), acc{0}, ulp); +//} + + +} diff --git a/cpp-projects/base/geometry/geometry2.hpp b/cpp-projects/base/geometry/geometry2.hpp new file mode 100644 index 0000000..7dc94b6 --- /dev/null +++ b/cpp-projects/base/geometry/geometry2.hpp @@ -0,0 +1,210 @@ + + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + + +#pragma once + +// local +//#include "geometry/matrix2.hpp" +//#include "geometry/line2.hpp" +//#include "geometry/circle.hpp" +//#include "geometry/rectangle.hpp" +//#include "geometry/interval.hpp" +//#include "utility/maths_utility.hpp" + +namespace tool::geo { + +//// aliases +//template +//constexpr bool point_on_line(const Pt2 &p, const Line2 &l) noexcept; // OK +//template +//constexpr bool point_in_circle(const Pt2 &p, const Circle &c) noexcept; // OK +//template +//constexpr bool point_in_rectangle(const Pt2 &p, const Rectangle2 &r); // TODO : float accuracy && constexpr +//template +//bool point_in_oriented_rectangle(const Pt2 &p, const OrientedRectangle2 &o); // TODO: constexpr +//template +//constexpr bool line_circle(const Line2 &l, const Circle &c); // TODO: 0 div +//template +//bool line_rectangle(const Line2 &l, const Rectangle2 &r); +//template +//bool line_oriented_rectangle(const Line2 &l, const OrientedRectangle2 &o); +//template +//bool circle_circle(const Circle &c1, const Circle &c2); +//template +//bool circle_rectangle(const Circle &c, const Rectangle2 &r); +//template +//bool circle_oriented_rectangle(const Circle &c, const OrientedRectangle2 &o); +//template +//bool rectangle_rectangle(const Rectangle2 &r1, const Rectangle2 &r2); +//template +//Interval interval(const Rectangle2 &r, const Vec2 &axis); +//template +//bool overlap_on_axis(const Rectangle2 &r1, const Rectangle2 &r2, const Vec2 &axis); + + +//template +//constexpr bool point_on_line(const Pt2 &p, const Line2 &l) noexcept{ + +// // find the slope +// const Vec2 d = l.vec(); +// const acc M = d.y()/d.x(); + +// // find the Y-intercept +// const acc B{l.start().y() - l.start().x()*M}; + +// // check the equation +// return math::almost_equal(p.y(), M*p.x() + B); +//} + +//template +//constexpr bool point_in_circle(const Pt2 &p, const Circle &c) noexcept{ +// const auto length = Line2(p, c.position).square_length(); +// const auto squareRadius = c.radius*c.radius; +// return (length < c.radius*c.radius || math::almost_equal(length, squareRadius) ); +//} + +//template +//constexpr bool point_in_rectangle(const Pt2 &p, const Rectangle2 &r){ +// const auto min = r.min(); +// const auto max = r.max(); +// return min.x() <= p.y() && +// p.x() <= max.x() && +// p.y() <= max.y(); +//} + +//template +//bool point_in_oriented_rectangle(const Pt2 &p, const OrientedRectangle2 &o){ +// Vec2 rotVector = p - o.position; +// acc theta = -math::deg_2_rad(o.rotation); +// Mat2 zRotation2x2 = {cos(theta), sin(theta), +// -sin(theta), cos(theta)}; +// return point_in_rectangle(Pt2((zRotation2x2*rotVector) + o.halfExtends), Rectangle2(Pt2(), o.halfExtends * acc{2})); +//} + + +//template +//constexpr bool line_circle(const Line2 &l, const Circle &c){ + +// const Vec2 ab = l.vec(); +// const acc t = dot(c.position - l.start, ab) / dot(ab,ab); // TODO: check 0 div +// if(t < acc{0} || t > acc{1} || alomst_equal(t,acc{1}) || almost_equal(t,acc{0})){ +// return false; +// } + +// return Line2(c.position, l.start + ab * t).square_length() < (c.radius * c.radius); +//} + + +//template +//bool line_rectangle(const Line2 &l, const Rectangle2 &r){ + +// if(point_in_rectangle(l.start, r) || point_in_rectangle(l.end, r)){ +// return true; +// } + +// Vec2 norm = normalized(vec(l.start,l.end)); +// norm.x() = (norm.x() != 0) ? (1 / norm.x()) : 0; // TODO +// norm.y() = (norm.y() != 0) ? (1 / norm.y()) : 0; // TODO +// Vec2 minV = (r.min() - l.start) * norm; +// Vec2 maxV = (r.max() - l.start) * norm; + +// acc tMin = std::max(std::min(minV.x(), maxV.x()), std::min(minV.y(), maxV.y())); +// acc tMax = std::min(std::max(minV.x(), maxV.x()), std::max(minV.y(), maxV.y())); + +// if(tMax < 0 || tMin > tMax){ +// return false; +// } + +// acc t = (tMin < 0) ? tMax : tMin; +// return t > 0 && (t * t < l.square_length()); +//} + + +//template +//bool line_oriented_rectangle(const Line2 &l, const OrientedRectangle2 &o){ +// return true; // TODO +//} + +//template +//bool circle_circle(const Circle &c1, const Circle &c2){ +// return true; // TODO +//} + + +//template +//bool circle_rectangle(const Circle &c, const Rectangle2 &r){ +// return true; // TODO +//} + + +//template +//bool circle_oriented_rectangle(const Circle &c, const OrientedRectangle2 &o){ +// return true; // TODO +//} + +//template +//bool rectangle_rectangle(const Rectangle2 &r1, const Rectangle2 &r2){ +// return true; // TODO +//} + +//template +//Interval interval(const Rectangle2 &r, const Vec2 &axis){ +// Interval res; // TODO +// return res; +//} + +//template +//bool overlap_on_axis(const Rectangle2 &r1, const Rectangle2 &r2, const Vec2 &axis){ +// return true; // TODO +//} + +// ### 2D collisions +// implement a shitload of functions +// rectangle_rectangle_SAT +// generic_SAT +// interval or axis +// overlap_on_axis rect or axis +// rectangle_oriented_rectangle +// oriented_rectangle_oriented_rectangle + +// ### 2D Optimizations +// containing_circle +// containing_rectangle +// bounding_shape +// point_in_shape + +// quad_tree.h +// QuadTreeData +// QuadTreeNode +// ... + +// broad phase collisions + + +} diff --git a/cpp-projects/base/geometry/geometry3.hpp b/cpp-projects/base/geometry/geometry3.hpp new file mode 100644 index 0000000..58c5d9e --- /dev/null +++ b/cpp-projects/base/geometry/geometry3.hpp @@ -0,0 +1,213 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include +#include + +// local +//#include "geometry/aabb3.hpp" +//#include "geometry/sphere.hpp" +//#include "geometry/obb3.hpp" +//#include "geometry/mesh.hpp" +//#include "geometry/line3.hpp" +//#include "geometry/raycast.hpp" + +namespace tool::geo { + +//inline Vec3 project(const Vec3 &dir) const{return Vec::project(dir);} +//inline Vec3 perpendicular(const Vec3 &dir) const{return Vec::perpendicular(dir);} +//inline Vec3 reflection(const Vec3 &normal) const{return Vec::reflection(normal);} + + +//// ############################################################################### SPHERE +//template +//bool line_test(const Sphere &s, const Line3 &l); + +//// ############################################################################### PLANE +//template +//Plane3 from_triangle(const Triangle3 &t); + +//// ############################################################################### TRIANGLE +//template +//bool overlap_on_axis(const AABB3 &aabb, const Triangle3 &t, const Vec3 &axis); +//template +//bool triangle_AABB(const Triangle3 &t, const AABB3 &a); + +//// ############################################################################### MESH +//template +//Mesh mesh_from_aabb(const AABB3 &aabb); +//template +//Mesh blend_meshes(const std::vector> &meshes); +//template +//acc mesh_ray(const Mesh &mesh, const Ray3 &ray); + + +//template +//bool line_test(const Sphere &s, const Line3 &l){ +// return (s.position-(closest_point(l, s.position))).square_norm() <= (s.radius*s.radius); +//} + +//template +//Plane3 from_triangle(const Triangle3 &t){ + +// Vec3 normal = normalized(cross(vec(t.a(),t.b()),vec(t.a(),t.c()))); +// return Plane3(normal, dot(normal, t.a())); +//} + +//template +//bool overlap_on_axis(const AABB3 &aabb, const Triangle3 &t, const Vec3 &axis){ + +// Interval a = interval(aabb, axis); +// Interval b = interval(t, axis); +// return ((b.min() <= a.max()) && (a.min() <= b.max())); +//} + +//template +//bool triangle_AABB(const Triangle3 &t, const AABB3 &a){ + +// Vec3 f0 = vec(t.a(),t.b()); +// Vec3 f1 = vec(t.b(),t.c()); +// Vec3 f2 = vec(t.c(),t.a()); + +// Vec3 u0 = {1,0,0}; +// Vec3 u1 = {0,1,0}; +// Vec3 u2 = {0,0,1}; + +// std::array,13> tests = { +// u0, u1, u2, cross(f0,f1), +// cross(u0, f0), cross(u0, f1), cross(u0, f2), +// cross(u1, f0), cross(u1, f1), cross(u1, f2), +// cross(u2, f0), cross(u2, f1), cross(u2, f2) +// }; + +// for(const auto &test : tests){ +// if(!overlap_on_axis(a, t, test)){ +// return false; // separating axis found +// } +// } +// return true; // separating axis not found +//} + +//template +//Mesh mesh_from_aabb(const AABB3 &aabb){ + +// Mesh mesh; +// Pt3 p0 = aabb.origin + Vec3(-aabb.size.x(),-aabb.size.y(),-aabb.size.z()); +// Pt3 p1 = aabb.origin + Vec3(+aabb.size.x(),-aabb.size.y(),-aabb.size.z()); +// Pt3 p2 = aabb.origin + Vec3(+aabb.size.x(),+aabb.size.y(),-aabb.size.z()); +// Pt3 p3 = aabb.origin + Vec3(-aabb.size.x(),+aabb.size.y(),-aabb.size.z()); +// Pt3 p4 = aabb.origin + Vec3(-aabb.size.x(),-aabb.size.y(),+aabb.size.z()); +// Pt3 p5 = aabb.origin + Vec3(+aabb.size.x(),-aabb.size.y(),+aabb.size.z()); +// Pt3 p6 = aabb.origin + Vec3(+aabb.size.x(),+aabb.size.y(),+aabb.size.z()); +// Pt3 p7 = aabb.origin + Vec3(-aabb.size.x(),+aabb.size.y(),+aabb.size.z()); + +// mesh.vertices = {p0,p1,p2,p3,p4,p5,p6,p7}; +// mesh.triIds = {TriIds(0,2,1), TriIds(0,3,2), TriIds(4,1,5), TriIds(4,0,1), TriIds(7,5,6), TriIds(7,4,5), +// TriIds(1,6,5), TriIds(1,2,6), TriIds(4,3,0), TriIds(4,7,3), TriIds(3,6,2), TriIds(3,7,6)}; + +// return mesh; +//} + +//template +//Mesh blend_meshes(const std::vector> &meshes){ + +// Mesh result; +// size_t offset = 0; +// for(const auto& mesh : meshes){ + +// std::copy(mesh.vertices.begin(), mesh.vertices.end(), std::back_inserter(result.vertices)); +// std::vector idTriangles; +// std::copy(mesh.triIds.begin(), mesh.triIds.end(), std::back_inserter(idTriangles)); + +// for(auto &idT : idTriangles){ +// idT.ids += offset; +// } + +// offset += mesh.vertices.size(); +// std::move(idTriangles.begin(), idTriangles.end(), std::back_inserter(result.triIds)); +// } + +// return result; +//} + +//template +//acc mesh_ray(const Mesh &mesh, const Ray3 &ray){ + +// if (!mesh.accelerator) { + +// for (size_t ii = 0; ii < mesh.triIds.size(); ++ii) { +// acc result = raycast_triangle(mesh.triangle(ii), ray); +// if(!(result < 0)){ // TODO +// return result; +// } +// } +// }else { + +// std::list*> toProcess; +// toProcess.push_front(mesh.accelerator.get()); + +// // Recursivley walk the BVH tree +// while (!toProcess.empty()) { + +// BVHNode* iterator = *(toProcess.begin()); +// toProcess.erase(toProcess.begin()); + +// if (iterator->triangles.size() >= 0) { + +// // Iterate trough all triangles of the node +// for (size_t ii = 0; ii < iterator->triangles.size(); ++ii) { + +// // Triangle indices in BVHNode index the mesh +// RaycastResult raycast; +// raycast_triangle(mesh.triangle(iterator->triangles[ii]), ray, &raycast); +// acc r = raycast.t; +// if (r >= 0) { // TODO +// return r; +// } +// } +// } + +// for (int ii = 8 - 1; ii >= 0; --ii) { +// if(iterator->children[ii] == nullptr){ +// continue; +// } + +// // Only push children whos bounds intersect the test geometry +// RaycastResult raycast; +// raycast_aabb(iterator->children[ii]->bounds, ray, &raycast); +// if (raycast.t >= 0) { +// toProcess.push_front(iterator->children[ii].get()); +// } +// } +// } +// } +// return -1; +//} +} diff --git a/cpp-projects/base/geometry/interval.hpp b/cpp-projects/base/geometry/interval.hpp new file mode 100644 index 0000000..1119db0 --- /dev/null +++ b/cpp-projects/base/geometry/interval.hpp @@ -0,0 +1,48 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// loca +#include "point2.hpp" + +namespace tool::geo { + +template +struct Interval{ + + Interval() = default; + constexpr Interval(acc vmin, acc vmax) noexcept: v({vmin,vmax}){} + + constexpr auto min() const noexcept -> acc {return v(0);} + constexpr auto max() const noexcept -> acc {return v(1);} + constexpr auto min() noexcept -> acc &{return v(0);} + constexpr auto max() noexcept -> acc &{return v(1);} + + Pt2 v; +}; +} diff --git a/cpp-projects/base/geometry/line2.hpp b/cpp-projects/base/geometry/line2.hpp new file mode 100644 index 0000000..37a556c --- /dev/null +++ b/cpp-projects/base/geometry/line2.hpp @@ -0,0 +1,65 @@ + +///******************************************************************************* +//** Toolset-base ** +//** MIT License ** +//** Copyright (c) [2018] [Florian Lance] ** +//** ** +//** Permission is hereby granted, free of charge, to any person obtaining a ** +//** copy of this software and associated documentation files (the "Software"), ** +//** to deal in the Software without restriction, including without limitation ** +//** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +//** and/or sell copies of the Software, and to permit persons to whom the ** +//** Software is furnished to do so, subject to the following conditions: ** +//** ** +//** The above copyright notice and this permission notice shall be included in ** +//** all copies or substantial portions of the Software. ** +//** ** +//** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +//** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +//** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +//** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +//** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +//** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +//** DEALINGS IN THE SOFTWARE. ** +//** ** +//********************************************************************************/ + +//#pragma once + +//// local +//#include "geometry/point2.hpp" + +//namespace tool::geo { + +//template +//struct Line2{ + +// Line2() = default; + +// constexpr Line2(const Pt2 &s, const Pt2 &e) noexcept : pts{s,e}{ +// } + +// constexpr Line2(std::initializer_list> l) noexcept { +// std::move(l.begin(), l.end(), pts.begin()); +// } + +// inline acc length() const noexcept{ +// return (end()-start()).norm(); +// } + +// constexpr acc square_length() const noexcept{ +// return (end()-start()).square_norm(); +// } + +// constexpr Pt2 start() const noexcept{return pts[0];} +// constexpr Pt2 end() const noexcept{return pts[1];} + +// constexpr Pt2& start() noexcept{return pts[0];} +// constexpr Pt2& end() noexcept{return pts[1];} + +// constexpr Vec2 vec() const noexcept {return end()-start();} + + +// std::array,2> pts{}; +//}; +//} diff --git a/cpp-projects/base/geometry/line3.hpp b/cpp-projects/base/geometry/line3.hpp new file mode 100644 index 0000000..3b518e3 --- /dev/null +++ b/cpp-projects/base/geometry/line3.hpp @@ -0,0 +1,76 @@ + + +///******************************************************************************* +//** Toolset-base ** +//** MIT License ** +//** Copyright (c) [2018] [Florian Lance] ** +//** ** +//** Permission is hereby granted, free of charge, to any person obtaining a ** +//** copy of this software and associated documentation files (the "Software"), ** +//** to deal in the Software without restriction, including without limitation ** +//** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +//** and/or sell copies of the Software, and to permit persons to whom the ** +//** Software is furnished to do so, subject to the following conditions: ** +//** ** +//** The above copyright notice and this permission notice shall be included in ** +//** all copies or substantial portions of the Software. ** +//** ** +//** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +//** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +//** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +//** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +//** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +//** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +//** DEALINGS IN THE SOFTWARE. ** +//** ** +//********************************************************************************/ + +//#pragma once + +//// local +//#include "geometry/point3.hpp" + +//namespace tool::geo { + + +//template +//struct Line3{ + +// Line3() = default; + +// constexpr Line3(const Pt3 &s, const Pt3 &e) noexcept: pts({s,e}){ +// } + +// constexpr Line3(std::initializer_list> l) noexcept { +// std::move(l.begin(), l.end(), pts.begin()); +// } + +// inline acc length() const noexcept{ +// return (end()-start()).norm(); +// } + +// constexpr acc square_length() const noexcept{ +// return (end()-start()).square_norm(); +// } + +// constexpr Pt3 start() const noexcept{return pts[0];} +// constexpr Pt3 end() const noexcept{return pts[1];} + +// constexpr Pt3& start() noexcept{return pts[0];} +// constexpr Pt3& end() noexcept{return pts[1];} + +// constexpr Vec3 vec() const noexcept {return end()-start();} + +// std::array,2> pts{}; +//}; + + +//template +//constexpr Pt3 closest_point(const Line3 &l, const Pt3 &p) { + +// Vec3 lVec = l.vec(); +// acc t = dot(p-l.start(),lVec) / dot(lVec, lVec); // TODO : check 0 div +// t = std::max(acc{0}, std::min(t, acc{1})); +// return l.start() + lVec*t; +//} +//} diff --git a/cpp-projects/base/geometry/matrix.hpp b/cpp-projects/base/geometry/matrix.hpp new file mode 100644 index 0000000..64faafe --- /dev/null +++ b/cpp-projects/base/geometry/matrix.hpp @@ -0,0 +1,594 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include +#include +#include +#include +#include + + +// local +#include "utility/math.hpp" + +namespace tool::geo { + +template +struct Matrix; + +template +using ColVec = Matrix; + +template +using RowVec = Matrix; + +// Row major matrix +template +struct Matrix{ + + static_assert((_rows != 0) && (_cols != 0), "Mat must have a number of rows and columns > 0" ); + + Matrix() = default; + Matrix(const Matrix& other) = default; + Matrix& operator=(const Matrix& other) = default; + Matrix(Matrix&& other) = default; + Matrix& operator=(Matrix&& other) = default; + + constexpr Matrix(const std::array &array) noexcept { + this->array = array; + } + constexpr Matrix(std::array &&array) noexcept{ + this->array = std::move(array); + } + + constexpr auto rows() const noexcept -> int{return _rows;} + constexpr auto cols() const noexcept -> int{return _cols;} + constexpr auto size() const noexcept -> int{return rows()*cols();} + + constexpr auto operator()(int id) const -> acc { return array[id];} + constexpr auto operator()(int id) -> acc& {return array[id];} + constexpr auto operator()(int row, int col) const -> acc{ return array[row*_cols + col];} + constexpr auto operator()(int row, int col) -> acc&{return array[row*_cols + col];} + + constexpr auto at(int id) const -> acc {return (*this)(id);} + constexpr auto at(int id) -> acc& {return (*this)(id);} + constexpr auto at(int row, int col) const -> acc {return (*this)(row,col);} + constexpr auto at(int row, int col) -> acc&{return (*this)(row,col);} + + constexpr auto row(int row) const noexcept -> RowVec{ + if(row >= _rows){ + return {}; + } + RowVec r; + const int start = row* _cols; + std::copy(std::begin(array) + start, std::begin(array) + start + _cols, std::begin(r.array)); + return r; + } + + constexpr auto col(int col) const noexcept -> ColVec{ + if(col >= _cols){ + return {}; + } + ColVec c; + for(int ii = 0; ii < _rows; ++ii){ + c(ii) = at(ii, col); + } + return c; + } + + constexpr auto x() noexcept -> acc& { + static_assert((_rows == 1 || _cols == 1), "X only available for row or column matrices."); + return array[0]; + } + constexpr auto x() const noexcept -> acc { + static_assert((_rows == 1 || _cols == 1), "X only available for row or column matrices."); + return array[0]; + } + + constexpr auto y() noexcept -> acc& { + static_assert((_rows == 1 || _cols == 1), "Y only available for row or column matrices."); + static_assert(_rows*_cols > 1, "Y only available for row or column matrices of a dimension > 1."); + return array[1]; + } + constexpr auto y() const noexcept -> acc { + static_assert((_rows == 1 || _cols == 1), "Y only available for row or column matrices."); + static_assert(_rows*_cols > 1, "Y only available for row or column matrices of a dimension > 1."); + return array[1]; + } + + constexpr auto z() noexcept -> acc& { + static_assert((_rows == 1 || _cols == 1), "Y only available for row or column matrices."); + static_assert(_rows*_cols > 2, "Y only available for row or column matrices of a dimension > 2."); + return array[2]; + } + constexpr auto z() const noexcept -> acc { + static_assert((_rows == 1 || _cols == 1), "Z only available for row or column matrices."); + static_assert(_rows*_cols > 2, "Z only available for row or column matrices of a dimension > 2."); + return array[2]; + } + + constexpr auto w() noexcept -> acc& { + static_assert((_rows == 1 || _cols == 1), "W only available for row or column matrices."); + static_assert(_rows*_cols > 3, "W only available for row or column matrices of a dimension > 2."); + return array[3]; + } + constexpr auto w() const noexcept -> acc { + static_assert((_rows == 1 || _cols == 1), "W only available for row or column matrices."); + static_assert(_rows*_cols > 3, "W only available for row or column matrices of a dimension > 2."); + return array[3]; + } + + constexpr auto xyz() const noexcept -> RowVec { + static_assert((_rows == 1 || _cols == 1), "XYZ only available for row or column matrices."); + static_assert(_rows*_cols > 2, "XYZ only available for row or column matrices of a dimension > 2."); + return {{x(),y(),z()}}; + } + + static constexpr auto identity() noexcept -> Matrix{ + + static_assert(_rows == _cols, "Identity only available for square matrices."); + if constexpr(_rows == 1){ + return {{1}}; + } else if constexpr(_rows == 2){ + return {{1,0, + 0,1}}; + } else if constexpr(_rows == 3){ + return {{1,0,0 + ,0,1,0, + 0,0,1}}; + } else if constexpr(_rows == 4){ + return {{1,0,0,0, + 0,1,0,0, + 0,0,1,0, + 0,0,0,1}}; + }else{ + Matrix m; + for(int ii = 0; ii < _rows; ++ii){ + m(ii,ii) = acc{1}; + } + return m; + } + } + + constexpr auto is_identity() const noexcept -> bool{ + for(int ii = 0; ii < _rows; ++ii){ + for(int jj = 0; jj < _cols; ++jj){ + if(ii == jj){ + if(!almost_equal(at(ii,jj), acc{1})){ + return false; + } + }else{ + if(!almost_equal(at(ii,jj), acc{0})){ + return false; + } + } + } + } + return true; + } + + template + constexpr auto conv() const noexcept -> Matrix{ + Matrix mc; + for(int ii = 0; ii < _rows*_cols; ++ii){ + mc(ii) = static_cast(at(ii)); + } + return mc; + } + + constexpr auto multiply_point(const RowVec &pt) const noexcept -> RowVec{ + + if constexpr(_cols == 2){ + return {{ + pt.x() * at(0,0) + pt.y() * at(1,0), + pt.x() * at(0,1) + pt.y() * at(1,1) + }}; + } else if constexpr(_cols == 3){ + return {{ + pt.x() * at(0,0) + pt.y() * at(1,0) + pt.z() * at(2,0) + at(3,0), + pt.x() * at(0,1) + pt.y() * at(1,1) + pt.z() * at(2,1) + at(3,1), + pt.x() * at(0,2) + pt.y() * at(1,2) + pt.z() * at(2,2) + at(3,2) + }}; + + // return {{ + // m(0,0) * v.x() + m(0,1) * v.y() + m(0,2) * v.z(), + // m(1,0) * v.x() + m(1,1) * v.y() + m(1,2) * v.z(), + // m(2,0) * v.x() + m(2,1) * v.y() + m(2,2) * v.z() + // }}; + + }else if constexpr(_cols == 4){ + return{{ + pt.x() * at(0,0) + pt.y() * at(1,0) + pt.z() * at(2,0) + pt.w() * at(3,0), + pt.x() * at(0,1) + pt.y() * at(1,1) + pt.z() * at(2,1) + pt.w() * at(3,1), + pt.x() * at(0,2) + pt.y() * at(1,2) + pt.z() * at(2,2) + pt.w() * at(3,2), + pt.x() * at(0,3) + pt.y() * at(1,3) + pt.z() * at(2,3) + pt.w() * at(3,3) + }}; + }else{ + static_assert("Not implemented."); + } + } + + constexpr auto multiply_vector(const RowVec &vec) const noexcept -> RowVec{ + if constexpr(_cols == 2){ + return {{ + dot(vec, RowVec{{at(0,0), at(1,0)}}), + dot(vec, RowVec{{at(0,1), at(1,1)}}) + }}; + } else if constexpr(_cols == 3){ + return {{ + dot(vec, RowVec{{at(0,0), at(1,0), at(2,0)}}), + dot(vec, RowVec{{at(0,1), at(1,1), at(2,1)}}), + dot(vec, RowVec{{at(0,2), at(1,2), at(2,2)}}) + }}; + }else if constexpr(_cols == 4){ + return {{ + dot(vec, RowVec{{at(0,0), at(1,0), at(2,0), at(3,0)}}), + dot(vec, RowVec{{at(0,1), at(1,1), at(2,1), at(3,1)}}), + dot(vec, RowVec{{at(0,2), at(1,2), at(2,2), at(3,2)}}), + dot(vec, RowVec{{at(0,3), at(1,3), at(2,3), at(3,3)}}) + }}; + }else{ + static_assert("Not implemented."); + } + } + + // operators +// constexpr auto operator-(const Matrix &m) const noexcept -> Matrix{ +// return (*this) = (*this)*acc{-1}; +// } + + constexpr auto operator+=(const Matrix &r) noexcept -> Matrix{ + return (*this) = (*this)+r; + } + + constexpr auto operator+=(acc v) noexcept -> Matrix{ + return (*this) = (*this)+v; + } + + constexpr auto operator-=(const Matrix &r) noexcept -> Matrix{ + return (*this) = (*this)-r; + } + + constexpr auto operator-=(acc v) noexcept -> Matrix{ + return (*this) = (*this)-v; + } + + template + constexpr auto operator*=(const Matrix &r) noexcept -> Matrix{ + return (*this) = (*this)*r; + } + + constexpr auto operator*=(acc v) noexcept -> Matrix{ + return (*this) = (*this)*v; + } + + constexpr auto operator/=(acc v) noexcept -> Matrix{ + return (*this) = (*this)/v; + } + + std::array array{}; +}; + +// functions +template +constexpr auto compare(const Matrix &l, const Matrix &r, int ulp = 3) noexcept -> bool{ + return std::equal(std::cbegin(l.array), std::cend(l.array), std::cbegin(r.array), + [ulp](const acc &v1, const acc &v2) -> bool{ + return almost_equal(v1,v2,ulp); + } + ); +} + +template +constexpr auto add(const Matrix &l, const Matrix &r) noexcept -> Matrix{ + Matrix res; + std::transform(l.array.cbegin(), l.array.cend(), r.array.cbegin(), std::begin(res.array), std::plus()); + return res; +} +template +constexpr auto add(const Matrix &m, acc value) noexcept -> Matrix{ + Matrix res; + std::transform(m.array.cbegin(), m.array.cend(), std::begin(res.array), std::bind(std::plus(), std::placeholders::_1, value)); + return res; +} +template +constexpr auto add(acc value, const Matrix &m) noexcept -> Matrix{ + Matrix res; + std::transform(m.array.cbegin(), m.array.cend(), std::begin(res.array), std::bind(std::plus(), std::placeholders::_1, value)); + return res; +} + +template +constexpr auto substract(const Matrix &l, const Matrix &r) noexcept -> Matrix{ + Matrix res; + std::transform(l.array.cbegin(), l.array.cend(), r.array.cbegin(), std::begin(res.array), std::minus()); + return res; +} +template +constexpr auto substract(const Matrix &m, acc value) noexcept -> Matrix{ + Matrix res; + std::transform(m.array.cbegin(), m.array.cend(), std::begin(res.array), std::bind(std::minus(), std::placeholders::_1, value)); + return res; +} + +template +constexpr auto multiply(const Matrix &l, const Matrix &r) noexcept -> Matrix{ + + static_assert(_colsL == _rowsR, "Invalid dimensions for multiplication."); + + Matrix res; + for(int ii = 0; ii < l.rows(); ++ii){ + for(int jj = 0; jj < r.cols(); ++jj){ + for(int kk = 0; kk < r.rows(); ++kk){ + res(ii,jj) += l(ii,kk) * r(kk,jj); + } + } + } + return res; +} +template +constexpr auto multiply(const Matrix &m, acc value) noexcept -> Matrix{ + Matrix res; + std::transform(m.array.cbegin(), m.array.cend(), std::begin(res.array), std::bind(std::multiplies(), std::placeholders::_1, value)); + return res; +} + +template +constexpr auto divide(const Matrix &m, acc value) noexcept -> Matrix{ + if(value > acc{0}){ + Matrix res; + std::transform(m.array.cbegin(), m.array.cend(), std::begin(res.array), std::bind(std::divides(), std::placeholders::_1, value)); + return res; + } + return m; +} + +template +constexpr auto transpose(const Matrix &m) noexcept -> Matrix { + Matrix dest; + for(int ii = 0; ii < _rows*_cols; ++ii){ + dest.array[ii] = m.array[_rows*(ii % _rows) + (ii / _rows)]; + } + return dest; +} + +template +constexpr auto cut(const Matrix &m, int row, int col) noexcept -> Matrix{ + Matrix res; + int index = 0; + for(int ii = 0; ii < _rows; ++ii){ + for(int jj = 0; jj < _cols; ++jj){ + if(ii != row && jj != col){ + res(index++) = m(_cols*ii+jj); + } + } + } + return res; +} + +//template +//constexpr auto cofactor(const Matrix &m) noexcept -> Matrix{ +// Matrix res; +// for(int ii = 0; ii < _rows; ++ii){ +// for(int jj = 0; jj < _cols; ++jj){ +// int t = _cols * jj +ii; +// res(t) = m(t)*static_cast(std::pow(acc{-1}, ii + jj)); +// } +// } +// return res; +//} + +//template +//constexpr auto adjugate(const Matrix &m) noexcept -> Matrix{ +// return transpose(cofactor(m)); +//} + +//template +//constexpr auto minor(const Matrix &m) noexcept -> Matrix{ + +// if constexpr (_rows == 2 && _cols == 2){ +// return {m(1,1), m(1,0), m(0,1), m(0,0)}; +// }else{ +// Matrix res; +// for(int ii = 0; ii < _rows; ++ii){ +// for(int jj = 0; jj < _cols; ++jj){ +// res(ii,jj) = determinant(cut(m,ii,jj)); +// } +// } +// return res; +// } +//} + +template +constexpr auto trace(const Matrix &m) noexcept -> acc{ + static_assert(_rows != _cols, "Trace can only be computed from a square matrix."); + acc t{0}; + for(int ii = 0; ii < _rows; ++ii){ + t += m(ii,ii); + } + return t; +} + +template +constexpr auto determinant(const Matrix &m) noexcept -> acc { + static_assert("Not implemented."); +// acc det{0}; +// auto cof = cofactor(m); +// for(int ii = 0; ii < 4; ++ii){ +// det += m(ii) * cof(0,ii); +// } +// return det; +} + +template +constexpr auto inverse(const Matrix &m) noexcept -> Matrix{ + static_assert("Not implemented."); +// else{ +// return adjugate(m)*(acc{1}/det); +// } +} + + + + +template +constexpr auto invert(const RowVec &p) noexcept -> RowVec{ + return multiply(p,acc{-1}); +} + +template +constexpr auto sum(const RowVec &vec) noexcept -> acc{ + return std::accumulate(vec.array.begin(), vec.array.end(), 0); +} + +template +constexpr auto dot(const RowVec &l, const RowVec &r) noexcept -> acc { + return std::inner_product(l.array.cbegin(), l.array.cend(), r.array.cbegin(), acc{0}); +} + +template +constexpr auto vec(const RowVec &l, const RowVec &r) noexcept -> RowVec{ + return r-l; +} + +template +constexpr auto square_norm(const RowVec &p) noexcept -> acc{ + return dot(p,p); +} + +template +inline auto norm(const RowVec &p) noexcept -> acc{ + return sqrt(square_norm(p)); +} + +template +inline auto normalize(const RowVec &vec) noexcept -> RowVec{ + return divide(vec, norm(vec)); +} + +// operators +template +constexpr auto operator+(const Matrix &l, const Matrix &r) noexcept-> Matrix{ + return add(l,r); +} + +template +constexpr auto operator+(const Matrix &m, acc value) noexcept -> Matrix{ + return add(m, value); +} + +template +constexpr auto operator-(const Matrix &l, const Matrix &r) noexcept-> Matrix{ + return substract(l,r); +} + +template +constexpr auto operator-(const Matrix &m, acc value) noexcept -> Matrix{ + return substract(m,value); +} + +template +constexpr auto operator*(const Matrix &l, const Matrix &r) noexcept -> Matrix{ + return multiply(l,r); +} + +template +constexpr auto operator*(const Matrix &m, acc value) noexcept -> Matrix{ + return multiply(m,value); +} + +template +constexpr auto operator*(acc value, const Matrix &m) noexcept -> Matrix{ + return multiply(m,value); +} + +template +constexpr auto operator/(const Matrix &m, acc value) noexcept -> Matrix{ + return divide(m,value); +} + +template +constexpr auto operator==(const Matrix &lhs, const Matrix &rhs) noexcept -> bool { return compare(lhs,rhs); } +template +constexpr auto operator!=(const Matrix &lhs, const Matrix &rhs) noexcept -> bool { return !compare(lhs,rhs); } + +template +constexpr auto operator<(const Matrix &lhs, const Matrix &rhs) noexcept -> bool{ + if(lhs != rhs){ + return std::lexicographical_compare(lhs.array.begin(), lhs.array.end(), rhs.array.begin(), rhs.array.end()); + } + return false; +} +template +constexpr auto operator> (const Matrix &lhs, const Matrix &rhs) noexcept -> bool { return rhs < lhs;} + +template +constexpr auto operator<=(const Matrix &lhs, const Matrix &rhs) noexcept -> bool { return !(lhs > rhs); } + +template +constexpr auto operator>=(const Matrix &lhs, const Matrix &rhs) noexcept -> bool { return !(lhs < rhs);} + +template +constexpr auto operator<(const Matrix &m, acc value) noexcept -> bool{ + for(const auto &mv : m.array){ + if(mv < value){ + continue; + } + return false; + } + return true; +} + +template +constexpr auto operator>(const Matrix &m, acc value) noexcept -> bool{ + for(const auto &mv : m.array){ + if(mv > value){ + continue; + } + return false; + } + return true; +} + +template +auto operator<<(std::ostream &flux, const Matrix &m) -> std::ostream&{ + for(int ii = 0; ii < _rows; ++ii){ + flux << "[ "; + for(int jj = 0; jj < _cols; ++jj){ + flux << m(ii,jj) << " "; + } + flux << "]\n"; + } + return flux; +} + + + +} diff --git a/cpp-projects/base/geometry/matrix2.hpp b/cpp-projects/base/geometry/matrix2.hpp new file mode 100644 index 0000000..d363b2c --- /dev/null +++ b/cpp-projects/base/geometry/matrix2.hpp @@ -0,0 +1,129 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/matrix.hpp" + +namespace tool::geo { + +template +struct Matrix2; + +template +using Mat2 = Matrix2; +using Mat2f = Mat2; +using Mat2d = Mat2; + +template +struct Matrix2 : Matrix{ + + Matrix2() = default; + Matrix2(const Matrix2& other) = default; + Matrix2& operator=(const Matrix2& other) = default; + Matrix2(Matrix2&& other) = default; + Matrix2& operator=(Matrix2&& other) = default; + + constexpr Matrix2(const Matrix &m) noexcept{ + this->array = m.array; + } + constexpr Matrix2(Matrix &&m) noexcept{ + this->array = std::move(m.array); + } + constexpr Matrix2(RowVec r1, RowVec r2) noexcept{ + this->array = { + r1.x(), r1.y(), + r2.x(), r2.y() + }; + } + constexpr Matrix2(ColVec c1, ColVec c2) noexcept{ + this->array = { + c1.x(), c2.x(), + c1.y(), c2.y() + }; + } + constexpr Matrix2(acc v00, acc v01 = acc{0}, acc v10 = acc{0}, acc v11 = acc{0}) noexcept{ + this->array = { + v00,v01, + v10,v11 + }; + } +}; + +template +constexpr auto operator*(const Matrix &m, const ColVec &v) noexcept -> ColVec{ + return {{ + m(0,0) * v.x() + m(0,1) * v.y(), + m(1,0) * v.x() + m(1,1) * v.y() + }}; +} + +template +constexpr auto operator*(const RowVec &v, const Matrix &m) noexcept -> RowVec{ + return {{ + m(0,0) * v.x() + m(1,0) * v.y(), + m(0,1) * v.x() + m(1,1) * v.y() + }}; +} + +template +constexpr auto operator*(const Matrix &l, const Matrix &r) noexcept -> Matrix{ + return{{ + l(0,0) * r(0,0) + l(0,1) * r(1,0), + l(0,0) * r(0,1) + l(0,1) * r(1,1), + l(1,0) * r(0,0) + l(1,1) * r(1,0), + l(1,0) * r(0,1) + l(1,1) * r(1,1) + }}; +} + +template +constexpr auto trace(const Matrix &m) noexcept -> acc{ + return m(0) + m(3); +} + +template +constexpr auto determinant(const Matrix &m) noexcept -> acc { + return m(0,0)*m(1,1)-m(0,1)*m(1,0); +} + +template +constexpr auto inverse(const Matrix &m) noexcept -> Matrix{ + + const acc det = determinant(m); + if(almost_equal(det, acc{0})){ + return {}; + } + + const acc invDet = acc{1}/ det; + return {{ + + m(1,1) * invDet, + - m(0,1) * invDet, + - m(1,0) * invDet, + + m(0,0) * invDet + }}; +} +} diff --git a/cpp-projects/base/geometry/matrix3.hpp b/cpp-projects/base/geometry/matrix3.hpp new file mode 100644 index 0000000..78013c4 --- /dev/null +++ b/cpp-projects/base/geometry/matrix3.hpp @@ -0,0 +1,285 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include + +// local +#include "utility/constants.hpp" +#include "geometry/matrix.hpp" + +namespace tool::geo { + +template +struct Matrix3; + +template +using Mat3 = Matrix3; +using Mat3f = Mat3; +using Mat3d = Mat3; + +template +struct Matrix3 : Matrix{ + + Matrix3() = default; + Matrix3(const Matrix3& other) = default; + Matrix3& operator=(const Matrix3& other) = default; + Matrix3(Matrix3&& other) = default; + Matrix3& operator=(Matrix3&& other) = default; + + constexpr Matrix3(const Matrix &m) noexcept{ + this->array = m.array; + } + constexpr Matrix3(Matrix &&m) noexcept{ + this->array = std::move(m.array); + } + constexpr Matrix3(RowVec r1, RowVec r2, RowVec r3) noexcept{ + this->array = { + r1.x(), r1.y(), r1.z(), + r2.x(), r2.y(), r2.z(), + r3.x(), r3.y(), r3.z() + }; + } + constexpr Matrix3(ColVec c1, ColVec c2, ColVec c3) noexcept{ + this->array = { + c1.x(), c2.x(), c3.x(), + c1.y(), c2.y(), c3.y(), + c1.z(), c2.z(), c3.z() + }; + } + + constexpr Matrix3(acc v00, acc v01 = acc{0}, acc v02 = acc{0}, + acc v10 = acc{0}, acc v11 = acc{0}, acc v12 = acc{0}, + acc v20 = acc{0}, acc v21 = acc{0}, acc v22 = acc{0}) noexcept{ + this->array = { + v00,v01,v02, + v10,v11,v12, + v20,v21,v22 + }; + } +}; + +template +constexpr auto operator*(const Matrix &m, const ColVec &v) noexcept -> ColVec{ + return {{ + m(0,0) * v.x() + m(0,1) * v.y() + m(0,2) * v.z(), + m(1,0) * v.x() + m(1,1) * v.y() + m(1,2) * v.z(), + m(2,0) * v.x() + m(2,1) * v.y() + m(2,2) * v.z() + }}; +} + +template +constexpr auto operator*(const RowVec &v, const Matrix &m) noexcept -> RowVec{ + return {{ + m(0,0) * v.x() + m(1,0) * v.y() + m(2,0) * v.z(), + m(0,1) * v.x() + m(1,1) * v.y() + m(2,1) * v.z(), + m(0,2) * v.x() + m(1,2) * v.y() + m(2,2) * v.z() + }}; +} + +template +constexpr auto operator*(const Matrix &l, const Matrix &r) noexcept -> Matrix{ + + const acc A00 = l(0,0); + const acc A01 = l(0,1); + const acc A02 = l(0,2); + const acc A10 = l(1,0); + const acc A11 = l(1,1); + const acc A12 = l(1,2); + const acc A20 = l(2,0); + const acc A21 = l(2,1); + const acc A22 = l(2,2); + + const acc B00 = r(0,0); + const acc B01 = r(0,1); + const acc B02 = r(0,2); + const acc B10 = r(1,0); + const acc B11 = r(1,1); + const acc B12 = r(1,2); + const acc B20 = r(2,0); + const acc B21 = r(2,1); + const acc B22 = r(2,2); + + return {{ + A00 * B00 + A01 * B10 + A02 * B20, + A00 * B01 + A01 * B11 + A02 * B21, + A00 * B02 + A01 * B12 + A02 * B22, + A10 * B00 + A11 * B10 + A12 * B20, + A10 * B01 + A11 * B11 + A12 * B21, + A10 * B02 + A11 * B12 + A12 * B22, + A20 * B00 + A21 * B10 + A22 * B20, + A20 * B01 + A21 * B11 + A22 * B21, + A20 * B02 + A21 * B12 + A22 * B22 + }}; +} + +template +constexpr auto trace(const Matrix &m) noexcept -> acc{ + return m(0) + m(4) + m(8); +} + +template +constexpr auto determinant(const Matrix &m) noexcept -> acc { + return + + m(0,0) * (m(1,1) * m(2,2) - m(1,2) * m(2,1)) + - m(0,1) * (m(1,0) * m(2,2) - m(1,2) * m(2,0)) + + m(0,2) * (m(1,0) * m(2,1) - m(1,1) * m(2,0)); +} + +template +constexpr auto inverse(const Matrix &m) noexcept -> Matrix{ + + const acc det = determinant(m); + if(almost_equal(det, acc{0})){ + return {}; + } + + const acc invDet = acc{1}/ det; + return{{ + + (m(1,1) * m(2,2) - m(1,2) * m(2,1)) * invDet, + - (m(0,1) * m(2,2) - m(0,2) * m(2,1)) * invDet, + + (m(0,1) * m(1,2) - m(0,2) * m(1,1)) * invDet, + - (m(1,0) * m(2,2) - m(1,2) * m(2,0)) * invDet, + + (m(0,0) * m(2,2) - m(0,2) * m(2,0)) * invDet, + - (m(0,0) * m(1,2) - m(0,2) * m(1,0)) * invDet, + + (m(1,0) * m(2,1) - m(1,1) * m(2,0)) * invDet, + - (m(0,0) * m(2,1) - m(0,1) * m(2,0)) * invDet, + + (m(0,0) * m(1,1) - m(0,1) * m(1,0)) * invDet + }}; +} + +/** + * @param pitch: angle in degrees + */ +template +constexpr auto x_rotation_m3x3(acc pitch) noexcept -> Matrix{ + pitch = deg_2_rad(pitch); + const acc cosA = cos(pitch); + const acc sinA = sin(pitch); + return {{ + 1, 0, 0, + 0, cosA, -sinA, + 0, sinA, cosA + }}; +} + +/** + * @param head: angle in degrees + */ +template +static constexpr auto y_rotation_m3x3(acc head) noexcept -> Matrix{ + head = deg_2_rad(head); + const acc cosA = cos(head); + const acc sinA = sin(head); + return {{ + cosA, 0, sinA, + 0, 1, 0, + -sinA, 0, cosA + }}; +} + +/** + * @param roll: angle in degrees + */ +template +constexpr auto z_rotation_m3x3(acc roll) noexcept -> Matrix{ + roll = deg_2_rad(roll); + const acc cosA = cos(roll); + const acc sinA = sin(roll); + return {{ + cosA, -sinA, 0, + sinA, cosA, 0, + 0, 0, 1 + }}; +} + +/** + * equivalent to: return y_rotation_matrix(angles.y())*x_rotation_matrix(angles.x())*z_rotation_matrix(angles.z()); + */ +template +constexpr auto rotation_m3x3(const RowVec &angles) noexcept -> Matrix{ + + const auto cosH = std::cos(angles.y()); + const auto cosP = std::cos(angles.x()); + const auto cosR = std::cos(angles.z()); + const auto sinH = std::sin(angles.y()); + const auto sinP = std::sin(angles.x()); + const auto sinR = std::sin(angles.z()); + + return {{ + cosR*cosH-sinR*sinP*sinH, -sinR*cosP, cosR*sinH+sinR*sinP*cosH, + sinR*cosH+cosR*sinP*sinH, cosR*cosP, sinR*sinH-cosR*sinP*cosH, + -cosP*sinH, sinP, cosP*cosH + }}; +} + +template +auto axis_angle_m3x3(const RowVec &axis, acc angle) noexcept -> Matrix{ + + angle = deg_2_rad(angle); + auto u = normalize(axis); + const auto cA = cos(angle); + const auto sA = sin(angle); + const auto ux = u.x(); + const auto uy = u.y(); + const auto uz = u.z(); + const auto ux2 = ux*ux; + const auto uy2 = uy*uy; + const auto uz2 = uz*uz; + + return {{ + cA + ux2*(1-cA), ux*uy*(1-cA) - uz*sA, ux*uz*(1-cA) + uy*sA, + uy*ux*(1-cA) + uz*sA, cA+uy2*(1-cA), uy*uz*(1-cA)-ux*sA, + uz*ux*(1-cA)-uy*sA, uz*uy*(1-cA) +ux*sA, cA +uz2*(1-cA) + }}; +} + +/** + * @return head/pitch/roll angles in degrees + */ +template +constexpr auto to_hpr_angles(const Matrix &m) noexcept -> RowVec{ // head/pitch/roll + + const auto p = std::asin(m(1,2)); + if(almost_equal(std::abs(p), PI_2)){ // grimbal lock + return{ + 0, + d180_PI*p, + d180_PI*(std::atan2(m(0,1),m(0,0))) + }; + } + + return {{ + d180_PI*std::atan2(-m(0,2),m(2,2)), + d180_PI*p, + d180_PI*std::atan2(-m(1,0),m(1,1)) + }}; +} + + +} diff --git a/cpp-projects/base/geometry/matrix4.hpp b/cpp-projects/base/geometry/matrix4.hpp new file mode 100644 index 0000000..9d3f573 --- /dev/null +++ b/cpp-projects/base/geometry/matrix4.hpp @@ -0,0 +1,525 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "utility/constants.hpp" +#include "geometry/point3.hpp" +#include "geometry/point4.hpp" +#include "geometry/matrix3.hpp" + +namespace tool::geo { + + +template +struct Matrix4; + +template +using Mat4 = Matrix4; +using Mat4f = Mat4; +using Mat4d = Mat4; + +template +struct Matrix4 : Matrix{ + + Matrix4() = default; + Matrix4(const Matrix4& other) = default; + Matrix4& operator=(const Matrix4& other) = default; + Matrix4(Matrix4&& other) = default; + Matrix4& operator=(Matrix4&& other) = default; + + constexpr Matrix4(const Matrix &m) noexcept{ + this->array = m.array; + } + constexpr Matrix4(Matrix &&m) noexcept{ + this->array = std::move(m.array); + } + + constexpr explicit Matrix4(Matrix m) noexcept{ + this->array = { + m(0),m(1),m(3),0, + m(4),m(5),m(6),0, + m(7),m(8),m(9),0, + 0, 0, 0, 1 + }; + } + + constexpr Matrix4(RowVec r1, RowVec r2, RowVec r3, RowVec r4) noexcept{ + this->array = { + r1.x(), r1.y(), r1.z(), r1.w(), + r2.x(), r2.y(), r2.z(), r2.w(), + r3.x(), r3.y(), r3.z(), r3.w(), + r4.x(), r4.y(), r4.z(), r4.w() + }; + } + constexpr Matrix4(ColVec c1, ColVec c2, ColVec c3, ColVec c4) noexcept{ + this->array = { + c1.x(), c2.x(), c3.x(), c4.x(), + c1.y(), c2.y(), c3.y(), c4.y(), + c1.z(), c2.z(), c3.z(), c4.z(), + c1.w(), c2.w(), c3.w(), c4.w() + }; + } + + constexpr Matrix4(acc v00, acc v01 = acc{0}, acc v02 = acc{0}, acc v03 = acc{0}, + acc v10 = acc{0}, acc v11 = acc{0}, acc v12 = acc{0}, acc v13 = acc{0}, + acc v20 = acc{0}, acc v21 = acc{0}, acc v22 = acc{0}, acc v23 = acc{0}, + acc v30 = acc{0}, acc v31 = acc{0}, acc v32 = acc{0}, acc v33 = acc{0}) noexcept{ + this->array = { + v00,v01,v02,v03, + v10,v11,v12,v13, + v20,v21,v22,v23, + v30,v31,v32,v33 + }; + } + constexpr Matrix4(acc *data, size_t size) noexcept{ + if(size <= this->array.size()){ + std::copy(data, data + size, this->array.begin()); + } + } + + constexpr auto t(int id) const noexcept -> acc{return this->at(id,3);} + constexpr auto t(int id) noexcept -> acc&{return this->at(id,3);} + constexpr auto s(int id) const noexcept -> acc{return this->at(id,id);} + constexpr auto s(int id) noexcept -> acc&{return this->at(id,id);} + constexpr auto r(int id) const noexcept -> acc{ + switch (id) { + case 0: return this->at(0); + case 1: return this->at(1); + case 2: return this->at(2); + case 3: return this->at(4); + case 4: return this->at(5); + case 5: return this->at(6); + case 6: return this->at(8); + case 7: return this->at(9); + case 8: return this->at(10); + default: return this->at(0); + } + } + constexpr auto r(int id) noexcept -> acc&{ + switch (id) { + case 0: return this->at(0); + case 1: return this->at(1); + case 2: return this->at(2); + case 3: return this->at(4); + case 4: return this->at(5); + case 5: return this->at(6); + case 6: return this->at(8); + case 7: return this->at(9); + case 8: return this->at(10); + default: return this->at(0); + } + } +}; + + +template +constexpr auto operator*(const Matrix &m, const ColVec &v) noexcept -> ColVec{ + return {{ + m(0,0) * v.x() + m(0,1) * v.y() + m(0,2) * v.z(), + m(1,0) * v.x() + m(1,1) * v.y() + m(1,2) * v.z(), + m(2,0) * v.x() + m(2,1) * v.y() + m(2,2) * v.z(), + }}; +} + +template +constexpr auto operator*(const Matrix &m, const ColVec &v) noexcept -> ColVec{ + return {{ + m(0,0) * v.x() + m(0,1) * v.y() + m(0,2) * v.z() + m(0,3) * v.w(), + m(1,0) * v.x() + m(1,1) * v.y() + m(1,2) * v.z() + m(1,3) * v.w(), + m(2,0) * v.x() + m(2,1) * v.y() + m(2,2) * v.z() + m(2,3) * v.w(), + m(3,0) * v.x() + m(3,1) * v.y() + m(3,2) * v.z() + m(3,3) * v.w(), + }}; +} + +template +constexpr auto operator*(const RowVec &v, const Matrix &m) noexcept -> RowVec{ + return {{ + m(0,0) * v.x() + m(1,0) * v.y() + m(2,0) * v.z() + m(3,0) * v.w(), + m(0,1) * v.x() + m(1,1) * v.y() + m(2,1) * v.z() + m(3,1) * v.w(), + m(0,2) * v.x() + m(1,2) * v.y() + m(2,2) * v.z() + m(3,2) * v.w(), + m(0,3) * v.x() + m(1,3) * v.y() + m(2,3) * v.z() + m(3,3) * v.w(), + }}; +} + +template +constexpr auto operator*(const RowVec &v, const Matrix &m) noexcept -> RowVec{ + return {{ + m(0,0) * v.x() + m(1,0) * v.y() + m(2,0) * v.z(), + m(0,1) * v.x() + m(1,1) * v.y() + m(2,1) * v.z(), + m(0,2) * v.x() + m(1,2) * v.y() + m(2,2) * v.z(), + }}; +} + +template +constexpr auto operator*(const Matrix &l, const Matrix &r) noexcept -> Matrix{ + + const ColVec A0 = l.col(0); + const ColVec A1 = l.col(1); + const ColVec A2 = l.col(2); + const ColVec A3 = l.col(3); + + const ColVec B0 = r.col(0); + const ColVec B1 = r.col(1); + const ColVec B2 = r.col(2); + const ColVec B3 = r.col(3); + + return Mat4( + A0 * B0(0) + A1 * B0(1) + A2 * B0(2) + A3 * B0(3), + A0 * B1(0) + A1 * B1(1) + A2 * B1(2) + A3 * B1(3), + A0 * B2(0) + A1 * B2(1) + A2 * B2(2) + A3 * B2(3), + A0 * B3(0) + A1 * B3(1) + A2 * B3(2) + A3 * B3(3) + ); +} + +template +constexpr auto trace(const Matrix &m) noexcept -> acc{ + return m(0) + m(5) + m(10) + m(15); +} + +template +constexpr auto determinant(const Matrix &m) noexcept -> acc { + + const acc subFactor00 = m(2,2) * m(3,3) - m(2,3) * m(3,2); + const acc subFactor01 = m(1,2) * m(3,3) - m(1,3) * m(3,2); + const acc subFactor02 = m(1,2) * m(2,3) - m(1,3) * m(2,2); + const acc subFactor03 = m(0,2) * m(3,3) - m(0,3) * m(3,2); + const acc subFactor04 = m(0,2) * m(2,3) - m(0,3) * m(2,2); + const acc subFactor05 = m(0,2) * m(1,3) - m(0,3) * m(1,2); + + auto detCof = RowVec{{ + + (m(1,1) * subFactor00 - m(2,1) * subFactor01 + m(3,1) * subFactor02), + - (m(0,1) * subFactor00 - m(2,1) * subFactor03 + m(3,1) * subFactor04), + + (m(0,1) * subFactor01 - m(1,1) * subFactor03 + m(3,1) * subFactor05), + - (m(0,1) * subFactor02 - m(1,1) * subFactor04 + m(2,1) * subFactor05) + }}; + return + m(0,0) * detCof(0) + m(1,0) * detCof(1) + + m(2,0) * detCof(2) + m(3,0) * detCof(3); +} + + +template +constexpr auto inverse(const Matrix &m) noexcept -> Matrix{ + + const acc det = determinant(m); + if(almost_equal(det, acc{0})){ + return {}; + } + + acc c00 = m(2,2) * m(3,3) - m(2,3) * m(3,2); + acc c02 = m(2,1) * m(3,3) - m(2,3) * m(3,1); + acc c03 = m(2,1) * m(3,2) - m(2,2) * m(3,1); + acc c04 = m(1,2) * m(3,3) - m(1,3) * m(3,2); + acc c06 = m(1,1) * m(3,3) - m(1,3) * m(3,1); + acc c07 = m(1,1) * m(3,2) - m(1,2) * m(3,1); + acc c08 = m(1,2) * m(2,3) - m(1,3) * m(2,2); + acc c10 = m(1,1) * m(2,3) - m(1,3) * m(2,1); + acc c11 = m(1,1) * m(2,2) - m(1,2) * m(2,1); + acc c12 = m(0,2) * m(3,3) - m(0,3) * m(3,2); + acc c14 = m(0,1) * m(3,3) - m(0,3) * m(3,1); + acc c15 = m(0,1) * m(3,2) - m(0,2) * m(3,1); + acc c16 = m(0,2) * m(2,3) - m(0,3) * m(2,2); + acc c18 = m(0,1) * m(2,3) - m(0,3) * m(2,1); + acc c19 = m(0,1) * m(2,2) - m(0,2) * m(2,1); + acc c20 = m(0,2) * m(1,3) - m(0,3) * m(1,2); + acc c22 = m(0,1) * m(1,3) - m(0,3) * m(1,1); + acc c23 = m(0,1) * m(1,2) - m(0,2) * m(1,1); + + RowVec f0{{c00, c00, c02, c03}}; + RowVec f1{{c04, c04, c06, c07}}; + RowVec f2{{c08, c08, c10, c11}}; + RowVec f3{{c12, c12, c14, c15}}; + RowVec f4{{c16, c16, c18, c19}}; + RowVec f5{{c20, c20, c22, c23}}; + + RowVec v0{{m(0,1), m(0,0), m(0,0), m(0,0)}}; + RowVec v1{{m(1,1), m(1,0), m(1,0), m(1,0)}}; + RowVec v2{{m(2,1), m(2,0), m(2,0), m(2,0)}}; + RowVec v3{{m(3,1), m(3,0), m(3,0), m(3,0)}}; + + RowVec i0 = v1 * f0 - v2 * f1 + v3 * f2; + RowVec i1 = v0 * f0 - v2 * f3 + v3 * f4; + RowVec i2 = v0 * f1 - v1 * f3 + v3 * f5; + RowVec i3 = v0 * f2 - v1 * f4 + v2 * f5; + + RowVec sA{{+1, -1, +1, -1}}; + RowVec sB{{-1, +1, -1, +1}}; + + return Mat4( + transpose(i0 * sA), + transpose(i1 * sB), + transpose(i2 * sA), + transpose(i3 * sB) + ) * acc{1}/ det; +} + +template +constexpr auto translation_v3(const Matrix &m) noexcept -> RowVec{ + return {{m(3,0), m(3,1), m(3,2)}}; +} +template +constexpr auto translation_v4(const Matrix &m) noexcept -> RowVec{ + return {{m(3,0), m(3,1), m(3,2), acc{1}}}; +} +template +constexpr auto translation_m4x4(const RowVec &t) noexcept -> Matrix{ + return {{ + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + t.x(), t.y(), t.z(), 1 + }}; +} +template +constexpr auto translate(const Matrix &m, const RowVec &t) noexcept -> Matrix { + return Mat4( + m.row(0), + m.row(1), + m.row(2), + m.row(0)*t.x() + m.row(1)*t.y() + m.row(2)*t.z() + m.row(3) + ); +} + +/** + * @param pitch: angle in degrees + */ +template +constexpr auto x_rotation_m4x4(acc pitch) noexcept -> Matrix{ + pitch = deg_2_rad(pitch); + const acc cosA = cos(pitch); + const acc sinA = sin(pitch); + return {{ + 1, 0, 0, 0, + 0, cosA, -sinA, 0, + 0, sinA, cosA, 0, + 0, 0, 0, 1 + }}; +} +/** + * @param head: angle in degrees + */ +template +static constexpr auto y_rotation_m4x4(acc head) noexcept -> Matrix{ + head = deg_2_rad(head); + const acc cosA = cos(head); + const acc sinA = sin(head); + return {{ + cosA, 0, sinA, 0, + 0, 1, 0, 0, + -sinA, 0, cosA, 0, + 0, 0, 0, 1 + }}; +} +/** + * @param roll: angle in degrees + */ +template +constexpr auto z_rotation_m4x4(acc roll) noexcept -> Matrix{ + roll = deg_2_rad(roll); + const acc cosA = cos(roll); + const acc sinA = sin(roll); + return {{ + cosA, -sinA, 0, 0, + sinA, cosA, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1 + }}; +} +template +constexpr auto rotation_m3x3(const Matrix &m) noexcept -> Matrix{ + return {{ + m(0,0),m(0,1),m(0,2), + m(1,0),m(1,1),m(1,2), + m(2,0),m(2,1),m(2,2) + }}; +} +template +constexpr auto rotation_m4x4(const Matrix &m) noexcept -> Matrix { + return {{ + m(0,0),m(0,1),m(0,2),0, + m(1,0),m(1,1),m(1,2),0, + m(2,0),m(2,1),m(2,2),0, + 0 ,0 ,0 ,1 + }}; +} +template +constexpr auto rotation_m4x4(const RowVec &angles) noexcept -> Matrix { + auto m = rotation_m3x3(angles); + return {{ + m(0,0),m(0,1),m(0,2),0, + m(1,0),m(1,1),m(1,2),0, + m(2,0),m(2,1),m(2,2),0, + 0 ,0 ,0 ,1 + }}; +} + +template +constexpr auto rotate(const Matrix &m, const RowVec &angles) noexcept -> Matrix { + return m*rotation_m4x4(angles*PI_180); +} +template +constexpr auto rotate(const Matrix &m, const RowVec &v, acc angle) noexcept -> Matrix { + + angle = deg_2_rad(angle); + + const acc a = angle; + const acc c = cos(a); + const acc s = sin(a); + + auto axis = normalize(v); + auto temp = ((acc{1} - c) * axis); + + Mat3 r = { + temp(0) * axis(0) + c, + temp(1) * axis(0) - axis(2) * s, + temp(2) * axis(0) + axis(1) * s, + temp(0) * axis(1) + axis(2) * s, + c + temp(1) * axis(1), + temp(2) * axis(1) - axis(0) * s, + temp(0) * axis(2) - axis(1) * s, + temp(1) * axis(2) + axis(0) * s, + temp(2) * axis(2) + c + }; + + return Mat4( + m.col(0) * r(0,0) + m.col(1) * r(1,0) + m.col(2) * r(2,0), + m.col(0) * r(0,1) + m.col(1) * r(1,1) + m.col(2) * r(2,1), + m.col(0) * r(0,2) + m.col(1) * r(1,2) + m.col(2) * r(2,2), + m.col(3) + ); +} + +template +constexpr auto scale_m4x4(const RowVec &s) noexcept -> Matrix { + return {{ + s.x(), 0, 0, 0, + 0, s.y(), 0, 0, + 0, 0, s.z(), 0, + 0, 0, 0, 1 + }}; +} +template +constexpr auto scale(const Matrix &m, const RowVec &s) noexcept -> Matrix { + return Mat4( + m.row(0) * s.x(), + m.row(1) * s.y(), + m.row(2) * s.z(), + m.row(3) + ); +} + +template +constexpr auto perspective(acc fovY, acc aspectRatio, acc zNear, acc zFar) -> Matrix { + // fovy = 2 atan(w/(2d)) + // w: width of the object perpendicular to the line of sight + // d: distance of the object + const auto c = acc{1}/std::tan(fovY/2); + const auto diffZ = zFar-zNear; + return {{ + c/aspectRatio, 0, 0, 0, + 0, c, 0, 0, + 0, 0, -(zFar+zNear)/diffZ, -1, + 0, 0, -(2*zFar*zNear)/diffZ, 0 + }}; +} + +template +constexpr auto transform(const RowVec &scale, const RowVec &rotation, const RowVec &translate) -> Matrix { +// return translation_m4x4(translate)*rotation_m4x4(rotation*PI_180)*scale_m4x4(scale); + return scale_m4x4(scale)*rotation_m4x4(rotation*PI_180)*translation_m4x4(translate); +} + + + + + + +template +constexpr auto axis_angle_m4x4(const Vec3 &axis, acc angle) noexcept -> Matrix{ + return {axis_angle_m3x3(axis,angle)}; +} + + + +//template +//constexpr auto transform2(const RowVec &scale, const RowVec &rotation, const RowVec &translate) -> Matrix { +// Mat4 tr = rotation_m4x4(rotation*PI_180)*scale_m4x4(scale); +// tr.t(0) = translate.x(); +// tr.t(1) = translate.y(); +// tr.t(2) = translate.z(); +// return tr; +//// return translation_m4x4(translate)*rotation_m4x4(rotation*PI_180)*scale_m4x4(scale); +//} + +template +constexpr auto look_at(const Pt &eye, const Pt &at, const Pt &up) noexcept -> Matrix { + +// auto f = normalize(at - eye); +// auto s = normalize(cross(f, up)); +// auto u = cross(s, f); +// return{{ +// s.x(), s.y(), s.z(), -dot(s, eye), +// u.x(), u.y(), u.z(), -dot(u, eye), +// -f.x(),-f.y(),-f.z(), dot(f, eye), +// 0, 0, 0, 1 +// }}; + + + auto zaxis = normalize(at - eye); + auto xaxis = normalize(cross(zaxis, up)); + auto yaxis = cross(xaxis, zaxis); + zaxis = acc{-1}*zaxis; + + return {{ + xaxis.x(), yaxis.x(), zaxis.x(), 0, + xaxis.y(), yaxis.y(), zaxis.y(), 0, + xaxis.z(), yaxis.z(), zaxis.z(), 0, + -dot(xaxis, eye), -dot(yaxis, eye), -dot(zaxis, eye), 1 + }}; +} + + +template +constexpr auto orthographic(acc left, acc right, acc bottom, acc top, acc zNear, acc zFar) -> Matrix { +// return Mat4{ +// acc{2}/(right-left),0,0,0, +// 0,acc{2}/(top-bottom),0,0, +// 0,0,acc{-2}/(zFar-zNear),0, +// -((right+left)/(right-left)), -((top+bottom)/(top-bottom)), -((zFar+zNear)/(zFar-zNear)), 1 +// }; + return {{ + acc{2}/(right-left),0,0,-((right+left)/(right-left)), + 0,acc{2}/(top-bottom),0,-((top+bottom)/(top-bottom)), + 0,0,acc{-2}/(zFar-zNear),-((zFar+zNear)/(zFar-zNear)), + 0, 0, 0, 1 + }}; +} + +} + + diff --git a/cpp-projects/base/geometry/mesh.cpp b/cpp-projects/base/geometry/mesh.cpp new file mode 100644 index 0000000..b1498d1 --- /dev/null +++ b/cpp-projects/base/geometry/mesh.cpp @@ -0,0 +1,607 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "mesh.hpp" + +// local +#include "utility/logger.hpp" + +using namespace tool::geo; + +auto BoneData::add_bone_data(int boneID, float weight) -> bool{ + + for(size_t ii = 0; ii < idS.size(); ++ii){ + if(almost_equal(weights[ii], 0.f)){ + idS[ii] = boneID; + weights[ii] = weight; + return true; + } + } + return false; +} + +auto Mesh::mean_position() const noexcept -> Pt3f{ + if(vertices.empty()){ + return {}; + } + return std::accumulate(vertices.begin(), vertices.end(), geo::Pt3f{0,0,0})/static_cast(vertices.size()); +} + +auto Mesh::generate_normals() noexcept -> void{ + + if(vertices.size() == 0 || triIds.size() == 0){ + Logger::warning("[Mesh::generate_normals] No vertices or triangles.\n"); + return; + } + + // compute triangles normals + std::vector triNormals; + triNormals.reserve(triIds.size()); + for(const auto &triId : triIds){ + triNormals.push_back(normalize( + cross( + vec(vertices[triId.id1()], vertices[triId.id2()]), + vec(vertices[triId.id3()], vertices[triId.id1()]) + ) + ) + ); + } + + // resize array + normals.resize(vertices.size()); + + // compute normals + std::vector verticesInTrianglesMsk(vertices.size(), 0); + + for(size_t ii = 0; ii < triIds.size(); ++ii){ + + for(int jj = 0; jj < 3; ++jj){ + + size_t id = triIds[ii].ids(jj); + verticesInTrianglesMsk[id] = 1; + + Vec3f currNormal = triNormals[ii]; + if(jj >= 1){ + if(dot(currNormal, normals[id]) < 0.f){ + currNormal = invert(currNormal); + } + } + normals[id] += currNormal; + } + } + + // normalize and inverse + auto center = mean_position(); + for(size_t ii = 0; ii < vertices.size(); ++ii){ + if(verticesInTrianglesMsk[ii] != 1){// in this case vertices doesn't belong to a triangle + normals[ii] = vec(vertices[ii], center); + } + normals[ii] = invert(normalize(normals[ii])); + } +} + +auto Mesh::generate_tangents() noexcept -> void{ + + if(normals.size() != vertices.size()){ + Logger::warning("[Mesh::generate_tangents] Invalid normals number of no vertices.\n"); + return; + } + + if(normals.size() != tCoords.size()){ + Logger::warning("[Mesh::generate_tangents] No texture coordinates defined.\n"); + return; + } + + std::vector tan1Accum(vertices.size()); + std::vector tan2Accum(vertices.size()); + tangents.resize(vertices.size()); + + for(const auto &tri : triIds){ + + const Pt3f &p1 = vertices[tri.ids.x()]; + const Pt3f &p2 = vertices[tri.ids.y()]; + const Pt3f &p3 = vertices[tri.ids.z()]; + + const Vec2f &tc1 = tCoords[tri.ids.x()]; + const Vec2f &tc2 = tCoords[tri.ids.y()]; + const Vec2f &tc3 = tCoords[tri.ids.z()]; + + Vec3f q1 = p2 - p1; + Vec3f q2 = p3 - p1; + float s1 = tc2.x() - tc1.x(), s2 = tc3.x() - tc1.x(); + float t1 = tc2.y() - tc1.y(), t2 = tc3.y() - tc1.y(); + float r = 1 / (s1 * t2 - s2 * t1); + Vec3f tan1{(t2*q1.x() - t1*q2.x()) * r, (t2*q1.y() - t1*q2.y()) * r, (t2*q1.z() - t1*q2.z()) * r}; + Vec3f tan2{(s1*q2.x() - s2*q1.x()) * r, (s1*q2.y() - s2*q1.y()) * r, (s1*q2.z() - s2*q1.z()) * r}; + + tan1Accum[tri.ids.x()] += tan1; + tan1Accum[tri.ids.y()] += tan1; + tan1Accum[tri.ids.z()] += tan1; + + tan2Accum[tri.ids.x()] += tan2; + tan2Accum[tri.ids.y()] += tan2; + tan2Accum[tri.ids.z()] += tan2; + } + + for(size_t ii = 0; ii < vertices.size(); ++ii){ + + const Vec3f &n = normals[ii]; + Vec3f &t1 = tan1Accum[ii]; + Vec3f &t2 = tan2Accum[ii]; + + // Gram-Schmidt orthogonalize + tangents[ii] = to_pt4(normalize(t1 -(dot(n,t1) * n)), 1.f); + + // store handedness in w + tangents[ii].w() = (dot(cross(n,t1), t2 ) < 0.f) ? -1.f : 1.f; + } +} + +auto Mesh::check() const -> void{ + + if(vertices.size() == 0){ + std::cout << "no vertices\n"; + return; + } + + // auto minI = std::min_element(std::begin(vertices), std::end(vertices)); + // auto maxI = std::max_element(std::begin(vertices), std::end(vertices)); + auto comp = ([](const Pt3f &pt){ + return pt == Pt3f{}; + }); + + auto countNull = std::count_if(std::begin(vertices), std::end(vertices), comp); + auto sum = std::accumulate(std::begin(vertices), std::end(vertices), Pt3f{}); + auto mean = sum / static_cast(vertices.size()); + std::cout << " [point] " << sum << " " << mean << " " << countNull << "\n"; + + if(normals.size() == 0){ + std::cout << "no normals\n"; + return; + } + + countNull = std::count_if(std::begin(normals), std::end(normals), comp); + sum = std::accumulate(std::begin(normals), std::end(normals), Pt3f{}); + mean = sum / static_cast(normals.size()); + std::cout << " [normal] " << sum << " " << mean << " " << countNull << "\n"; + + + if(bones.size() != 0){ + + + + size_t total0 = 0; + size_t totalNon0 = 0; + + size_t count0 = 0; + size_t count1 = 0; + size_t count2 = 0; + size_t count3 = 0; + size_t count4 = 0; + + std::unordered_map bonesIdCount; + for(const auto &bone : bones){ + + size_t localCount = 0; + + for(size_t ii = 0; ii < bone.weights.size(); ++ii){ + const auto &w = bone.weights[ii]; + const auto &id = bone.idS[ii]; + if(!almost_equal(w, 0.f)){ + ++localCount; + + if(bonesIdCount.count(id) == 0){ + bonesIdCount[id] = 1; + }else{ + ++bonesIdCount[id]; + } + } + } + + count0 += localCount == 0 ? 1 : 0; + count1 += localCount == 1 ? 1 : 0; + count2 += localCount == 2 ? 1 : 0; + count3 += localCount == 3 ? 1 : 0; + count4 += localCount == 4 ? 1 : 0; + + total0 += bone.weights.size()-localCount; + totalNon0 += localCount; + + } + + std::cout << " [bones]\n" + << " count0: " << count0 << " count1: " << count1 << " count2: " << count2 << " count3: " << count3 << " count4: " << count4 << "\n" + << " pTotalNon0: " << (1.f*totalNon0/(vertices.size()*4))<< " pTotal0: " << (1.f*total0/(vertices.size()*4)) << "\n" + << " p0: " << (1.f*count0 / (vertices.size())) << " p1: " << (1.f*count1 / (vertices.size())) << " p2: " << (1.f*count2 / (vertices.size())) + << " p3: " << (1.f*count3 / (vertices.size())) << " p4: " << (1.f*count4 / (vertices.size())) << "\n"; + + // std::cout << "bones mapping\n"; + // for(auto &m : bonesMapping){ + // auto id = m.second; + + // if(bonesIdCount.count(id) != 0){ + // std::cout << "bone " << m.first << " " << bonesIdCount[id] << " \n" << bonesInfo[id].offset << "\n" << bonesInfo[id].final << "\n"; + // bonesIdCount.erase(id); + // }else{ + // std::cout << "bone " << m.first << " no referenced \n"; + // } + // } + + std::cout << "id remaining: " << bonesIdCount.size() << "\n"; + } +} + + + + +//void convertFacesToAdjancencyFormat(){ + +// std::vector> vertices; +// std::vector> normals; +// std::vector triIds; +// std::vector tCoords; +// std::vector colors; +// std::vector> tangents; +// std::vector bones; + +// Elements with adjacency info +// std::vector elAdj(faces.size() * 2); + +// // Copy and make room for adjacency info +// for( GLuint i = 0; i < faces.size(); i+=3) +// { +// elAdj[i*2 + 0] = faces[i]; +// elAdj[i*2 + 1] = std::numeric_limits::max(); +// elAdj[i*2 + 2] = faces[i+1]; +// elAdj[i*2 + 3] = std::numeric_limits::max(); +// elAdj[i*2 + 4] = faces[i+2]; +// elAdj[i*2 + 5] = std::numeric_limits::max(); +// } + +// // Find matching edges +// for( GLuint i = 0; i < elAdj.size(); i+=6) +// { +// // A triangle +// GLuint a1 = elAdj[i]; +// GLuint b1 = elAdj[i+2]; +// GLuint c1 = elAdj[i+4]; + +// // Scan subsequent triangles +// for(GLuint j = i+6; j < elAdj.size(); j+=6) +// { +// GLuint a2 = elAdj[j]; +// GLuint b2 = elAdj[j+2]; +// GLuint c2 = elAdj[j+4]; + +// // Edge 1 == Edge 1 +// if( (a1 == a2 && b1 == b2) || (a1 == b2 && b1 == a2) ) +// { +// elAdj[i+1] = c2; +// elAdj[j+1] = c1; +// } +// // Edge 1 == Edge 2 +// if( (a1 == b2 && b1 == c2) || (a1 == c2 && b1 == b2) ) +// { +// elAdj[i+1] = a2; +// elAdj[j+3] = c1; +// } +// // Edge 1 == Edge 3 +// if ( (a1 == c2 && b1 == a2) || (a1 == a2 && b1 == c2) ) +// { +// elAdj[i+1] = b2; +// elAdj[j+5] = c1; +// } +// // Edge 2 == Edge 1 +// if( (b1 == a2 && c1 == b2) || (b1 == b2 && c1 == a2) ) +// { +// elAdj[i+3] = c2; +// elAdj[j+1] = a1; +// } +// // Edge 2 == Edge 2 +// if( (b1 == b2 && c1 == c2) || (b1 == c2 && c1 == b2) ) +// { +// elAdj[i+3] = a2; +// elAdj[j+3] = a1; +// } +// // Edge 2 == Edge 3 +// if( (b1 == c2 && c1 == a2) || (b1 == a2 && c1 == c2) ) +// { +// elAdj[i+3] = b2; +// elAdj[j+5] = a1; +// } +// // Edge 3 == Edge 1 +// if( (c1 == a2 && a1 == b2) || (c1 == b2 && a1 == a2) ) +// { +// elAdj[i+5] = c2; +// elAdj[j+1] = b1; +// } +// // Edge 3 == Edge 2 +// if( (c1 == b2 && a1 == c2) || (c1 == c2 && a1 == b2) ) +// { +// elAdj[i+5] = a2; +// elAdj[j+3] = b1; +// } +// // Edge 3 == Edge 3 +// if( (c1 == c2 && a1 == a2) || (c1 == a2 && a1 == c2) ) +// { +// elAdj[i+5] = b2; +// elAdj[j+5] = b1; +// } +// } +// } + +// // Look for any outside edges +// for( GLuint i = 0; i < elAdj.size(); i+=6) +// { +// if( elAdj[i+1] == std::numeric_limits::max() ) elAdj[i+1] = elAdj[i+4]; +// if( elAdj[i+3] == std::numeric_limits::max() ) elAdj[i+3] = elAdj[i]; +// if( elAdj[i+5] == std::numeric_limits::max() ) elAdj[i+5] = elAdj[i+2]; +// } + +// // Copy all data back into el +// faces = elAdj; +//} + + +// AABB3 AABB() const{ + +// if(vertices.size() == 0){ +// return AABB3(); +// } + +// Pt3 min = vertices[0]; +// Pt3 max = vertices[0]; +// for(const auto &vertex : vertices){ +// min.x() = std::min(vertex.x(), min.x()); +// min.y() = std::min(vertex.y(), min.y()); +// min.z() = std::min(vertex.z(), min.z()); + +// max.x() = std::max(vertex.x(), max.x()); +// max.y() = std::max(vertex.y(), max.y()); +// max.z() = std::max(vertex.z(), max.z()); +// } +// return aabb_from_points(min, max); +// } + + +//template +//bool load_from_obj(const std::string &path, Mesh &mesh){ + +// std::ifstream fileStream(path); +// if(!fileStream.is_open()){ +// std::cerr << "-Error : load_from_obj -> Can't open file " << path << std::endl; +// return false; +// } + +// bool endFile = false; +// int nbVertices = 0; +// int nbNormals = 0; +// int nbTextureCoords = 0; +// int nbTri = 0; + +// while(!endFile){ + +// std::string line; +// std::getline(fileStream, line); +// if(line.size() == 0){ +// endFile = true; +// break; +// } + +// if(line[0] == 'v'){ +// if(line[1] == ' ') +// ++nbVertices; +// else if(line[1] == 'n') +// ++nbNormals; +// else if(line[1] == 't') +// ++nbTextureCoords; + +// } +// else if(line[0] == 'f'){ +// ++nbTri; +// }else if(line[0] == '#'){ +// continue; +// }else if(line[0] == 'm'){ +// continue; +// } +// } + +// std::vector> points(nbVertices); +// std::vector> normals(nbNormals); +// std::vector> colors(nbVertices); +// std::vector> textureCoords(nbTextureCoords); +// std::vector triIds(nbTri); + + +// fileStream.clear(); +// fileStream.seekg(0, std::ios::beg); +// endFile = false; + +// int currentVertexId = 0; +// int currentNormalId = 0; +// int currentTextureCoordId = 0; +// int currentTriId = 0; + +// while(!endFile){ + +// std::string line; +// std::getline(fileStream, line); +// if(line.size() == 0){ +// endFile = true; +// break; +// } + +// std::vector elements = String::split(line, ' '); +// if(elements[0] == "v"){ + +// if(elements.size() >= 4){ +// points[currentVertexId] = Pt3(static_cast(std::stod(elements[1])), +// static_cast(std::stod(elements[2])), +// static_cast(std::stod(elements[3]))); +// } + +// if(elements.size() == 7){ +// colors[currentVertexId] = Pt4(std::stof(elements[4]),std::stof(elements[5]),std::stof(elements[6]), 0.f); +// } +// currentVertexId++; + +// }else if(elements[0] == "vt"){ + +// if(elements.size() == 3){ +// textureCoords[currentTextureCoordId++] = Pt2{std::stof(elements[1]),std::stof(elements[2])}; +// } +// }else if(elements[0] == "vn"){ + +// if(elements.size() == 4){ +// normals[currentNormalId++] = Pt3(static_cast(std::stod(elements[1])), +// static_cast(std::stod(elements[2])), +// static_cast(std::stod(elements[3]))); +// } +// }else if(elements[0] == "f"){ + +// if(elements.size() == 4){ +// triIds[currentTriId++] = TriIds(std::stoi(String::split(elements[1], '/')[0])-1,std::stoi(String::split(elements[2], '/')[0])-1,std::stoi(String::split(elements[3], '/')[0])-1); +// } +// } +// } + +// mesh.vertices = std::move(points); +// mesh.normals = std::move(normals); +// mesh.triIds = std::move(triIds); +// mesh.tCoords = std::move(textureCoords); +// mesh.colors = std::move(colors); + + +// return true; +//} + + +//// mesh +//template +//void split_BVHNode(BVHNode *node, Mesh &mesh, int depth){ + +// if(depth-- == 0){ +// return; +// } + +// // only split if this node contains triangles +// if(node->triangles.size() > 0){ + +// for(auto &child : node->children){ +// child = std::make_unique>(); +// } + +// Pt3 c = node->bounds.origin; +// Vec3 e = node->bounds.size*acc{0.5}; +// node->children[0]->bounds = AABB3(c + Vec3(-e.x(), +e.y(), -e.z()), e); +// node->children[1]->bounds = AABB3(c + Vec3(+e.x(), +e.y(), -e.z()), e); +// node->children[2]->bounds = AABB3(c + Vec3(-e.x(), +e.y(), +e.z()), e); +// node->children[3]->bounds = AABB3(c + Vec3(+e.x(), +e.y(), +e.z()), e); +// node->children[4]->bounds = AABB3(c + Vec3(-e.x(), -e.y(), -e.z()), e); +// node->children[5]->bounds = AABB3(c + Vec3(+e.x(), -e.y(), -e.z()), e); +// node->children[6]->bounds = AABB3(c + Vec3(-e.x(), -e.y(), +e.z()), e); +// node->children[7]->bounds = AABB3(c + Vec3(+e.x(), -e.y(), +e.z()), e); +// } + +// // if this node was just split +// if(node->triangles.size() > 0){ + +// for(auto &child : node->children){ + +// child->triangles.reserve(node->triangles.size()); +// for(auto triId : node->triangles){ + +// if(triangle_AABB(mesh.triangle(triId), child->bounds)){ +// child->triangles.emplace_back(triId); +// } +// } +// } +// } + +// node->triangles.clear(); +// for(auto &child : node->children){ +// if(child != nullptr){ +// if(child->triangles.size() > 0){ +// split_BVHNode(child.get(), mesh, depth); +// }else{ +// child = nullptr; +// } +// } +// } +//} + +//template +//void accelerate_mesh(Mesh &mesh){ + +// if(mesh.accelerator){ +// return; +// } + +// mesh.accelerator = std::make_unique>(); +// mesh.accelerator->bounds = mesh.AABB(); +// mesh.accelerator->triangles.resize(mesh.triIds.size()); + +// std::iota(mesh.accelerator->triangles.begin(), mesh.accelerator->triangles.end(), acc{0}); +// split_BVHNode(mesh.accelerator.get(), mesh, 3); +//} +// bool save_to_obj(const std::string &path) const{ + +// std::ofstream flowOBJ(path); + +// // open file to write +// if(!flowOBJ.is_open()){ +// std::cerr << "-Error : save_to_obj -> Can't open obj file " << path << std::endl; +// return false; +// } + +// if(vertices.size() == 0){ +// std::cerr << "-Error : save_to_obj-> surface is empty " << std::endl; +// return false; +// } + +// std::ostringstream osV; +// std::vector>::const_iterator itC = colors.cbegin(); +// for(typename std::vector>::const_iterator itV = vertices.cbegin(); itV != vertices.cend(); ++itV) +// { +// osV.str(std::string()); +// osV << "v " << itV->x() << " " << itV->y() << " " << itV->z(); +// osV << "\n"; +// flowOBJ << osV.str(); +// } + +// for(auto itT = triIds.begin(); itT != triIds.end(); ++itT){ +// osV.str(std::string()); +// osV << "f "; +// osV << (itT->ids[0] +1) << " " << (itT->ids[1] + 1) << " " << (itT->ids[2] + 1); +// osV << "\n"; +// flowOBJ << osV.str(); +// } + +// return true; +// } + + diff --git a/cpp-projects/base/geometry/mesh.hpp b/cpp-projects/base/geometry/mesh.hpp new file mode 100644 index 0000000..1ef736c --- /dev/null +++ b/cpp-projects/base/geometry/mesh.hpp @@ -0,0 +1,83 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include +#include +#include +#include + +// local +#include "geometry/point2.hpp" +#include "geometry/point3.hpp" +#include "geometry/point4.hpp" +#include "geometry/triangle3.hpp" + +namespace tool::geo { + + +static constexpr int nbBonesPerVertex = 4; + +struct BoneData{ + + std::array idS = {}; + std::array weights = {}; + + auto add_bone_data(int boneID, float weight) -> bool; +}; + +struct Mesh{ + + std::vector vertices; + std::vector normals; + std::vector triIds; + std::vector tCoords; + std::vector colors; + std::vector tangents; + std::vector bones; + // BVHNodeP accelerator = nullptr; + + Mesh() = default; + Mesh(const Mesh& other) = default; + Mesh& operator=(const Mesh& other) = default; + Mesh(Mesh&& other) = default; + Mesh& operator=(Mesh&& other) = default; + + constexpr auto triangle(size_t id) const -> Triangle3{ + return {vertices[triIds[id].id1()],vertices[triIds[id].id2()],vertices[triIds[id].id3()]}; + } + + auto mean_position() const noexcept -> geo::Pt3f; + auto generate_normals() noexcept -> void; + auto generate_tangents() noexcept -> void; + + auto check() const -> void; + +}; +} + diff --git a/cpp-projects/base/geometry/obb3.hpp b/cpp-projects/base/geometry/obb3.hpp new file mode 100644 index 0000000..46889f2 --- /dev/null +++ b/cpp-projects/base/geometry/obb3.hpp @@ -0,0 +1,128 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/matrix3.hpp" +#include "geometry/point3.hpp" + +namespace tool::geo { + +template +struct OBB3{ + + OBB3() = default; + + constexpr OBB3(const Pt3 &p, const Vec3 &s, const Mat3 &o = geo::Mat3::identity()) noexcept : position(p), size(s), orientation(o){ + } + + constexpr auto min() const noexcept -> Vec3{ + const Vec3 p1 = position + size; + const Vec3 p2 = position - size; + return Vec3(std::min(p1.x(), p2.x()), std::min(p1.y(), p2.y()),std::min(p1.z(), p2.z())); + } + + constexpr auto max() const noexcept -> Vec3{ + const Vec3 p1 = position + size; + const Vec3 p2 = position - size; + return Vec3(std::max(p1.x(), p2.x()), std::max(p1.y(), p2.y()),std::max(p1.z(), p2.z())); + } + + auto is_point_inside(const Pt3 &p) const noexcept -> bool{ + + const Vec3 dir = p - position; + + for(int ii = 0; ii < 3; ++ii){ + const int id = ii*3; + const acc distance = dot(dir,{orientation[id], orientation[id + 1], orientation[id + 2]}); + + const bool equal1 = almost_equal(distance, size[ii]); + const bool equal2 = almost_equal(distance, -size[ii]); + + if(distance > size[ii] && !equal1){ + return false; + } + if(distance < -size[ii] && !equal2){ + return false; + } + } + return true; + } + + Pt3 position = {0,0,0}; + Vec3 size = {1,1,1}; + Mat3 orientation{}; +}; +} + + +//template +//static constexpr bool point_in_obb(const Pt3 &p, const OBB3 &obb) noexcept { + +// const Vec3 dir = p - obb.position; +// for(int ii = 0; ii < 3; ++ii){ +// const int id = ii*3; +// const acc distance = dot(dir,{obb.orientation[id], obb.orientation[id + 1],obb.orientation[id + 2]}); + +// const bool equal1 = almost_equal(distance, obb.size[ii]); +// const bool equal2 = almost_equal(distance, -obb.size[ii]); + +// if(distance > obb.size[ii] && !equal1){ +// return false; +// } +// if(distance < -obb.size[ii] && !equal2){ +// return false; +// } +// } +// return true; +//} + + +//template +//constexpr Pt3 closest_point(const OBB3 &obb, const Pt3 &p) noexcept{ // TODO : manage float accuracy + +// Pt3 res = p; +// const Vec3 dir = p - obb.position; + +// for(int ii = 0; ii < 3; ++ii){ +// int id = ii*3; +// Vec3 axis = {obb.orientation[id], obb.orientation[id + 1],obb.orientation[id + 2]}; + +// acc distance = dot(dir,axis); +// if(distance > obb.size[ii]){ +// distance = obb.size[ii]; +// } +// if(distance < -obb.size[ii]){ +// distance = -obb.size[ii]; +// } + +// res += (axis*distance); + +// } + +// return res; +//} diff --git a/cpp-projects/base/geometry/octree.hpp b/cpp-projects/base/geometry/octree.hpp new file mode 100644 index 0000000..1a8901d --- /dev/null +++ b/cpp-projects/base/geometry/octree.hpp @@ -0,0 +1,228 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + + +//// std +//#include +//#include +//#include + +//// local +//#include "geometry/aabb3.hpp" +//#include "utility/vector_utility.hpp" + +//namespace tool::geo { + +//struct Color4{ +//// std::array rgba; +// float r,g,b,a; +//}; + +//template +//struct Voxel{ +// Point3 p; +// Color4 c; +//}; + +//// 4 ----- 5 +//// / | / | +//// 7 ----- 6 | +//// | 0 --|-- 1 +//// | / | / +//// 3 ----- 2 + +//// +Y +Z +//// | / +//// 0 --- +X + + + +//template +//constexpr std::array, 8> mu = { +//// Pt3{+1,+1,-1}, // 0 +//// Pt3{-1,+1,-1}, // 1 +//// Pt3{-1,+1,+1}, // 2 +//// Pt3{+1,+1,+1}, // 3 +//// Pt3{+1,-1,-1}, // 4 +//// Pt3{-1,-1,-1}, // 5 +//// Pt3{-1,-1,+1}, // 6 +//// Pt3{+1,-1,+1}, // 7 +// Pt3{-1,-1,+1}, // 0 +// Pt3{+1,-1,+1}, // 1 +// Pt3{+1,-1,-1}, // 2 +// Pt3{-1,-1,-1}, // 3 +// Pt3{-1,+1,+1}, // 4 +// Pt3{+1,+1,+1}, // 5 +// Pt3{+1,+1,-1}, // 6 +// Pt3{-1,+1,-1}, // 7 + +//}; + +//constexpr std::array, 8> mu3 = { +// Pt3{0,0,1}, // 0 +// Pt3{1,0,1}, // 1 +// Pt3{1,0,0}, // 2 +// Pt3{0,0,0}, // 3 +// Pt3{0,1,1}, // 4 +// Pt3{1,1,1}, // 5 +// Pt3{1,1,0}, // 6 +// Pt3{0,1,0} // 7 +//}; + + +////constexpr std::array,8>,8> mu4{{ + +////// {Pt3{-1,-1,+0}, Pt3{+0,-1,+0}, Pt3{+0,+0,+0}, Pt3{-1,+0,+0}, Pt3{-1,-1,+1}, Pt3{+0,-1,+1}, Pt3{+0,+0,+1}, Pt3{-1,+0,+1}}, // 0 +////// {Pt3{+0,-1,+0}, Pt3{+1,-1,+0}, Pt3{+1,+0,+0}, Pt3{+0,+0,+0}, Pt3{+0,-1,+1}, Pt3{+1,-1,+1}, Pt3{+1,+0,+1}, Pt3{+0,+0,+1}}, // 1 +////// {Pt3{+0,-1,-1}, Pt3{+1,-1,-1}, Pt3{+1,+0,-1}, Pt3{+0,+0,-1}, Pt3{+0,-1,+0}, Pt3{+1,-1,+0}, Pt3{+1,+0,+0}, Pt3{+0,+0,+0}}, // 2 +////// {Pt3{-1,-1,-1}, Pt3{+0,-1,-1}, Pt3{+0,+0,-1}, Pt3{-1,+0,-1}, Pt3{-1,-1,+0}, Pt3{+0,-1,+0}, Pt3{+0,+0,+0}, Pt3{-1,+0,+0}}, // 3 +////// {Pt3{-1,+0,+0}, Pt3{+0,+0,+0}, Pt3{+0,+1,+0}, Pt3{-1,+1,+0}, Pt3{-1,+0,+1}, Pt3{+0,+0,+1}, Pt3{+0,+1,+1}, Pt3{-1,+1,+1}}, // 4 +////// {Pt3{+0,+0,+0}, Pt3{+1,+0,+0}, Pt3{+1,+1,+0}, Pt3{+0,+1,+0}, Pt3{+0,+0,+1}, Pt3{+1,+0,+1}, Pt3{+1,+1,+1}, Pt3{+0,+1,+1}}, // 5 +////// {Pt3{+0,+0,-1}, Pt3{+1,+0,-1}, Pt3{+1,+1,-1}, Pt3{+0,+1,-1}, Pt3{+0,+0,+0}, Pt3{+1,+0,+0}, Pt3{+1,+1,+0}, Pt3{+0,+1,+0}}, // 6 +////// {Pt3{-1,+0,-1}, Pt3{+0,+0,-1}, Pt3{+0,+1,-1}, Pt3{-1,+1,-1}, Pt3{-1,+0,+0}, Pt3{+0,+0,+0}, Pt3{+0,+1,+0}, Pt3{-1,+1,+0}}, // 7 + +//// {Pt3{-1,-1,+0}, Pt3{+0,-1,+0}, Pt3{+0,+0,+0}, Pt3{-1,+0,+0}, Pt3{-1,-1,+1}, Pt3{+0,-1,+1}, Pt3{+0,+0,+1}, Pt3{-1,+0,+1}}, // 0 +//// {Pt3{+0,-1,+0}, Pt3{+1,-1,+0}, Pt3{+1,+0,+0}, Pt3{+0,+0,+0}, Pt3{+0,-1,+1}, Pt3{+1,-1,+1}, Pt3{+1,+0,+1}, Pt3{+0,+0,+1}}, // 1 +//// {Pt3{+0,-1,-1}, Pt3{+1,-1,-1}, Pt3{+1,+0,-1}, Pt3{+0,+0,-1}, Pt3{+0,-1,+0}, Pt3{+1,-1,+0}, Pt3{+1,+0,+0}, Pt3{+0,+0,+0}}, // 2 +//// {Pt3{-1,-1,-1}, Pt3{+0,-1,-1}, Pt3{+0,+0,-1}, Pt3{-1,+0,-1}, Pt3{-1,-1,+0}, Pt3{+0,-1,+0}, Pt3{+0,+0,+0}, Pt3{-1,+0,+0}}, // 3 +//// {Pt3{-1,+0,+0}, Pt3{+0,+0,+0}, Pt3{+0,+1,+0}, Pt3{-1,+1,+0}, Pt3{-1,+0,+1}, Pt3{+0,+0,+1}, Pt3{+0,+1,+1}, Pt3{-1,+1,+1}}, // 4 +//// {Pt3{+0,+0,+0}, Pt3{+1,+0,+0}, Pt3{+1,+1,+0}, Pt3{+0,+1,+0}, Pt3{+0,+0,+1}, Pt3{+1,+0,+1}, Pt3{+1,+1,+1}, Pt3{+0,+1,+1}}, // 5 +//// {Pt3{+0,+0,-1}, Pt3{+1,+0,-1}, Pt3{+1,+1,-1}, Pt3{+0,+1,-1}, Pt3{+0,+0,+0}, Pt3{+1,+0,+0}, Pt3{+1,+1,+0}, Pt3{+0,+1,+0}}, // 6 +//// {Pt3{-1,+0,-1}, Pt3{+0,+0,-1}, Pt3{+0,+1,-1}, Pt3{-1,+1,-1}, Pt3{-1,+0,+0}, Pt3{+0,+0,+0}, Pt3{+0,+1,+0}, Pt3{-1,+1,+0}}, // 7 +////}}; + + + + +//template +//struct VoxelOctreeNode; + +//template +//using VoxelOctreeNodeUP = std::unique_ptr>; + +//template +//struct VoxelOctreeNode { + +// VoxelOctreeNode(AABB3 b, Pt3 id = {0,0,0}) : bounds(b), idNode(id){} + +// AABB3 bounds; +// std::unique_ptr,8>> children = nullptr; + +// void get_finals_nodes(std::vector*> &finalNodes) { + +// if(ids.size() > 0){ +// finalNodes.emplace_back(this); +// return; +// } + +// if(children){ +// for(const auto &child : *children){ +// child->get_finals_nodes(finalNodes); +// } +// } +// } + +// template +// auto sorted_nodes(size_t depth) { + +// using VoxN = VoxelOctreeNode*; +// size_t side = static_cast(math::ipow(2,depth)); +// std_v3 sortedNodes(side, std_v2(side, std::vector(side, nullptr))); + +// std::vector*> nodes; +// get_finals_nodes(nodes); + +// for(const auto node : nodes){ +// sortedNodes[node->idNode.x()][node->idNode.y()][node->idNode.z()] = node; +// } +// return sortedNodes; +// } + +////private: + +// std::vector ids; +// Pt3 idNode; +//}; + + + + +//template +//void split_tree(const std::vector> &voxels, VoxelOctreeNode *node, int depth, std::vector idVoxels = {}){ + +// if(depth-- <= 0){ +// // store ids +// node->ids = std::move(idVoxels); +// return; +// } + +// // fill id +// if(idVoxels.size() == 0){ +// idVoxels.resize(voxels.size()); +// std::iota(idVoxels.begin(), idVoxels.end(), 0); +// } + +// if(!node->children){ +// node->children = std::make_unique,8>>(); + +// const Pt3 o = node->bounds.origin; +// const Vec3 s = node->bounds.size * acc{0.5}; +// for(size_t ii = 0; ii < 8; ++ii){ +// (*node->children)[ii] = std::make_unique>(AABB3{o + mu[ii]*s, s}, node->idNode + mu3[ii]* math::ipow(2,depth)); +// } +//// std::cout << " d " << ipow(2,depth) << "\n"; +// } + +// std::array, 8> idsPerChild; +// for(size_t ii = 0; ii < idsPerChild.size(); ++ii){ +// idsPerChild[ii].reserve(idVoxels.size()); +// } + +// for(size_t ii = 0; ii < idVoxels.size(); ++ii){ +// bool insideAABB = false; +// for(size_t jj = 0; jj < node->children->size(); ++jj){ +// if(point_in_aabb(voxels[idVoxels[ii]].p,(*node->children)[jj]->bounds)){ +// idsPerChild[jj].emplace_back(idVoxels[ii]); +// insideAABB = true; +// break; +// } +// } +// if(!insideAABB){ +// std::cout << "not inside anything: " << voxels[idVoxels[ii]].p << "\n"; +// } +// } + +// for(size_t ii = 0; ii < idsPerChild.size(); ++ii){ +// if(idsPerChild[ii].size() > 0){ +// split_tree(voxels, (*node->children)[ii].get(), depth, std::move(idsPerChild[ii])); +// } +// } +//} + + +//} diff --git a/cpp-projects/base/geometry/plane3.hpp b/cpp-projects/base/geometry/plane3.hpp new file mode 100644 index 0000000..a9809e1 --- /dev/null +++ b/cpp-projects/base/geometry/plane3.hpp @@ -0,0 +1,50 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point3.hpp" + +namespace tool::geo { + +template +struct Plane3; + +using Plane3f = Plane3; +using Plane3d = Plane3; + +template +struct Plane3{ + Vec3 normal = {1,0,0}; + acc distance{0}; +}; + +template +constexpr auto plane_equation(const Pt3 &pt, const Plane3 &plane) -> acc{ + return dot(pt, plane.normal) - plane.distance; +} +} diff --git a/cpp-projects/base/geometry/point.hpp b/cpp-projects/base/geometry/point.hpp new file mode 100644 index 0000000..66033cc --- /dev/null +++ b/cpp-projects/base/geometry/point.hpp @@ -0,0 +1,41 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "matrix.hpp" + +namespace tool::geo { + +template +using Point = RowVec; + +template +using Pt = Point; + + +} diff --git a/cpp-projects/base/geometry/point2.hpp b/cpp-projects/base/geometry/point2.hpp new file mode 100644 index 0000000..4044b6a --- /dev/null +++ b/cpp-projects/base/geometry/point2.hpp @@ -0,0 +1,77 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point.hpp" + +namespace tool::geo { + +template +struct Point2; + +template +using Pt2 = Point2; +using Pt2f = Pt2; +using Pt2d = Pt2; + +template +using Vec2 = Pt2; +using Vec2f = Vec2; +using Vec2d = Vec2; + +template +struct Point2 : Point{ + + Point2() = default; + Point2(const Point2& other) = default; + Point2& operator=(const Point2& other) = default; + Point2(Point2&& other) = default; + Point2& operator=(Point2&& other) = default; + + constexpr Point2(const Point &p) noexcept{ + this->array = p.array; + } + constexpr Point2(Point &&p) noexcept{ + this->array = std::move(p.array); + } + constexpr Point2(acc x, acc y = acc{0}) noexcept{ + this->array = {x,y}; + } +}; + +template +constexpr auto operator*(const RowVec &l, const RowVec &r) noexcept -> RowVec{ + return {{l.x()*r.x(),l.y()*r.y()}}; +} + +template +constexpr auto operator*(const ColVec &l, const ColVec &r) noexcept -> ColVec{ + return {{l.x()*r.x(),l.y()*r.y()}}; +} + +} diff --git a/cpp-projects/base/geometry/point3.hpp b/cpp-projects/base/geometry/point3.hpp new file mode 100644 index 0000000..b0bf069 --- /dev/null +++ b/cpp-projects/base/geometry/point3.hpp @@ -0,0 +1,95 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point.hpp" + +namespace tool::geo { + +template +struct Point3; + +template +using Pt3 = Point3; +using Pt3f = Pt3; +using Pt3d = Pt3; + +template +using Vec3 = Pt3; +using Vec3f = Pt3; +using Vec3d = Pt3; + +using Col3 = Pt3; +using Col3f= Pt3f; + +template +using Normal3 = Vec3; + +template +struct Point3 : Point{ + + Point3() = default; + Point3(const Point3& other) = default; + Point3& operator=(const Point3& other) = default; + Point3(Point3&& other) = default; + Point3& operator=(Point3&& other) = default; + + constexpr Point3(const Point &p) noexcept{ + this->array = p.array; + } + constexpr Point3(Point &&p) noexcept{ + this->array = std::move(p.array); + } + constexpr Point3(acc x, acc y = acc{0}, acc z = acc{0}) noexcept{ + this->array = {x,y,z}; + } +}; + +template +constexpr auto operator*(const RowVec &l, const RowVec &r) noexcept -> RowVec{ + return {{l.x()*r.x(),l.y()*r.y(),l.z()*r.z()}}; +} + +template +constexpr auto operator*(const ColVec &l, const ColVec &r) noexcept -> ColVec{ + return {{l.x()*r.x(),l.y()*r.y(),l.z()*r.z()}}; +} + +template +constexpr auto cross(const RowVec &l, const RowVec &r) noexcept -> RowVec{ + static_assert(std::numeric_limits::is_iec559, "'cross' accepts only floating-point inputs"); + return {{ + l.y() * r.z() - l.z() * r.y(), + l.z() * r.x() - l.x() * r.z(), + l.x() * r.y() - l.y() * r.x() + }}; +} + +}; + + diff --git a/cpp-projects/base/geometry/point4.hpp b/cpp-projects/base/geometry/point4.hpp new file mode 100644 index 0000000..cec1144 --- /dev/null +++ b/cpp-projects/base/geometry/point4.hpp @@ -0,0 +1,91 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point.hpp" + +namespace tool::geo { + +template +struct Point4; + +template +using Pt4 = Point4; +using Pt4f = Pt4; +using Pt4d = Pt4; + +template +using Vec4 = Pt4; +using Vec4f = Pt4; +using Vec4d = Pt4; + +using Col4 = Pt4; +using Col4f= Pt4f; + +using RGBA = Col4; +using RGBAf = Col4f; + + +template +struct Point4 : Point{ + + Point4() = default; + Point4(const Point4& other) = default; + Point4& operator=(const Point4& other) = default; + Point4(Point4&& other) = default; + Point4& operator=(Point4&& other) = default; + + constexpr Point4(const Point &p) noexcept{ + this->array = p.array; + } + constexpr Point4(Point &&p) noexcept{ + this->array = std::move(p.array); + } + + constexpr Point4(acc x, acc y = acc{0}, acc z = acc{0}, acc w = acc{0}) noexcept{ + this->array = {x,y,z,w}; + } +}; + +template +constexpr auto operator*(const RowVec &l, const RowVec &r) noexcept -> RowVec{ + return {{l.x()*r.x(),l.y()*r.y(),l.z()*r.z(),l.w()*r.w()}}; +} + +template +constexpr auto operator*(const ColVec &l, const ColVec &r) noexcept -> ColVec{ + return {{l.x()*r.x(),l.y()*r.y(),l.z()*r.z(),l.w()*r.w()}}; +} + +template +constexpr auto to_pt4(const Point &pt, acc w) noexcept -> Pt4{ + return {pt.x(),pt.y(),pt.z(),w}; +} + + +} diff --git a/cpp-projects/base/geometry/point4.hpp.KGJtTk b/cpp-projects/base/geometry/point4.hpp.KGJtTk new file mode 100644 index 0000000..5c8590b --- /dev/null +++ b/cpp-projects/base/geometry/point4.hpp.KGJtTk @@ -0,0 +1,58 @@ + +/******************************************************************************* +** Toolbox-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point.hpp" + +namespace tool::geo { + +template +using Point4 = Point; + +template +using Pt4 = Point4; +using Pt4f = Pt4; +using Pt4d = Pt4; + +template +using Vec4 = Pt4; +using Vec4f = Pt4; +using Vec4d = Pt4; + +using Col4 = Pt4; +using Col4f= Pt4f; + +using RGBA = Col4; +using RGBAf = Col4f; + +template +constexpr auto to_pt4(Point4 pt, acc w) noexcept -> Pt4{ + return {pt.x(),pt.y(),pt.z(),w}; +} + +} diff --git a/cpp-projects/base/geometry/quaternion.hpp b/cpp-projects/base/geometry/quaternion.hpp new file mode 100644 index 0000000..1b0e973 --- /dev/null +++ b/cpp-projects/base/geometry/quaternion.hpp @@ -0,0 +1,310 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point3.hpp" +#include "utility/math.hpp" + +namespace tool::geo { + +template +struct Quaternion; + +template +using Quat = Quaternion; +using Quatf = Quat; +using Quatd = Quat; + +template +struct Quaternion{ + + acc x{0}; /**< i factor */ + acc y{0}; /**< j factor */ + acc z{0}; /**< k factor */ + acc w{1}; /**< scalar */ + + Quaternion() = default; + constexpr Quaternion(acc x, acc y, acc z, acc w) : x(x), y(y), z(z), w(w){} + + static constexpr auto from_axis(const Vec3 &axe, const acc angle) noexcept -> Quaternion{ + const acc radAngle = deg_2_rad(angle/2); + const acc s = std::sin(radAngle); + return {axe.x() * s, axe.y() * s, axe.z() * s, cos(radAngle)}; + } + + // TODO: test? + static constexpr auto from_euler(const Vec3 &eulerAngles) noexcept -> Quaternion{ + return + from_axis({1,0,0}, eulerAngles.x()) * + from_axis({0,1,0}, eulerAngles.y()) * + from_axis({0,0,1}, eulerAngles.z()); + } + + // operators + constexpr auto operator+() const noexcept -> Quaternion{return *this;} + constexpr auto operator-() const noexcept -> Quaternion{return {-x, -y, -z, -w};} + constexpr auto operator+=(acc value) noexcept -> Quaternion&{(*this) = *this + value;return *this;} + constexpr auto operator-=(acc value) noexcept -> Quaternion&{(*this) = *this - value;return *this;} + constexpr auto operator*=(acc value) noexcept -> Quaternion&{(*this) = *this * value;return *this;} + constexpr auto operator/=(acc value) -> Quaternion&{(*this) = *this / value;return *this;} + constexpr auto operator+=(const Quaternion &q) noexcept -> Quaternion&{(*this) = *this + q;return *this;} + constexpr auto operator-=(const Quaternion &q) noexcept -> Quaternion&{(*this) = *this + q;return *this;} + constexpr auto operator*=(const Quaternion &q) noexcept -> Quaternion&{(*this) = *this * q;return *this;} +}; + +// functions +template +constexpr auto add(const Quaternion &q1, const Quaternion &q2) noexcept -> Quaternion{ + return {q1.x+q2.x,q1.y+q2.y,q1.z+q2.z,q1.w+q2.w}; +} + +template +constexpr auto add(const Quaternion &q, acc value) noexcept -> Quaternion{ + return {q.x+value, q.y, q.z, q.w}; +} + +template +constexpr auto substract(const Quaternion &q1, const Quaternion &q2) noexcept -> Quaternion{ + return {q1.x-q2.x,q1.y-q2.y,q1.z-q2.z,q1.w-q2.w}; +} + +template +constexpr auto substract(const Quaternion &q, acc value) noexcept -> Quaternion{ + return {q.x-value, q.y, q.z, q.w}; +} + +template +constexpr auto multiply(const Quaternion &q, acc value) noexcept -> Quaternion{ + return {q.x*value, q.y*value, q.z*value, q.w*value}; +} + +template +constexpr auto multiply(const Quaternion &q1, const Quaternion &q2) noexcept -> Quaternion{ + return { + q1.w*q2.x + q1.x*q2.w + q1.y*q2.z - q1.z*q2.y, + q1.w*q2.y - q1.x*q2.z + q1.y*q2.w + q1.z*q2.x, + q1.w*q2.z + q1.x*q2.y - q1.y*q2.x + q1.z*q2.w, + q1.w*q2.w - q1.x*q2.x - q1.y*q2.y - q1.z*q2.z + }; +} + +template +constexpr auto divide(const Quaternion &q, acc value) noexcept -> Quaternion{ + if(value > acc{0}){ + return {q.x/value, q.y/value, q.z/value, q.w/value}; + } + return q; +} + +template +constexpr auto compare(const Quaternion &l, const Quaternion &r, int ulp = 3) noexcept -> bool{ + return + almost_equal(l.x, r.x, ulp) && + almost_equal(l.y, r.y, ulp) && + almost_equal(l.z, r.z, ulp) && + almost_equal(l.w, r.w, ulp); +} + +template +constexpr auto square_norm(const Quaternion &q) noexcept -> acc{ + return (q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w); +} + +template +auto norm(const Quaternion &q) noexcept -> acc{ + return sqrt(square_norm(q)); +} + +template +constexpr auto conjugate(const Quaternion &q) noexcept -> Quaternion{ + return {-q.x,-q.y,-q.z,q.w}; +} + +template +constexpr auto dot(const Quaternion &q1, const Quaternion &q2) noexcept -> acc{ + return {q1.x*q2.x + q1.y*q2.y + q1.z*q2.z + q1.w*q2.w}; +} + +template +auto normalize(const Quaternion &q) -> Quaternion{ + return divide(q, norm(q)); +} + +template +auto inverse(const Quaternion &q) -> Quaternion{ + return conjugate(normalize(q)); +} + +template +auto angle(const Quaternion &q) noexcept -> acc{ + return acos(q.w/norm(q))*2; +} + +template +constexpr auto axis(const Quaternion &q) noexcept -> Vec3{ + auto q1 = normalize(q); + return normalize(Vec3{q1.x,q1.y,q1.z}); +} + +template +constexpr auto pitch(const Quaternion &q) -> acc{ + + const acc y = acc{2} * (q.y * q.z + q.w * q.x); + const acc x = q.w * q.w - q.x * q.x - q.y * q.y + q.z * q.z; + + if(almost_equal(x,acc{0}) && almost_equal(y,acc{0})){ + return acc{2} * std::atan2(q.x, q.w); + } + return std::atan2(y, x); +} + +template +constexpr auto yaw(const Quaternion &q) -> acc{ + return std::asin(std::clamp(acc{-2} * (q.x * q.z - q.w * q.y),acc{-1}, acc{1})); +} + +template +constexpr auto roll(const Quaternion &q) -> acc{ + return std::atan2(acc{2} * (q.x * q.y + q.w * q.z), q.w * q.w + q.x * q.x - q.y * q.y - q.z * q.z); +} + +template +constexpr auto euler_angles(const Quaternion &q) -> Vec3{ + return {pitch(q),yaw(q),roll(q)}; +} + +template +auto slerp(const Quaternion &q1, const Quaternion &q2, acc t) -> Quaternion{ + + // only unit quaternions are valid rotations. + Quaternion v1 = normalize(q1); + Quaternion v2 = normalize(q2); + + // compute the cosine of the angle between the two vectors. + acc dotV = dot(v1, v2); + + // if the dot product is negative, slerp won't take + // the shorter path. Note that v0 and -v0 are equivalent when + // the negation is applied to all four components. Fix by + // reversing one quaternion. + if (dotV < 0) { + v2 = -v2; + dotV = -dotV; + } + + const acc dotThreshold = static_cast(0.9995); + if (dotV > dotThreshold){ + // if the inputs are too close for comfort, linearly interpolateand normalize the result. + return normalize(v1 + (v2 - v1)*t); + } + + // Since dot is in range [0, DOT_THRESHOLD], acos is safe + acc theta_0 = acos(dotV); // theta_0 = angle between input vectors + acc theta = theta_0*t; // theta = angle between v0 and result + acc sin_theta = sin(theta); // compute this value only once + acc sin_theta_0 = sin(theta_0); // compute this value only once + acc s0 = cos(theta) - dotV * sin_theta / sin_theta_0; // == sin(theta_0 - theta) / sin(theta_0) + acc s1 = sin_theta / sin_theta_0; + + return (v1*s0) + (v2*s1); +} + + +// operators +template +constexpr auto operator+(const Quaternion &q, acc value) noexcept -> Quaternion{ + return add(q,value); +} + +template +constexpr auto operator+(const Quaternion &q1, const Quaternion &q2) noexcept -> Quaternion{ + return add(q1,q2); +} + +template +constexpr auto operator-(const Quaternion &q, acc value) noexcept -> Quaternion{ + return substract(q,value); +} + +template +constexpr auto operator-(const Quaternion &q1, const Quaternion &q2) noexcept -> Quaternion{ + return substract(q1,q2); +} + +template +constexpr auto operator*(const Quaternion &q, acc value) noexcept -> Quaternion{ + return multiply(q, value); +} + +template +constexpr auto operator*(const Quaternion &q1, const Quaternion &q2) noexcept -> Quaternion{ + return multiply(q1,q2); +} + +template +constexpr auto operator/(const Quaternion &q, acc value) -> Quaternion{ + return divide(q, value); +} + +template +constexpr static auto operator==(const Quaternion &l, const Quaternion &r) -> bool{ + return compare(l,r); +} + +template +auto operator<<(std::ostream &flux, const Quaternion &q) -> std::ostream&{ + flux <<"[" << q.x << ", " << q.y << ", " << q.z << ", " << q.w << "]"; + return flux; +} + +} + + +///** +// * Unary division with other Quaternion. +// * +// * Warning: if the norm of y is zero, the result is +// * 4 NaNs, but maybe it should be inf. +// */ +//template +//Quaternion operator/=(const Quaternion& y) { + +// T n2 = y.norm_squared(); + +// T at = _a * y.a() + _b * y.b() + _c * y.c() + _d * y.d(); +// T bt = -_a * y.b() + _b * y.a() - _c * y.d() + _d * y.c(); +// T ct = -_a * y.c() + _b * y.d() + _c * y.a() - _d * y.b(); +// T dt = -_a * y.d() - _b * y.c() + _c * y.b() + _d * y.a(); + +// _a = at / n2; +// _b = bt / n2; +// _c = ct / n2; +// _d = dt / n2; + +// return *this; +//} diff --git a/cpp-projects/base/geometry/ray3.hpp b/cpp-projects/base/geometry/ray3.hpp new file mode 100644 index 0000000..88135a8 --- /dev/null +++ b/cpp-projects/base/geometry/ray3.hpp @@ -0,0 +1,58 @@ + +///******************************************************************************* +//** Toolset-base ** +//** MIT License ** +//** Copyright (c) [2018] [Florian Lance] ** +//** ** +//** Permission is hereby granted, free of charge, to any person obtaining a ** +//** copy of this software and associated documentation files (the "Software"), ** +//** to deal in the Software without restriction, including without limitation ** +//** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +//** and/or sell copies of the Software, and to permit persons to whom the ** +//** Software is furnished to do so, subject to the following conditions: ** +//** ** +//** The above copyright notice and this permission notice shall be included in ** +//** all copies or substantial portions of the Software. ** +//** ** +//** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +//** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +//** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +//** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +//** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +//** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +//** DEALINGS IN THE SOFTWARE. ** +//** ** +//********************************************************************************/ + +//#pragma once + +//// local +//#include "geometry/point3.hpp" + +//namespace tool::geo { + + +//template +//struct Ray3{ + +// Ray3() = default; + +// constexpr Ray3(const Pt3 &o, const Vec3 &d) noexcept : origin(o), direction(d){ +// } + +// void normalize_direction(){ +// direction.normalize();; +// } + +// Pt3 origin; +// Vec3 direction; +//}; + + +//template +//Ray3 from_points(const Pt3 &from, const Pt3 &to){ +// return Ray3(from, normalized(vec(from,to))); +//} + + +//} diff --git a/cpp-projects/base/geometry/raycast.hpp b/cpp-projects/base/geometry/raycast.hpp new file mode 100644 index 0000000..54c0faa --- /dev/null +++ b/cpp-projects/base/geometry/raycast.hpp @@ -0,0 +1,214 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +//#include "geometry/point3.hpp" +//#include "geometry/ray3.hpp" +//#include "geometry/triangle3.hpp" +//#include "geometry/plane3.hpp" +//#include "geometry/aabb3.hpp" + +/*namespace tool::geo { + +template +struct RaycastResult { + + Vec3 point = {0,0,0}; + Vec3 normal = {0, 0, 1}; + acc t = acc{-1}; + bool hit = false; +}; + +template +acc raycast_plane(const Plane3 &plane, const Ray3 &ray){ + acc nd = dot(ray.direction, plane.normal); + acc pn = dot(ray.origin, plane.normal); + + if(nd >= acc{0}){ + return acc{-1}; + } + acc t = (plane.distance - pn) / nd; + if(t >= acc{0}){ + return t; + } + + return acc{-1}; +} + +template +bool raycast_plane(const Plane3& plane, const Ray3& ray, RaycastResult* outResult) { + + if(outResult != nullptr){ + *outResult = RaycastResult(); + } + + acc nd = dot(ray.direction, plane.normal); + acc pn = dot(ray.origin, plane.normal); + + // nd must be negative, and not 0 + // if nd is positive, the ray and plane normals + // point in the same direction. No intersection. + if (!(nd < 0)) { + return false; + } + + acc t = (plane.distance - pn) / nd; + + // t must be positive + if (!(t < 0)) { + if (outResult != 0) { + outResult->t = t; + outResult->hit = true; + outResult->point = ray.origin + ray.direction * t; + outResult->normal = normalize(plane.normal); + } + return true; + } + + return false; +} + +template +acc raycast_triangle(const Triangle3 &tri, const Ray3 &ray){ + + Plane3 plane = from_triangle(tri); + acc t = raycast_plane(plane, ray); + if(t < acc{0}){ + return t; + } + + Pt3 result = ray.origin + ray.direction * t; + Pt3 bary = barycentric(result, tri); + if( bary.x() >= 0.f && bary.x() <= 1.f && + bary.y() >= 0.f && bary.y() <= 1.f && + bary.z() >= 0.f && bary.z() <= 1.f){ + return t; + } + return acc{-1}; +} + +template +bool raycast_triangle(const Triangle3 &triangle, const Ray3& ray, RaycastResult *outResult){ + + if(outResult != nullptr){ + *outResult = RaycastResult(); + } + + Plane3 plane = from_triangle(triangle); + RaycastResult planeResult; + if (!raycast_plane(plane, ray, &planeResult)) { + return false; + } + + acc t = planeResult.t; + Pt3 result = ray.origin + ray.direction * t; + Vec3 barycentricPt = barycentric(result, triangle); + if (!(barycentricPt.x() < 0) && !(barycentricPt.x() > 1) && + !(barycentricPt.y() < 0) && !(barycentricPt.y() > 1) && + !(barycentricPt.z() < 0) && !(barycentricPt.z() > 1)) { + + if (outResult != nullptr) { + outResult->t = t; + outResult->hit = true; + outResult->point = ray.origin + ray.direction * t; + outResult->normal = plane.normal; + } + + return true; + } + + return false; +} + +template +bool raycast_aabb(const AABB3& aabb, const Ray3& ray, RaycastResult* outResult) { + + if(outResult != nullptr){ + *outResult = RaycastResult(); + } + + Pt3 pmin = aabb.min(); + Pt3 pmax = aabb.max(); + + // Any component of direction could be 0! + // Address this by using a small number, close to + // 0 in case any of directions components are 0 + acc t1 = (pmin.x() - ray.origin.x()) / (almost_equal(ray.direction.x(), acc{0}) ? static_cast(0.00001) : ray.direction.x()); + acc t2 = (pmax.x() - ray.origin.x()) / (almost_equal(ray.direction.x(), acc{0}) ? static_cast(0.00001) : ray.direction.x()); + acc t3 = (pmin.y() - ray.origin.y()) / (almost_equal(ray.direction.y(), acc{0}) ? static_cast(0.00001) : ray.direction.y()); + acc t4 = (pmax.y() - ray.origin.y()) / (almost_equal(ray.direction.y(), acc{0}) ? static_cast(0.00001) : ray.direction.y()); + acc t5 = (pmin.z() - ray.origin.z()) / (almost_equal(ray.direction.z(), acc{0}) ? static_cast(0.00001) : ray.direction.z()); + acc t6 = (pmax.z() - ray.origin.z()) / (almost_equal(ray.direction.z(), acc{0}) ? static_cast(0.00001) : ray.direction.z()); + + acc tmin = std::max(std::max(std::min(t1, t2), std::min(t3, t4)), std::min(t5, t6)); + acc tmax = std::min(std::min(std::max(t1, t2), std::max(t3, t4)), std::max(t5, t6)); + + // if tmax < 0, ray is intersecting AABB + // but entire AABB is behing it's origin + if (tmax < 0) { + return false; + } + + // if tmin > tmax, ray doesn't intersect AABB + if (tmin > tmax) { + return false; + } + + acc result = tmin; + + // If tmin is < 0, tmax is closer + if (tmin < 0) { + result = tmax; + } + + if (outResult != nullptr) { + outResult->t = result; + outResult->hit = true; + outResult->point = ray.origin + ray.direction * result; + + std::array,6> normals = { + Vec3(-1, 0, 0), + Vec3(1, 0, 0), + Vec3(0, -1, 0), + Vec3(0, 1, 0), + Vec3(0, 0, -1), + Vec3(0, 0, 1) + }; + std::array t = {t1, t2, t3, t4, t5, t6}; + for(size_t ii = 0; ii < normals.size(); ++ii){ + if(almost_equal(result, t[ii])){ + outResult->normal = normals[ii]; + } + } + } + + + return true; +} +} +*/ diff --git a/cpp-projects/base/geometry/rectangle.hpp b/cpp-projects/base/geometry/rectangle.hpp new file mode 100644 index 0000000..0be6727 --- /dev/null +++ b/cpp-projects/base/geometry/rectangle.hpp @@ -0,0 +1,68 @@ + +///******************************************************************************* +//** Toolset-base ** +//** MIT License ** +//** Copyright (c) [2018] [Florian Lance] ** +//** ** +//** Permission is hereby granted, free of charge, to any person obtaining a ** +//** copy of this software and associated documentation files (the "Software"), ** +//** to deal in the Software without restriction, including without limitation ** +//** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +//** and/or sell copies of the Software, and to permit persons to whom the ** +//** Software is furnished to do so, subject to the following conditions: ** +//** ** +//** The above copyright notice and this permission notice shall be included in ** +//** all copies or substantial portions of the Software. ** +//** ** +//** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +//** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +//** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +//** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +//** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +//** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +//** DEALINGS IN THE SOFTWARE. ** +//** ** +//********************************************************************************/ + +//#pragma once + +//// local +//#include "geometry/point2.hpp" + +//namespace tool::geo { + +//template +//struct Rectangle2{ + +// Rectangle2(const Pt2 &o, const Vec2 &s) : origin(o), size(s) { +// } + +// Vec2 min() const{ +// const Vec2 p2 = origin + size; +// return Vec2(std::min(origin.x(),p2.x()), std::min(origin.y(),p2.y())); +// } + +// Vec2 max() const{ +// const Vec2 p2 = origin + size; +// return Vec2(std::max(origin.x(),p2.x()), std::max(origin.y(),p2.y())); +// } + +// static Rectangle2 from_min_max(const Pt2 &min, const Pt2 &max) { +// return Rectangle2(min, max-min); +// } + +// Pt2 origin; +// Vec2 size = {1,1}; +//}; + +//template +//struct OrientedRectangle2{ + +// OrientedRectangle2(const Pt2 &p, const Vec2 &ext, acc rot = 0) : position(p), halfExtends(ext), rotation(rot){ +// } + +// Pt2 position; +// Vec2 halfExtends = {1,1}; +// acc rotation = 0; +//}; +//} diff --git a/cpp-projects/base/geometry/sphere.hpp b/cpp-projects/base/geometry/sphere.hpp new file mode 100644 index 0000000..37ee286 --- /dev/null +++ b/cpp-projects/base/geometry/sphere.hpp @@ -0,0 +1,50 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point3.hpp" + +namespace tool::geo { + +template +struct Sphere{ + Vec3 position; + acc radius; +}; + +template +constexpr auto point_in_sphere(const Pt3 &p, const Sphere &s) -> bool{ + return (p-s.position).square_norm() < (s.radius*s.radius); +} + +//template +//Pt3 closest_point(const Sphere &s, const Pt3 &p){ +// return (normalize(p - s.position) * s.radius) + s.position; +//} + +} diff --git a/cpp-projects/base/geometry/transform.hpp b/cpp-projects/base/geometry/transform.hpp new file mode 100644 index 0000000..0c6109d --- /dev/null +++ b/cpp-projects/base/geometry/transform.hpp @@ -0,0 +1,237 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/matrix4.hpp" +#include "geometry/quaternion.hpp" +#include "utility/vector.hpp" + +namespace tool::geo { + + template + class Transform { + + private: + + Quat rotation = {0,0,0,1}; + Vec3 translation = {0,0,0}; + Vec3 scaling = {1,1,1}; + + Vec3 right = {1,0,0}; + Vec3 up = {0,1,0}; + Vec3 foward = {0,0,1}; + + public: + + Transform *parent = nullptr; + std::vector children; + + +// constexpr Vec3 local_euler_angles()const{ +// return rotation. +// } + +// Camera(){ +// right = geo::normalized(geo::cross(up, direction)); +// } + +// Camera(Pt3 p, Vec3 d, Vec3 u = {0.,1.,0.}) : +// position(p),up(u),direction(d), rPosition(p), rUp(u),rDirection(d) { +// right = geo::normalized(geo::cross(up, direction)); +// } + +// void set_direction(double yaw, double pitch, double roll){ +// direction = Mat4::axis_angle(up, yaw).multiply_vector(direction); +// direction = Mat4::axis_angle(right, pitch).multiply_vector(direction); +// direction.normalize(); +// up = geo::normalized(geo::cross(direction, right)); +// up = Mat4::axis_angle(direction, roll).multiply_vector(up); +// up.normalize(); +// right = geo::normalized(geo::cross(up, direction)); +// } + +// void move_up(double amount){ +// position += up*amount; +// } + +// void move_down(double amount){ +// position -= up*amount; +// } + +// void move_front(double amount){ +// position += direction*amount; +// } + +// void move_left(double amount){ +// position += geo::cross(direction, up)*amount; +// } + +// void move_right(double amount){ +// position -= geo::cross(direction, up)*amount; +// } + +// void move_back(double amount){ +// position -= direction*amount; +// } + +// geo::Mat4 view(){ +// return Mat4::look_at(position, direction, up); +// } + +// void reset(){ +// position = rPosition; +// direction = rDirection; +// up = rUp; +// right = geo::normalized(geo::cross(up, direction)); +// } + +// Pt3 position = {0.,0.,0.}; +// Vec3 up = {0.,1.,0.}; +// Vec3 direction = {0.,0.,1.}; +// Vec3 right; + + +// localEulerAngles The rotation as Euler angles in degrees relative to the parent transform's rotation. +// localPosition Position of the transform relative to the parent transform. +// localRotation The rotation of the transform relative to the transform rotation of the parent. +// localScale The scale of the transform relative to the GameObjects parent. + +// DetachChildren Unparents all children. +// Find Finds a child by n and returns it. +// GetChild Returns a transform child by index. +// GetSiblingIndex Gets the sibling index. +// InverseTransformDirection Transforms a direction from world space to local space. The opposite of Transform.TransformDirection. +// InverseTransformPoint Transforms position from world space to local space. +// InverseTransformVector Transforms a vector from world space to local space. The opposite of Transform.TransformVector. +// IsChildOf Is this transform a child of parent? +// LookAt Rotates the transform so the forward vector points at /target/'s current position. +// Rotate Use Transform.Rotate to rotate GameObjects in a variety of ways. The rotation is often provided as an Euler angle and not a Quaternion. +// RotateAround Rotates the transform about axis passing through point in world coordinates by angle degrees. +// SetAsFirstSibling Move the transform to the start of the local transform list. +// SetAsLastSibling Move the transform to the end of the local transform list. +// SetParent Set the parent of the transform. +// SetPositionAndRotation Sets the world space position and rotation of the Transform component. +// SetSiblingIndex Sets the sibling index. +// TransformDirection Transforms direction from local space to world space. +// TransformPoint Transforms position from local space to world space. +// TransformVector Transforms vector from local space to world space. +// Translate Moves the transform in the direction and distance of translation. + + + + }; + + + template + constexpr Mat4 to_mat4(const Quat &q){ + + const acc xx = q.x * q.x; + const acc xy = q.x * q.y; + const acc xz = q.x * q.z; + const acc xw = q.x * q.w; + + const acc yy = q.y * q.y; + const acc yz = q.y * q.z; + const acc yw = q.y * q.w; + + const acc zz = q.z * q.z; + const acc zw = q.z * q.w; + + return { + 1 - 2 * ( yy + zz ), 2 * ( xy + zw ), 2 * ( xz - yw ), 0, + 2 * ( xy - zw ), 1 - 2 * ( xx + zz ), 2 * ( yz + xw ), 0, + 2 * ( xz + yw ), 2 * ( yz - xw ), 1 - 2 * ( xx + yy ), 0, + 0, 0, 0, 1 + }; + } + + template + Quat to_quaternion(const Mat4 &m){ + + const auto t = m.trace(); + if(t < acc{0}){ + const auto s = sqrt(acc{1} + m(0,0) - m(1,1) - m(2,2))*acc{2}; + return { + acc{0.25} *s, + (m.at(1,0) + m.at(0,1))/s, + (m.at(2,0) + m.at(0,2))/s, + (m.at(1,2) - m.at(2,1))/s + }; + }else{ + const auto s = acc{0.5} / sqrt(t); + return { + (m.at(1,2) - m.at(2,1))*s, + (m.at(2,0) - m.at(0,2))*s, + (m.at(0,1) - m.at(1,0))*s, + acc{0.25} / s + }; + } + +// T fourXSquaredMinus1 = m[0][0] - m[1][1] - m[2][2]; +// T fourYSquaredMinus1 = m[1][1] - m[0][0] - m[2][2]; +// T fourZSquaredMinus1 = m[2][2] - m[0][0] - m[1][1]; +// T fourWSquaredMinus1 = m[0][0] + m[1][1] + m[2][2]; + +// int biggestIndex = 0; +// T fourBiggestSquaredMinus1 = fourWSquaredMinus1; +// if(fourXSquaredMinus1 > fourBiggestSquaredMinus1) +// { +// fourBiggestSquaredMinus1 = fourXSquaredMinus1; +// biggestIndex = 1; +// } +// if(fourYSquaredMinus1 > fourBiggestSquaredMinus1) +// { +// fourBiggestSquaredMinus1 = fourYSquaredMinus1; +// biggestIndex = 2; +// } +// if(fourZSquaredMinus1 > fourBiggestSquaredMinus1) +// { +// fourBiggestSquaredMinus1 = fourZSquaredMinus1; +// biggestIndex = 3; +// } + +// T biggestVal = sqrt(fourBiggestSquaredMinus1 + static_cast(1)) * static_cast(0.5); +// T mult = static_cast(0.25) / biggestVal; + +// switch(biggestIndex) +// { +// case 0: +// return qua(biggestVal, (m[1][2] - m[2][1]) * mult, (m[2][0] - m[0][2]) * mult, (m[0][1] - m[1][0]) * mult); +// case 1: +// return qua((m[1][2] - m[2][1]) * mult, biggestVal, (m[0][1] + m[1][0]) * mult, (m[2][0] + m[0][2]) * mult); +// case 2: +// return qua((m[2][0] - m[0][2]) * mult, (m[0][1] + m[1][0]) * mult, biggestVal, (m[1][2] + m[2][1]) * mult); +// case 3: +// return qua((m[0][1] - m[1][0]) * mult, (m[2][0] + m[0][2]) * mult, (m[1][2] + m[2][1]) * mult, biggestVal); +// default: // Silence a -Wswitch-default warning in GCC. Should never actually get here. Assert is just for sanity. +// assert(false); +// return qua(1, 0, 0, 0); +// } + } + +} diff --git a/cpp-projects/base/geometry/transformation.hpp b/cpp-projects/base/geometry/transformation.hpp new file mode 100644 index 0000000..c5fc137 --- /dev/null +++ b/cpp-projects/base/geometry/transformation.hpp @@ -0,0 +1,1000 @@ + +//#pragma once + +///** +// * \file Transformation.h +// * \brief defines Mat3 and Transformation +// * \author Florian Lance +// * \date 01/02/15 +// */ + +//// std +//#include +//#include + +//// hbp +////#include "Plane3.h" +//#include "point3.hpp" + +//namespace geo { + + +//template +//class Mat3; + +//template +//class Mat4; + +//template +///** +// * @brief The Quaternion class +// */ +//class Quaternion +//{ +//public : + +// Quaternion(const Quaternion&) = default; +// Quaternion(Quaternion&&) = default; +// Quaternion& operator=(const Quaternion&) = default; +// Quaternion& operator=(Quaternion&&) = default; +// virtual ~Quaternion() = default; + +// /** +// * @brief Quaternion default constructor +// */ +// Quaternion() : w(1),x(0),y(0),z(0) +// {} + +// /** +// * @brief Constructor which init the quaternion with an axe and an angle +// * @param [in] axe : normalize axe vector +// * @param [in] angle : in degrees +// */ +// Quaternion(const Vec3 &axe, const TAcc angle) +// { +// init_with_angle_and_axis(axe, angle); +// } + +// /** +// * @brief Init the quaternion with an axe and an angle +// * @param [in] axe : normalize axe vector +// * @param [in] angle : in degrees +// */ +// void init_with_angle_and_axis(const Vec3 &axe, const TAcc angle) +// { +// TAcc radAngle = deg2rad(angle/2); +// TAcc s = std::sin(radAngle); +// x = axe.x() * s; +// y = axe.y() * s; +// z = axe.z() * s; +// w = cos(radAngle); +// } + +// /** +// * @brief Convert quaternion to an axe and an angle +// * @param [out] angle : in degrees +// * @param [out] axe : rotation axe vector +// */ +// void convert_axe_and_angle(TAcc &angle, Vec3 &axe) const +// { +// normalize(); +// angle = rad2deg(acos(w) * 2); +// axe.x() = x; +// axe.y() = y; +// axe.z() = z; +// } + +// /** +// * @brief Return the 4x4 matrix convertion of the quaternion +// */ +// Mat4 convert_to_matrix() const +// { +// Mat4 mat; +// TAcc xx = x * x; +// TAcc xy = x * y; +// TAcc xz = x * z; +// TAcc xw = x * w; + +// TAcc yy = y * y; +// TAcc yz = y * z; +// TAcc yw = y * w; + +// TAcc zz = z * z; +// TAcc zw = z * w; + +// mat[0] = 1 - 2 * ( yy + zz ); +// mat[1] = 2 * ( xy - zw ); +// mat[2] = 2 * ( xz + yw ); + +// mat[4] = 2 * ( xy + zw ); +// mat[5] = 1 - 2 * ( xx + zz ); +// mat[6] = 2 * ( yz - xw ); + +// mat[8] = 2 * ( xz - yw ); +// mat[9] = 2 * ( yz + xw ); +// mat[10] = 1 - 2 * ( xx + yy ); + +// mat[3] = mat[7] = mat[11] = mat[12] = mat[13] = mat[14] = 0; +// mat[15] = 1; + +// return mat; +// } + +// /** +// * @brief Return the norm +// */ +// TAcc square_norm() const +// { +// return (w*w + x*x + y*y + z*z); +// } + +// /** +// * @brief Return the norm +// */ +// TAcc norm() const +// { +// return sqrt(square_norm()); +// } + +// /** +// * @brief Return the norm +// * @param [in] q +// */ +// static TAcc norm(const Quaternion &q) +// { +// return q.norm(); +// } + +// /** +// * @brief Normalize the quaternion +// */ +// void normalize() +// { +// TAcc magnitude = norm(); +// w /= magnitude; +// x /= magnitude; +// y /= magnitude; +// z /= magnitude; +// } + +// /** +// * @brief Return the multiplication of two quaternion +// * @param [in] q +// */ +// Quaternion &operator*=(const Quaternion &q) +// { +// multiply(q); +// return *this; +// } + +// /** +// * @brief multiply with the input quaternion +// * @param [in] q +// */ +// void multiply(const Quaternion &q) +// { +// w = w*q.w - x*q.x - y*q.y - z*q.z; +// x = w*q.x + x*q.w + y*q.z - z*q.y; +// y = w*q.y - x*q.z + y*q.w + z*q.x; +// z = w*q.z + x*q.y - y*q.x + z*q.w; +// } + + +// TAcc w; /**< scalar */ +// TAcc x; /**< i factor */ +// TAcc y; /**< j factor */ +// TAcc z; /**< k factor */ + +//}; + + +//template +///** +// * @brief A 3x3 rotation matrix class +// */ +//class Mat3 +//{ +//public : + +// Mat3(const Mat3&) = default; +// Mat3(Mat3&&) = default; +// Mat3& operator=(const Mat3&) = default; +// Mat3& operator=(Mat3&&) = default; +// virtual ~Mat3() = default; + +// /** +// * @brief Mat3 constructor +// * @param identity : if true init matrice with identity +// */ +// Mat3(bool identity = false) +// { +// if(identity) +// m_mat[0] = m_mat[4] = m_mat[8] = 1; +// } + +// /** +// * @brief operator = +// * @param array +// * @return +// */ +// Mat3& operator=(std::array array) noexcept +// { +// m_mat = array; +// return *this; +// } + +// /** +// * @brief operator += , add another matrix +// * @param [in] mat : matrix to be added +// */ +// void operator+=(const Mat3& mat) +// { +// for(int ii = 0; ii < 9; ++ii) +// m_mat[ii] += mat.m_mat[ii]; +// } + +// /** +// * @brief Return a value of the matrix +// * @param [in] idRow +// * @param [in] idCol +// */ +// inline TAcc& at(cint idRow, cint idCol) {return m_mat[idRow*3 + idCol];} + + +// /** +// * @brief Return a value of the matrix +// * @param [in] idRow +// * @param [in] idCol +// */ +// inline const TAcc& at(cint idRow, cint idCol) const {return m_mat[idRow*3 + idCol];} + + +// /** +// * @brief Return a value of the matrix +// * @param [in] idRow +// * @param [in] idCol +// */ +// inline TAcc& operator()(cint idRow, cint idCol){return m_mat[idRow*3 + idCol]; } + + +// /** +// * @brief Return a value of the matrix +// * @param [in] idRow +// * @param [in] idCol +// */ +// inline const TAcc& operator()(cint idRow, cint idCol) const {return m_mat[idRow*3 + idCol];} + + +// /** +// * @brief Return a value of the matrix +// * @param [in] id +// */ +// inline TAcc& operator[](cint id){ return m_mat[id];} + +// /** +// * @brief Return a value of the matrix +// * @param [in] id +// */ +// const TAcc& operator[](cint id) const { return m_mat[id];} + +// /** +// * @brief mat +// * @return +// */ +// inline std::array& mat() noexcept {return m_mat;} + +// /** +// * @brief mat +// * @return +// */ +// inline const std::array& mat() const noexcept {return m_mat;} + +// /** +// * @brief operator *=, apply a factor on all the matrix +// * @param [in] f +// */ +// Mat3& operator*=(const TAcc f) +// { +// for(auto &&elem : m_mat) +// elem *= f; + +// return *this; +// } + +// /** +// * @brief Apply the rotation to the input point +// * @param [in,out] pt +// */ +// inline void apply_rotation(Pt3 &pt) const noexcept +// { +// pt = Point3(m_mat[0] * pt.x() + m_mat[1] * pt.y() + m_mat[2] * pt.z(), +// m_mat[3] * pt.x() + m_mat[4] * pt.y() + m_mat[5] * pt.z(), +// m_mat[6] * pt.x() + m_mat[7] * pt.y() + m_mat[8] * pt.z()); +// } + +// /** +// * @brief Return the determinant of the rotation matrix +// */ +// TAcc determinant() const +// { +// return at(0,0)*(at(1,1)*at(2,2)-at(2,1)*at(1,2)) +// -at(0,1)*(at(1,0)*at(2,2)-at(1,2)*at(2,0)) +// +at(0,2)*(at(1,0)*at(2,1)-at(1,1)*at(2,0)); +// } + +// /** +// * @brief Transpose the rotation matrix +// */ +// void transpose() +// { +// Mat3 tMat; +// tMat.at(0,0) = at(0,0); +// tMat.at(0,1) = at(1,0); +// tMat.at(1,0) = at(0,1); +// tMat.at(1,1) = at(1,1); +// tMat.at(1,2) = at(2,1); +// tMat.at(2,1) = at(1,2); +// tMat.at(2,2) = at(2,2); +// (*this) = std::move(tMat); +// } + + +// /** +// * @brief Invert the matrix +// */ +// void invert() +// { +// TAcc det = determinant(); +// if(almost_equal(det,0, 3)) +// { +// std::cerr << "-Error : Mat3::invert -> determinant is null. " << std::endl; +// return; +// } + +// TAcc invdet = 1/det; +// Mat3 invMat; +// invMat.at(0,0) = (at(1,1)*at(2,2)-at(2,1)*at(1,2))*invdet; +// invMat.at(0,1) = (at(0,2)*at(2,1)-at(0,1)*at(2,2))*invdet; +// invMat.at(0,2) = (at(0,1)*at(1,2)-at(0,2)*at(1,1))*invdet; +// invMat.at(1,0) = (at(1,2)*at(2,0)-at(1,0)*at(2,2))*invdet; +// invMat.at(1,1) = (at(0,0)*at(2,2)-at(0,2)*at(2,0))*invdet; +// invMat.at(1,2) = (at(1,0)*at(0,2)-at(0,0)*at(1,2))*invdet; +// invMat.at(2,0) = (at(1,0)*at(2,1)-at(2,0)*at(1,1))*invdet; +// invMat.at(2,1) = (at(2,0)*at(0,1)-at(0,0)*at(2,1))*invdet; +// invMat.at(2,2) = (at(0,0)*at(1,1)-at(1,0)*at(0,1))*invdet; + +// (*this) = std::move(invMat); +// } + +// /** +// * @brief display +// * @param [in,out] flux +// */ +// inline void display(std::ostream &flux) const{flux << "[" << m_mat[0] << " " << m_mat[1] << " " << m_mat[2] << "]" << "\n" +// << "[" << m_mat[3] << " " << m_mat[4] << " " << m_mat[5] << "]" << "\n" +// << "[" << m_mat[6] << " " << m_mat[7] << " " << m_mat[8] << "]" << "\n";} + +// /** +// * @brief Display the matrix +// */ +// void display()const +// { +// for(int ii = 0; ii < 9; ++ii) +// { +// if(ii%3 == 0 && ii !=0) +// std::cout << "\n"; + +// std::cout << m_mat[ii] << " "; +// } +// } + +//private : + +// std::array m_mat = {{0,0,0,0,0,0,0,0,0}}; +//}; + + +//template +///** +// * @brief A 4x4 transformation matrix class +// */ +//class Mat4 +//{ +//public : + + +// constexpr Mat4(const Mat4&) = default; +// Mat4(Mat4&&) = default; +// Mat4& operator=(const Mat4&) = default; +// Mat4& operator=(Mat4&&) = default; +// virtual ~Mat4() = default; + +// /** +// * @brief Mat4 constructor +// * @param identity : if true init matrice with identity +// */ +// constexpr Mat4(cbool identity = false) noexcept +// { +// if(identity) +// m_mat[0] = m_mat[5] = m_mat[10] = m_mat[15] = 1; +// } + +// /** +// * @brief Mat4 constructor +// * @param [in] rotation +// * @param [in] translation +// */ +// constexpr Mat4(Mat3 &rotation, Pt3 &translation) noexcept +// { +// m_mat ={rotation[0], rotation[1], rotation[2], translation[0], +// rotation[3], rotation[4], rotation[5], translation[1], +// rotation[6], rotation[7], rotation[8], translation[2], +// 0 , 0 , 0 , 1}; +// } + +// /** +// * @brief operator = +// * @param array +// * @return +// */ +// Mat4& operator=(std::array array) noexcept +// { +// m_mat = array; +// return *this; +// } + + +// /** +// * @brief operator += , add another matrix +// * @param [in] mat : matrix to be added +// */ +// void operator+=(const Mat3& mat) +// { +// for(int ii = 0; ii < 16; ++ii) +// m_mat[ii] += mat.m_mat[ii]; +// } + +// /** +// * @brief Return a value of the matrix +// * @param [in] idRow +// * @param [in] idCol +// */ +// inline TAcc& at(cint idRow, cint idCol) {return m_mat[idRow*4 + idCol];} + + +// /** +// * @brief Return a value of the matrix +// * @param [in] idRow +// * @param [in] idCol +// */ +// inline const TAcc& at(cint idRow, cint idCol) const {return m_mat[idRow*4 + idCol];} + + +// /** +// * @brief Return a value of the matrix +// * @param [in] idRow +// * @param [in] idCol +// */ +// inline TAcc& operator()(cint idRow, cint idCol){return m_mat[idRow*4 + idCol]; } + + +// /** +// * @brief Return a value of the matrix +// * @param [in] idRow +// * @param [in] idCol +// */ +// inline const TAcc& operator()(cint idRow, cint idCol) const {return m_mat[idRow*4 + idCol];} + + +// /** +// * @brief Return a value of the matrix +// * @param [in] id +// */ +// inline TAcc& operator[](cint id){ return m_mat[id];} + +// /** +// * @brief Return a value of the matrix +// * @param [in] id +// */ +// const TAcc& operator[](cint id) const { return m_mat[id];} + +// /** +// * @brief mat +// * @return +// */ +// inline std::array& mat() noexcept {return m_mat;} + +// /** +// * @brief mat +// * @return +// */ +// inline const std::array& mat() const noexcept {return m_mat;} + + +// /** +// * @brief operator *=, apply a factor on all the matrix +// * @param [in] f +// */ +// Mat4& operator*=(const TAcc f) +// { +// for(auto &&elem : m_mat) +// elem *= f; + +// return *this; +// } + +//private : + +// std::array m_mat = {{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}}; +//}; + + +//template +///** +// * @brief slow basic 4x4 multiplication +// * @param m1 +// * @param m2 +// */ +//static Mat4 multiply(const Mat4 &m1, const Mat4 &m2) +//{ +// Mat4 res; +// for(int ii = 0; ii < 4; ++ii) +// { +// for(int jj = 0; jj < 4; ++jj) +// { +// res.at(ii,jj) = m1.at(ii,0) * m2.at(0,jj) + m1.at(ii,1) * m2.at(1,jj) + m1.at(ii,2)* m2.at(2,jj) + m1.at(ii,3) * m2.at(3,jj); +// } +// } +// return res; +//} + + + +//template +///** +// * @brief A transformation class (3x3 rotation + 3x1 translation) +// */ +//class Transformation3 +//{ +//public : + +// Transformation3(const Transformation3&) = default; +// Transformation3(Transformation3&&) = default; +// Transformation3& operator=(const Transformation3&) = default; +// Transformation3& operator=(Transformation3&&) = default; +// virtual ~Transformation3() = default; + +// /** +// * @brief Transformation3 constructor +// * @param [in] identity +// */ +// Transformation3(cbool identity = false) : rotation(Mat3(identity)), translation(Pt3()) +// {} + +// /** +// * @brief Transformation3 constructor +// * @param [in] rot +// * @param [in] trans +// */ +// Transformation3(const Mat3 &rot, const Vec3 &trans = Vec3(0,0,0)) : rotation(rot), translation(trans) {} + + +// /** +// * @brief Transformation3 constructor +// * @param [ın] mat +// */ +// Transformation3(const Mat4 &mat) +// { +// rotation = {mat[0], mat[1], mat[2], +// mat[4], mat[5], mat[6], +// mat[8], mat[9], mat[10]}; + +// translation = Pt3(mat[3], mat[7], mat[11]); +// } + +// /** +// * @brief Transformation3D constructor +// * @param [in] trmTransfoFilePath : path of the transformation to be loaded +// */ +// Transformation3(const std::string &transfoFilePath); + +// /** +// * @brief Load a transformation file (txt or trm) +// * @param [in] transfoFilePath +// * @return true if sucess else false +// */ +// bool load(const std::string &transfoFilePath); + +// /** +// * @brief Save a transformation to trm format +// * @param [in] trmTransfoFileSavePath +// * @return true if sucess else false +// */ +// bool save_to_trm(const std::string &trmTransfoFileSavePath); + +// /** +// * @brief Save a transformation to txt format +// * @param [in] txtTransfoFileSavePath +// * @return true if sucess else false +// */ +// bool save_to_txt(const std::string &txtTransfoFileSavePath); + +// /** +// * @brief Return the transformed input point +// * @param [in] point +// */ +// inline Point3 transform(const Point3 &point) const; + +// /** +// * @brief Apply the transformation to the input point +// * @param [in,out] point +// */ +// inline void apply_transformation(Point3 &point) const; + +// /** +// * @brief Apply the translation to the input point +// * @param [in,out] point +// */ +// inline void apply_translation(Point3 &point) const; + +// /** +// * @brief Apply the rotation to the input point +// * @param [in,out] point +// */ +// inline void apply_rotation(Point3 &point) const; + +// /** +// * @brief Compose with the input transformation +// * @param [in] transfo +// */ +// void compose_transfo(Transformation3 &transfo) +// { +// Mat4 t1(rotation, translation); +// Mat4 t2(transfo.rotation, transfo.translation); +// Mat4 t3 = multiply(t1,t2); +// *this = Transformation3(t3); +// } + +// /** +// * @brief Invert the transformation +// */ +// void invert() +// { +// rotation.invert(); +// Mat3 negRot = rotation; +// negRot *= -1; +// negRot.apply_rotation(translation); +// } + +// /** +// * @brief Return the transformation matrix +// */ +// inline std::array& rot_mat(){return rotation.mat();} + +// /** +// * @brief Return the transformation matrix +// */ +// inline const std::array& rot_mat()const {return rotation.mat();} + +// /** +// * @brief add the transformation coordinates in the flux +// * @param [in,out] flux +// */ +// inline void display(std::ostream &flux) const{flux << rotation << translation;} + +// Mat3 rotation; /**< rotation matrix */ +// Point3 translation;/**< translation vector */ + + +//private : + +// /** +// * @brief Load a trm transformation file +// * @param [in] trmTransfoFilePath : path of the transformation to be loaded +// * @return true if sucess else false +// */ +// bool load_trm(const std::string &trmTransfoFilePath); + +// /** +// * @brief Load a raw txt transformation file +// * @param [in] rawTxtTransfoFilePath : path of the transformation to be loaded +// * @return true if sucess else false +// */ +// bool load_raw_txt(const std::string &rawTxtTransfoFilePath); + +//}; + +//template +///** +// * @brief operator << +// * @param flux +// * @param pt +// * @return +// */ +//static std::ostream &operator<<(std::ostream &flux, Mat3 const& mat3) +//{ +// mat3.display(flux); +// return flux; +//} + + +//template +///** +// * @brief operator << +// * @param flux +// * @param pt +// * @return +// */ +//static std::ostream &operator<<(std::ostream &flux, Transformation3 const& transform) +//{ +// transform.display(flux); +// return flux; +//} + +//// ############################################## CLASS DEFINITIONS + +//template +//bool Transformation3::load_trm(const std::string &trmTransfoFilePath) +//{ +// // open file +// std::ifstream fileStream(trmTransfoFilePath); + +// if (!fileStream.is_open()) +// { +// std::cerr << "-Error : Transformation3::loadTrmTransformation -> Can't open file : " << trmTransfoFilePath << std::endl; +// return false; +// } + +// // retrieve the full file in one string +// std::stringstream buffer; +// buffer << fileStream.rdbuf(); +// std::string fileString = buffer.str(); + +// // replace all backlines with comas +// for(size_t ii = 0; ii < fileString.size(); ++ii) +// { +// if(fileString[ii] == '\n') +// { +// fileString[ii] = ' '; +// } +// } + +// // split the string with comas +// std::vector splited = split(fileString, ' '); +// for(int ii = 0; ii < static_cast(splited.size()); ++ii) +// { +// std::istringstream buffer(splited[ii]); + +// if(ii < 3) +// { +// buffer >> translation(ii); +// } +// else +// { +// buffer >> rot_mat()[ii-3]; +// } +// } + +// return true; +//} + +//template +//bool Transformation3::load_raw_txt(const std::string &rawTxtTransfoFilePath) +//{ +// // open file +// std::ifstream fileStream(rawTxtTransfoFilePath); + +// if (!fileStream.is_open()) +// { +// std::cerr << "-Error : Transformation3::loadRawTxtTransformation -> Can't open file : " << rawTxtTransfoFilePath << std::endl; +// return false; +// } + +// std::string line; +// std::vector elements; +// for(int ii = 0; ii < 3; ++ii) +// { +// std::getline(fileStream, line); +// elements = split(line, ' '); +// if(elements.size() == 4) +// { +// rotation[ii*3] = static_cast(std::stod(elements[0])); +// rotation[ii*3+1] = static_cast(std::stod(elements[1])); +// rotation[ii*3+2] = static_cast(std::stod(elements[2])); +// translation[ii] = static_cast(std::stod(elements[3])); +// } +// else +// return false; +// } + +// return true; +//} + +//template +//bool Transformation3::load(const std::string &transfoFilePath) +//{ +// if(transfoFilePath.size() >= 5) +// { +// std::string ext =transfoFilePath.substr(transfoFilePath.size()-3); +// std::transform(ext.begin(), ext.end(),ext.begin(), ::toupper); + +// if(ext == "TXT") +// { +// return load_raw_txt(transfoFilePath); +// } + +// if(ext == "TRM") +// { +// return load_trm(transfoFilePath); +// } +// } +// std::cerr << "-ERROR : Transformation3::loadTransformation -> invalid file save path : " << transfoFilePath << "\n"; + +// return false; +//} + +//template +//bool Transformation3::save_to_trm(const std::string &trmTransfoFileSavePath) +//{ +// std::ofstream flowTRM(trmTransfoFileSavePath); + +// // open file to write +// if(!flowTRM.is_open()) +// { +// std::cerr << "-Error : Transformation3::saveToTrm -> Can't open trm file. " << std::endl; +// return false; +// } + +// flowTRM << translation[0] << " " << translation[1] << " " << translation[2] << "\n"; +// flowTRM << rotation[0] << " " << rotation[1] << " " << rotation[2] << "\n"; +// flowTRM << rotation[3] << " " << rotation[4] << " " << rotation[5] << "\n"; +// flowTRM << rotation[6] << " " << rotation[7] << " " << rotation[8] << "\n"; + +// return true; +//} + +//template +//bool Transformation3::save_to_txt(const std::string &txtTransfoFileSavePath) +//{ +// std::ofstream flowTxt(txtTransfoFileSavePath); + +// // open file to write +// if(!flowTxt.is_open()) +// { +// std::cerr << "-Error : Transformation3::saveToTxt -> Can't open txt file. " << std::endl; +// return false; +// } + +// flowTxt << rotation[0] << " " << rotation[1] << " " << rotation[2] << " " << translation[0] << "\n"; +// flowTxt << rotation[3] << " " << rotation[4] << " " << rotation[5] << " " << translation[1] << "\n"; +// flowTxt << rotation[6] << " " << rotation[7] << " " << rotation[8] << " " << translation[2] << "\n"; +// flowTxt << 0 << " " << 0 << " " << 0 << " " << 1<< "\n"; + +// return true; +//} + +//template +//Transformation3::Transformation3(const std::string &transfoFilePath) +//{ +// load(transfoFilePath); +//} + +//template +//inline Point3 Transformation3::transform(const Point3 &pt) const //noexcept +//{ +// return Point3( rotation[0] * pt.x() + rotation[1] * pt.y() + rotation[2] * pt.z() + translation.x(), +// rotation[3] * pt.x() + rotation[4] * pt.y() + rotation[5] * pt.z() + translation.y(), +// rotation[6] * pt.x() + rotation[7] * pt.y() + rotation[8] * pt.z() + translation.z()); +//} + +//template +//inline void Transformation3::apply_transformation(Point3 &pt) const// noexcept +//{ +// pt = transform(pt); +//} + +//template +//inline void Transformation3::apply_translation(Point3 &pt) const// noexcept +//{ +// pt += translation; +//} + +//template +//inline void Transformation3::apply_rotation(Point3 &pt) const// noexcept +//{ +// rotation.apply_rotation(pt); +//} + + +//// ############################################## STATIC + +//template +///** +// * @brief skew-symmetric cross-product matrix of v +// * @param v +// * @return +// */ +//static inline Mat3 skewSymmetricXProduct(const Vec3 v) +//{ +// Mat3 mat; +// mat[1] = -v.z(); +// mat[2] = v.y(); +// mat[3] = v.z(); +// mat[5] = -v.x(); +// mat[6] = -v.y(); +// mat[7] = v.x(); +// return mat; +//} + +//template +///** +// * @brief squareSkewSymmetricXProduct +// * @param v +// * @return +// */ +//static inline Mat3 squareSkewSymmetricXProduct(const Vec3 v) +//{ +// Mat3 mat; +// mat[0] = -v.z()*v.z() - v.y()*v.y(); +// mat[1] = v.x()*v.y(); +// mat[2] = v.x() * v.z(); +// mat[3] = v.x()*v.y(); +// mat[4] = -v.z()*v.z() - v.x()*v.x(); +// mat[5] = v.y()*v.z(); +// mat[6] = v.x()*v.z(); +// mat[7] = v.y()*v.z(); +// mat[8] = -v.y()*v.y() - v.x()*v.x(); +// return mat; +//} + + +//template +///** +// * @brief slow basic 3x3 multiplication +// * @param m1 +// * @param m2 +// * @return +// */ +//static Mat3 multiply(const Mat3 &m1, const Mat3 &m2) +//{ +// Mat3 res; +// for(int ii = 0; ii < 3; ++ii) +// { +// for(int jj = 0; jj < 3; ++jj) +// { +// res.at(ii,jj) = m1.at(ii,0) * m2.at(0,jj) + m1.at(ii,1) * m2.at(1,jj) + m1.at(ii,2)* m2.at(2,jj); +// } +// } +// return res; +//} + + +//template +///** +// * @brief rotationMatrixFromVectors +// * @param v1 +// * @param v2 +// * @return +// */ +//static Mat3 rotationMatrixFromVectors(const Vec3 &v1, const Vec3 &v2) +//{ +// Point3 a = v1; a.normalize(); +// Point3 b = v2; b.normalize(); +// Point3 V = Pt3::cross_product(a,b); +// TAcc s = V.norm(); +// TAcc c = Pt3::dot_product(a,b); + +// if(s == static_cast(0)) +// return Mat3(true); + +// Mat3 vx = skewSymmetricXProduct(V); +// Mat3 vx_vx = squareSkewSymmetricXProduct(V); +// vx_vx *= (1-c)/(s*s); +// vx += Mat3(true); +// vx += vx_vx; + +// return vx; +//} + +//} + diff --git a/cpp-projects/base/geometry/triangle3.hpp b/cpp-projects/base/geometry/triangle3.hpp new file mode 100644 index 0000000..e6f238b --- /dev/null +++ b/cpp-projects/base/geometry/triangle3.hpp @@ -0,0 +1,114 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point3.hpp" +//#include "geometry/interval.hpp" + +namespace tool::geo { + +struct TriIds{ + constexpr size_t id1() const{return ids.x();} + constexpr size_t id2() const{return ids.y();} + constexpr size_t id3() const{return ids.z();} + Pt3 ids; +}; + +template +struct Triangle3{ + + constexpr auto a() const noexcept -> Pt3 {return v[0];} + constexpr auto b() const noexcept -> Pt3 {return v[1];} + constexpr auto c() const noexcept -> Pt3 {return v[2];} + + constexpr auto a() noexcept -> Pt3&{return v[0];} + constexpr auto b() noexcept -> Pt3&{return v[1];} + constexpr auto c() noexcept -> Pt3&{return v[2];} + + std::array,3> v; +}; + +//template +//constexpr auto normal(const Triangle3 &t) -> Normal3{ +// return ((a() + b() + c())/acc{3}); +//} + +//template +//constexpr auto interval(const Triangle3 &t, const Vec3 &axis) -> Interval{ + +// Interval res; +// res.min() = dot(axis, t.a()); +// res.max() = res.min(); + +// for(int ii = 1; ii < t.v.size(); ++ii){ +// acc value = dot(axis, t.v[ii]); +// res.min() = std::min(res.min(), value); +// res.max() = std::min(res.max(), value); +// } + +// return res; +//} + +//template +//constexpr auto barycentric(const Pt3 &p, const Triangle3 &t) -> Pt3{ + +// const Vec3 ab = t.b() - t.a(); +// const Vec3 bc = t.c() - t.b(); +// const Vec3 ca = t.a() - t.c(); + +// Vec3 v = ab - project(ab,vec(t.c(), t.b())); +// const acc a = 1 - (dot(v, vec(t.a(),p)) / dot(v,ab)); + +// v = bc - project(bc, vec(t.a(),t.c())); +// const acc b = 1 - (dot(v,vec(t.b(),p)) / dot(v, bc)); + +// v = ca - project(ca, ab); +// const acc c = 1 - (dot(v, vec(t.c(),p)) / dot(v, ca)); + +// return Pt3(a,b,c); +//} + +//template +//constexpr auto generate_point(const Pt2 &factors, const Triangle3 &t) noexcept -> Pt3{ +// Vec3 ab = vec(t.a(),t.b()); +// Vec3 ac = vec(t.a(),t.c()); +// return t.a() + ab*factors[0] + ac*factors[1]; +//} + +//template +//constexpr auto barycenter_to_point(const Pt3 &barycenter, const Triangle3 &t) noexcept -> Pt3{ + +// // TODO check x + y + z == 1 +// return Pt3{ +// barycenter.x()*t.a().x() + barycenter.y()*t.b().x() + barycenter.z()*t.c().x(), +// barycenter.x()*t.a().y() + barycenter.y()*t.b().y() + barycenter.z()*t.c().y(), +// barycenter.x()*t.a().z() + barycenter.y()*t.b().z() + barycenter.z()*t.c().z() +// }; +//} +} diff --git a/cpp-projects/base/geometry/vertices.cpp b/cpp-projects/base/geometry/vertices.cpp new file mode 100644 index 0000000..37c1156 --- /dev/null +++ b/cpp-projects/base/geometry/vertices.cpp @@ -0,0 +1,350 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "vertices.hpp" + +//// std +//#include + +using namespace tool::geo; + +auto Vertices3D::min_id() const noexcept -> size_t{ + if(empty()){ + return {}; + } + return std::distance(values.cbegin(), std::min_element(values.cbegin(), values.cend())); +} + +auto Vertices3D::min() const noexcept -> Pt3f{ + if(empty()){ + return {}; + } + return *std::min_element(values.cbegin(), values.cend()); +} + +auto Vertices3D::max_id() const noexcept -> size_t{ + if(empty()){ + return {}; + } + return std::distance(values.cbegin(), std::max_element(values.cbegin(), values.cend())); +} + +auto Vertices3D::max() const noexcept -> Pt3f{ + if(empty()){ + return {}; + } + return *std::max_element(values.cbegin(), values.cend()); +} + +auto Vertices3D::min_x_id() const noexcept -> size_t{ + if(empty()){ + return {}; + } + auto minX = std::min_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.x() < b.x(); + }); + return std::distance(values.cbegin(), minX); +} + +auto Vertices3D::min_x() const noexcept -> float{ + if(empty()){ + return {}; + } + auto minX = std::min_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.x() < b.x(); + }); + return minX->x(); +} + +auto Vertices3D::max_x_id() const noexcept -> size_t{ + if(empty()){ + return {}; + } + auto maxX = std::max_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.x() < b.x(); + }); + return std::distance(values.cbegin(), maxX); +} + +auto Vertices3D::max_x() const noexcept -> float{ + if(empty()){ + return {}; + } + auto maxX = std::max_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.x() < b.x(); + }); + return maxX->x(); +} + +auto Vertices3D::min_y_id() const noexcept -> size_t{ + if(empty()){ + return {}; + } + auto minY = std::min_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.y() < b.y(); + }); + return std::distance(values.cbegin(), minY); +} + +auto Vertices3D::min_y() const noexcept -> float{ + if(empty()){ + return {}; + } + auto minY = std::min_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.y() < b.y(); + }); + return minY->y(); +} + +auto Vertices3D::max_y_id() const noexcept -> size_t{ + if(empty()){ + return {}; + } + auto maxY = std::max_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.y() < b.y(); + }); + return std::distance(values.cbegin(), maxY); +} + +auto Vertices3D::max_y() const noexcept -> float{ + if(empty()){ + return {}; + } + auto maxY = std::max_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.y() < b.y(); + }); + return maxY->y(); +} + +auto Vertices3D::min_z_id() const noexcept -> size_t{ + if(empty()){ + return {}; + } + auto minZ = std::min_element(values.begin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.z() < b.z(); + }); + return std::distance(values.begin(), minZ); +} + +auto Vertices3D::min_z() const noexcept -> float{ + if(empty()){ + return {}; + } + auto minZ = std::min_element(values.begin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.z() < b.z(); + }); + return minZ->z(); +} + +auto Vertices3D::max_z_id() const noexcept -> size_t{ + if(empty()){ + return {}; + } + auto maxZ = std::max_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.z() < b.z(); + }); + return std::distance(values.cbegin(), maxZ); +} + +auto Vertices3D::max_z() const noexcept -> float{ + if(empty()){ + return {}; + } + auto maxZ = std::max_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.z() < b.z(); + }); + return maxZ->z(); +} + +auto Vertices3D::sum() const noexcept -> Pt3f { + if(empty()){ + return {}; + } + return std::accumulate(values.cbegin(), values.cend(), geo::Pt3f{0,0,0}); +} + +auto Vertices3D::mean_position() const noexcept -> Pt3f{ + if(empty()){ + return {}; + } + return sum()/static_cast(size()); +} + +auto Vertices3D::aabb() const noexcept -> AABB3{ + + if(empty()){ + return {}; + } + + auto [minX, maxX] = std::minmax_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.x() < b.x(); + }); + auto [minY, maxY] = std::minmax_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.y() < b.y(); + }); + auto [minZ, maxZ] = std::minmax_element(values.cbegin(), values.cend(), [](const Pt3f &a, const Pt3f &b){ + return a.z() < b.z(); + }); + + return { + {minX->x(),minY->y(),minZ->z()}, + {maxX->x(),maxY->y(),maxZ->z()} + }; +} + +auto Vertices3D::sphere() const -> Sphere{ + + if(empty()){ + return {{},0.f}; + } + + Pt3f mean = mean_position(); + + // compute A/B + Mat3f A; + Vec3f B; + auto n = static_cast(size()); + for(const auto &v : values){ + + Vec3f diff = v-mean; + + A.array[0] += (v.x() * diff.x()); + A.array[1] += (v.x() * diff.y()); + A.array[2] += (v.x() * diff.z()); + + A.array[3] += (v.y() * diff.x()); + A.array[4] += (v.y() * diff.y()); + A.array[5] += (v.y() * diff.z()); + + A.array[6] += (v.z() * diff.x()); + A.array[7] += (v.z() * diff.y()); + A.array[8] += (v.z() * diff.z()); + A /= n; + + auto d = dot(v, v); + B += (diff*d)/n; + } + A *= 2.; + + Mat3f trA = transpose(A); + Mat3f bb = inverse((trA*A)) * trA; + + Pt3f center = transpose(bb * transpose(B)); + float r = 0.f; + for(const auto &v : values){ + r += square_norm(v - center); + } + r = sqrt(r/n); + + return {center,r}; +} + +auto Vertices3D::apply_transformation(const Mat4f &transformation) noexcept -> void{ + + std::transform(values.begin(), values.end(), values.begin(), + [&](geo::Pt3f vertex) { + return (to_pt4(vertex,1.f)*transformation).xyz(); + } + ); +} + +auto Vertices3D::sort_ascendant() noexcept -> void{ + std::sort(values.begin(), values.end(), std::less()); +} + +auto Vertices3D::sort_descendant() noexcept -> void{ + std::sort(values.begin(), values.end(), std::greater()); +} + +auto Vertices3D::get_outliers_id(const Pt3f &target, float maxDistance) noexcept -> std::vector{ + + if(empty()){ + return {}; + } + + std::vector ids; + auto squareMaxDistance = maxDistance*maxDistance; + for(size_t ii = 0; ii < size(); ++ii){ + if(square_norm(vec(values[ii], target)) < squareMaxDistance){ + ids.push_back(ii); + } + } + + return ids; +} + +auto Vertices3D::get_from_plane_id(const Plane3f &plane, bool keepAbove) noexcept -> std::vector{ + + if(empty()){ + return {}; + } + + std::vector ids; + for(size_t ii = 0; ii < size(); ++ii){ + auto v = plane_equation(values[ii], plane); + if((!keepAbove && v < 0.f) || (keepAbove && v >= 0.f) ){ + ids.push_back(ii); + } + } + return ids; +} + +auto Vertices3D::remove_outliers(const Pt3f &target, float maxDistance) noexcept -> void { + + if(empty()){ + return; + } + + auto squareMaxDistance = maxDistance*maxDistance; + size_t currIdToKeep = 0; + for(auto &v : values){ + if(square_norm(vec(v, target)) < squareMaxDistance){ + values[currIdToKeep++] = std::move(v); + } + } + resize(currIdToKeep); +} + +auto Vertices3D::remove_from_plane(const Plane3f &plane, bool keepAbove) noexcept -> void{ + + if(empty()){ + return; + } + + size_t currIdToKeep = 0; + for(auto &v : values){ + auto pe = plane_equation(v, plane); + if((!keepAbove && pe < 0.f) || (keepAbove && pe >= 0.f) ){ + values[currIdToKeep++] = std::move(v); + } + } + resize(currIdToKeep); +} + + + + diff --git a/cpp-projects/base/geometry/vertices.hpp b/cpp-projects/base/geometry/vertices.hpp new file mode 100644 index 0000000..1755368 --- /dev/null +++ b/cpp-projects/base/geometry/vertices.hpp @@ -0,0 +1,75 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "plane3.hpp" +#include "sphere.hpp" +#include "aabb3.hpp" +#include "matrix4.hpp" +#include "utility/buffer_vector.hpp" + +namespace tool::geo { + +struct Vertices3D : public Buffer{ + + // get + // # geometry + auto min_id() const noexcept -> size_t; + auto max_id() const noexcept -> size_t; + auto min_x_id() const noexcept -> size_t; + auto max_x_id() const noexcept -> size_t; + auto min_y_id() const noexcept -> size_t; + auto max_y_id() const noexcept -> size_t; + auto min_z_id() const noexcept -> size_t; + auto max_z_id() const noexcept -> size_t; + auto min() const noexcept -> geo::Pt3f; + auto max() const noexcept -> geo::Pt3f; + auto min_x() const noexcept -> float; + auto max_x() const noexcept -> float; + auto min_y() const noexcept -> float; + auto max_y() const noexcept -> float; + auto min_z() const noexcept -> float; + auto max_z() const noexcept -> float; + auto sum() const noexcept -> geo::Pt3f; + auto mean_position() const noexcept -> geo::Pt3f; + auto aabb() const noexcept -> AABB3; + auto sphere() const -> Sphere; // TO CHECK + + // id + auto get_outliers_id(const Pt3f &target, float maxDistance) noexcept -> std::vector; + auto get_from_plane_id(const Plane3f &plane, bool keepAbove) noexcept -> std::vector; // TO CHECK + // remove + auto remove_outliers(const Pt3f &target, float maxDistance) noexcept -> void; + auto remove_from_plane(const Plane3f &plane, bool keepAbove) noexcept -> void; // TO CHECK + + // modify + auto apply_transformation(const Mat4f &transformation) noexcept -> void; + auto sort_ascendant() noexcept -> void; + auto sort_descendant() noexcept -> void; +}; +} diff --git a/cpp-projects/base/geometry/voxel.hpp b/cpp-projects/base/geometry/voxel.hpp new file mode 100644 index 0000000..23d0a24 --- /dev/null +++ b/cpp-projects/base/geometry/voxel.hpp @@ -0,0 +1,115 @@ + + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point3.hpp" + +namespace tool::geo{ + +struct Voxel { +public: + + geo::Pt3 index; + geo::Pt3f color; +}; + +struct CVoxel{ + std::uint64_t + xIndex : 13, + yIndex : 13, + zIndex : 13, + red : 8, + blue : 8, + green : 8, + discard : 1; +}; + +//struct CV{ + + + +// b32 +// y11 z11 x10 y10 z10 x9 y9 z9 x8 y8 z8 x7 y7 z7 x6 y6 z6 x5 y5 z5 x4 y4 z4 x3 y3 z3 x2 y2 z2 x1 y1 z1 +// rgb compression +// g compression +// b compression +// extra discard x13 y13 z13 x12 y12 z12 x11 + +// sort by b32 value +//}; + + + +struct AvgColorVoxel { + + constexpr auto add(const tool::geo::Pt3 &index) noexcept -> void{ + if(numOfPoints == 0){ + voxel.index = index; + } + } + + constexpr auto add(const tool::geo::Pt3 &index, const tool::geo::Pt3f &color) noexcept -> void { + if(numOfPoints > 0){ + if(index != voxel.index){ + voxel.color += color; + numOfPoints++; + } + }else{ + voxel = {index, color}; + ++numOfPoints; + } + } + + constexpr auto add(const Voxel &v) noexcept -> void { + if(numOfPoints > 0){ + if(v.index != voxel.index){ + voxel.color += v.color; + numOfPoints++; + } + }else{ + voxel = v; + ++numOfPoints; + } + } + + constexpr auto get_average_color() const noexcept -> tool::geo::Pt3f{ + if(numOfPoints > 0){ + return voxel.color / (1.f*numOfPoints); + }else{ + return voxel.color; + } + } + + int numOfPoints = 0; + Voxel voxel; +}; + + +} diff --git a/cpp-projects/base/geometry/voxel_grid.cpp b/cpp-projects/base/geometry/voxel_grid.cpp new file mode 100644 index 0000000..18dda73 --- /dev/null +++ b/cpp-projects/base/geometry/voxel_grid.cpp @@ -0,0 +1,110 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "voxel_grid.hpp" + +// local +#include "utility/logger.hpp" + +#include +#include + +using namespace tool::geo; + +VoxelGrid::VoxelGrid(float voxelSize, Pt3f minBound, Pt3f maxBound) : voxelSize(voxelSize), origin(minBound), size(maxBound){ +} + +auto VoxelGrid::add_cloud(const ColoredCloudData &cloud, const Mat4f &model) -> void{ + + bool hasColors = cloud.has_colors(); + for (size_t ii = 0; ii < cloud.size(); ++ii) { + + const auto &v = cloud.vertices[ii]; + Pt4f v4{v.x(),v.y(),v.z(),1}; + auto vertex = (v4 * model).xyz(); + + if(vertex > origin && vertex < size){ + + Pt3f refCoord = (vertex - origin) / voxelSize; + Pt3 voxelIndex = { + static_cast(std::floor(refCoord(0))), + static_cast(std::floor(refCoord(1))), + static_cast(std::floor(refCoord(2))) + }; + + if (hasColors) { + voxelindexToAccPoint[voxelIndex].add(voxelIndex, cloud.colors[ii]); + } else { + voxelindexToAccPoint[voxelIndex].add(voxelIndex); + } + } + } + +} + +auto VoxelGrid::compute_grid() -> void{ + + if(voxelindexToAccPoint.empty()){ + return; + } + + grid.clear(); + grid.reserve(voxelindexToAccPoint.size()); + + for (const auto &accpoint : voxelindexToAccPoint) { + grid[accpoint.second.voxel.index] = Voxel{ + accpoint.second.voxel.index, + accpoint.second.get_average_color() + }; + } + + voxelindexToAccPoint.clear(); +} + +auto VoxelGrid::convert_to_cloud(ColoredCloudData &cloud) -> void{ + + cloud.resize(grid.size()); + + size_t id = 0; + for(const auto &voxel : grid){ + cloud.vertices[id] = origin + voxel.second.index.conv() * voxelSize; + cloud.colors[id] = voxel.second.color; + id++; + } +} + +auto VoxelGrid::create_from_point_cloud_within_bounds(const ColoredCloudData &cloud, float voxelSize, Pt3f minBound, Pt3f maxBound) -> VoxelGrid{ + + VoxelGrid voxelGrid(voxelSize, minBound, maxBound); + if(voxelSize < 0.f){ + tool::Logger::error("[VoxelGrid::create_from_point_cloud_within_bounds] Voxel size <= 0.\n"); + return voxelGrid; + } + voxelGrid.add_cloud(cloud, Mat4f::identity()); + voxelGrid.compute_grid(); + return voxelGrid; +} diff --git a/cpp-projects/base/geometry/voxel_grid.hpp b/cpp-projects/base/geometry/voxel_grid.hpp new file mode 100644 index 0000000..6f8eaa4 --- /dev/null +++ b/cpp-projects/base/geometry/voxel_grid.hpp @@ -0,0 +1,63 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/voxel.hpp" +#include "geometry/cloud.hpp" +#include "utility/unordered_map.hpp" + +namespace tool::geo{ + +struct VoxelGrid{ + + struct Pt3IntHashRepresentation { + using is_avalanching = void; + [[nodiscard]] auto operator()(const tool::geo::Pt3 &f) const noexcept -> uint64_t { + static_assert(std::has_unique_object_representations_v>); + return ankerl::unordered_dense::detail::wyhash::hash(&f, sizeof(f)); + } + }; + + VoxelGrid(float voxelSize, tool::geo::Pt3f minBound, tool::geo::Pt3f maxBound); + auto add_cloud(const ColoredCloudData &cloud, const Mat4f &model) -> void; + auto compute_grid() -> void; + + auto convert_to_cloud(ColoredCloudData &cloud) -> void; + + static auto create_from_point_cloud_within_bounds(const ColoredCloudData &cloud, float voxelSize, tool::geo::Pt3f minBound, tool::geo::Pt3f maxBound) -> VoxelGrid; + + float voxelSize = 0.1f; + Pt3f origin = {}; + Pt3f size = {}; + umap, Voxel, Pt3IntHashRepresentation> grid; + +private: + + umap, AvgColorVoxel, Pt3IntHashRepresentation> voxelindexToAccPoint; +}; +} diff --git a/cpp-projects/base/graphics/camera.hpp b/cpp-projects/base/graphics/camera.hpp new file mode 100644 index 0000000..0e3d3cd --- /dev/null +++ b/cpp-projects/base/graphics/camera.hpp @@ -0,0 +1,316 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/matrix4.hpp" +#include "graphics/screen.hpp" + +namespace tool::graphics{ + +struct CameraMatrices{ + using Mat3 = geo::Mat3d; + using Mat4 = geo::Mat4d; + + Mat4 m; // model matrix + Mat4 v; // view matrix + Mat4 p; // projection matrix + + Mat3 normal; // normal matrix + Mat4 mv; // model*view matrix + Mat4 mvp; // model*view*projection matrix + + void update(){ + mv = m * v; + normal = geo::rotation_m3x3(mv); + mvp = mv * p; + } + + void update_m(const Mat4 &model){ + m = model; + update(); + } + + void update_v(const Mat4 &view){ + v = view; + update(); + } + + void update_p(const Mat4 &proj){ + p = proj; + update(); + } + + void update_vp(const Mat4 &view, const Mat4 &proj){ + v = view; + p = proj; + update(); + } + + void update_mvp(const Mat4 &model, const Mat4 &view, const Mat4 &proj){ + m = model; + v = view; + p = proj; + update(); + } + +// static Mat3 normal_matrix(const Mat4 &modelView){ +// return geo::rotation_m3x3(modelView); +//// const auto &mv = modelView; +//// return { +//// mv.at(0,0),mv.at(0,1),mv.at(0,2), +//// mv.at(1,0),mv.at(1,1),mv.at(1,2), +//// mv.at(2,0),mv.at(2,1),mv.at(2,2), +//// }; +// } +}; + +class Camera{ +public: + + using Vec3 = geo::Vec3d; + using Pt2 = geo::Pt2d; + using Pt3 = geo::Pt3d; + using Mat3 = geo::Mat3d; + using Mat4 = geo::Mat4d; + + enum class Mode : int{ + Perspective, Orhtographic + }; + + Camera(Screen *screen, Pt3 position = {0.,0.,0.}, Vec3 direction = {0.,0.,1.}, Vec3 up = {0.,1.,0.}) : + m_screen(screen),m_position(position),m_up(up),m_direction(direction), m_initPosition(position), m_initUp(up), m_initDirection(direction) { + + update_right(); + update_view(); + update_projection(); + } + + constexpr Pt3 position() const noexcept{ + return m_position; + } + + constexpr Vec3 up() const noexcept{ + return m_up; + } + + constexpr Vec3 direction() const noexcept{ + return m_direction; + } + + constexpr Mat4 view() const noexcept{ + return m_view; + } + + Mat4 look_at() const{ + return geo::look_at(m_position, m_position+m_direction, m_up); + } + + static Mat3 normal(const Mat4 &modelView){ + const auto &mv = modelView; + return { + mv.at(0,0),mv.at(0,1),mv.at(0,2), + mv.at(1,0),mv.at(1,1),mv.at(1,2), + mv.at(2,0),mv.at(2,1),mv.at(2,2), + }; + } + + CameraMatrices generate_camera_matrices(){ + CameraMatrices camM; + camM.v = m_view; + camM.p = m_projection; + camM.update(); + return camM; + } + + constexpr Mat4 projection() const noexcept{ + return m_projection; + } + + constexpr double fov() const noexcept{ + return m_fov; + } + + constexpr Pt2 z_range() const noexcept{ + return m_zRange; + } + + constexpr Screen *screen() const noexcept{ + return m_screen; + } + + // view + void move_up(double amount){ + m_position += m_up*amount; + update_view(); + } + + void move_down(double amount){ + m_position -= m_up*amount; + update_view(); + } + + void move_front(double amount){ + m_position += m_direction*amount; + update_view(); + } + + void move_left(double amount){ + m_position -= geo::cross(m_direction, m_up)*amount; + update_view(); + } + + void move_right(double amount){ + m_position += geo::cross(m_direction, m_up)*amount; + update_view(); + } + + void move_back(double amount){ + m_position -= m_direction*amount; + update_view(); + } + + void set_position(Pt3 position){ + m_position = position; + update_view(); + } + + void set_up_vector(Vec3 up){ + m_up = normalize(up); + update_right(); + update_view(); + } + + void set_direction(double yaw, double pitch, double roll){ + + // compute direction vector + m_direction = axis_angle_m3x3(m_up, yaw).multiply_vector(m_direction); + m_direction = normalize(axis_angle_m3x3(m_right, pitch).multiply_vector(m_direction)); + // compute up vector + update_up(); + m_up = axis_angle_m3x3(m_direction, roll).multiply_vector(m_up); + // m_up.normalize(); + + // update right vector + update_right(); + // update view matrix + update_view(); + } + + void set_direction(Vec3 direction, Vec3 up = {0.,1.,0.}){ + + m_direction = normalize(direction); + m_up = normalize(up); + + // update right vector + update_right(); + // update view matrix + update_view(); + } + + // projection + void set_screen(Screen *screen){ + m_screen = screen; + update_projection(); + } + + void set_fov(double fov){ + m_fov = fov; + if(m_mode == Mode::Perspective){ + update_projection(); + } + } + + void set_range(double min, double max){ + m_zRange = {min,max}; + update_projection(); + } + + void reset_init_values(){ + m_position = m_initPosition; + m_direction = m_initDirection; + m_up = m_initUp; + update_right(); + update_view(); + } + + void update_projection(){ + if(m_screen != nullptr){ + const auto ratio = 1.*m_screen->width()/ m_screen->height(); + if(m_mode == Mode::Perspective){ + m_projection = geo::perspective(deg_2_rad(m_fov), ratio, m_zRange.x(), m_zRange.y()); + }else if(m_mode == Mode::Orhtographic){ + m_projection = geo::orthographic( + -ratio, ratio, + -1., 1., + m_zRange.x(), m_zRange.y() + ); + } + } + } + + void set_mode(Mode mode){ + m_mode = mode; + update_projection(); + } + +private : + + void update_right(){ + m_right = geo::normalize(geo::cross(m_up, m_direction)); + } + + void update_up(){ + m_up = geo::normalize(geo::cross(m_direction, m_right)); + } + + void update_view(){ + m_view = geo::look_at(m_position, m_position+m_direction, m_up); + } + + Screen *m_screen = nullptr; + + Pt3 m_position = {0.,0.,0.}; + Vec3 m_up = {0.,1.,0.}; + Vec3 m_direction = {0.,0.,1.}; + Vec3 m_right; + + Pt3 m_initPosition = {0.,0.,0.}; + Vec3 m_initUp = {0.,1.,0.}; + Vec3 m_initDirection = {1.,0.,0.}; + + Mat4 m_view; + Mat4 m_projection; + + double m_fov = 60.0; + Pt2 m_zRange = {0.1, 10000.}; + + Mode m_mode = Mode::Perspective; +}; +} + diff --git a/cpp-projects/base/graphics/color.cpp b/cpp-projects/base/graphics/color.cpp new file mode 100644 index 0000000..4e8752f --- /dev/null +++ b/cpp-projects/base/graphics/color.cpp @@ -0,0 +1,116 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "color.hpp" + +using namespace tool; + +auto Convert::to_hsv(const ColorRGB32 &rgbf) noexcept -> ColorHSV{ + + ColorHSV hsv; + + float fCMax = std::max(std::max(rgbf.r(), rgbf.g()), rgbf.b()); + float fCMin = std::min(std::min(rgbf.r(), rgbf.g()), rgbf.b()); + float fDelta = fCMax - fCMin; + + if(fDelta > 0) { + if(fCMax == rgbf.r()) { + hsv.h() = 60.f * (fmod(((rgbf.g() - rgbf.b()) / fDelta), 6.f)); + } else if(fCMax == rgbf.g()) { + hsv.h() = 60.f * (((rgbf.b() - rgbf.r()) / fDelta) + 2.f); + } else if(fCMax == rgbf.b()) { + hsv.h() = 60.f * (((rgbf.r() - rgbf.g()) / fDelta) + 4.f); + } + + if(fCMax > 0) { + hsv.s() = fDelta / fCMax; + } else { + hsv.s() = 0; + } + + hsv.v() = fCMax; + + } else { + hsv.h() = 0; + hsv.s() = 0; + hsv.v() = fCMax; + } + + if(hsv.h() < 0) { + hsv.h() = 360 + hsv.h(); + } + + return hsv; +} + +auto Convert::to_rgb32(const ColorHSV &hsv) noexcept -> ColorRGB32{ + + float fC = hsv.v() * hsv.s(); // Chroma + float fHPrime = fmod(hsv.h() / 60.f, 6.f); + float fX = fC * (1.f - fabs(fmod(fHPrime, 2.f) - 1.f)); + float fM = hsv.v() - fC; + + ColorRGB32 rgb32; + if(0 <= fHPrime && fHPrime < 1) { + rgb32.r() = fC; + rgb32.g() = fX; + rgb32.b() = 0; + } else if(1 <= fHPrime && fHPrime < 2) { + rgb32.b() = fX; + rgb32.g() = fC; + rgb32.r() = 0; + } else if(2 <= fHPrime && fHPrime < 3) { + rgb32.b() = 0; + rgb32.g() = fC; + rgb32.r() = fX; + } else if(3 <= fHPrime && fHPrime < 4) { + rgb32.b() = 0; + rgb32.g() = fX; + rgb32.r() = fC; + } else if(4 <= fHPrime && fHPrime < 5) { + rgb32.b() = fX; + rgb32.g() = 0; + rgb32.r() = fC; + } else if(5 <= fHPrime && fHPrime < 6) { + rgb32.b() = fC; + rgb32.g() = 0; + rgb32.r() = fX; + } else { + rgb32.b() = 0; + rgb32.g() = 0; + rgb32.r() = 0; + } + + rgb32.b() += fM; + rgb32.g() += fM; + rgb32.r() += fM; + return rgb32; +} + +auto Convert::to_hsv(const ColorRGBA8 &rgba) noexcept -> ColorHSV{ + return Convert::to_hsv(ColorRGB32(Convert::to_rgba32(rgba).rgb())); +} diff --git a/cpp-projects/base/graphics/color.hpp b/cpp-projects/base/graphics/color.hpp new file mode 100644 index 0000000..27785c7 --- /dev/null +++ b/cpp-projects/base/graphics/color.hpp @@ -0,0 +1,222 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point3.hpp" +#include "geometry/point4.hpp" + +namespace tool{ + +/** + * @brief ColorHSV + * h Hue: [0, 360] + * s Saturation: [0, 1] + * v Value: [0, 1] + */ +struct ColorHSV{ + + ColorHSV() = default; + explicit constexpr ColorHSV(float h, float s, float v) : m_col({h,s,v}){} + + inline auto h() noexcept -> float& {return m_col.x();} + constexpr auto h() const noexcept -> float {return m_col.x();} + + inline auto s() noexcept -> float& {return m_col.y();} + constexpr auto s() const noexcept -> float {return m_col.y();} + + inline auto v() noexcept -> float& {return m_col.z();} + constexpr auto v() const noexcept -> float {return m_col.z();} + +private: + geo::Pt3f m_col = {0.f,0.f,0.f}; +}; + +/** + * @brief ColorRGB8 + * r Red: [0, 255] + * g Green: [0, 255] + * b Blue: [0, 255] + */ +struct ColorRGB8{ + + ColorRGB8() = default; + ColorRGB8(const ColorRGB8& other) = default; + ColorRGB8& operator=(const ColorRGB8& other) = default; + ColorRGB8(ColorRGB8&& other) = default; + ColorRGB8& operator=(ColorRGB8&& other) = default; + + constexpr explicit ColorRGB8(std::uint8_t r, std::uint8_t g, std::uint8_t b) : m_col({r,g,b}){} + constexpr explicit ColorRGB8(const geo::Pt3 &rgb) : m_col(rgb){} + + inline auto r() noexcept -> std::uint8_t& {return m_col.x();} + constexpr auto r() const noexcept -> std::uint8_t {return m_col.x();} + + inline auto g() noexcept -> std::uint8_t& {return m_col.y();} + constexpr auto g() const noexcept -> std::uint8_t {return m_col.y();} + + inline auto b() noexcept -> std::uint8_t& {return m_col.z();} + constexpr auto b() const noexcept -> std::uint8_t {return m_col.z();} + + inline auto rgb() noexcept -> geo::Pt3& {return m_col;} + constexpr auto rgb() const noexcept -> geo::Pt3 {return m_col;} + +private: + geo::Pt3 m_col = {0,0,0}; +}; + +/** + * @brief ColorRGB8 + * r Red: [0, 255] + * g Green: [0, 255] + * b Blue: [0, 255] + */ +struct ColorRGBA8{ + + ColorRGBA8() = default; + ColorRGBA8(const ColorRGBA8& other) = default; + ColorRGBA8& operator=(const ColorRGBA8& other) = default; + ColorRGBA8(ColorRGBA8&& other) = default; + ColorRGBA8& operator=(ColorRGBA8&& other) = default; + + constexpr explicit ColorRGBA8(std::uint8_t r, std::uint8_t g, std::uint8_t b, std::uint8_t a) : m_col({r,g,b, a}){} + constexpr explicit ColorRGBA8(const geo::Pt4 &rgba) : m_col(rgba){} + + inline auto r() noexcept -> std::uint8_t& {return m_col.x();} + constexpr auto r() const noexcept -> std::uint8_t {return m_col.x();} + + inline auto g() noexcept -> std::uint8_t& {return m_col.y();} + constexpr auto g() const noexcept -> std::uint8_t {return m_col.y();} + + inline auto b() noexcept -> std::uint8_t& {return m_col.z();} + constexpr auto b() const noexcept -> std::uint8_t {return m_col.z();} + + inline auto a() noexcept -> std::uint8_t& {return m_col.w();} + constexpr auto a() const noexcept -> std::uint8_t {return m_col.w();} + + constexpr auto rgb() const noexcept -> geo::Pt3 {return m_col.xyz();} + + inline auto rgba() noexcept -> geo::Pt4& {return m_col;} + constexpr auto rgba() const noexcept -> geo::Pt4 {return m_col;} + +private: + geo::Pt4 m_col = {0,0,0,0}; +}; + + +/** + * @brief ColorRGB32 + * r Red: [0, 1] + * g Green: [0, 1] + * b Blue: [0, 1] + */ +struct ColorRGB32{ + + ColorRGB32() = default; + ColorRGB32(const ColorRGB32& other) = default; + ColorRGB32& operator=(const ColorRGB32& other) = default; + ColorRGB32(ColorRGB32&& other) = default; + ColorRGB32& operator=(ColorRGB32&& other) = default; + + constexpr explicit ColorRGB32(float r, float g, float b) : m_col({r,g,b}){} + constexpr explicit ColorRGB32(const geo::Pt3f &rgb) : m_col(rgb){} + + inline auto r() noexcept -> float& {return m_col.x();} + constexpr auto r() const noexcept -> float {return m_col.x();} + + inline auto g() noexcept -> float& {return m_col.y();} + constexpr auto g() const noexcept -> float {return m_col.y();} + + inline auto b() noexcept -> float& {return m_col.z();} + constexpr auto b() const noexcept -> float {return m_col.z();} + + inline auto rgb() noexcept -> geo::Pt3f& {return m_col;} + constexpr auto rgb() const noexcept -> geo::Pt3f {return m_col;} + + inline auto clamp(float min, float max) -> void{ + m_col = { + std::clamp(m_col(0), min, max), + std::clamp(m_col(1), min, max), + std::clamp(m_col(2), min, max), + }; + } + +private: + geo::Pt3f m_col = {0.f,0.f,0.f}; +}; + +/** + * @brief ColorRGB32 + * r Red: [0, 1] + * g Green: [0, 1] + * b Blue: [0, 1] + * a Transparency: [0, 1] + */ +struct ColorRGBA32{ + + ColorRGBA32() = default; + ColorRGBA32(const ColorRGBA32& other) = default; + ColorRGBA32& operator=(const ColorRGBA32& other) = default; + ColorRGBA32(ColorRGBA32&& other) = default; + ColorRGBA32& operator=(ColorRGBA32&& other) = default; + + constexpr explicit ColorRGBA32(float r, float g, float b, float a) : m_col({r,g,b, a}){} + constexpr explicit ColorRGBA32(const geo::Pt4f &rgba) : m_col(rgba){} + + inline auto r() noexcept -> float& {return m_col.x();} + constexpr auto r() const noexcept -> float {return m_col.x();} + + inline auto g() noexcept -> float& {return m_col.y();} + constexpr auto g() const noexcept -> float {return m_col.y();} + + inline auto b() noexcept -> float& {return m_col.z();} + constexpr auto b() const noexcept -> float {return m_col.z();} + + inline auto a() noexcept -> float& {return m_col.w();} + constexpr auto a() const noexcept -> float {return m_col.w();} + + constexpr auto rgb() const noexcept -> geo::Pt3f {return m_col.xyz();} + + inline auto rgba() noexcept -> geo::Pt4f& {return m_col;} + constexpr auto rgba() const noexcept -> geo::Pt4f {return m_col;} + +private: + geo::Pt4f m_col = {0.f,0.f,0.f, 0.f}; +}; + +struct Convert{ + static constexpr auto to_rgba32(const ColorRGBA8 &rgba8) noexcept -> ColorRGBA32{ + return ColorRGBA32{geo::Pt4f(rgba8.rgba().conv())/255.f}; + } + + static auto to_hsv(const ColorRGB32 &rgbf) noexcept -> ColorHSV; + static auto to_hsv(const ColorRGBA8 &rgba) noexcept -> ColorHSV; + static auto to_rgb32(const ColorHSV &hsv) noexcept -> ColorRGB32; +}; + +} diff --git a/cpp-projects/base/graphics/colors.hpp b/cpp-projects/base/graphics/colors.hpp new file mode 100644 index 0000000..4e88f3b --- /dev/null +++ b/cpp-projects/base/graphics/colors.hpp @@ -0,0 +1,36 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "utility/buffer_vector.hpp" +//#include "color.hpp" +#include "geometry/point3.hpp" + +namespace tool { + using ColorsRGB32 = Buffer; +} diff --git a/cpp-projects/base/graphics/light.hpp b/cpp-projects/base/graphics/light.hpp new file mode 100644 index 0000000..0556f2e --- /dev/null +++ b/cpp-projects/base/graphics/light.hpp @@ -0,0 +1,43 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// base +#include "geometry/point3.hpp" +#include "geometry/point4.hpp" + +namespace tool::graphics{ + +struct LightInfo{ + geo::Pt4f Position; // light position in eye coords + geo::Vec3f La; // ambient light intensity + geo::Vec3f Ld; // diffuse light intensity + geo::Vec3f Ls; // specular light intensity +}; +} +// alignas(sizeof(float)*4) diff --git a/cpp-projects/base/graphics/material.hpp b/cpp-projects/base/graphics/material.hpp new file mode 100644 index 0000000..bf4c1f4 --- /dev/null +++ b/cpp-projects/base/graphics/material.hpp @@ -0,0 +1,106 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + + +// std +#include + +// base +#include "geometry/point3.hpp" +#include "utility/unordered_map.hpp" + +// local +#include "texture.hpp" + +namespace tool::graphics { + +enum class BlendMode : std::int8_t{ + default_blend, // SourceColor*SourceAlpha + DestColor*(1-SourceAlpha) + additive_blend, // SourceColor*1 + DestColor*1 +}; + +enum class ShadingModel : std::int8_t{ + flat, gouraud, phong, phong_blinn, toon, oren_nayar, minnarert, coook_torrance, no_shadding, fresnel +}; + +struct Material { // http://assimp.sourceforge.net/lib_html/materials.html + + enum class Property : std::int8_t{ + name,twosided,shading_model,enable_wireframe,blend_func,opacity, bumpscaling, shininess, reflectivity, + shininess_strength, refacti, color_diffuse, color_ambient, color_specular, color_emissive, color_transparent, + color_reflective, global_background_image, + text_blend, text_mapping, text_operation, text_uvw_source, + text_mapping_mode_u, text_mapping_mode_v, + text_texmap_axis, text_flags, + SizeEnum + }; + + // TODO: add default values + bool backfaceCulling = true; + bool wireframe = false; + + float opacity; + float shininess; + float shininessStrength; + float refraction; + float reflectivity; + + geo::Pt3f ambiantColor; + geo::Pt3f diffuseColor; + geo::Pt3f specularColor; + geo::Pt3f emissiveColor; + geo::Pt3f transparentColor; + geo::Pt3f reflectiveColor; + + ShadingModel shadingModel; + BlendMode blendMode; + + std::string name; + + umap> textures; + umap> texturesInfo; +}; + + +struct MaterialInfo { + + // [size: 48] + geo::Vec3f Ka; // Ambient reflectivity [offset: 0] + geo::Vec3f Kd; // Diffuse reflectivity [offset: 16] + geo::Vec3f Ks; // Specular reflectivity [offset: 32] + float Shininess; // Specular shininess factor [offset: 44] +}; + +struct RefractMaterialInfo { + float eta; // ration of indices of refraction + float reflectionFactor; // percentage of reflected light +}; + +} + diff --git a/cpp-projects/base/graphics/model.cpp b/cpp-projects/base/graphics/model.cpp new file mode 100644 index 0000000..a344f6c --- /dev/null +++ b/cpp-projects/base/graphics/model.cpp @@ -0,0 +1,102 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + + +#include "model.hpp" + +// std +#include + +// local +#include "utility/logger.hpp" + +auto tool::graphics::Model::get_all_tree_models() -> std::vector{ + + std::vector models; + models.push_back(this); + + for(const auto &child : children){ + auto childModels = child->get_all_tree_models(); + models.insert(std::end(models), std::begin(childModels), std::end(childModels)); + } + + return models; +} + +auto tool::graphics::Model::get_models_with_no_children(std::vector &models) -> void{ + + if(children.size() == 0){ + models.push_back(this); + return; + } + + for(auto &child : children){ + child->get_models_with_no_children(models); + } +} + +auto tool::graphics::Model::display_hierarchy() const -> void{ + + Logger::message(std::format("{} with name [{}] has [{}] meshes, [{}] animations and [{}] sub nodes.\n"sv, + parent != nullptr ? "Node"sv : "Root node"sv, + name, + gmeshes.size(), + animations.size(), + children.size()) + ); + if(gmeshes.size() > 0){ + Logger::message("#Meshes:\n"); + size_t id = 0; + for(const auto &gmesh : gmeshes){ + Logger::message(std::format("subMesh: [{}]\n", id++)); + const geo::Mesh *mesh = &gmesh->mesh; + Logger::message(std::format("\tvertices: [{}]\n\tnormals: [{}]\n\ttriangles: [{}]\n\ttextures coords: [{}]\n\ttangents: [{}]\n\tbones: [{}]\n"sv, + mesh->vertices.size(), + mesh->normals.size(), + mesh->triIds.size(), + mesh->tCoords.size(), + mesh->tangents.size(), + mesh->bones.size() + )); + } + } + if(animations.size() > 0){ + Logger::message("#Animations:\n"); + size_t id = 0; + for(const auto &animation : animations){ + Logger::message(std::format("\tanimation: [{}][{}]\n\tduration: [{}]\n\tticks per second: [{}]\n"sv, + id++, + animation.name, + animation.duration, + animation.ticksPerSecond + )); + } + } + + for(const auto &child : children){ + child->display_hierarchy(); + } +} diff --git a/cpp-projects/base/graphics/model.hpp b/cpp-projects/base/graphics/model.hpp new file mode 100644 index 0000000..90c62d6 --- /dev/null +++ b/cpp-projects/base/graphics/model.hpp @@ -0,0 +1,392 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include + +// base +#include "geometry/matrix4.hpp" +#include "geometry/quaternion.hpp" + +// local +#include "geometry/mesh.hpp" +#include "texture.hpp" +#include "material.hpp" + +// assimp +#include "assimp/scene.h" +#include "assimp/matrix4x4.h" + +namespace tool::graphics { + +template +struct GMesh{ + std::string name; + Material *material = nullptr; + geo::Mesh mesh; +}; + +struct BoneInfo{ +// geo::Mat4f offset = geo::Mat4f::identity(); + aiMatrix4x4t offset; + geo::Mat4f final = geo::Mat4f::identity(); +}; + +struct AnimationKeys{ + + std::vector positionTimes; + std::vector positionKeys; + + std::vector rotationTimes; + std::vector rotationKeys; + + std::vector scalingTimes; + std::vector scalingKeys; + + + auto find_position(float time) const noexcept -> size_t{ + for(size_t ii = 0; ii < positionTimes.size()-1; ++ii){ + if(time < static_cast(positionTimes[ii+1])){ + return ii; + } + } + return 0; + } + + auto find_rotation(float time) const noexcept -> size_t{ + for(size_t ii = 0; ii < rotationTimes.size()-1; ++ii){ + if(time < static_cast(rotationTimes[ii+1])){ + return ii; + } + } + return 0; + } + + auto find_scaling(float time) const noexcept -> size_t{ + for(size_t ii = 0; ii < scalingTimes.size()-1; ++ii){ + if(time < static_cast(scalingTimes[ii+1])){ + return ii; + } + } + return 0; + } + + auto calculate_interpolated_scaling(float time) const noexcept -> geo::Pt3f{ + + if(scalingKeys.empty()){ + return {}; + } + + if(scalingKeys.size() == 1){ + return scalingKeys[0]; + } + + size_t scalingId =find_scaling(time); + size_t nextScalingIndex = scalingId +1; + + float deltaTime = static_cast(scalingTimes[nextScalingIndex] - scalingTimes[scalingId]); + float factor = (time - static_cast(scalingTimes[scalingId]))/deltaTime; + + geo::Pt3f start = scalingKeys[scalingId]; + geo::Pt3f end = scalingKeys[nextScalingIndex]; + geo::Pt3f delta = end - start; + + return start + delta * factor; + } + + auto calculate_interpolated_position(float time) const noexcept -> geo::Pt3f{ + + if(positionKeys.empty()){ + return {}; + } + + if(positionKeys.size() == 1){ + return positionKeys[0]; + } + + size_t positionId = find_position(time); + size_t nextPositionIndex = positionId +1; + + float deltaTime = static_cast(positionTimes[nextPositionIndex] - positionTimes[positionId]); + float factor = (time - static_cast(positionTimes[positionId]))/deltaTime; + + geo::Pt3f start = positionKeys[positionId]; + geo::Pt3f end = positionKeys[nextPositionIndex]; + geo::Pt3f delta = end - start; + + return start + delta * factor; + } + + auto calculate_interpolated_rotation( float time) -> geo::Quatf{ + + if(rotationKeys.empty()){ + return {}; + } + + if(rotationKeys.size() == 1){ + return rotationKeys[0]; + } + + size_t rotationId = find_rotation(time); + size_t nextRotationIndex = rotationId +1; + + float deltaTime = static_cast(rotationTimes[nextRotationIndex] - rotationTimes[rotationId]); + float factor = (time - static_cast(rotationTimes[rotationId]))/deltaTime; + + geo::Quatf start = rotationKeys[rotationId]; + geo::Quatf end = rotationKeys[nextRotationIndex]; + return normalize(slerp(start, end, factor)); + } +}; + +struct Animation{ + std::string name; + double duration; + double ticksPerSecond; +}; + + + +struct BonesHierarchy{ + std::string boneName; +// geo::Mat4f tr; + aiMatrix4x4t tr; + std::vector children; +}; + + +class Model{ + +public: + + // info + std::string name; + std::string directory = ""; + + // meshes + std::vector>> gmeshes; + + // materials + std::vector materials; + + // textures + std::unordered_map textures; + + // animations + std::vector animations; + std::unordered_map> animationsKeys; + + // bones + std::vector bonesInfo; + std::map bonesMapping; + BonesHierarchy bonesHierachy; + + aiMatrix4x4t globalInverseTr; + geo::Mat4f globalInverseTr2; + + // transform + geo::Mat4f transform; + + // hierarchy + Model *parent = nullptr; + std::vector> children; + +public: + + Model() = default; + Model(Model *parentModel) : parent(parentModel){} + + constexpr auto is_root() const noexcept -> bool{return parent == nullptr;} + + auto display_hierarchy() const -> void; + auto get_all_tree_models() -> std::vector; + auto get_models_with_no_children(std::vector &models) -> void; + + void clean(){ + + std::vector childrenToRemove; + for(size_t ii = 0; ii < children.size(); ++ii){ + if(children[ii]->gmeshes.size() == 0 && children[ii]->children.size() == 0 && children[ii]->animations.size() == 0){ + childrenToRemove.push_back(ii); + } + } + + std::reverse(std::begin(childrenToRemove), std::end(childrenToRemove)); + for(const auto &id : childrenToRemove){ + children.erase(std::begin(children) + static_cast(id)); + } + + if(gmeshes.size() == 0 && children.size() == 0 && animations.size() == 0){ + parent->clean(); + } + } + + + + + + + + void get_bones_transform(BonesHierarchy *bh, std::string_view animationName, float time, const aiMatrix4x4t &parentTr){ + + auto nodeTr = bh->tr; + + if(animationsKeys.count(animationName) != 0){ + + if(animationsKeys[animationName].count(bh->boneName) != 0){ + + AnimationKeys *keys = &animationsKeys[animationName][bh->boneName]; + + // compute interpolation + geo::Pt3f scaling = keys->calculate_interpolated_scaling(time); + geo::Quatf rotation = keys->calculate_interpolated_rotation(time); + geo::Pt3f position = keys->calculate_interpolated_position(time); + + aiVector3D Scaling; + Scaling.x = scaling.x(); + Scaling.y = scaling.y(); + Scaling.z = scaling.z(); + + aiQuaternion Rotation; + Rotation.x = rotation.x; + Rotation.y = rotation.y; + Rotation.z = rotation.z; + Rotation.w = rotation.w; + + aiVector3D Position; + Position.x = position.x(); + Position.y = position.y(); + Position.z = position.z(); + + nodeTr = aiMatrix4x4t(Scaling, Rotation, Position); + + +// aiVector3t aiTr,aiRot,aiSc; +// q.Decompose(aiSc,aiRot,aiTr); +// auto r = geo::Pt3f{aiRot.x,aiRot.y,aiRot.z}; +// nodeTr = geo::Mat4f::transform( +// geo::Pt3f{aiSc.x,aiSc.y,aiSc.z}, +//// geo::Pt3f{rad_2_deg(r.x()),rad_2_deg(r.y()),rad_2_deg(r.z())}, +// geo::Pt3f{(r.x()),(r.y()),(r.z())}, +// geo::Pt3f{aiTr.x,aiTr.y,aiTr.z} +// ); + + +// const auto &m = q;//.Transpose(); +// nodeTr = geo::Mat4f +// { +// m.a1,m.a2,m.a3,m.a4, +// m.b1,m.b2,m.b3,m.b4, +// m.c1,m.c2,m.c3,m.c4, +// m.d1,m.d2,m.d3,m.d4, +// }; + + + } + } + + auto globalTr = parentTr * nodeTr; +// geo::Mat4f globalTr = nodeTr*parentTr; + if(animationsKeys.count(animationName) != 0){ + + std::string bName = bh->boneName; +// std::cout << "----------> " << bName << " "; +// if(bonesMapping.count(bName) != 0){ + + +// } +// auto pos = std::find(std::begin(bonesMapping)); +// if (pos != std::string::npos) { +// bName.erase(pos, bName.size()-pos); +// } + + if(bonesMapping.count(bName) != 0){ +// std::cout << "find bone " << bName << "\n"; + + auto id = bonesMapping[bName]; + auto m = globalInverseTr* globalTr * bonesInfo[id].offset; + +// bonesInfo[id].final = +// { +// m.a1,m.a2,m.a3,m.a4, +// m.b1,m.b2,m.b3,m.b4, +// m.c1,m.c2,m.c3,m.c4, +// m.d1,m.d2,m.d3,m.d4, +// }; + bonesInfo[id].final = + { + m.a1,m.b1,m.c1,m.d1, + m.a2,m.b2,m.c2,m.d2, + m.a3,m.b3,m.c3,m.d3, + m.a4,m.b4,m.c4,m.d4, + }; + +// std::cout << "final " << bonesInfo[id].final << "\n"; + +// bonesInfo[id].final = globalInverseTr.value()* globalTr * bonesInfo[id].offset; +// bonesInfo[id].final = globalInverseTr.value();//* bonesInfo[id].offset; + +// bonesInfo[id].final = bonesInfo[id].offset* globalTr* globalInverseTr.value(); +// std::cout << "final: " << id << " " << bonesInfo[id].final << "\n"; + }else{ +// std::cerr << "Cannot find bone " << bName << " in mapping: " << bonesMapping.size() << "\n"; + } + } + + for(auto &b : bh->children){ + get_bones_transform(&b, animationName, time, globalTr); + } + } + + + auto update_animation(std::string_view animationName, float time) -> bool{ + + const Animation *animationP = nullptr; + for(const auto &animation : animations){ + if(animation.name == animationName){ + animationP = &animation; + break; + } + } + if(animationP == nullptr){ + return false; + } + + float TicksPerSecond = almost_equal(animationP->ticksPerSecond, 0.0) ? static_cast(animationP->ticksPerSecond) : 25.f; + float TimeInTicks = time * TicksPerSecond; + float AnimationTime = fmod(TimeInTicks, static_cast(animationP->duration)); + + // retrieve bones transforms + aiMatrix4x4t m; + get_bones_transform(&bonesHierachy, animationName, AnimationTime, m); + return true; + } +}; +} + diff --git a/cpp-projects/base/graphics/screen.hpp b/cpp-projects/base/graphics/screen.hpp new file mode 100644 index 0000000..a297483 --- /dev/null +++ b/cpp-projects/base/graphics/screen.hpp @@ -0,0 +1,79 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// local +#include "geometry/point2.hpp" + +namespace tool::graphics{ + +class Screen{ + +public: + + constexpr Screen(unsigned int width = 800, unsigned int height = 800, int xPos = 0, int yPos = 0) : m_size(width,height), m_position(xPos,yPos){ + } + + constexpr auto resize(unsigned int width, unsigned int height) noexcept -> void{ + m_size = {width, height}; + } + + constexpr auto width() const noexcept -> unsigned int{ + return m_size.x(); + } + + constexpr auto height() const noexcept -> unsigned int{ + return m_size.y(); + } + + constexpr auto size_pixels() const noexcept -> unsigned int{ + return m_size.x() * m_size.y(); + } + + constexpr auto size() const noexcept -> geo::Pt2{ + return m_size; + } + + constexpr auto x_pos() const noexcept -> int{ + return m_position.x(); + } + + constexpr auto y_pos() const noexcept -> int{ + return m_position.y(); + } + + constexpr auto position() const noexcept -> geo::Pt2{ + return m_position; + } + +private: + geo::Pt2 m_size = {800,800}; + geo::Pt2 m_position = {0,0}; +}; +} + diff --git a/cpp-projects/base/graphics/texture.cpp b/cpp-projects/base/graphics/texture.cpp new file mode 100644 index 0000000..ca7ae11 --- /dev/null +++ b/cpp-projects/base/graphics/texture.cpp @@ -0,0 +1,279 @@ + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#include "texture.hpp" + +// std +#include +#include +#include + +// stb_image +#include "thirdparty/stb/stb_image.h" +#include "thirdparty/stb/stb_image_write.h" + +using namespace tool::graphics; + +Texture2D::Texture2D(const std::string &pathTexture, bool flip, int targetNbChannels) : path(pathTexture), m_flipped(flip){ + if(!load_2d_image_file_data(pathTexture, m_flipped, targetNbChannels)){ + std::cerr << "[Texture2D] Error during file texture loading.\n"; + } +} + +void Texture::copy_2d_data(int width, int height, int nbChannels, unsigned char *data){ + m_data.resize(width*height*nbChannels); + m_sizes = {width, height, 1}; + m_nbChannels = nbChannels; + m_hdr = false; + std::copy(data, data + width*height*m_nbChannels, std::begin(m_data)); +} + +void Texture::copy_2d_data(int width, int height, int nbChannels, const std::vector &data){ + if(data.size() < static_cast(width*height*nbChannels)){ + std::cerr << "[Texture2D] Cannot copy data to texture, invalid array size.\n"; + return; + } + m_sizes = {width, height, 1}; + m_nbChannels = nbChannels; + m_hdr = false; + m_data = data; +} + +void Texture::copy_2d_data(int width, int height, const std::vector > &data){ + + if(data.size() < static_cast(width*height)){ + std::cerr << "[Texture2D] Cannot copy data to texture, invalid array size.\n"; + return; + } + m_sizes = {width, height, 1}; + m_nbChannels = 3; + m_hdr = false; + + m_data.resize(static_cast(width*height*m_nbChannels)*4); + std::copy(std::begin(data), std::end(data), reinterpret_cast*>(m_data.data())); +} + +void Texture::copy_2d_data(int width, int height, int nbChannels, const std::vector &data){ + if(data.size() < static_cast(width*height*nbChannels)){ + std::cerr << "[Texture2D] Cannot copy data to texture, invalid array size.\n"; + return; + } + m_sizes = {width, height, 1}; + m_nbChannels = nbChannels; + m_hdr = true; + + m_data.resize(static_cast(width*height*m_nbChannels)*4); + std::copy(std::begin(data), std::end(data), reinterpret_cast(m_data.data())); +} + +void Texture::copy_2d_data(int width, int height, const std::vector> &data){ + + if(data.size() < static_cast(width*height)){ + std::cerr << "[Texture2D] Cannot copy data to texture, invalid array size.\n"; + return; + } + + m_sizes = {width, height, 1}; + m_nbChannels = 3; + m_hdr = true; + + m_data.resize(static_cast(width*height*m_nbChannels)*4); + std::copy(std::begin(data), std::end(data), reinterpret_cast*>(m_data.data())); +} + + +bool Texture::load_2d_image_file_data(const std::string &pathTexture, bool flip, int targetNbChannels){ + + if(targetNbChannels <= 0 || targetNbChannels > 4){ + std::cerr << "[STB] Invalid input target nb of channels with texture: " << pathTexture << "\n"; + return false; + } + + // set flipping + stbi_set_flip_vertically_on_load(flip); + + // N=#comp components + // 1 grey + // 2 grey, alpha + // 3 red, green, blue + // 4 red, green, blue, alpha + + int width; + int height; + + // check extension + namespace fs = std::filesystem; + if(fs::path(pathTexture).extension() == u8".hdr"){ + + m_hdr = true; + + float *loadedDataF = nullptr; + + // load data with stb + loadedDataF = stbi_loadf(pathTexture.c_str(), &width, &height, &m_nbChannels, targetNbChannels); + if(!loadedDataF){ + std::cerr << "[STB] Cannot load texture: " << pathTexture << "\n"; + return false; + } + + m_data.resize(static_cast(width*height*targetNbChannels)*4); + std::copy(loadedDataF, loadedDataF + width*height*targetNbChannels, reinterpret_cast(m_data.data())); + + // clean + stbi_image_free(loadedDataF); + }else{ + + unsigned char *loadedData = nullptr; + + // load data with stb + loadedData = stbi_load(pathTexture.c_str(), &width, &height, &m_nbChannels, targetNbChannels); + if(!loadedData){ + std::cerr << "[STB] Cannot load texture: " << pathTexture << "\n"; + return false; + } + + m_data.resize(static_cast(width*height*targetNbChannels)); + std::copy(loadedData, loadedData + width*height*targetNbChannels, m_data.data()); + + // clean + stbi_image_free(loadedData); + } + + m_sizes = {width, height, 1}; + if(targetNbChannels == 0){ + targetNbChannels = m_nbChannels; + } + +// std::cout << "tex: " << pathTexture << " " << width << " " << height << " " << m_nbChannels << " " << targetNbChannels << " " << m_data.size() << " " << is_hdr()<< "\n"; + // update channels nb + m_nbChannels = targetNbChannels; + + return true; +} + +bool Texture::write_2d_image_file_data(const std::string &pathTexture){ + + // check extension + namespace fs = std::filesystem; + const auto path = fs::path(pathTexture); + const auto parentDir = path.parent_path(); + if(!fs::exists(parentDir)){ + std::cerr << "[STB] Cannot save texture at path " << pathTexture << ", invalid path.\n"; + return false; + } + + const auto extension = path.extension(); + const auto filePath = path.string(); + + int res = 0; + if(extension == u8".png"){ + if (!m_hdr){ + res = stbi_write_png(filePath.c_str(), m_sizes[0], m_sizes[1], m_nbChannels, m_data.data(), m_sizes[0]*m_nbChannels); + }else{ + std::vector data(m_sizes[0]*m_sizes[1]*m_nbChannels); + auto hdrData = reinterpret_cast(m_data.data()); + for(size_t ii = 0; ii < data.size(); ++ii){ + data[ii] = static_cast(hdrData[ii]*255); + } + res = stbi_write_png(filePath.c_str(), m_sizes[0], m_sizes[1], m_nbChannels, data.data(), m_sizes[0]*m_nbChannels); + } + }else if(extension == u8".bmp"){ + if (!m_hdr){ + res = stbi_write_bmp(filePath.c_str(), m_sizes[0], m_sizes[1], m_nbChannels, m_data.data()); + }else{ + std::vector data(m_sizes[0]*m_sizes[1]*m_nbChannels); + auto hdrData = reinterpret_cast(m_data.data()); + for(size_t ii = 0; ii < data.size(); ++ii){ + data[ii] = static_cast(hdrData[ii]*255); + } + res = stbi_write_bmp(filePath.c_str(), m_sizes[0], m_sizes[1], m_nbChannels, data.data()); + } + }else if(extension == u8".tga"){ + if (!m_hdr){ + res = stbi_write_tga(filePath.c_str(), m_sizes[0], m_sizes[1], m_nbChannels, m_data.data()); + }else{ + std::vector data(m_sizes[0]*m_sizes[1]*m_nbChannels); + auto hdrData = reinterpret_cast(m_data.data()); + for(size_t ii = 0; ii < data.size(); ++ii){ + data[ii] = static_cast(hdrData[ii]*255); + } + res = stbi_write_tga(filePath.c_str(), m_sizes[0], m_sizes[1], m_nbChannels, data.data()); + } + }else if(extension == u8".jpg"){ + if (!m_hdr){ + res = stbi_write_jpg(filePath.c_str(), m_sizes[0], m_sizes[1], m_nbChannels, m_data.data(), 100); + }else{ + std::vector data(m_sizes[0]*m_sizes[1]*m_nbChannels); + auto hdrData = reinterpret_cast(m_data.data()); + for(size_t ii = 0; ii < data.size(); ++ii){ + data[ii] = static_cast(hdrData[ii]*255); + } + res = stbi_write_jpg(filePath.c_str(), m_sizes[0], m_sizes[1], m_nbChannels, data.data(), 100); + } + }else if(extension == u8".hdr"){ + if (!m_hdr){ + std::vector data(m_sizes[0]*m_sizes[1]*m_nbChannels); + for(size_t ii = 0; ii < m_data.size(); ++ii){ + data[ii] = static_cast(m_data[ii])/255.f; + } + res = stbi_write_hdr(filePath.c_str(), m_sizes[0], m_sizes[1], m_nbChannels, reinterpret_cast(data.data())); + }else{ + res = stbi_write_hdr(filePath.c_str(), m_sizes[0], m_sizes[1], m_nbChannels, reinterpret_cast(m_data.data())); + } + }else{ + std::cerr << "[STB] Cannot save texture at path " << pathTexture << ", extension not managed.\n"; + return false; + } + + if(res == 0){ + std::cerr << "[STB] Fail to save texture at path " << pathTexture << ".\n"; + return false; + } + + return true; +} + +bool CubeMap::load_2d_images_files(const std::array &pathTextures, bool flip, int targetNbChannels){ + + for(size_t ii = 0; ii < pathTextures.size(); ++ii){ + if(!textures[ii].load_2d_image_file_data(pathTextures[ii].v, flip, targetNbChannels)){ + std::cerr << "[CubeMap] Error during files textures loading.\n"; + return false; + } + } + return true; +} + +bool CubeMap::load_2d_images_files(const std::string &basePath, std::array &extensions, bool flip, int targetNbChannels){ + + for(size_t ii = 0; ii < extensions.size(); ++ii){ + if(!textures[ii].load_2d_image_file_data(basePath + extensions[ii], flip, targetNbChannels)){ + std::cerr << "[CubeMap] Error during files textures loading.\n"; + return false; + } + } + return true; +} diff --git a/cpp-projects/base/graphics/texture.hpp b/cpp-projects/base/graphics/texture.hpp new file mode 100644 index 0000000..6b22a4f --- /dev/null +++ b/cpp-projects/base/graphics/texture.hpp @@ -0,0 +1,264 @@ + + +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ + +#pragma once + +// std +#include +#include + +// local +#include "utility/array.hpp" +#include "utility/vector.hpp" +#include "utility/tuple_array.hpp" +#include "geometry/point2.hpp" +#include "geometry/point3.hpp" +#include "geometry/point4.hpp" + +namespace tool::graphics{ + +using namespace std::literals::string_view_literals; + +enum class TextureType : std::int8_t { + diffuse, // The texture is combined with the result of the diffuse lighting equation. + ambiant, // The texture is combined with the result of the ambient lighting equation. + specular, // The texture is combined with the result of the specular lighting equation. + normal, // The texture is a (tangent space) normal-map. + height, // The texture is a height map. By convention, higher gray-scale values stand for higher elevations from the base height. + light_map, // Lightmap texture (aka Ambient Occlusion). The texture contains a scaling value for the final color value of a pixel. Its intensity is not affected by incoming light. + opacity, // The texture defines per-pixel opacity. + emissive, // The texture is added to the result of the lighting calculation. It isn't influenced by incoming light. + shininess, // The texture defines the glossiness of the material. The glossiness is in fact the exponent of the specular (phong) lighting equation. + reflection, // Contains the color of a perfect mirror reflection. Rarely used, almost never for real-time applications. + displacement, // Displacement texture. + SizeEnum +}; + +enum class TextureOperation : std::int8_t{ + multiply, // T = T1 * T2 + add, // T = T1 + T2 + substract, // T = T1 - T2 + divide, // T = T1 / T2 + smooth_add, // T = (T1 + T2) - (T1 * T2) + singed_add, // T = T1 + (T2-0.5) + SizeEnum +}; + +enum class TextureMapping : std::int8_t{ + UV, // mapping coordinates are taken from an UV channel + sphere, // spherical mapping + cylinder, // cylindrical mapping + box, // cubic mapping + plane, // planar mapping + other, // undefined + SizeEnum +}; + +enum class TextureMapMode : std::int8_t{ + wrap, // texture coordinate u|v is translated to u%1|v%1 + clamp, // texture coordinates outside [0...1] are clamped to the nearest valid value. + decal, // if the texture coordinates for a pixel are outside [0...1] the texture is not applied to that pixel + mirro, // a texture coordinate u|v becomes u%1|v%1 if (u-(u%1))%2 is zero and 1-(u%1)|1-(v%1) otherwise + SizeEnum +}; + +enum class TextureWrapMode : std::int8_t{ + clamp_to_edge, // Causes s coordinates to be clamped to the range [12N,1−12N], + // where N is the size of the texture in the direction of clamping. + clamp_to_border, // Evaluates s coordinates in a similar manner to GL_CLAMP_TO_EDGE. + // However, in cases where clamping would have occurred in GL_CLAMP_TO_EDGE mode, + // the fetched texel data is substituted with the values specified by GL_TEXTURE_BORDER_COLOR. + mirrored_repeat, // Causes the s coordinate to be set to the fractional part of the texture coordinate if the integer part of s is even; + // if the integer part of s is odd, then the s texture coordinate is set to 1−frac(s), where frac(s) represents the fractional part of s. + repeat, // Causes the integer part of the s coordinate to be ignored; the GL uses only the fractional part, thereby creating a repeating pattern. + mirror_clamp_to_edge, // Causes the scoordinate to be repeated as for GL_MIRRORED_REPEAT for one repetition of the texture, at which point the coordinate + SizeEnum // to be clamped as in GL_CLAMP_TO_EDGE. Initially, GL_TEXTURE_WRAP_S is set to GL_REPEAT. +}; + +enum class TextureMode : std::int8_t{ + texture_1d, // Images in this texture all are 1-dimensional. They have width, but no height or depth. + texture_2d, // Images in this texture all are 2-dimensional. They have width and height, but no depth. + texture_3d, // Images in this texture all are 3-dimensional. They have width, height, and depth. + rectangle, // The image in this texture (only one image. No mipmapping) is 2-dimensional. Texture coordinates used for these textures are not normalized. + buffer, // The image in this texture (only one image. No mipmapping) is 1-dimensional. The storage for this data comes from a Buffer Object. + cubemap, // There are exactly 6 distinct sets of 2D images, all of the same size. They act as 6 faces of a cube. + texture_1d_array, // Images in this texture all are 1-dimensional. However, it contains multiple sets of 1-dimensional images, all within one texture. The array length is part of the texture's size. + texture_2d_array, // Images in this texture all are 2-dimensional. However, it contains multiple sets of 2-dimensional images, all within one texture. The array length is part of the texture's size. + cubemap_array, // Images in this texture are all cube maps. It contains multiple sets of cube maps, all within one texture. The array length * 6 (number of cube faces) is part of the texture size. + texture_2d_multisample, // The image in this texture (only one image. No mipmapping) is 2-dimensional. Each pixel in these images contains multiple samples instead of just one value. + texture_2d_multisample_array,// Combines 2D array and 2D multisample types. No mipmapping. + SizeEnum +}; + +// The texture minifying function is used whenever the level-of-detail function used when sampling from the texture determines that the texture should be minified. +// There are six defined minifying functions. Two of them use either the nearest texture elements or a weighted average of multiple texture elements to compute the texture value. The other four use mipmaps. +// A mipmap is an ordered set of arrays representing the same image at progressively lower resolutions. If the texture has dimensions 2n×2m, +// there are max(n,m)+1 mipmaps. The first mipmap is the original texture, with dimensions 2n×2m. Each subsequent mipmap has dimensions 2k−1×2l−1, where 2k×2l are the dimensions of the previous mipmap, +// until either k=0 or l=0. At that point, subsequent mipmaps have dimension 1×2l−1 or 2k−1×1 until the final mipmap, which has dimension 1×1. +// To define the mipmaps, call glTexImage1D, glTexImage2D, glTexImage3D, glCopyTexImage1D, or glCopyTexImage2D with the level argument indicating the order of the mipmaps. Level 0 is the original texture; +// level max(n,m) is the final 1×1 +// As more texture elements are sampled in the minification process, fewer aliasing artifacts will be apparent. +// While the GL_NEAREST and GL_LINEAR minification functions can be faster than the other four, they sample only one or multiple texture elements to determine the texture value of +// the pixel being rendered and can produce moire patterns or ragged transitions. The initial value of GL_TEXTURE_MIN_FILTER is GL_NEAREST_MIPMAP_LINEAR. +enum class TextureMinFilter : std::int8_t{ + nearest, // Returns the value of the texture element that is nearest (in Manhattan distance) to the specified texture coordinates. + linear, // Returns the weighted average of the four texture elements that are closest to the specified texture coordinates. + // These can include items wrapped or repeated from other parts of a texture, depending on the values of GL_TEXTURE_WRAP_S and GL_TEXTURE_WRAP_T, and on the exact mapping. + nearest_mipmap_nearest, // Chooses the mipmap that most closely matches the size of the pixel being textured and uses the GL_NEAREST criterion + // (the texture element closest to the specified texture coordinates) to produce a texture value. + linear_mimmap_nearest, // Chooses the mipmap that most closely matches the size of the pixel being textured and uses the GL_LINEAR criterion + // (a weighted average of the four texture elements that are closest to the specified texture coordinates) to produce a texture value. + nearest_mipmap_linear, // Chooses the two mipmaps that most closely match the size of the pixel being textured and uses the GL_NEAREST criterion + // (the texture element closest to the specified texture coordinates ) to produce a texture value from each mipmap. The final texture value is a weighted average of those two values. + linear_mipmap_linear, // Chooses the two mipmaps that most closely match the size of the pixel being textured and uses the GL_LINEAR criterion + // (a weighted average of the texture elements that are closest to the specified texture coordinates) to produce a texture value from each mipmap. The final texture value is a weighted average of those two values. + SizeEnum +}; + +// The texture magnification function is used whenever the level-of-detail function used when sampling from the texture determines that the texture should be magnified. +// It sets the texture magnification function to either GL_NEAREST or GL_LINEAR (see below). +// GL_NEAREST is generally faster than GL_LINEAR, but it can produce textured images with sharper edges because the transition between texture elements is not as smooth. +// The initial value of GL_TEXTURE_MAG_FILTER is GL_LINEAR. +enum class TextureMagFilter : std::int8_t{ + nearest,// Returns the value of the texture element that is nearest (in Manhattan distance) to the specified texture coordinates. + linear, // Returns the weighted average of the texture elements that are closest to the specified texture coordinates. + // These can include items wrapped or repeated from other parts of a texture, depending on the values of GL_TEXTURE_WRAP_S and GL_TEXTURE_WRAP_T, and on the exact mapping. + SizeEnum +}; + +enum class TextureMaxLevel : std::int8_t{ + SizeEnum +}; + + +using TTexType = std::tuple< + TextureType, std::string_view>; +static constexpr TupleArray textureTypes{{ + TTexType{TextureType::diffuse, "diffuse"sv }, + TTexType{TextureType::ambiant, "ambiant"sv }, + TTexType{TextureType::specular, "specular"sv }, + TTexType{TextureType::normal, "normal"sv }, + TTexType{TextureType::height, "height"sv }, + TTexType{TextureType::light_map, "lightMap"sv }, + TTexType{TextureType::opacity, "opacity"sv }, + TTexType{TextureType::emissive, "emissive"sv }, + TTexType{TextureType::shininess, "shininess"sv }, + TTexType{TextureType::reflection, "reflection"sv }, + TTexType{TextureType::displacement, "displacement"sv}, +}}; + + +[[maybe_unused]] static std::string_view to_string(TextureType t) { + return textureTypes.at<0,1>(t); +} + +struct TextureOptions{ + TextureType type = TextureType::diffuse; + TextureOperation operation = TextureOperation::add; + TextureMapping mapping = TextureMapping::UV; + geo::Pt3 mapMode{TextureMapMode::wrap,TextureMapMode::wrap,TextureMapMode::wrap}; + + // mixed + TextureWrapMode wrapS = TextureWrapMode::repeat; // Sets the wrap parameter for texture coordinate s + TextureWrapMode wrapT = TextureWrapMode::repeat; // Sets the wrap parameter for texture coordinate t + TextureWrapMode wrapR = TextureWrapMode::repeat; // Sets the wrap parameter for texture coordinate r + TextureMinFilter minFilter = TextureMinFilter::nearest_mipmap_linear; + TextureMagFilter magFilter = TextureMagFilter::linear; + + geo::Pt4f borderColor{0.0f,0.0f,0.0f,0.0f}; + + int maxLevel = 1000; // Sets the index of the highest defined mipmap level. This is an integer value. The initial value is 1000. + int minLOD = -1000; // Sets the minimum level-of-detail parameter. This floating-point value limits the selection of highest resolution mipmap (lowest mipmap level). The initial value is -1000. + int maxLOD = 1000; // Sets the maximum level-of-detail parameter. This floating-point value limits the selection of the lowest resolution mipmap (highest mipmap level). The initial value is 1000. +}; + + +struct TextureAlias{ + std::string v; +}; + +struct TexturePath{ + std::string v; +}; + +struct Texture{ + + constexpr int nb_channels()const{return m_nbChannels;} + + void copy_2d_data(int width, int height, int nbChannels, unsigned char *data); + void copy_2d_data(int width, int height, int nbChannels, const std::vector &data); + void copy_2d_data(int width, int height, const std::vector> &data); + void copy_2d_data(int width, int height, int nbChannels, const std::vector &data); + void copy_2d_data(int width, int height, const std::vector> &data); + + bool load_2d_image_file_data(const std::string &pathTexture, bool flip, int targetNbChannels = 3); + bool write_2d_image_file_data(const std::string &pathTexture); + unsigned char *data(){return m_data.data();} + + constexpr bool is_hdr() const {return m_hdr;} + +protected: + + bool m_hdr = false; + std::vector m_data; + int m_nbChannels = 0; + std::vector m_sizes; +}; + +struct Texture2D : public Texture{ + + std::string path; + + Texture2D() = default; + explicit Texture2D(const std::string &pathTexture, bool flip = true, int targetNbChannels = 4); + + constexpr geo::Pt2 size() const { return geo::Pt2{m_sizes[0], m_sizes[1]};} + +private : + + bool m_flipped = false; + +}; + +struct CubeMap{ + // posx.png","negx.png","posy.png","negy.png","posz.png","negz.png + std::array textures; + + CubeMap() = default; + + bool load_2d_images_files(const std::array &pathTextures, bool flip, int targetNbChannels = 3); + bool load_2d_images_files(const std::string &basePath, std::array &extensions, bool flip, int targetNbChannels = 3); + +}; + +struct TextureInfo { + Texture2D *texture = nullptr; + TextureOptions options; +}; +using TexturesInfo = std::vector; + +} diff --git a/cpp-projects/base/input/joypad.hpp b/cpp-projects/base/input/joypad.hpp new file mode 100644 index 0000000..adefa10 --- /dev/null +++ b/cpp-projects/base/input/joypad.hpp @@ -0,0 +1,185 @@ +/******************************************************************************* +** Toolset-base ** +** MIT License ** +** Copyright (c) [2018] [Florian Lance] ** +** ** +** Permission is hereby granted, free of charge, to any person obtaining a ** +** copy of this software and associated documentation files (the "Software"), ** +** to deal in the Software without restriction, including without limitation ** +** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** +** and/or sell copies of the Software, and to permit persons to whom the ** +** Software is furnished to do so, subject to the following conditions: ** +** ** +** The above copyright notice and this permission notice shall be included in ** +** all copies or substantial portions of the Software. ** +** ** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ** +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** +** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** +** DEALINGS IN THE SOFTWARE. ** +** ** +********************************************************************************/ +#pragma once + +// std +#include +#include +#include + +// base +#include "utility/tuple_array.hpp" + +namespace tool::input { + +using namespace std::literals::string_view_literals; + +class Joypad{ + +public: + enum class Button : int { + // P1 + A_P1 = 0, B_P1, X_P1, Y_P1, + RightBumper_P1, LeftBumper_P1, Start_P1, Back_P1, + LeftStickClick_P1, RightStickClick_P1, + // P2 + A_P2, B_P2, X_P2, Y_P2, + RightBumper_P2, LeftBumper_P2, Start_P2, Back_P2, + LeftStickClick_P2, RightStickClick_P2, + // counter + SizeEnum + }; + + enum class Axis : int { + // P1 + LeftJoystickX_P1 = 0, RightJoystickX_P1, + LeftJoystickY_P1, RightJoystickY_P1, + LeftTrigger_P1, RightTrigger_P1, + DPadX_P1, DPadY_P1, + // P2 + LeftJoystickX_P2, RightJoystickX_P2, + LeftJoystickY_P2, RightJoystickY_P2, + LeftTrigger_P2, RightTrigger_P2, + DPadX_P2, DPadY_P2, + // counter + SizeEnum + }; + + using Code = int; + using Player = int; + using Name = std::string_view; + using TButton = std::tuple< + Button, Name, Code, Player>; + static constexpr TupleArray buttons ={{ + TButton + {Button::A_P1 ,"A_P1"sv ,0 , 1}, + {Button::B_P1 ,"B_P1"sv ,1 , 1}, + {Button::X_P1 ,"X_P1"sv ,2 , 1}, + {Button::Y_P1 ,"Y_P1"sv ,3 , 1}, + {Button::RightBumper_P1 ,"RightBumper_P1"sv ,4 , 1}, + {Button::LeftBumper_P1 ,"LeftBumper_P1"sv ,5 , 1}, + {Button::Start_P1 ,"Start_P1"sv ,6 , 1}, + {Button::Back_P1 ,"Back_P1"sv ,7 , 1}, + {Button::LeftStickClick_P1 ,"LeftStickClick_P1"sv ,8 , 1}, + {Button::RightStickClick_P1 ,"RightStickClick_P1"sv ,9 , 1}, + {Button::A_P2 ,"A_P2"sv ,10 , 2}, + {Button::B_P2 ,"B_P2"sv ,11 , 2}, + {Button::X_P2 ,"X_P2"sv ,12 , 2}, + {Button::Y_P2 ,"Y_P2"sv ,13 , 2}, + {Button::RightBumper_P2 ,"RightBumper_P2"sv ,14 , 2}, + {Button::LeftBumper_P2 ,"LeftBumper_P2"sv ,15 , 2}, + {Button::Start_P2 ,"Start_P2"sv ,16 , 2}, + {Button::Back_P2 ,"Back_P2"sv ,17 , 2}, + {Button::LeftStickClick_P2 ,"LeftStickClick_P2"sv ,18 , 2}, + {Button::RightStickClick_P2 ,"RightStickClick_P2"sv ,19 , 2}, + }}; + + static constexpr Name get_name(Button button) { + return buttons.at<0,1>(button); + } + + static constexpr Code get_code(Button button) { + return buttons.at<0,2>(button); + } + + static constexpr Player get_player(Button button) { + return buttons.at<0,3>(button); + } + + static constexpr std::optional get_button_name(Code code) { + return buttons.optional_at<2,1>(code); + } + + static constexpr std::optional get_button_code(Name name) { + return buttons.optional_at<1,2>(name); + } + + static constexpr std::optional