Skip to content

Commit

Permalink
Optimized code of guided filter (reduce by 20 percent)
Browse files Browse the repository at this point in the history
  • Loading branch information
t-taniai committed Oct 13, 2017
1 parent ff87db6 commit 3ae369c
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 35 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ x64/Release/LocalExpansionStereo.iobj
*.idb
LocalExpansionStereo/results/
results/
data/MiddV3/Adirondack/
data/MiddV3/Vintage/
data/MiddV3/trainingH/Adirondack
data/MiddV3/trainingH/Vintage
9 changes: 7 additions & 2 deletions LocalExpansionStereo/CostVolumeEnergy.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,13 @@ class CostVolumeEnergy :
}
else if (params.filterName == "GF")
{
filter[0] = std::make_unique<FastGuidedImageFilter<double>>(imL, params.windR / 2, params.filter_param1, 1.0/255);
filter[1] = std::make_unique<FastGuidedImageFilter<double>>(imR, params.windR / 2, params.filter_param1, 1.0/255);
filter[0] = std::make_unique<FastGuidedImageFilter<double>>(imL, params.windR / 2, params.filter_param1, 1.0 / 255);
filter[1] = std::make_unique<FastGuidedImageFilter<double>>(imR, params.windR / 2, params.filter_param1, 1.0 / 255);
}
else if (params.filterName == "GFfloat")
{
filter[0] = std::make_unique<FastGuidedImageFilter<float>>(imL, params.windR / 2, params.filter_param1, 1.0 / 255);
filter[1] = std::make_unique<FastGuidedImageFilter<float>>(imR, params.windR / 2, params.filter_param1, 1.0 / 255);
}
else //if (params.filterName == "")
{
Expand Down
173 changes: 146 additions & 27 deletions LocalExpansionStereo/GuidedFilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// OpenCV
#include <opencv2/opencv.hpp>
#include <memory>
//#include "Utilities.hpp"


class IJointFilter
Expand Down Expand Up @@ -38,10 +37,10 @@ class GuidedImageFilter : public IJointFilter
cv::Mat N;
static const int DEPTH = cv::DataDepth<Type>::value;

cv::Mat boxfilter(cv::Mat I, int r) const
cv::Mat boxfilter(const cv::Mat& I) const
{
cv::Mat q;
cv::boxFilter(I, q, -1, cv::Size(2 * r + 1, 2 * r + 1), cv::Point(-1, -1), false, cv::BORDER_CONSTANT);
cv::boxFilter(I, q, -1, cv::Size(2 * R + 1, 2 * R + 1), cv::Point(-1, -1), false, cv::BORDER_CONSTANT);
return q;
}

Expand All @@ -67,22 +66,22 @@ class GuidedImageFilter : public IJointFilter

cv::split(realI, Ichannels);

N = boxfilter(cv::Mat_<Type>::ones(realI.size()), R);
mean_I_r = boxfilter(Ichannels[0], R) / N;
mean_I_g = boxfilter(Ichannels[1], R) / N;
mean_I_b = boxfilter(Ichannels[2], R) / N;
N = boxfilter(cv::Mat_<Type>::ones(realI.size()));
mean_I_r = boxfilter(Ichannels[0]) / N;
mean_I_g = boxfilter(Ichannels[1]) / N;
mean_I_b = boxfilter(Ichannels[2]) / N;

// variance of I in each local patch: the matrix Sigma in Eqn (14).
// Note the variance in each local patch is a 3x3 symmetric matrix:
// rr, rg, rb
// Sigma = rg, gg, gb
// rb, gb, bb
cv::Mat var_I_rr = boxfilter(Ichannels[0].mul(Ichannels[0]), R) / N - mean_I_r.mul(mean_I_r) + eps;
cv::Mat var_I_rg = boxfilter(Ichannels[0].mul(Ichannels[1]), R) / N - mean_I_r.mul(mean_I_g);
cv::Mat var_I_rb = boxfilter(Ichannels[0].mul(Ichannels[2]), R) / N - mean_I_r.mul(mean_I_b);
cv::Mat var_I_gg = boxfilter(Ichannels[1].mul(Ichannels[1]), R) / N - mean_I_g.mul(mean_I_g) + eps;
cv::Mat var_I_gb = boxfilter(Ichannels[1].mul(Ichannels[2]), R) / N - mean_I_g.mul(mean_I_b);
cv::Mat var_I_bb = boxfilter(Ichannels[2].mul(Ichannels[2]), R) / N - mean_I_b.mul(mean_I_b) + eps;
cv::Mat var_I_rr = boxfilter(Ichannels[0].mul(Ichannels[0])) / N - mean_I_r.mul(mean_I_r) + eps;
cv::Mat var_I_rg = boxfilter(Ichannels[0].mul(Ichannels[1])) / N - mean_I_r.mul(mean_I_g);
cv::Mat var_I_rb = boxfilter(Ichannels[0].mul(Ichannels[2])) / N - mean_I_r.mul(mean_I_b);
cv::Mat var_I_gg = boxfilter(Ichannels[1].mul(Ichannels[1])) / N - mean_I_g.mul(mean_I_g) + eps;
cv::Mat var_I_gb = boxfilter(Ichannels[1].mul(Ichannels[2])) / N - mean_I_g.mul(mean_I_b);
cv::Mat var_I_bb = boxfilter(Ichannels[2].mul(Ichannels[2])) / N - mean_I_b.mul(mean_I_b) + eps;

// Inverse of Sigma + eps * I
invrr = var_I_gg.mul(var_I_bb) - var_I_gb.mul(var_I_gb);
Expand All @@ -108,17 +107,13 @@ class GuidedImageFilter : public IJointFilter
return std::make_shared<GuidedImageFilter>(realI(rect), R, eps);
}

cv::Mat filter(const cv::Mat& _p) const override
cv::Mat filter_mat(const cv::Mat& p) const
{
cv::Mat p;
if (_p.depth() != DEPTH) _p.convertTo(p, DEPTH);
else p = _p;

cv::Mat mean_p = boxfilter(p, R) / N;
cv::Mat mean_p = boxfilter(p) / N;

cv::Mat mean_Ip_r = boxfilter(Ichannels[0].mul(p), R) / N;
cv::Mat mean_Ip_g = boxfilter(Ichannels[1].mul(p), R) / N;
cv::Mat mean_Ip_b = boxfilter(Ichannels[2].mul(p), R) / N;
cv::Mat mean_Ip_r = boxfilter(Ichannels[0].mul(p)) / N;
cv::Mat mean_Ip_g = boxfilter(Ichannels[1].mul(p)) / N;
cv::Mat mean_Ip_b = boxfilter(Ichannels[2].mul(p)) / N;

// covariance of (I, p) in each local patch.
cv::Mat cov_Ip_r = mean_Ip_r - mean_I_r.mul(mean_p);
Expand All @@ -132,10 +127,133 @@ class GuidedImageFilter : public IJointFilter
cv::Mat b = mean_p - a_r.mul(mean_I_r) - a_g.mul(mean_I_g) - a_b.mul(mean_I_b); // Eqn. (15) in the paper;

cv::Mat q =
( boxfilter(a_r, R).mul(Ichannels[0])
+ boxfilter(a_g, R).mul(Ichannels[1])
+ boxfilter(a_b, R).mul(Ichannels[2])
+ boxfilter(b, R)) / N; // Eqn. (16) in the paper;
(boxfilter(a_r).mul(Ichannels[0])
+ boxfilter(a_g).mul(Ichannels[1])
+ boxfilter(a_b).mul(Ichannels[2])
+ boxfilter(b)) / N; // Eqn. (16) in the paper;
return q;
}

// This code reduces redudant data access.
// Not explicitly vectorized but hopefully done by auto vectorization of the compiler.
// Benchmark for Adirondack:
// Desktop) 498 sec -> 408 sec.(18% reduction)
// Laptop) 302 sec -> 237 sec.(22% reduction)
cv::Mat filter_raw(const cv::Mat& p) const
{
int rows = p.rows, cols = p.cols;
cv::Mat mean_p = boxfilter(p);

cv::Mat mean_Ip_r(p.size(), p.depth());
cv::Mat mean_Ip_g(p.size(), p.depth());
cv::Mat mean_Ip_b(p.size(), p.depth());

for (int i = 0; i < rows; i++)
{
auto pp = p.ptr<Type>(i);
auto pmean_Ip_r = mean_Ip_r.ptr<Type>(i);
auto pmean_Ip_g = mean_Ip_g.ptr<Type>(i);
auto pmean_Ip_b = mean_Ip_b.ptr<Type>(i);

auto pI_r = Ichannels[0].ptr<Type>(i);
auto pI_g = Ichannels[1].ptr<Type>(i);
auto pI_b = Ichannels[2].ptr<Type>(i);

for (int j = 0; j < cols; j++)
{
auto vp = pp[j];
pmean_Ip_r[j] = pI_r[j] * vp;
pmean_Ip_g[j] = pI_g[j] * vp;
pmean_Ip_b[j] = pI_b[j] * vp;
}
}
mean_Ip_r = boxfilter(mean_Ip_r);
mean_Ip_g = boxfilter(mean_Ip_g);
mean_Ip_b = boxfilter(mean_Ip_b);


cv::Mat a_r(p.size(), p.depth());
cv::Mat a_g(p.size(), p.depth());
cv::Mat a_b(p.size(), p.depth());
cv::Mat b(p.size(), p.depth());

for (int i = 0; i < rows; i++)
{
auto pa_r = a_r.ptr<Type>(i);
auto pa_g = a_g.ptr<Type>(i);
auto pa_b = a_b.ptr<Type>(i);

auto pN = N.ptr<Type>(i);
auto pmean_p = mean_p.ptr<Type>(i);
auto pmean_Ip_r = mean_Ip_r.ptr<Type>(i);
auto pmean_Ip_g = mean_Ip_g.ptr<Type>(i);
auto pmean_Ip_b = mean_Ip_b.ptr<Type>(i);

auto pmean_I_r = mean_I_r.ptr<Type>(i);
auto pmean_I_g = mean_I_g.ptr<Type>(i);
auto pmean_I_b = mean_I_b.ptr<Type>(i);

auto pinvrr = invrr.ptr<Type>(i);
auto pinvrg = invrg.ptr<Type>(i);
auto pinvrb = invrb.ptr<Type>(i);
auto pinvgg = invgg.ptr<Type>(i);
auto pinvgb = invgb.ptr<Type>(i);
auto pinvbb = invbb.ptr<Type>(i);

auto pb = b.ptr<Type>(i);
for (int j = 0; j < cols; j++)
{
auto n = pN[j];
auto mp = pmean_p[j] / n;
auto mIr = pmean_I_r[j];
auto mIg = pmean_I_g[j];
auto mIb = pmean_I_b[j];

auto cov_Ip_r = pmean_Ip_r[j] / n - mIr*mp;
auto cov_Ip_g = pmean_Ip_g[j] / n - mIg*mp;
auto cov_Ip_b = pmean_Ip_b[j] / n - mIb*mp;

pa_r[j] = pinvrr[j] * cov_Ip_r + pinvrg[j] * cov_Ip_g + pinvrb[j] * cov_Ip_b;
pa_g[j] = pinvrg[j] * cov_Ip_r + pinvgg[j] * cov_Ip_g + pinvgb[j] * cov_Ip_b;
pa_b[j] = pinvrb[j] * cov_Ip_r + pinvgb[j] * cov_Ip_g + pinvbb[j] * cov_Ip_b;

pb[j] = mp - pa_r[j] * mIr - pa_g[j] * mIg - pa_b[j] * mIb;
}
}

a_r = boxfilter(a_r);
a_g = boxfilter(a_g);
a_b = boxfilter(a_b);
b = boxfilter(b);

for (int i = 0; i < rows; i++)
{
auto pa_r = a_r.ptr<Type>(i);
auto pa_g = a_g.ptr<Type>(i);
auto pa_b = a_b.ptr<Type>(i);
auto pb = b.ptr<Type>(i);
auto pN = N.ptr<Type>(i);

auto pI_r = Ichannels[0].ptr<Type>(i);
auto pI_g = Ichannels[1].ptr<Type>(i);
auto pI_b = Ichannels[2].ptr<Type>(i);

for (int j = 0; j < cols; j++)
{
pb[j] = (pb[j] + pa_r[j] * pI_r[j] + pa_g[j] * pI_g[j] + pa_b[j] * pI_b[j]) / pN[j];
}
}
return b;
}
cv::Mat filter(const cv::Mat& _p) const override
{
cv::Mat p;
if (_p.depth() != DEPTH) _p.convertTo(p, DEPTH);
else p = _p;

// This code is the largest bottleneck of the while algorithm.
cv::Mat q = filter_raw(p);
//cv::Mat q = filter_mat(p);

cv::Mat _q;

Expand Down Expand Up @@ -185,6 +303,7 @@ class FastGuidedImageFilter : public GuidedImageFilter<Type>
auto filter = std::make_shared<FastGuidedImageFilter>();
filter->R = R;
filter->eps = eps;

filter->I = I(rect);
filter->realI = realI(rect);
filter->mean_I_r = mean_I_r(rect);
Expand All @@ -202,7 +321,7 @@ class FastGuidedImageFilter : public GuidedImageFilter<Type>
filter->invgb = invgb(rect);
filter->invbb = invbb(rect);

filter->N = boxfilter(cv::Mat_<Type>::ones(rect.size()), R);
filter->N = boxfilter(cv::Mat_<Type>::ones(rect.size()));
return filter;
}
};
Expand Down
7 changes: 6 additions & 1 deletion LocalExpansionStereo/StereoEnergy.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ struct Parameters
float filter_param1;
int windR;
int neighborNum;
std::string filterName; // "BF" or "GF" or ""
std::string filterName; // "BF" or "GF" or "GFfloat" or ""

Parameters(float lambda = 20, int windR = 20, std::string filterName = "BF", float filter_param1 = 10)
: alpha(0.9)
Expand Down Expand Up @@ -566,6 +566,11 @@ class NaiveStereoEnergy : public StereoEnergy
filter[0] = std::make_unique<FastGuidedImageFilter<double>>(imL, params.windR / 2, params.filter_param1, 1.0 / 255);
filter[1] = std::make_unique<FastGuidedImageFilter<double>>(imR, params.windR / 2, params.filter_param1, 1.0 / 255);
}
else if (params.filterName == "GFfloat")
{
filter[0] = std::make_unique<FastGuidedImageFilter<float>>(imL, params.windR / 2, params.filter_param1, 1.0 / 255);
filter[1] = std::make_unique<FastGuidedImageFilter<float>>(imR, params.windR / 2, params.filter_param1, 1.0 / 255);
}
else //if (params.filterName == "")
{
filter[0] = nullptr;
Expand Down
1 change: 1 addition & 0 deletions LocalExpansionStereo/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ struct Options

const Parameters paramsBF = Parameters(20, 20, "BF", 10);
const Parameters paramsGF = Parameters(1.0, 20, "GF", 0.0001);
const Parameters paramsGFfloat = Parameters(1.0, 20, "GFfloat", 0.0001); // Slightly faster

struct Calib
{
Expand Down
2 changes: 0 additions & 2 deletions data/MiddV3/Adirondack/README.txt

This file was deleted.

3 changes: 3 additions & 0 deletions data/MiddV3/trainingH/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Download an example file set from
http://www2.hci.iis.u-tokyo.ac.jp/datasets/data/LocalExpStereo/Adirondack.zip
then extract it here as a directly "Adirondack"
2 changes: 1 addition & 1 deletion demo.bat
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ set resultsroot=%~dp0results
mkdir "%resultsroot%"
"%bin%" -targetDir "%datasetroot%\MiddV2\cones" -outputDir "%resultsroot%\cones" -mode MiddV2 -smooth_weight 1 -doDual 1
"%bin%" -targetDir "%datasetroot%\MiddV2\teddy" -outputDir "%resultsroot%\teddy" -mode MiddV2 -smooth_weight 1
"%bin%" -targetDir "%datasetroot%\MiddV3\Adirondack" -outputDir "%resultsroot%\Adirondack" -mode MiddV3 -smooth_weight 0.5
"%bin%" -targetDir "%datasetroot%\MiddV3\trainingH\Adirondack" -outputDir "%resultsroot%\Adirondack" -mode MiddV3 -smooth_weight 0.5

0 comments on commit 3ae369c

Please sign in to comment.