diff --git a/demos/helix-example/helix-example/Documentation.pdf b/demos/helix-example/helix-example/Documentation.pdf new file mode 100644 index 000000000..fed5a7ec0 Binary files /dev/null and b/demos/helix-example/helix-example/Documentation.pdf differ diff --git a/demos/helix-example/helix-example/Graph_from_file.ipynb b/demos/helix-example/helix-example/Graph_from_file.ipynb new file mode 100644 index 000000000..bc9b01b60 --- /dev/null +++ b/demos/helix-example/helix-example/Graph_from_file.ipynb @@ -0,0 +1 @@ +{"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3 (ipykernel)","language":"python"},"language_info":{"name":"python","version":"3.10.14","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":5,"nbformat":4,"cells":[{"id":"3a9756e8-5eb8-4e31-a2de-1c08b0b10229","cell_type":"code","source":"import matplotlib.pyplot as plt\n\nfig = plt.figure()\nax = plt.axes(projection=\"3d\")\n\nx, y, z = [], [], []\nwith open(\"output.txt\") as file:\n lines = file.readlines()\nfor line in lines:\n if line.strip() == \"end\":\n ax.plot3D(x, y, z)\n x.clear()\n y.clear()\n z.clear()\n else:\n a, b, c = [float(i) for i in line.split()]\n x.append(a)\n y.append(b)\n z.append(c)\n\nax.set_box_aspect([1,1,1])\nplt.show()","metadata":{"trusted":true},"outputs":[{"output_type":"display_data","data":{"text/plain":"
","image/png":""},"metadata":{}}],"execution_count":3},{"id":"50b3a6d1-98c0-4ddc-b216-d388a21502e3","cell_type":"code","source":"","metadata":{"trusted":true},"outputs":[],"execution_count":null}]} \ No newline at end of file diff --git a/demos/helix-example/helix-example/Helix.ipynb b/demos/helix-example/helix-example/Helix.ipynb new file mode 100644 index 000000000..3945d1480 --- /dev/null +++ b/demos/helix-example/helix-example/Helix.ipynb @@ -0,0 +1 @@ +{"metadata":{"kernelspec":{"display_name":"C++17-Clad-v1.7","language":"C++17","name":"xcpp17-clad"},"language_info":{"codemirror_mode":"text/x-c++src","file_extension":".cpp","mimetype":"text/x-c++src","name":"c++","version":"17"}},"nbformat_minor":5,"nbformat":4,"cells":[{"id":"270d637f-81da-472b-a904-783c072d9fa2","cell_type":"markdown","source":"# Helix fitter tutorial - CLAD & Jupyter Notebook ","metadata":{}},{"id":"0447116e-4a4f-412a-9de3-2a8601e11f43","cell_type":"markdown","source":"## Introduction\n\nParticle tracking is an important part of the processing and analysis of data received from particle detectors, such as the Compact Muon Solenoid (CMS). Tracking is the step that determines the momentum of charged particles escaping from the collision point. It identifies individual particles by reconstructing their trajectories from points where charged particle “hits” were measured by the detector and interpreting them.$^{[2]}$ Due to the Lorentz force, charged particles move in a helical motion when affected by the magnetic field (neglecting other effects due to material interactions, etc). This means we can figure out a specific particle trajectory through the detector by fitting a helix function to data points in such a way that the distance from the data points and the helix would be minimized. In mathematical terms, we need to find optimal helix parameters by minimizing a loss function composed of the sum of least squared distances, thus giving the best estimation of these parameters. For this purpose we can use Clad to efficiently minimize the loss function. ","metadata":{}},{"id":"7749b5ea-505f-4d8e-b97c-00efc5ab1c6e","cell_type":"markdown","source":"## Levenberg-Marquardt algorithm\n\nTo solve this nonlinear least squares problem we will be using the Levenberg-Marquardt algorithm.$^{(1)}$ The Levenberg-Marquardt algorithm combines two optimization methods: gradient descent and Gauss-Newton. Its behaviour changes based on how close the current coefficients are to the optimal value. When the coefficients are far from optimal, it uses gradient descent, which takes steps in the direction of steepest descent. When the coefficients are close to optimal, it uses Gauss-Newton, which assumes the problem is locally quadratic and finds the minimum of this quadratic. This combination allows the algorithm to provide a more reliable and efficient optimization than the other two methods mentioned.$^{[3]}$ ","metadata":{}},{"id":"8510d725-4b06-4b9d-be3b-8d378e6e5704","cell_type":"markdown","source":"## Setup","metadata":{}},{"id":"756e353f-f5bb-43b2-b310-578e1d1a3a9f","cell_type":"code","source":"#include \n#include \n#include \n#include \n#include \n#include \n#include \"clad/Differentiator/Differentiator.h\"\n//---------------------\n#include \nstd::ofstream outfile(\"output.txt\");\nauto MY_PI = 3.14159265359;","metadata":{"trusted":true},"outputs":[],"execution_count":1},{"id":"643072ca-02f4-489f-aebb-c25741041b8b","cell_type":"code","source":"namespace clad::custom_derivatives::std {\ntemplate \nCUDA_HOST_DEVICE ValueAndPushforward atan2_pushforward(T y, T x, T d_y,\n T d_x) {\n return {::std::atan2(y, x),\n -(y / ((x * x) + (y * y))) * d_x + x / ((x * x) + (y * y)) * d_y};\n}\n\ntemplate \nCUDA_HOST_DEVICE void atan2_pullback(T y, T x, U d_z, T* d_y, T* d_x) {\n *d_y += x / ((x * x) + (y * y)) * d_z;\n\n *d_x += -(y / ((x * x) + (y * y))) * d_z;\n}\n\ntemplate \nCUDA_HOST_DEVICE ValueAndPushforward acos_pushforward(T x, T d_x) {\n return {::std::acos(x), ((-1) / (::std::sqrt(1 - x * x))) * d_x};\n}\n}\nnamespace clad::custom_derivatives\n{\n using std::atan2_pushforward;\n using std::atan2_pullback;\n using std::acos_pushforward;\n}","metadata":{"trusted":true},"outputs":[],"execution_count":2},{"id":"5f50b5c9-1e0a-4a67-9dd5-4074d5bd5ccd","cell_type":"code","source":"double DistanceSquare(double x1, double y1, double z1, double x2, double y2, double z2)\n{\n return (x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2) + (z1 - z2) * (z1 - z2);\n}","metadata":{"trusted":true},"outputs":[],"execution_count":3},{"id":"ba5a4f3f-310d-496c-8d27-86e40564847d","cell_type":"code","source":"double Distance(double x1, double y1, double z1, double x2, double y2, double z2)\n{\n return std::sqrt(DistanceSquare(x1, y1, z1, x2, y2, z2));\n}","metadata":{"trusted":true},"outputs":[],"execution_count":4},{"id":"192c2378-4827-4837-826d-6269e1bbe262","cell_type":"code","source":"double DistanceSquareA(double v[3], double x2, double y2, double z2)\n{\n return DistanceSquare(v[0], v[1], v[2], x2, y2, z2);\n}","metadata":{"trusted":true},"outputs":[],"execution_count":5},{"id":"c9c455a3-0487-419e-a823-b837b4003cdc","cell_type":"code","source":"double DistanceA(double v[3], double x2, double y2, double z2)\n{\n return Distance(v[0], v[1], v[2], x2, y2, z2);\n}","metadata":{"trusted":true},"outputs":[],"execution_count":6},{"id":"a10a4720-2b20-4225-a5fb-c6c1da5396c7","cell_type":"code","source":"// All the matrices are written as 1D arrays!\nvoid MatrixMultiply(double *a, double *b, int arows, int acols, int bcols, double *output)\n{\n for (int i = 0; i < arows; i++)\n {\n for (int j = 0; j < bcols; j++)\n {\n double sum = 0;\n for (int k = 0; k < acols; k++)\n sum = sum + a[i * acols + k] * b[k * bcols + j];\n output[i * bcols + j] = sum;\n }\n }\n}","metadata":{"trusted":true},"outputs":[],"execution_count":7},{"id":"fd7cef3a-ff78-4439-8a38-a94318f355c5","cell_type":"code","source":"void Transpose(double *input, int rows, int cols, double *output)\n{\n for (int i = 0; i < rows; ++i)\n {\n for (int j = 0; j < cols; ++j)\n {\n int i_input = i * cols + j;\n\n int i_output = j * rows + i;\n\n output[i_output] = input[i_input];\n }\n }\n}","metadata":{"trusted":true},"outputs":[],"execution_count":8},{"id":"7b7f0f1c-6b15-4535-b86f-48371c1659c9","cell_type":"code","source":"void DiagOfSquareM(double *input, int height, double *diag)\n{\n // Works for square matrices only\n for (int i = 0; i < height * height; i++)\n {\n diag[i] = 0;\n }\n for (int i = 0; i < height; i++)\n {\n diag[i * height + i] = input[i * height + i];\n }\n}","metadata":{"trusted":true},"outputs":[],"execution_count":9},{"id":"f782c822-fde6-4396-b283-d07d4f43224a","cell_type":"code","source":"void ScalarMultiply(double *matrix, int rows, int cols, double number, double *output)\n{\n for (int i = 0; i < rows; i++)\n {\n for (int j = 0; j < cols; j++)\n {\n output[i * cols + j] = number * matrix[i * cols + j];\n }\n }\n}\n","metadata":{"trusted":true},"outputs":[],"execution_count":10},{"id":"6c5444d9-7e34-4691-8057-1e65379fee04","cell_type":"code","source":"void CopyMatrix(double *matrix, int size, double *output)\n{\n for (int i = 0; i < size; i++)\n {\n output[i] = matrix[i];\n }\n}","metadata":{"trusted":true},"outputs":[],"execution_count":11},{"id":"1b67b3fa-6a9a-40e5-9ec0-2dced9bd1b4c","cell_type":"code","source":"void AddMatrices(double *a, double *b, int rows, int cols, double *output)\n{\n for (int i = 0; i < rows; i++)\n {\n for (int j = 0; j < cols; j++)\n {\n output[i * cols + j] = a[i * cols + j] + b[i * cols + j];\n }\n }\n}","metadata":{"trusted":true},"outputs":[],"execution_count":12},{"id":"640e3f3f-a242-41f7-83e4-4fc99f2ee9c4","cell_type":"markdown","source":"## The implementation\n\nFirst we define our helix. We chose to do so in the Cartesian coordinates. This is done in **HelixPoint**, but we first define functions that allow our helix to be rotated around the x axis and then the y axis. The angle **alph** represents how much the helix is rotated counterclockwise around the x axis, when viewed from the positive direction of the axis. Likewise for **bet** and the y axis:","metadata":{}},{"id":"dcffd80a-03ab-4ed2-ae1d-cfe371b5ed0f","cell_type":"code","source":"void RotateAlph(double x, double y, double z, double alph, double output[3])\n{\n output[0] = x;\n output[1] = y * cos(alph) - z * sin(alph);\n output[2] = y * sin(alph) + z * cos(alph);\n}","metadata":{"trusted":true},"outputs":[],"execution_count":13},{"id":"e4146b67-14d3-45af-bf9e-0710e55aeb3a","cell_type":"code","source":"void RotateBet(double x, double y, double z, double bet, double output[3])\n{\n output[0] = x * cos(bet) + z * sin(bet);\n output[1] = y;\n output[2] = -x * sin(bet) + z * cos(bet);\n}","metadata":{"trusted":true},"outputs":[],"execution_count":14},{"id":"58726266-6fde-4707-a2d7-dd4212d7d8a8","cell_type":"code","source":"void Rotate(double x, double y, double z, double alph, double bet, double output[3])\n{\n double point[3];\n RotateAlph(x, y, z, alph, point);\n RotateBet(point[0], point[1], point[2], bet, output);\n}","metadata":{"trusted":true},"outputs":[],"execution_count":15},{"id":"9c844ca6-d4ae-4d2d-be11-6b8750b81c01","cell_type":"code","source":"void UnRotate(double x, double y, double z, double alph, double bet, double output[3])\n{\n double point[3];\n RotateBet(x, y, z, -bet, point);\n RotateAlph(point[0], point[1], point[2], -alph, output);\n}","metadata":{"trusted":true},"outputs":[],"execution_count":16},{"id":"2bc8bcc6-f292-44ce-84a1-917e1b0cc0a3","cell_type":"code","source":"void HelixPoint(double a, double b, double c, double d, double alph, double bet, double t, double output[3])\n{\n /*Describe a point on a helix in the Cartesian coordinate system.*/\n double x = a * (c + std::cos(t));\n double y = a * (d + std::sin(t));\n double z = a * b * t;\n output[0] = x;\n output[1] = y;\n output[2] = z;\n Rotate(x, y, z, alph, bet, output);\n}","metadata":{"trusted":true},"outputs":[],"execution_count":17},{"id":"00a11ec6-4890-4f74-9b93-18921bae7acc","cell_type":"markdown","source":"\nThe function **HelixPoint** describes a circular helix of radius **a** and a slope ⁠**1/b**⁠ (in the net direction of travel, which is the z direction in the code above). The helix can not only be rotated as described above, but also be shifted from the starting coordinates by distances **c** and **d** in the x and y directions, respectively.\n
\n
\n
\nNext, for demonstration purposes, we generate a set of points using **GenerateFlawedPoints**. We pick some set of helix parameters and scan **t** in some range to generate an array containing **nr_of_points** data points. The data points are generated by calculating a point on the helix with given parameters at the time **t** using **HelixPoint** and also adding some randomness (representing measurement error), so that the helix points do not correspond to a perfect helix. We will use these points to then determine estimated helix parameters. \n
","metadata":{}},{"id":"0b5d9f88-d796-4dc5-89d0-c6618628130e","cell_type":"code","source":"void GenerateFlawedPoints(int nr_of_points, double a, double b, double c, double d, double alph, double bet, double *points)\n{\n /*Generate points on a helix with given params but add noise. */\n auto seed = time(nullptr);\n std::mt19937_64 rng(seed);\n std::uniform_real_distribution uniform(0, 1);\n double output[3];\n double t = 0;\n for (int i = 0; i < nr_of_points; i++)\n {\n t += 0.1;\n HelixPoint(a, b, c, d, alph, bet, t, output);\n points[i * 3] = output[0] + uniform(rng);\n points[i * 3 + 1] = output[1] + uniform(rng);\n points[i * 3 + 2] = output[2] + uniform(rng);\n }\n}\n","metadata":{"trusted":true},"outputs":[],"execution_count":18},{"id":"57160b5b-2e1c-4067-a47a-cb6e4ba36391","cell_type":"markdown","source":"\nLet’s now implement the Levenberg-Marquardt algorithm. The equation that dictates how to update the parameters in the Levenberg-Marquardt algorithm is this:\n(JTWJ + λI) hlm = JT W (y - ŷ)\n\t\nIn this equation ŷ represents a vector containing the differences between the data points and the closest point of each to the helix (given a set of helix parameters: **a**, **b**, **c**, **d**, **alph**, **bet**). y is a vector of expected values: in our case, ideally, we expect the difference between the data point and the helix to be 0. \nThe jacobian is a matrix of partial derivatives of ŷ with respect to the parameters we are searching for. So in our case it is a matrix of size **6** x **nr_of_points**, where **nr_of_points** is the number of data points and **6** is the number of parameters we have: **a**, **b**, **c**, **d**, **alph**, **bet**. λ is the damping coefficient, which determines the behaviour of the algorithm. It is not static: if the sum of the distances squared is smaller than in the previous iteration, we make it smaller, otherwise, we increase its size.\n\n\nTo find the closest distance of a point to a helix, we do some scaling so that our helix is now defined by (cos𝑡,sin𝑡,ℎ𝑡). For a given point 𝑃(i,j,k), let 𝑄 be the closest point on the helix. The line segment connecting 𝑃 and 𝑄 must be perpendicular to the helix's tangent line at 𝑄, which is (−sin𝑡,cos𝑡,ℎ). Knowing that the dot product of two perpendicular vectors is 0 leads to:\n−(cos𝑡−i)sin𝑡+(sin𝑡−j)cos𝑡+(ℎ𝑡−k)ℎ=0\nThis simplifies to 𝐴sin(𝑡+𝐵)+𝐶𝑡+𝐷=0 for some constants 𝐴,𝐵,𝐶,𝐷. $^{[4]}$ \nTo find the solution, we perform a binary search (**SolveSinPlusLin**). \n\n**HelixClosestTime** is the function taking all of this into account and returning the parameter **t**, during which the point is closest to the helix.\n","metadata":{}},{"id":"94b92283-28b1-436b-81ee-8e7e7dcda57e","cell_type":"code","source":"double EvaluateSinPlusLin(double A, double B, double C, double D, double x)\n{\n /*When this equation is equal to zero, the distance between the point and the helix is the shortest.*/\n return A * std::sin(x + B) + C * x + D;\n}","metadata":{"trusted":true},"outputs":[],"execution_count":19},{"id":"6bf86804-c192-425a-9621-099cca3f08fb","cell_type":"code","source":"// A sin (x + B) + C x + D = 0\ndouble SolveSinPlusLin(double A, double B, double C, double D, double mi, double ma)\n{\n /*Binary search to determine x, with which EvaluateSinPlusLin equation equals zero.*/\n for (int i = 0; i < 100; i++)\n {\n double mid = (mi + ma) / 2;\n double vmi = EvaluateSinPlusLin(A, B, C, D, mi);\n double vmid = EvaluateSinPlusLin(A, B, C, D, mid);\n double vma = EvaluateSinPlusLin(A, B, C, D, ma);\n\n if (vmi < 0 and 0 < vmid)\n {\n ma = mid;\n }\n else if (vmid < 0 and 0 < vma)\n {\n mi = mid;\n }\n else if (vmid < 0 and 0 < vmi)\n {\n ma = mid;\n }\n else if (vma < 0 and 0 < vmid)\n {\n mi = mid;\n }\n else\n {\n break;\n mi = mid;\n }\n }\n\n double x = (mi + ma) / 2;\n return x;\n}","metadata":{"trusted":true},"outputs":[],"execution_count":20},{"id":"cf0065af-cd01-4986-8a4b-7c8549ffeb52","cell_type":"code","source":"double NextValPiK(double offs, double x)\n{\n\n if (x < 0)\n {\n double v = -NextValPiK(-offs, -x) + 2 * MY_PI;\n return v > x ? v : v + 2 * MY_PI;\n }\n\n double kie = std::floor(x / 2 / MY_PI);\n\n for (int i = -2; i <= 2; i++)\n {\n double v = (kie + i) * 2 * MY_PI + offs;\n\n if (v > x)\n {\n return v;\n }\n }\n\n return 1000000000;\n}","metadata":{"trusted":true},"outputs":[],"execution_count":21},{"id":"5c153969-15d4-4889-977c-74c0086c8825","cell_type":"code","source":"// A cos(x + B) + C = 0\ndouble NextSinPlusInflection(double A, double B, double C, double x)\n{\n /* Identifies the next inflection point of the sine curve.*/\n // cos(x + B) = -C / A\n if (-C / A >= -1 && -C / A <= 1)\n {\n double inv = std::acos(-C / A);\n return std::min(NextValPiK(inv - B, x), NextValPiK(-inv - B, x));\n }\n else\n {\n return 1000000000;\n }\n}","metadata":{"trusted":true},"outputs":[],"execution_count":22},{"id":"e23679a2-d99a-4084-af06-0a96a542c3e4","cell_type":"code","source":"double HelixClosestTime(double a, double b, double c, double d, double alph, double bet, double x, double y, double z)\n{\n /*Calculate t, during which a helix with given params is the closest to a given point.*/\n double point[3];\n UnRotate(x, y, z, alph, bet, point);\n point[0] /= a;\n point[1] /= a;\n point[2] /= a;\n point[0] -= c;\n point[1] -= d;\n double A = std::sqrt(point[0] * point[0] + point[1] * point[1]);\n double B = std::atan2(-point[1], point[0]);\n double C = b * b;\n double D = -point[2] * b;\n\n double mi = point[2] / b - MY_PI;\n double ma = point[2] / b + MY_PI;\n double t1 = SolveSinPlusLin(A, B, C, D, mi, ma);\n\n double ans = t1;\n HelixPoint(a, b, c, d, alph, bet, ans, point);\n double dist = DistanceSquareA(point, x, y, z);\n\n for (double t = mi; t < ma; t = t)\n {\n double ttt = NextSinPlusInflection(A, B, C, t);\n\n if (ttt == t)\n {\n break;\n }\n\n double cur = SolveSinPlusLin(A, B, C, D, t, ttt);\n t = ttt;\n HelixPoint(a, b, c, d, alph, bet, cur, point);\n double dist2 = DistanceSquareA(point, x, y, z);\n\n if (dist2 < dist)\n {\n dist = dist2;\n ans = cur;\n }\n }\n\n return ans;\n}","metadata":{"trusted":true},"outputs":[],"execution_count":23},{"id":"b853c722-48c4-46fa-8209-59c5a9ebd952","cell_type":"markdown","source":"\nFor the Levenberg-Marquardt algorithm itself (main implementation is in **LevenbergMarquardt**), we start out by guessing the initial parameters, except **b**. Currently, the code doesn’t support determination of **b** and one right now needs to set **b** to a number close to, but not equal to zero (like 0.1) to find the other parameters.$^{(2)}$ However, that is fine for our purposes, since knowing the magnetic field in an experiment constrains **b** for a given momentum (eg, the ratio of a and b is known). It is also expected for alph and bet to be between -π and π. \n\nNext, we calculate the jacobian using **clad::gradient**.$^{(3)}$ We determine its transpose, find the closest distances of the points to the helix, do some matrix multiplication and we are left with an equation to solve for **h**. We solve it using the Gaussian elimination method (**swap_row**, **ForwardElim** & **BackSub**). We update the parameters with **h**, recalculate the sum of all squared distances and change **lambda** accordingly.$^{(4)}$ The process is repeated for some number of iterations or until there is almost no change between the recalculated sum of all squared distances between several iterations.","metadata":{}},{"id":"83151944-9950-4e96-b4f8-e88a9b961f78","cell_type":"code","source":"void swap_row(double *matrix, int size, int i, int j)\n{\n // 6x6\n for (int k = 0; k < size; k++)\n {\n double temp = matrix[i * size + k];\n matrix[i * size + k] = matrix[j * size + k];\n matrix[j * size + k] = temp;\n }\n}","metadata":{"trusted":true},"outputs":[],"execution_count":24},{"id":"a6a02f26-d7c0-4144-9999-98cb4fec6c9a","cell_type":"code","source":"void ForwardElim(double *input, int size, double *res, double *output)\n{\n for (int i = 0; i < size; ++i)\n {\n for (int j = 0; j < size; ++j)\n {\n output[i * size + j] = input[i * size + j];\n }\n }\n for (int i = 0; i < size; i++)\n {\n int i_max = i;\n double v_max = output[i_max * size + i];\n\n for (int j = i + 1; j < size; j++)\n if (std::abs(output[j * size + i]) > std::abs(v_max) && output[j * size + i] != 0)\n v_max = output[j * size + i], i_max = j;\n if (i_max != i)\n {\n swap_row(output, size, i, i_max);\n double temp = res[i];\n res[i] = res[i_max];\n res[i_max] = temp;\n }\n\n if (output[i * size + i] == 0.0)\n {\n std::cerr << \"Mathematical Error!\";\n std::cerr << \"Input that caused the error is:\";\n for (int i = 0; i < size; i++)\n {\n for (int j = 0; j < size; j++)\n {\n std::cerr << output[i * size + j] << \" \";\n }\n std::cerr << std::endl;\n }\n }\n for (int j = i + 1; j < size; j++)\n {\n double ratio = output[j * size + i] / output[i * size + i];\n\n for (int k = 0; k < size; k++)\n {\n output[j * size + k] = output[j * size + k] - ratio * output[i * size + k];\n if (std::abs(output[j * size + k]) <= 1e-15)\n {\n output[j * size + k] = 0;\n }\n }\n res[j] = res[j] - ratio * res[i];\n if (std::abs(res[j]) <= 1e-15)\n {\n res[j] = 0;\n }\n }\n }\n\n // std::cerr << \"Forward elimination results:\" << std::endl;\n // std::cerr << \"Left side:\" << std::endl;\n // for (int j = 0; j < size; j++)\n // {\n // for (int k = 0; k < size; k++)\n // {\n // std::cerr << output[j * size + k] << \" \";\n // }\n // std::cerr << std::endl;\n // }\n // std::cerr << \"Right side:\" << std::endl;\n // for (int k = 0; k < size; k++)\n // {\n // std::cerr << res[k] << \" \";\n // }\n // std::cerr << std::endl;\n}","metadata":{"trusted":true},"outputs":[],"execution_count":25},{"id":"55fec92b-49d4-4973-a9dd-de657628ccc5","cell_type":"code","source":"void BackSub(double *input, int size, double *right_side, double *results)\n{\n /*Back substitution and the result of Gaussian elimination*/\n for (int i = (size - 1); i > -1; i--)\n {\n results[i] = right_side[i];\n for (int j = (size - 1); j > i; j--)\n {\n results[i] -= input[i * size + j] * results[j];\n }\n results[i] /= input[i * size + i];\n if (std::abs(results[i]) <= 1e-15)\n {\n results[i] = 0;\n }\n }\n}","metadata":{"trusted":true},"outputs":[],"execution_count":26},{"id":"774a5a5e-17a9-4048-b116-01ed8ea86fc4","cell_type":"code","source":"void CheckSolution(double *input, int size, double *right_side, double *results)\n{\n for (int i = 0; i < size; i++)\n {\n double sum = 0;\n for (int j = 0; j < size; j++)\n {\n sum += input[i * size + j] * results[j];\n }\n if (std::abs(sum - right_side[i]) >= 1e-5)\n {\n std::cerr << \"Wrong solution \" << sum << \" \" << right_side[i] << std::endl;\n std::abort();\n }\n }\n}","metadata":{"trusted":true},"outputs":[],"execution_count":27},{"id":"d17256ab-669a-430c-a2d6-9252f0f3bd2d","cell_type":"code","source":"double DistanceToPoint(double a, double b, double c, double d, double alph, double bet, double x, double y, double z)\n{\n /*Calculate the distance to a single point. */\n double t = HelixClosestTime(a, b, c, d, alph, bet, x, y, z);\n double output[3];\n HelixPoint(a, b, c, d, alph, bet, t, output);\n double dist = DistanceA(output, x, y, z);\n dist += 0.001 * ((a * a) + (b * b) + (c * c) + (d * d) + (alph * alph) + (bet * bet));\n\n return dist;\n}","metadata":{"trusted":true},"outputs":[],"execution_count":28},{"id":"3ecd9d50-f097-4d45-a92b-6a53da7e3464","cell_type":"code","source":"void DistancesToAllPoints(double *points, int nr_of_points, double a, double b, double c, double d, double alph, double bet, double *dist)\n{\n /*Calculate the distances to all points. */\n int n = 0;\n for (int i = 0; i < nr_of_points; i++)\n {\n double x = points[i * 3];\n double y = points[i * 3 + 1];\n double z = points[i * 3 + 2];\n dist[n] = DistanceToPoint(a, b, c, d, alph, bet, x, y, z);\n n++;\n }\n}","metadata":{"trusted":true},"outputs":[],"execution_count":29},{"id":"abf1fdd6-d4a5-4c18-800d-854a704c0fc3","cell_type":"code","source":"double SquareErr(double *points, int nr_of_points, double a, double b, double c, double d, double alph, double bet)\n{\n /*Calculate the residual sum of squares. */\n double dist;\n double square_err = 0;\n for (int i = 0; i < nr_of_points; i++)\n {\n double x = points[i * 3];\n double y = points[i * 3 + 1];\n double z = points[i * 3 + 2];\n dist = DistanceToPoint(a, b, c, d, alph, bet, x, y, z);\n square_err += (dist * dist);\n }\n return square_err;\n}","metadata":{"trusted":true},"outputs":[],"execution_count":30},{"id":"5d559ae3-4b0e-431d-848a-d293af8f46d1","cell_type":"code","source":"void Points(int nr_of_points, double a, double b, double c, double d, double alph, double bet)\n{\n /*Generate and print out points on a helix with given params. */\n double t = 0;\n for (int i = 0; i < nr_of_points; i++)\n {\n t += 0.1;\n double output[3];\n HelixPoint(a, b, c, d, alph, bet, t, output);\n double x = output[0], y = output[1], z = output[2];\n outfile << x << \" \" << y << \" \" << z << \"\\n\";\n }\n outfile << \"end\\n\";\n}","metadata":{"trusted":true},"outputs":[],"execution_count":31},{"id":"3f6b8cb6-f309-4832-b25f-5c80328d5c71","cell_type":"code","source":"void Jacobian(double *points, int nr_of_points, double a, double b, double c, double d, double alph, double bet, double *Jacobian)\n{\n /*Construct the nr_of_points x 6 Jacobian.*/\n auto dist_grad = clad::gradient(DistanceToPoint, \"a, b, c, d, alph, bet\");\n for (int i = 0; i < nr_of_points; i++)\n {\n double x = points[i * 3];\n double y = points[i * 3 + 1];\n double z = points[i * 3 + 2];\n double output[3];\n double da = 0, db = 0, dc = 0, dd = 0, dalph = 0, dbet = 0;\n dist_grad.execute(a, b, c, d, alph, bet, x, y, z, &da, &db, &dc, &dd, &dalph, &dbet);\n Jacobian[i * 6] = da;\n Jacobian[i * 6 + 1] = db;\n Jacobian[i * 6 + 2] = dc;\n Jacobian[i * 6 + 3] = dd;\n Jacobian[i * 6 + 4] = dalph;\n Jacobian[i * 6 + 5] = dbet;\n }\n}","metadata":{"trusted":true},"outputs":[],"execution_count":32},{"id":"d525fe6b-7e43-40b1-aa96-86067e9975ea","cell_type":"code","source":"double Lambda(double *points, int nr_of_points, double &a, double &b, double &c, double &d, double &alph, double &bet, double lambda, double &square_err, double *results)\n{\n /*Calculate the damping coefficient lambda for the next iteration of the LevenbergMarquardt function.*/\n double new_lambda;\n double new_square_err = SquareErr(points, nr_of_points, a + results[0], b + results[1], c + results[2], d + results[3], alph + results[4], bet + results[5]);\n // std::cerr << \"SQUARE ERR \" << new_square_err << std::endl;\n if ((new_square_err >= square_err) && (lambda < 1000))\n new_lambda = lambda * 10;\n else\n {\n // std::cerr << \"IMPROVEMENTS!\";\n a += results[0];\n b += results[1];\n c += results[2];\n d += results[3];\n alph += results[4];\n bet += results[5];\n new_lambda = lambda / 10;\n square_err = new_square_err;\n }\n return new_lambda;\n}","metadata":{"trusted":true},"outputs":[],"execution_count":33},{"id":"df6c8ed9-2cc0-4cba-ba09-7b6e0f43b8cc","cell_type":"code","source":"void LevenbergMarquardt(double *points, int nr_of_points, double &a, double &b, double &c, double &d, double &alph, double &bet)\n/*Use the Levenberg-Marquardt algorithm to fit a helix on a given set of points. Currently produces all of the parameters of the helix, except b.*/\n{\n double true_b = b;\n a = 6.2122, b = 0.1, c = 1.9835, d = 1.707055, alph = -3.60384, bet = 1.13255; // currently breaks if the parameters are exact as the ones used for (noise free) generated points\n int diff_params = 6;\n double lambda = 1;\n double lambda_change = 1;\n double square_err;\n double jacobian[nr_of_points * diff_params];\n double tjacobian[diff_params * nr_of_points];\n double tjj[diff_params * diff_params];\n double results[diff_params];\n double counter = 0;\n {\n double dist[nr_of_points];\n DistancesToAllPoints(points, nr_of_points, a, b, c, d, alph, bet, dist);\n square_err = 0;\n for (int i = 0; i < nr_of_points; i++)\n {\n square_err += (dist[i] * dist[i]);\n }\n }\n\n for (int i = 0; i < 200; i++)\n {\n\n Jacobian(points, nr_of_points, a, b, c, d, alph, bet, jacobian);\n\n Transpose(jacobian, nr_of_points, diff_params, tjacobian);\n\n MatrixMultiply(tjacobian, jacobian, diff_params, nr_of_points, diff_params, tjj);\n\n double diag[diff_params * diff_params];\n DiagOfSquareM(tjj, diff_params, diag);\n\n double identity[diff_params * diff_params];\n ScalarMultiply(diag, diff_params, diff_params, lambda, identity);\n double left_side[diff_params * diff_params];\n AddMatrices(tjj, identity, diff_params, diff_params, left_side);\n double dist[nr_of_points];\n DistancesToAllPoints(points, nr_of_points, a, b, c, d, alph, bet, dist);\n double right_side[diff_params * 1];\n MatrixMultiply(tjacobian, dist, diff_params, nr_of_points, 1, right_side);\n ScalarMultiply(right_side, 1, diff_params, -1, right_side);\n\n // left side is 6x6, right side is 6x1, so h is 6x1.\n double forward_elim[diff_params * diff_params];\n double unchanged_rs[diff_params];\n CopyMatrix(right_side, diff_params, unchanged_rs);\n ForwardElim(left_side, diff_params, right_side, forward_elim);\n BackSub(forward_elim, diff_params, right_side, results);\n CheckSolution(left_side, diff_params, unchanged_rs, results);\n double old_square_err = square_err;\n lambda = Lambda(points, nr_of_points, a, b, c, d, alph, bet, lambda, square_err, results);\n if (int(square_err) == int(old_square_err) && counter > 10 && square_err < old_square_err)\n break;\n else if (int(square_err) == int(old_square_err))\n counter++;\n else\n counter = 0;\n old_square_err = square_err;\n\n // std::cerr << \"New params: \" << a << \" \" << b << \" \" << c << \" \" << d << \" \" << alph << \" \" << bet << \" \";\n // std::cerr << \"lambda: \" << lambda << \" squares distance: \" << square_err << std::endl;\n }\n b = true_b;\n Points(nr_of_points, a, b, c, d, alph, bet);\n}","metadata":{"trusted":true},"outputs":[],"execution_count":34},{"id":"0a5b72fc-c12a-413a-bb6e-16594f1d906a","cell_type":"code","source":" int nr_of_points = 200;\n // double points[nr_of_points * 3];\n double points[200 * 3];\n double a = 5.2122, b = 2, c = 10.835, d = 17.07055, alph = -3.60384, bet = 1.13255;\n GenerateFlawedPoints(nr_of_points, a, b, c, d, alph, bet, points);\n LevenbergMarquardt(points, nr_of_points, a, b, c, d, alph, bet);\n for (int i = 0; i < nr_of_points; i++)\n {\n outfile << points[i * 3 + 0] << \" \" << points[i * 3 + 1] << \" \" << points[i * 3 + 2] << \"\\n\";\n }\n outfile << \"end\\n\";\n outfile.close();","metadata":{"trusted":true},"outputs":[],"execution_count":35},{"id":"5bde7cea-91e8-4ecc-89ef-b67ab77da0e1","cell_type":"markdown","source":"In the end, one can produce a graph by running **Graph_from_file.ipynb**. The way it is currently implemented, the graph shows the original generated points and the best helix approximation **LevenbergMarquardt** produced. Note that parameter **b**, used for the fitted helix here, is given by the user, not the algorithm.\n\nOverall, one can say that the parameters produced by the minimisation are quite close to expected results and the function seems to work well when using data with no added randomness. However, added noise sometimes skews the results and the fit ends up being visibly inaccurate. ","metadata":{}},{"id":"5a713ed4-c642-4a62-abf1-28bc651653f8","cell_type":"markdown","source":"## Appendix\n\n**(1)** Perhaps a better way to showcase Clad (but not necessarily a better way to approximate a helix) would be to use the gradient descent method, since it is more simplistic, however, the implementation found in **fitter.h** gets stuck in a local minimum that is very far off from the actual expected results.\n\n**(2)** That is because **b** is the parameter controlling the pitch of the helix and our distance function unfortunately has a local minimum at **b ≈ 0**. This occurs because for very small values of **b**, the helix practically turns into a cylinder. A cylinder will always pass through points that would otherwise be the minima on a helix with the correct **b** value. The way our function is written, it doesn’t have the solution to overcome this local minimum. Making **b** a constant and leaving it out from the future calculations also seems to lead to highly inaccurate answers.\n\n**(3)** Another improvement would be to use **clad::jacobian**, however, we ran into a problem where Clad’s execute method doesn’t support arguments that have length specified as a variable (a template argument may not reference a variable-length array type). Const variables do not solve the problem.\n\n**(4)** Unfortunately, due to the added randomness in GenerateFlawedPoints, sometimes the end result is not as expected.\n\n\n## References\n\n[1] https://github.com/vgvassilev/clad\n\n[2] https://cmsexperiment.web.cern.ch/detector/identifying-tracks\n\n[3] https://people.duke.edu/~hpgavin/lm.pdf\n\n[4] https://math.stackexchange.com/questions/13341/shortest-distance-between-a-point-and-a-helix","metadata":{}}]} \ No newline at end of file diff --git a/demos/helix-example/helix-example/README.md b/demos/helix-example/helix-example/README.md new file mode 100644 index 000000000..191aefca7 --- /dev/null +++ b/demos/helix-example/helix-example/README.md @@ -0,0 +1,30 @@ +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/compiler-research/helix-example/HEAD) +# Helix fitter example + +## Goal + +Particle tracking is an important part of the processing and analysis of data received from particle detectors, such as the Compact Muon Solenoid (CMS). Tracking is the step that determines the momentum of charged particles escaping from the collision point. It identifies individual particles by reconstructing their trajectories from points where charged particle “hits” were measured by the detector and interpreting them.[1] Due to the Lorentz force, charged particles move in a helical motion when affected by the magnetic field (neglecting other effects due to material interactions, etc). This means we can figure out a specific particle trajectory through the detector by fitting a helix function to data points in such a way that the distance from the data points and the helix would be minimized. In mathematical terms, we need to find optimal helix parameters by minimizing a loss function composed of the sum of least squared distances, thus giving the best estimation of these parameters. For this purpose we can use Clad to efficiently minimize the loss function. + +In this repository one can find the code containing one such helix fitter implementation. + +## Content + +Besides the main fitter code, there are a few more files: + +- Documentation.pdf - a more in depth explanation of the code and methods used. +- Helix.ipynb - a Jupyter notebook containing the code and the documentation comments. Can be used to try out the code online but is considerably slower. +- Graph_from_file.ipynb - accompanies Helix.ipynb. Reads from the output.txt (produced by Helix.ipynb) and plots the results. + + +## Usage + +One can compile the files with: + +``` +clang++ main.cc -o main -I /full/path/to/clad/include/ -fplugin=/full/path/to/lib/clad.so -I /full/path/to/kokkos-4.3.01/include +``` +Running the code and plotting the output can be done by: + +``` +./main | python3 graph.py +``` diff --git a/demos/helix-example/helix-example/distance.h b/demos/helix-example/helix-example/distance.h new file mode 100644 index 000000000..d90931d42 --- /dev/null +++ b/demos/helix-example/helix-example/distance.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +inline double DistanceSquare(double x1, double y1, double z1, double x2, double y2, double z2) +{ + return (x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2) + (z1 - z2) * (z1 - z2); +} + +inline double Distance(double x1, double y1, double z1, double x2, double y2, double z2) +{ + return std::sqrt(DistanceSquare(x1, y1, z1, x2, y2, z2)); +} + +inline double DistanceSquareA(double v[3], double x2, double y2, double z2) +{ + return DistanceSquare(v[0], v[1], v[2], x2, y2, z2); +} + +inline double DistanceA(double v[3], double x2, double y2, double z2) +{ + return Distance(v[0], v[1], v[2], x2, y2, z2); +} \ No newline at end of file diff --git a/demos/helix-example/helix-example/environment.yml b/demos/helix-example/helix-example/environment.yml new file mode 100644 index 000000000..d13965e83 --- /dev/null +++ b/demos/helix-example/helix-example/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge +dependencies: + - xeus-cling + - clad + - matplotlib diff --git a/demos/helix-example/helix-example/equations.h b/demos/helix-example/helix-example/equations.h new file mode 100644 index 000000000..c0eac9864 --- /dev/null +++ b/demos/helix-example/helix-example/equations.h @@ -0,0 +1,90 @@ +#pragma once + +#include +#include + +auto MY_PI = 3.14159265359; + +double EvaluateSinPlusLin(double A, double B, double C, double D, double x) +{ + /*When this equation is equal to zero, the distance between the point and the helix is the shortest.*/ + return A * std::sin(x + B) + C * x + D; +} + +double SolveSinPlusLin(double A, double B, double C, double D, double mi, double ma) +{ + /*Binary search to determine x, with which EvaluateSinPlusLin equation equals zero.*/ + for (int i = 0; i < 100; i++) + { + double mid = (mi + ma) / 2; + double vmi = EvaluateSinPlusLin(A, B, C, D, mi); + double vmid = EvaluateSinPlusLin(A, B, C, D, mid); + double vma = EvaluateSinPlusLin(A, B, C, D, ma); + + if (vmi < 0 and 0 < vmid) + { + ma = mid; + } + else if (vmid < 0 and 0 < vma) + { + mi = mid; + } + else if (vmid < 0 and 0 < vmi) + { + ma = mid; + } + else if (vma < 0 and 0 < vmid) + { + mi = mid; + } + else + { + break; + mi = mid; + } + } + + double x = (mi + ma) / 2; + return x; +} + +double NextValPiK(double offs, double x) +{ + /*Find the next 2 * PI * k + offset (where k is an integer) that is greater than x.*/ + + if (x < 0) + { + double v = -NextValPiK(-offs, -x) + 2 * MY_PI; + return v > x ? v : v + 2 * MY_PI; + } + + double kie = std::floor(x / 2 / MY_PI); + + for (int i = -2; i <= 2; i++) + { + double v = (kie + i) * 2 * MY_PI + offs; + + if (v > x) + { + return v; + } + } + + return 1000000000; +} + +// A cos(x + B) + C = 0 +double NextSinPlusInflection(double A, double B, double C, double x) +{ + /* Identifies the next inflection point of the sine curve.*/ + // cos(x + B) = -C / A + if (-C / A >= -1 && -C / A <= 1) + { + double inv = std::acos(-C / A); + return std::min(NextValPiK(inv - B, x), NextValPiK(-inv - B, x)); + } + else + { + return 1000000000; + } +} diff --git a/demos/helix-example/helix-example/fitter.h b/demos/helix-example/helix-example/fitter.h new file mode 100644 index 000000000..11496a7b3 --- /dev/null +++ b/demos/helix-example/helix-example/fitter.h @@ -0,0 +1,266 @@ +#include +#include +#include +#include +#include + +#include "helix.h" +#include "equations.h" +#include "matrices.h" +#include "clad/Differentiator/Differentiator.h" + +double DistanceToPoint(double a, double b, double c, double d, double alph, double bet, double x, double y, double z) +{ + /*Calculate the distance to a single point. */ + double t = HelixClosestTime(a, b, c, d, alph, bet, x, y, z); + double output[3]; + HelixPoint(a, b, c, d, alph, bet, t, output); + double dist = DistanceA(output, x, y, z); + dist += 0.001 * ((a * a) + (b * b) + (c * c) + (d * d) + (alph * alph) + (bet * bet)); + + return dist; +} + +double SquareErr(double *points, int nr_of_points, double a, double b, double c, double d, double alph, double bet) +{ + /*Calculate the residual sum of squares. */ + double dist; + double square_err = 0; + for (int i = 0; i < nr_of_points; i++) + { + double x = points[i * 3]; + double y = points[i * 3 + 1]; + double z = points[i * 3 + 2]; + dist = DistanceToPoint(a, b, c, d, alph, bet, x, y, z); + square_err += (dist * dist); + } + return square_err; +} + +void Points(int nr_of_points, double a, double b, double c, double d, double alph, double bet) +{ + /*Generate and print out points on a helix with given params. */ + double t = 0; + for (int i = 0; i < nr_of_points; i++) + { + t += 0.1; + double output[3]; + HelixPoint(a, b, c, d, alph, bet, t, output); + double x = output[0], y = output[1], z = output[2]; + std::cout << x << " " << y << " " << z << "\n"; + } + std::cout << "end\n"; +} + +void GenerateFlawedPoints(int nr_of_points, double a, double b, double c, double d, double alph, double bet, double *points) +{ + /*Generate points on a helix with given params but add noise. */ + auto seed = time(nullptr); + std::mt19937_64 rng(seed); + std::uniform_real_distribution uniform(-2 * MY_PI, 2 * MY_PI); + double output[3]; + double t = 0; + for (int i = 0; i < nr_of_points; i++) + { + t += 0.1; + HelixPoint(a, b, c, d, alph, bet, t, output); + points[i * 3] = output[0] + uniform(rng) / 10; + points[i * 3 + 1] = output[1] + uniform(rng) / 10; + points[i * 3 + 2] = output[2] + uniform(rng) / 10; + } +} + +void DistancesToAllPoints(double *points, int nr_of_points, double a, double b, double c, double d, double alph, double bet, double *dist) +{ + /*Calculate the distances to all points. */ + int n = 0; + for (int i = 0; i < nr_of_points; i++) + { + double x = points[i * 3]; + double y = points[i * 3 + 1]; + double z = points[i * 3 + 2]; + dist[n] = DistanceToPoint(a, b, c, d, alph, bet, x, y, z); + n++; + } +} +void Jacobian(double *points, int nr_of_points, double a, double b, double c, double d, double alph, double bet, double *Jacobian) +{ + /*Construct the nr_of_points x 6 Jacobian.*/ + auto dist_grad = clad::gradient(DistanceToPoint, "a, b, c, d, alph, bet"); + for (int i = 0; i < nr_of_points; i++) + { + double x = points[i * 3]; + double y = points[i * 3 + 1]; + double z = points[i * 3 + 2]; + double output[3]; + double da = 0, db = 0, dc = 0, dd = 0, dalph = 0, dbet = 0; + dist_grad.execute(a, b, c, d, alph, bet, x, y, z, &da, &db, &dc, &dd, &dalph, &dbet); + Jacobian[i * 6] = da; + Jacobian[i * 6 + 1] = db; + Jacobian[i * 6 + 2] = dc; + Jacobian[i * 6 + 3] = dd; + Jacobian[i * 6 + 4] = dalph; + Jacobian[i * 6 + 5] = dbet; + } +} + +double Lambda(double *points, int nr_of_points, double &a, double &b, double &c, double &d, double &alph, double &bet, double lambda, double &square_err, double *results) +{ + /*Calculate the damping coefficient lambda for the next iteration of the LevenbergMarquardt function.*/ + double new_lambda; + double new_square_err = SquareErr(points, nr_of_points, a + results[0], b + results[1], c + results[2], d + results[3], alph + results[4], bet + results[5]); + // std::cerr << "SQUARE ERR " << new_square_err << std::endl; + if ((new_square_err >= square_err) && (lambda < 1000)) + new_lambda = lambda * 10; + else + { + // std::cerr << "IMPROVEMENTS!"; + a += results[0]; + b += results[1]; + c += results[2]; + d += results[3]; + alph += results[4]; + bet += results[5]; + new_lambda = lambda / 10; + square_err = new_square_err; + } + return new_lambda; +} + +void LevenbergMarquardt(double *points, int nr_of_points, double true_b, double &a, double &b, double &c, double &d, double &alph, double &bet) +/*Use the Levenberg-Marquardt algorithm to fit a helix on a given set of points. Currently produces all of the parameters of the helix, except b.*/ +{ + a = 6.2122, b = 0.1, c = 1.9835, d = 1.707055, alph = -3.60384, bet = 1.13255; // currently breaks if the parameters are exact as the ones used for (noise free) generated points + + int diff_params = 6; + double lambda = 1; + double lambda_change = 1; + double square_err; + double jacobian[nr_of_points * diff_params]; + double tjacobian[diff_params * nr_of_points]; + double tjj[diff_params * diff_params]; + double results[diff_params]; + double counter = 0; + { + double dist[nr_of_points]; + DistancesToAllPoints(points, nr_of_points, a, b, c, d, alph, bet, dist); + square_err = 0; + for (int i = 0; i < nr_of_points; i++) + { + square_err += (dist[i] * dist[i]); + } + } + + for (int i = 0; i < 200; i++) + { + + Jacobian(points, nr_of_points, a, b, c, d, alph, bet, jacobian); + + Transpose(jacobian, nr_of_points, diff_params, tjacobian); + + MatrixMultiply(tjacobian, jacobian, diff_params, nr_of_points, diff_params, tjj); + + double diag[diff_params * diff_params]; + DiagOfSquareM(tjj, diff_params, diag); + + double identity[diff_params * diff_params]; + ScalarMultiply(diag, diff_params, diff_params, lambda, identity); + double left_side[diff_params * diff_params]; + AddMatrices(tjj, identity, diff_params, diff_params, left_side); + double dist[nr_of_points]; + DistancesToAllPoints(points, nr_of_points, a, b, c, d, alph, bet, dist); + double right_side[diff_params * 1]; + MatrixMultiply(tjacobian, dist, diff_params, nr_of_points, 1, right_side); + ScalarMultiply(right_side, 1, diff_params, -1, right_side); + + // left side is 6x6, right side is 6x1, so h is 6x1. + double forward_elim[diff_params * diff_params]; + double unchanged_rs[diff_params]; + CopyMatrix(right_side, diff_params, unchanged_rs); + ForwardElim(left_side, diff_params, right_side, forward_elim); + BackSub(forward_elim, diff_params, right_side, results); + CheckSolution(left_side, diff_params, unchanged_rs, results); + double old_square_err = square_err; + lambda = Lambda(points, nr_of_points, a, b, c, d, alph, bet, lambda, square_err, results); + if (int(square_err) == int(old_square_err) && counter > 10 && square_err < old_square_err) + break; + else if (int(square_err) == int(old_square_err)) + counter++; + else + counter = 0; + old_square_err = square_err; + + // std::cerr << "New params: " << a << " " << b << " " << c << " " << d << " " << alph << " " << bet << " "; + // std::cerr << "lambda: " << lambda << " squares distance: " << square_err << std::endl; + } + b = true_b; + Points(nr_of_points, a, b, c, d, alph, bet); +} + +void GradientDescent(double *points, int nr_of_points) +{ + /*Implementation of the gradient descent algorithm. Gets stuck in a local minimum.*/ + double a = 5.2122, b = 0.1, c = 0.9835, d = 1.707055, alph = -3.60384, bet = 1.13255; + double lambda = 0.00001; + double jacobian[nr_of_points * 6]; + double tjacobian[6 * nr_of_points]; + double dist[nr_of_points]; + double square_err = SquareErr(points, nr_of_points, a, b, c, d, alph, bet); + double params[6] = {0}; + double prev_square_er = SquareErr(points, nr_of_points, a, b, c, d, alph, bet); + std::cerr << square_err << std::endl; + for (int i = 0; i < 2000; i++) + { + DistancesToAllPoints(points, nr_of_points, a, b, c, d, alph, bet, dist); + Jacobian(points, nr_of_points, a, b, c, d, alph, bet, jacobian); + Transpose(jacobian, nr_of_points, 6, tjacobian); + + double y_dist[nr_of_points]; + ScalarMultiply(dist, nr_of_points, 1, -1, y_dist); + double h[6]; + MatrixMultiply(tjacobian, y_dist, 6, nr_of_points, 1, h); + ScalarMultiply(h, 6, 1, lambda, h); + double new_square_err = SquareErr(points, nr_of_points, a + h[0], b + h[1], c + h[2], d + h[3], alph + h[4], bet + h[5]); + + if (new_square_err < prev_square_er) + { + lambda = lambda * 10; + } + else + { + lambda = lambda / 10; + continue; + } + a += h[0]; + b += h[1]; + c += h[2]; + d += h[3]; + alph += h[4]; + bet += h[5]; + if (new_square_err < square_err) + { + square_err = new_square_err; + params[0] = a; + params[1] = b; + params[2] = c; + params[3] = d; + params[4] = alph; + params[5] = bet; + } + prev_square_er = new_square_err; + // std::cerr << "New params: " << a << " " << b << " " << c << " " << d << " " << alph << " " << bet << " "; + // std::cerr << "lambda: " << lambda << " squares distance: " << new_square_err << std::endl; + } + + double t = -nr_of_points / 2; + for (int i = 0; i < 10 * nr_of_points; i++) + { + t += 0.1; + double output[3]; + HelixPoint(params[0], params[1], params[2], params[3], params[4], params[5], t, output); + double x = output[0], y = output[1], z = output[2]; + + std::cout << x << " " << y << " " << z << "\n"; + } + std::cout << "end\n"; +} \ No newline at end of file diff --git a/demos/helix-example/helix-example/graph.py b/demos/helix-example/helix-example/graph.py new file mode 100644 index 000000000..639fc02ae --- /dev/null +++ b/demos/helix-example/helix-example/graph.py @@ -0,0 +1,25 @@ + +import matplotlib.pyplot as plt +import sys + +fig = plt.figure() +ax = plt.axes(projection="3d") + +x, y, z = [], [], [] + +for line in sys.stdin: + if line.strip() == "end": + ax.plot3D(x, y, z) + x.clear() + y.clear() + z.clear() + else: + a, b, c = [float(i) for i in line.split()] + x.append(a) + y.append(b) + z.append(c) + +ax.set_box_aspect([1,1,1]) +plt.show() + + diff --git a/demos/helix-example/helix-example/helix.h b/demos/helix-example/helix-example/helix.h new file mode 100644 index 000000000..74e8b6421 --- /dev/null +++ b/demos/helix-example/helix-example/helix.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include "rotations.h" +#include "equations.h" +#include "distance.h" + +inline void HelixPoint(double a, double b, double c, double d, double alph, double bet, double t, double output[3]) +{ + /*Describe a point on a helix in the Cartesian coordinate system.*/ + double x = a * (c + std::cos(t)); + double y = a * (d + std::sin(t)); + double z = a * b * t; + output[0] = x; + output[1] = y; + output[2] = z; + Rotate(x, y, z, alph, bet, output); +} + +inline double HelixClosestTime(double a, double b, double c, double d, double alph, double bet, double x, double y, double z) +{ + /*Calculate t, during which a helix with given params is the closest to a given point.*/ + auto MY_PI = 3.14159265359; + double point[3]; + UnRotate(x, y, z, alph, bet, point); + point[0] /= a; + point[1] /= a; + point[2] /= a; + point[0] -= c; + point[1] -= d; + double A = std::sqrt(point[0] * point[0] + point[1] * point[1]); + double B = std::atan2(-point[1], point[0]); + double C = b * b; + double D = -point[2] * b; + + double mi = point[2] / b - MY_PI; + double ma = point[2] / b + MY_PI; + double t1 = SolveSinPlusLin(A, B, C, D, mi, ma); + + double ans = t1; + HelixPoint(a, b, c, d, alph, bet, ans, point); + double dist = DistanceSquareA(point, x, y, z); + + for (double t = mi; t < ma; t = t) + { + double ttt = NextSinPlusInflection(A, B, C, t); + + if (ttt == t) + { + break; + } + + double cur = SolveSinPlusLin(A, B, C, D, t, ttt); + t = ttt; + HelixPoint(a, b, c, d, alph, bet, cur, point); + double dist2 = DistanceSquareA(point, x, y, z); + + if (dist2 < dist) + { + dist = dist2; + ans = cur; + } + } + + return ans; +} diff --git a/demos/helix-example/helix-example/main.cc b/demos/helix-example/helix-example/main.cc new file mode 100644 index 000000000..61e536af4 --- /dev/null +++ b/demos/helix-example/helix-example/main.cc @@ -0,0 +1,21 @@ +#include +#include +#include "clad/Differentiator/Differentiator.h" + +#include "fitter.h" + +int main() +{ + int nr_of_points = 200; + double points[nr_of_points * 3]; + double a = 5.2122, b = 2, c = 10.835, d = 17.07055, alph = -3.60384, bet = 1.13255; + GenerateFlawedPoints(nr_of_points, a, b, c, d, alph, bet, points); + LevenbergMarquardt(points, nr_of_points, b, a, b, c, d, alph, bet); + // GradientDescent(points, nr_of_points); + for (int i = 0; i < nr_of_points; i++) + { + std::cout << points[i * 3 + 0] << " " << points[i * 3 + 1] << " " << points[i * 3 + 2] << "\n"; + } + std::cout << "end\n"; + std::cerr << "Results: " << a << " " << b << " " << c << " " << d << " " << alph << " " << bet << std::endl; +} diff --git a/demos/helix-example/helix-example/matrices.h b/demos/helix-example/helix-example/matrices.h new file mode 100644 index 000000000..f08a00f18 --- /dev/null +++ b/demos/helix-example/helix-example/matrices.h @@ -0,0 +1,217 @@ +#pragma once +#include +#include +#include + +// All the matrices are written as 1D arrays! +inline void MatrixMultiply(double *a, double *b, int arows, int acols, int bcols, double *output) +{ + for (int i = 0; i < arows; i++) + { + for (int j = 0; j < bcols; j++) + { + double sum = 0; + for (int k = 0; k < acols; k++) + sum = sum + a[i * acols + k] * b[k * bcols + j]; + output[i * bcols + j] = sum; + } + } +} + +inline void Transpose(double *input, int rows, int cols, double *output) +{ + for (int i = 0; i < rows; ++i) + { + for (int j = 0; j < cols; ++j) + { + int i_input = i * cols + j; + + int i_output = j * rows + i; + + output[i_output] = input[i_input]; + } + } +} + +inline void DiagOfSquareM(double *input, int height, double *diag) +{ + // Works for square matrices only + for (int i = 0; i < height * height; i++) + { + diag[i] = 0; + } + for (int i = 0; i < height; i++) + { + diag[i * height + i] = input[i * height + i]; + } +} + +inline void ScalarMultiply(double *matrix, int rows, int cols, double number, double *output) +{ + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + output[i * cols + j] = number * matrix[i * cols + j]; + } + } +} + +inline double VectorLen(double *vector, int size) +{ + double length = 0; + for (int i = 0; i < size; i++) + { + length += vector[i] * vector[i]; + } + return std::sqrt(length); +} +inline void CopyMatrix(double *matrix, int size, double *output) +{ + for (int i = 0; i < size; i++) + { + output[i] = matrix[i]; + } +} +inline void AddMatrices(double *a, double *b, int rows, int cols, double *output) +{ + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + output[i * cols + j] = a[i * cols + j] + b[i * cols + j]; + } + } +} + +inline void swap_row(double *matrix, int size, int i, int j) +{ + for (int k = 0; k < size; k++) + { + double temp = matrix[i * size + k]; + matrix[i * size + k] = matrix[j * size + k]; + matrix[j * size + k] = temp; + } +} +inline void ForwardElim(double *input, int size, double *res, double *output) +{ + for (int i = 0; i < size; ++i) + { + for (int j = 0; j < size; ++j) + { + output[i * size + j] = input[i * size + j]; + } + } + for (int i = 0; i < size; i++) + { + int i_max = i; + double v_max = output[i_max * size + i]; + + for (int j = i + 1; j < size; j++) + if (std::abs(output[j * size + i]) > std::abs(v_max) && output[j * size + i] != 0) + v_max = output[j * size + i], i_max = j; + if (i_max != i) + { + swap_row(output, size, i, i_max); + double temp = res[i]; + res[i] = res[i_max]; + res[i_max] = temp; + } + + if (output[i * size + i] == 0.0) + { + std::cerr << "Mathematical Error!"; + std::cerr << "Input that caused the error is:"; + for (int i = 0; i < size; i++) + { + for (int j = 0; j < size; j++) + { + std::cerr << output[i * size + j] << " "; + } + std::cerr << std::endl; + } + } + for (int j = i + 1; j < size; j++) + { + double ratio = output[j * size + i] / output[i * size + i]; + + for (int k = 0; k < size; k++) + { + output[j * size + k] = output[j * size + k] - ratio * output[i * size + k]; + if (std::abs(output[j * size + k]) <= 1e-15) + { + output[j * size + k] = 0; + } + } + res[j] = res[j] - ratio * res[i]; + if (std::abs(res[j]) <= 1e-15) + { + res[j] = 0; + } + } + } + + // std::cerr << "Forward elimination results:" << std::endl; + // std::cerr << "Left side:" << std::endl; + // for (int j = 0; j < size; j++) + // { + // for (int k = 0; k < size; k++) + // { + // std::cerr << output[j * size + k] << " "; + // } + // std::cerr << std::endl; + // } + // std::cerr << "Right side:" << std::endl; + // for (int k = 0; k < size; k++) + // { + // std::cerr << res[k] << " "; + // } + // std::cerr << std::endl; +} + +void BackSub(double *input, int size, double *right_side, double *results) +{ + /*Back substitution and the result of Gaussian elimination*/ + for (int i = (size - 1); i > -1; i--) + { + results[i] = right_side[i]; + for (int j = (size - 1); j > i; j--) + { + results[i] -= input[i * size + j] * results[j]; + } + results[i] /= input[i * size + i]; + if (std::abs(results[i]) <= 1e-15) + { + results[i] = 0; + } + } +} +void CheckSolution(double *input, int size, double *right_side, double *results) +{ + for (int i = 0; i < size; i++) + { + double sum = 0; + for (int j = 0; j < size; j++) + { + sum += input[i * size + j] * results[j]; + } + if (std::abs(sum - right_side[i]) >= 1e-5) + { + std::cerr << "Wrong solution " << sum << " " << right_side[i] << std::endl; + std::abort(); + } + } +} + +inline void PrintMatrix(std::string name, double *matrix, int rows, int cols) +{ + std::cerr << name << std::endl; + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + std::cerr << matrix[i * cols + j] << " "; + } + std::cerr << std::endl; + } +} \ No newline at end of file diff --git a/demos/helix-example/helix-example/rotations.h b/demos/helix-example/helix-example/rotations.h new file mode 100644 index 000000000..b7c204e47 --- /dev/null +++ b/demos/helix-example/helix-example/rotations.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +inline void RotateAlph(double x, double y, double z, double alph, double output[3]) { + output[0] = x; + output[1] = y * cos(alph) - z * sin(alph); + output[2] = y * sin(alph) + z * cos(alph); +} + +inline void RotateBet(double x, double y, double z, double bet, double output[3]) { + output[0] = x * cos(bet) + z * sin(bet); + output[1] = y; + output[2] = -x * sin(bet) + z * cos(bet); +} + +inline void Rotate(double x, double y, double z, double alph, double bet, double output[3]) { + double point[3]; + RotateAlph(x, y, z, alph, point); + RotateBet(point[0], point[1], point[2], bet, output); +} + +inline void UnRotate(double x, double y, double z, double alph, double bet, double output[3]) { + double point[3]; + RotateBet(x, y, z, -bet, point); + RotateAlph(point[0], point[1], point[2], -alph, output); +} diff --git a/demos/helix-example/helix-example/tests.cc b/demos/helix-example/helix-example/tests.cc new file mode 100644 index 000000000..285d57039 --- /dev/null +++ b/demos/helix-example/helix-example/tests.cc @@ -0,0 +1,210 @@ +#include +#include +#include "matrices.h" +#include "fitter.h" + +/* Tests for "matrices.h" */ +bool AreSame(double a, double b) +{ + return fabs(a - b) < 0.00009; +} +bool ArraysEqual(double *a, double *b, int N) +{ + for (int i = 0; i < N; i++) + { + if (AreSame(a[i], b[i])) + { + } + else + { + std::cerr << a[i] << " " << b[i] << std::endl; + return false; + } + } + return true; +} + +void TestMultiply() +{ + double a[2 * 3] = {1, 2, 3, 4, 5, 6}; + double b[3 * 4] = {11, 12, 13, 14, 14, 15, 16, 7, 17, 18, 19, 20}; + double output[2 * 4]; + MatrixMultiply(a, b, 2, 3, 4, output); + double expected_output[2 * 4] = {90, 96, 102, 88, 216, 231, 246, 211}; + for (int i = 0; i < 8; i++) + { + std::cout << output[i] << " "; + } + std::cout << std::endl; + + assert(ArraysEqual(output, expected_output, 8)); +} + +void TestTranspose() +{ + double input[2 * 3] = {1, 2, 3, 4, 5, 6}; + double output[3 * 2]; + double expected_output[3 * 2] = {1, 4, 2, 5, 3, 6}; + Transpose(input, 2, 3, output); + for (int i = 0; i < 6; i++) + { + std::cout << output[i] << " "; + } + std::cout << std::endl; + + assert(ArraysEqual(output, expected_output, 6)); +} + +void TestDiagOfSquareM() +{ + double input[3 * 3] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + double diag[3 * 3]; + double expected_diag[3 * 3] = {1, 0, 0, 0, 5, 0, 0, 0, 9}; + DiagOfSquareM(input, 3, diag); + for (int i = 0; i < 9; i++) + { + std::cout << diag[i] << " "; + } + std::cout << std::endl; + assert(ArraysEqual(diag, expected_diag, 9)); +} + +void TestScalarMultiply() +{ + double input[2 * 3] = {1, 2, 3, 4, 5, 6}; + double output[2 * 3]; + double expected_output[2 * 3] = {2, 4, 6, 8, 10, 12}; + + ScalarMultiply(input, 2, 3, 2, output); + for (int i = 0; i < 6; i++) + { + std::cout << output[i] << " "; + } + std::cout << std::endl; + + assert(ArraysEqual(output, expected_output, 6)); +} + +void TestAddMatrices() +{ + double a[2 * 3] = {1, 2, 3, 4, 5, 6}; + double b[2 * 3] = {2, 4, 6, 8, 10, 12}; + double output[2 * 3]; + double expected_output[2 * 3] = {3, 6, 9, 12, 15, 18}; + AddMatrices(a, b, 2, 3, output); + for (int i = 0; i < 6; i++) + { + std::cout << output[i] << " "; + } + std::cout << std::endl; + + assert(ArraysEqual(output, expected_output, 6)); +} + +void TestGaussianElim() +{ + double left_side[36] = { + + 2, 3, 1, 5, 7, 1, // + + 4, + 7, 2, 10, 14, 2, // + + 1, + 2, 2, 3, 5, 2, // + + 3, + 5, 4, 1, 6, 4, // + + 5, + 1, 3, 2, 1, 3, // + + 2, + 4, 6, 1, 3, 5}; + + double right_side[6] = {25, 53, 18, 31, 23, 40}; + double output[36]; + double results[6]; + ForwardElim(left_side, 6, right_side, output); + std::cout << "right side " << std::endl; + for (int i = 0; i < 6; i++) + { + std::cout << right_side[i] << " "; + } + std::cout << std::endl; + + BackSub(output, 6, right_side, results); + for (int i = 0; i < 6; i++) + { + std::cout << results[i] << " "; + } + std::cout << std::endl; +} + +void TestDistanceToPoint() +{ + double a = 5.2122, b = -4.79395, c = -26.40835, d = -4.207055, alph = -3.60384, bet = 1.13255; + double t = 0.1; + double output[3]; + HelixPoint(a, b, c, d, alph, bet, t, output); + std::cout << "Generated point " << output[0] << " " << output[1] << " " << output[2] << " " << std ::endl; + double x = output[0], y = output[1], z = output[2]; + double dist = DistanceToPoint(a, b, c, d, alph, bet, x, y, z); + std::cout << "Distance to point on helix is: " << dist << std ::endl; +} + +void TestDistancesToPoints() +{ + double a = 5.2122, b = -4.79395, c = -26.40835, d = -4.207055, alph = -3.60384, bet = 1.13255; + double t = 0; + double points[10][3]; + + std::cout << "Generated points: " << std::endl; + for (int i = 0; i < 10; i++) + { + t += 0.1; + double output[3]; + HelixPoint(a, b, c, d, alph, bet, t, output); + double x = output[0], y = output[1], z = output[2]; + points[i][0] = x; + points[i][1] = y; + points[i][2] = z; + std::cout << output[0] << " " << output[1] << " " << output[2] << " " << std ::endl; + } + double points1D[10 * 3]; + for (int i = 0; i < 10; i++) + { + for (int j = 0; j < 3; j++) + { + points1D[i * 3 + j] = points[i][j]; + } + } + double dist[10 * 3]; + DistancesToAllPoints(points1D, 10, a, b, c, d, alph, bet, dist); + std::cout << "Distances to all points: " << std::endl; + for (int i = 0; i < 10; i++) + { + std::cout << dist[i] << std::endl; + } + std::cout << "done\n"; +} + +int main() +{ + std::cout << "TestMultiply results: " << std::endl; + TestMultiply(); + std::cout << "TestTranspose results: " << std::endl; + TestTranspose(); + std::cout << "TestDiagOfSquareM results: " << std::endl; + TestDiagOfSquareM(); + std::cout << "TestScalarMultiply results: " << std::endl; + TestScalarMultiply(); + std::cout << "TestAddMatrices results: " << std::endl; + TestAddMatrices(); + std::cout << "TestGaussianElim results: " << std::endl; + TestGaussianElim(); + std::cout << "TestDistanceToPoint results: " << std::endl; + TestDistanceToPoint(); + std::cout << "TestDistancesToAllPoint results: " << std::endl; + TestDistancesToPoints(); +} \ No newline at end of file