From b273b954f81cadcef66f5a174bccfcdd37952a20 Mon Sep 17 00:00:00 2001 From: Adwaith Rajesh Date: Tue, 24 Oct 2023 19:38:00 +0530 Subject: [PATCH] Deployed b37fdf7 with MkDocs version: 1.5.3 --- ml/logregress/index.html | 83 ++++++++++++++++++++++++++++++++++++++- search/search_index.json | 2 +- sitemap.xml.gz | Bin 127 -> 127 bytes 3 files changed, 83 insertions(+), 2 deletions(-) diff --git a/ml/logregress/index.html b/ml/logregress/index.html index 87eb1d2..eb511e8 100644 --- a/ml/logregress/index.html +++ b/ml/logregress/index.html @@ -660,7 +660,88 @@

The math

\]

When you have the final values from your derivative calculation, you can use it in the gradient descent equation and update the weights and bias.

The code

-

Coming soon

+

The data used here is the Breast Cancer Wisconsin (Diagnostic) Data Set which has bee modified to look like this, where we +don't have id's and M=0, and B=1

+
#define INCLUDE_MAT_CONVERSIONS
+#include "ds/mat.h"
+#include "ml/logisticregress.h"
+#include "model/metrics.h"
+#include "model/train_test_split.h"
+#include "parsers/csv.h"
+
+int main(void) {
+    CSV *csv_reader = csv_init(569, 31, ',');
+    csv_parse(csv_reader, "data/bcancer.csv");
+
+    Mat *X = csv_get_mat_slice(csv_reader, (Slice){1, 31});
+    Mat *Y = csv_get_mat_slice(csv_reader, (Slice){0, 1});
+    Mat *X_train, *X_test, *Y_train, *Y_test;
+
+    train_test_split(X, Y, &X_train, &X_test, &Y_train, &Y_test, 0.3, 101);
+
+    logregress_set_max_iter(2000);
+    LogisticRegressionModel *model = logregress_init();
+    logregress_fit(model, X_train, Y_train);
+
+    // printf("prediction: %lf\n", logregress_predict(model, (double[]){15.22, 30.62, 103.4, 716.9, ... , 0}, 30));
+    Array *preds = logregress_predict_many(model, X_test);
+    Array *true = mat_get_col_arr(Y_test, 0);
+
+    logregress_print(model);
+
+    printf("confusion matrix: \n");
+    Mat *conf_mat = model_confusion_matrix(true, preds);
+    mat_print(conf_mat);
+
+    arr_free(true);
+    arr_free(preds);
+    logregress_free(model);
+    mat_free_many(7, X, Y, X_test, X_train, Y_test, Y_train, conf_mat);
+    csv_free(csv_reader);
+}
+
+
LogisticRegressionModel(bias: 0.5159147, loss: -12.4263621, weights: 0x5556e8a732c0)
+weights:
+1546.6922009
+1139.6829595
+8552.1648900
+2522.0044946
+11.8724211
+-19.3345598
+-44.9646156
+-18.4984994
+23.8378678
+10.1676564
+0.2338315
+103.3839701
+-139.7864354
+-4498.8563443
+0.2662770
+-6.5798244
+-8.6158697
+-1.6938180
+1.6508702
+-0.3857419
+1650.7843571
+1445.0283208
+8312.7672485
+-4024.9280673
+13.2972726
+-72.4527931
+-111.8298475
+-26.6204266
+28.0612275
+5.4099162
+confusion matrix:
+  57.00   10.00
+  2.00   101.00
+
+

Now, what does the confusion matrix generated by sklean look like.

+
array([[ 59,   7],
+       [  3, 102]])
+
+

we are pretty close... +checkout the python implementation here

diff --git a/search/search_index.json b/search/search_index.json index c1f047c..31ddc09 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"LinearML","text":"

A learning project where I try to build a ML library from scratch in C.

"},{"location":"#building-linearml","title":"Building LinearML","text":"

LinearML uses BuildMe as it's build system. It's a build system that I made. It is still WIP.

Install the BuildMe with the command

pip3 install buildme\n

Clone the repo

git clone --depth=1 -b master https://github.com/Adwaith-Rajesh/LinearML.git\n

Compile the liblinearml.so

cd ./LinearML\nchmod +x buildme\n
./buildme all --gslpath=/path/to/your/gsl/installation\n

This will create a build folder where the shared object will be present in the build/lib folder

"},{"location":"#testing-the-install","title":"Testing the install","text":"

Let's create a simple Array and print it to check whether everything works

touch test_lm.c\n
// test_lm.c\n\n#define INCLUDE_ARRAY_STATS\n#include \"ds/array.h\"\n\nint main(void) {\nArray *arr = arr_create(3);\n\nARR_AT(arr, 0) = 1;\nARR_AT(arr, 1) = 4;\nARR_AT(arr, 2) = 5;\n\narr_print(arr);\narr_free(arr);\n\nreturn 0;\n}\n

Compile

gcc -o test_lm test_lm.c -I./include -L./build/lib -llinearml -lm -lgsl -lgslcblas\n

Run

$ LD_LIBRARY_PATH=./build/lib ./test_lm\n[ 1.00 4.00 5.00 ]\n
"},{"location":"ds/array/","title":"Array","text":""},{"location":"ds/array/#structure","title":"Structure","text":"
typedef struct {\nsize_t size;\nfloat *arr;\n} Array;\n

Array is LinearML way of storing both the C array and the number of elements at the same time.

"},{"location":"ds/array/#examples","title":"Examples","text":""},{"location":"ds/array/#1-creating-a-simple-array","title":"1. Creating a simple Array","text":"
#include \"ds/array.h\"\n\nint main(void) {\n// create an array with 3 values\nArray *arr = arr_create(3);\n\nARR_AT(arr, 0) = 1;  // set arr[0] = 1\nARR_AT(arr, 1) = 4;  // set arr[1] = 4\nARR_AT(arr, 2) = 5;  // set arr[2] = 5\n\narr_print(arr);\n\n// free Array crated using arr_crate\narr_free(arr);\n\nreturn 0;\n}\n
[ 1.00 4.00 5.00 ]\n
"},{"location":"ds/array/#2-create-an-array-using-existing-c-array","title":"2. Create an Array using existing C array","text":"
#include \"ds/array.h\"\n\nint main(void) {\nfloat arr_vals[] = {1, 2, 3, 4};\n\n// use an existing C array with 4 values\nArray *arr = arr_init(arr_vals, 4);\n\nARR_AT(arr, 0) = 5;  // set arr[0] = 5\narr_print(arr);\n\n// free Array crated using arr_init\narr_init_free(arr);\nreturn 0;\n}\n
[ 5.00 2.00 3.00 4.00 ]\n
"},{"location":"ds/array/#3-changing-the-float-precision-during-printing","title":"3. Changing the float precision during printing","text":"
#include \"ds/array.h\"\n\nint main(void) {\n// create an array with 3 values\nArray *arr = arr_create(3);\n\nARR_AT(arr, 0) = 1;  // set arr[0] = 1\nARR_AT(arr, 1) = 4;  // set arr[1] = 4\nARR_AT(arr, 2) = 5;  // set arr[2] = 5\n\n// use arr_printp instead of arr_print\narr_printp(arr, 0);\n\n// free Array crated using arr_crate\narr_free(arr);\n\nreturn 0;\n}\n
[ 1 4 5 ]\n
"},{"location":"ds/array/#4-map-a-function-over-the-array","title":"4. Map a function over the Array","text":"
#include \"ds/array.h\"\n\nfloat add_three(float val) {\nreturn val + 3;\n}\n\nint main(void) {\n// create an array with 3 values\nArray *arr = arr_create(3);\n\nARR_AT(arr, 0) = 1;  // set arr[0] = 1\nARR_AT(arr, 1) = 4;  // set arr[1] = 4\nARR_AT(arr, 2) = 5;  // set arr[2] = 5\n\n// the function will return the same array back for\n// convenience reasons\narr_print(arr_map(arr, add_three));\n\n// free Array crated using arr_crate\narr_free(arr);\n\nreturn 0;\n}\n
[ 4.00 7.00 8.00 ]\n
"},{"location":"ds/array/#5-array-stats","title":"5. Array stats","text":"
#include <stdio.h>\n\n#define INCLUDE_ARRAY_STATS\n#include \"ds/array.h\"\n\n\nint main(void) {\n// create an array with 3 values\nArray *arr = arr_create(3);\n\nARR_AT(arr, 0) = 1;  // set arr[0] = 1\nARR_AT(arr, 1) = 4;  // set arr[1] = 4\nARR_AT(arr, 2) = 5;  // set arr[2] = 5\n\narr_print(arr);\n\nprintf(\"Mean: %.2f\\n\", arr_mean(arr));\nprintf(\"Max: %.2f\\n\", arr_max(arr));\nprintf(\"Min: %.2f\\n\", arr_min(arr));\n\n// free Array crated using arr_crate\narr_free(arr);\n\nreturn 0;\n}\n
[ 1.00 4.00 5.00 ]\nMean: 3.33\nMax: 5.00\nMin: 1.00\n
"},{"location":"ds/mat/","title":"Mat","text":""},{"location":"ds/mat/#structure","title":"Structure","text":"
typedef struct {\ngsl_matrix *mat;\nsize_t rows;\nsize_t cols;\n} Mat;\n

Internally the Matrix is 1D array where the values are stored in row major order.

/*\nCreate a rows x cols matrix\n*/\nMat *mat_create(size_t rows, size_t cols);\n\n/*\nCreate a Mat from an existing array\n*/\nMat *mat_create_from_array(double *arr, size_t rows, size_t cols);\n\n/*\nCreate a zero matrix of size rows x cols\n*/\nMat *mat_create_zeros(size_t rows, size_t cols);\n\n/*\nCreate a size x size identity matrix\n*/\nMat *mat_identity(size_t size);\n\n/*\nget value at mat[row, col]\n*/\ndouble mat_get(Mat *mat, size_t row, size_t col);\n\n/*\nSet mat[row, col] = val, and return the mat\n*/\nMat *mat_set(Mat *mat, size_t row, size_t col, double val);\n\n/*\nDisplay the given matrix\n*/\nvoid mat_printp(Mat *mat, int print_prec);\n\n/*\nFree a Mat\n*/\nvoid mat_free(Mat *mat);\n\n/*\nAdd two Mat of the same dimension\n\nAdds mat2 to mat1 and returns mat1\n*/\nMat *mat_add(Mat *mat1, Mat *mat2);\n\n/*\nSubtract two Mat of the same dimension\n\nSubtract mat2 from mat1 and returns mat1\n*/\nMat *mat_sub(Mat *mat1, Mat *mat2);\n\n/*\nMultiplies mat1 and mat2 and returns a new Mat\n\nyou need to free the new Mat\n*/\nMat *mat_mul(Mat *mat1, Mat *mat2);\n\n/*\nMultiplies a scalar value to the Mat and returns the given mat\n*/\nMat *mat_scalar_mul(Mat *mat, double val);\n\n/*\nReturns a new matrix that is the transpose of the given mat\n\nyou need to free the new matrix\n*/\nMat *mat_transpose(Mat *mat);\n\n/*\nFind the determinant of a matrix\n*/\ndouble mat_det(Mat *mat);\n\n/*\nFind the inverse of the given mat using LU decomposition\n\nYou need to free the returned Mat\n*/\nMat *mat_inverse(Mat *mat);\n\n/*\nreturns and inverse of MxN matrix using SVD\n*/\nMat *mat_invert_svd(Mat *mat);\n\n/*\nReturns a copy of the given matrix\n*/\nMat *mat_cpy(Mat *mat);\n

Here is a simple example on finding the inverse of a matrix

#include <stdio.h>\n\n#include \"ds/mat.h\"\n\nint main(void) {\nMat *mat = mat_create(2, 2);\n\nmat_set(mat, 0, 0, 4);\nmat_set(mat, 0, 1, 7);\n\nmat_set(mat, 1, 0, 2);\nmat_set(mat, 1, 1, 6);\n\nmat_printp(mat, 5);\nprintf(\"\\n\");\n\nMat *inv = mat_inverse(mat);\nmat_printp(inv, 7);\n\nmat_free(mat);\nmat_free(inv);\n\nreturn 0;\n}\n
  4.00000   7.00000\n  2.00000   6.00000\n\n  0.6000000   -0.7000000\n  -0.2000000   0.4000000\n

If you have any queries feel free to contact me through any of the socials given below

"},{"location":"ds/mat/#1-available-functions","title":"1. Available functions.","text":""},{"location":"ds/mat/#2-examples","title":"2. Examples.","text":""},{"location":"ds/vec/","title":"vec","text":""},{"location":"ds/vec/#this-is-the-vec-file","title":"this is the vec file","text":""},{"location":"ml/linregress/","title":"Linear Regression","text":""},{"location":"ml/linregress/#the-math","title":"The math","text":"

Well, what is Linear Regression or Simple Linear Regression?

Acco.To Wiki

\"linear regression is a linear approach for modelling the relationship between a scalar response and one or more explanatory variables (also known as dependent and independent variables)\"

or in other words use one variable to predict the value of another.

Now, how do you do that?

A simple linear regression model defines the relationship between two variables \\(x\\) and \\(y\\) using a line defined by an equation in the following form:

\\[ y = \\alpha + \\beta x \\]

In order to determine the optimal estimates of \\(\\alpha\\) and \\(\\beta\\), an estimation method known as Ordinary Least Squares is used.

Now after a lot of derivation and other math stuff, which I won't go over here, we will end up with two nice formulas for finding \\(\\alpha\\) and \\(\\beta\\)

\\[ \\beta = { Cov(x, y) \\over Var(x)} \\] \\[ \\alpha = \\overline{y} - \\beta \\overline{x} \\]

Where

\\[ Cov(x, y) = {1 \\over n - 1} \\sum_{i=1}^{n} (x_i - \\overline{x}) \\ (y_i - \\overline{y}) \\] \\[ Var(x) = {1 \\over n - 1} \\sum_{i=1}^{n} (x_i - \\overline{x})^2 \\] "},{"location":"ml/linregress/#the-code","title":"The Code","text":"

This is a really stupid example.. but it shows it's working.

#include <stdio.h>\n\n#include \"ds/array.h\"\n#include \"ml/linregress.h\"\n\nint main(void) {\nfloat x_vals[] = {1, 2, 3, 4, 5};\nfloat y_vals[] = {2, 4, 6, 8, 10};\n\nLinearRegressionModel *model = linregress_init();\nlinregress_fit(model, x_vals, y_vals, 5);\n\nprintf(\"Slope: %f\\n\", model->slope);\nprintf(\"Intercept: %f\\n\", model->intercept);\nprintf(\"R value: %f\\n\", model->rvalue);\n\nprintf(\"prediction (20): %f\\n\", linregress_predict(model, 20));\n\nlinregress_free(model);\n\nreturn 0;\n}\n

Run the code, instructions here

Slope: 2.000000\nIntercept: 0.000000\nR value: 1.000000\nprediction (20): 40.000000\n
"},{"location":"ml/linregress/#list-of-functions","title":"List of functions","text":"

The struct

typedef struct {\n// y = ax + b\ndouble slope;\ndouble intercept;\ndouble rvalue;  // corelation value\n} LinearRegressionModel;\n
/*\nInitialize the simple linear regression model\n*/\nLinearRegressionModel *linregress_init();\n\n/*\nFree the linear regression model\n*/\nvoid linregress_free(LinearRegressionModel *model);\n\n/*\nFit the linear regression model with the values\n*/\nLinearRegressionModel *linregress_fit(LinearRegressionModel *model, double *x, double *y, size_t len);\n\n/*\nPredict new values with the linear regression model\n*/\ndouble linregress_predict(LinearRegressionModel *model, double x);\n\n/*\nScore/test the linear regression model based on known x and y values\n*/\ndouble linregress_score(LinearRegressionModel *model, double *x_test, double *y_test, size_t len);\n

If you have any issues, queries feel free to contact me through the socials given below

"},{"location":"ml/logregress/","title":"Logistic Regression","text":""},{"location":"ml/logregress/#the-math","title":"The math","text":"

The references that is used

The logistic regression that LinearML performs is a Binary Logistic Regression. i.e we can classify two values

To perform a prediction, we use a bunch of values that include the weights(w), inputs(x) and bias(b). which can be written using neural-network like formula.

\\[ z = \\left(\\sum_{i=1}^n w_i x_i \\right) \\]

We need an activation function to get our predictions, in case of binary logistic regression, it's called the sigmoid which is usually denoted by \\(\\sigma\\) of \\(\\hat y\\), and is defined as follows.

\\[ \\hat y = \\sigma(z) = \\begin{cases} \\frac{1}{1 + exp(-z)}, & \\text{if $z$ $\\geq$ 0} \\\\[2ex] \\frac{exp(z)}{1 + exp(z)}, & \\text{otherwise} \\end{cases} \\]

In order to compute the loss we can use the following function.

\\[ Loss(\\hat y, y) = -\\frac{1}{m} \\sum_{i=1}^{m} y \\log(\\hat y) + (1 - y) \\log(1 - \\hat y) \\]

Now, to calculate the gradients to optimize your weights using gradient descent, you must calculate the derivative of your loss function

\\[ \\frac{\\partial Loss(\\hat y, y)}{\\partial w} = \\frac{1}{m}(\\hat y - y)x_i^T \\] \\[ \\frac{\\partial Loss(\\hat y, y)}{\\partial b} = \\frac{1}{m}(\\hat y - y) \\]

When you have the final values from your derivative calculation, you can use it in the gradient descent equation and update the weights and bias.

"},{"location":"ml/logregress/#the-code","title":"The code","text":"

Coming soon

"},{"location":"ml/multiregress/","title":"Multiple Linear Regression","text":""},{"location":"ml/multiregress/#the-math","title":"The Math","text":"

Definition can be found here.

In simple terms MLR can described as

\"The case of one explanatory variable is called simple linear regression; for more than one, the process is called multiple linear regression.\"

The formula / model for MLR is

\\[ y = \\beta_0 + \\beta_1x_1 + \\dots + \\beta_nx_n + \\epsilon \\]

The same formula in Matrix form can be represented as

\\[ Y = \\beta X + \\epsilon \\] \\[ \\begin{bmatrix} y_1 \\\\ y_2 \\\\ \\vdots \\\\ y_N \\end{bmatrix} = \\begin{bmatrix} x_{11} & x_{12} & \\cdots & x_{1k} \\\\ x_{21} & x_{22} & \\cdots & x_{2k} \\\\ \\vdots & \\ddots & \\ddots & \\vdots \\\\ x_{N1} & x_{N2} & \\cdots & x_{Nk} \\\\ \\end{bmatrix} * \\begin{bmatrix} \\beta_0 \\\\ \\beta_1 \\\\ \\vdots \\\\ \\beta_k \\end{bmatrix} + \\begin{bmatrix} \\epsilon_1 \\\\ \\epsilon_2 \\\\ \\vdots \\\\ \\epsilon_N \\\\ \\end{bmatrix} \\]

Our goal is to find \\(\\beta\\) (coefficients / parameters) and to minimize \\(\\epsilon^2\\). The methods used to do so is called the Least Squared Method.

Now, the least squared estimator for the given model is

\\[ \\beta = {(X^TX)}^{-1}(X^TY) \\]

The proof for this derivation can be found here.

"},{"location":"ml/multiregress/#the-code","title":"The Code","text":"

Here is an example of training the Multiple Linear Regression Model with an example data.

Data

\\[ \\begin{array}{|c|c|c|} \\hline \\text{${x_1}$} & \\text{${x_2}$} & \\text{${y}$} \\\\ \\hline 1 & 1 & 3.25 \\\\ \\hline 1 & 2 & 6.5 \\\\ \\hline 2 & 2 & 3.5 \\\\ \\hline 0 & 1 & 5.0 \\\\ \\hline \\end{array} \\]

When fitting the MLinearRegressionModel using the function mlinregress_fit, the \\(X\\) matrix must be

\\[ X = \\begin{bmatrix} 1 & 1 \\\\ 1 & 2 \\\\ 2 & 2 \\\\ 0 & 1 \\\\ \\end{bmatrix} \\]

The \\(1^{st}\\) column are the values of \\(x_1\\) and \\(2^{nd}\\) columns are the values of \\(x_2\\).

The \\(Y\\) matrix will look like the following

\\[ Y = \\begin{bmatrix} 3.25 \\\\ 6.5 \\\\ 3.5 \\\\ 5.0 \\\\ \\end{bmatrix} \\]

In code

#include <stdio.h>\n\n#include \"ds/array.h\"\n#include \"ml/multiregress.h\"\n\nint main(void) {\nfloat x_vals[] = {\n1, 1,\n1, 2,\n2, 2,\n0, 1};\n\nfloat y_vals[] = {\n3.25,\n6.5,\n3.5,\n5.0};\n\nMat *x_mat = mat_create_from_array(x_vals, 4, 2);\nMat *y_mat = mat_create_from_array(y_vals, 4, 1);\n\nMLinearRegressionModel *model = mlinregress_init();\nmlinregress_fit(model, x_mat, y_mat);\n\nprintf(\"coefficients:\\n\");\nmat_print(model->coefs);\nprintf(\"intercept: %f\\n\", model->intercept);\n\n// Array *arr = arr_create(2);\n// ARR_AT(arr, 0) = 4;\n// ARR_AT(arr, 1) = 3;\n\nprintf(\"predicted: %f\\n\", mlinregress_predict(model, (float[]){4, 3}, 2));\n// printf(\"predicted: %f\\n\", mlinregress_predict_arr(model, arr));\n\nmat_free_no_array(x_mat);\nmat_free_no_array(y_mat);\nmlinregress_free(model);\n}\n

Run the code, instructions here

coefficients:\n  -2.38\n  3.25\nintercept: 2.062500\npredicted: 2.312500\n
"},{"location":"parsers/csv/","title":"CSV","text":"

LinearML has it's own simple CSV parser.

Here is an example code on how to use it. Will be working on the code of Multiple Linear Regression mentioned here.

Here, instead of the data being explicitly passed using Arrays, we will use CSV files.

"},{"location":"parsers/csv/#example","title":"Example","text":"

data.csv

1 1 3.25 1 2 6.5 2 2 3.5 0 1 5.0
#include <stdio.h>\n\n#include \"ds/mat.h\"\n#include \"ml/multiregress.h\"\n#include \"parsers/csv.h\"\n\nint main(void) {\n// initialize the reader for 4 row 3 col CSV file,\n// using ',' as the delimiter\nCSV *csv_reader = csv_init(4, 3, ',');\n\n// parse the CSV file\ncsv_parse(csv_reader, \"data.csv\");\n\n// select the cols needed for X and Y values\nMat *X = csv_get_mat(csv_reader, (int[]){0, 1}, 2);\nMat *Y = csv_get_mat(csv_reader, (int[]){2}, 1);\n\n// initialize the model\nMLinearRegressionModel *model = mlinregress_init();\n\n// fit/train the model\nmlinregress_fit(model, X, Y);\n\n// print the properties.\nmat_printp(model->coefs, 7);\nprintf(\"Intercept: %f\\n\", model->intercept);\n\n// free everything.\nmlinregress_free(model);\nmat_free(X);\nmat_free(Y);\ncsv_free(csv_reader);\n}\n
"},{"location":"parsers/csv/#limitations-of-the-parser","title":"Limitations of the parser","text":""}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"LinearML","text":"

A learning project where I try to build a ML library from scratch in C.

"},{"location":"#building-linearml","title":"Building LinearML","text":"

LinearML uses BuildMe as it's build system. It's a build system that I made. It is still WIP.

Install the BuildMe with the command

pip3 install buildme\n

Clone the repo

git clone --depth=1 -b master https://github.com/Adwaith-Rajesh/LinearML.git\n

Compile the liblinearml.so

cd ./LinearML\nchmod +x buildme\n
./buildme all --gslpath=/path/to/your/gsl/installation\n

This will create a build folder where the shared object will be present in the build/lib folder

"},{"location":"#testing-the-install","title":"Testing the install","text":"

Let's create a simple Array and print it to check whether everything works

touch test_lm.c\n
// test_lm.c\n\n#define INCLUDE_ARRAY_STATS\n#include \"ds/array.h\"\n\nint main(void) {\nArray *arr = arr_create(3);\n\nARR_AT(arr, 0) = 1;\nARR_AT(arr, 1) = 4;\nARR_AT(arr, 2) = 5;\n\narr_print(arr);\narr_free(arr);\n\nreturn 0;\n}\n

Compile

gcc -o test_lm test_lm.c -I./include -L./build/lib -llinearml -lm -lgsl -lgslcblas\n

Run

$ LD_LIBRARY_PATH=./build/lib ./test_lm\n[ 1.00 4.00 5.00 ]\n
"},{"location":"ds/array/","title":"Array","text":""},{"location":"ds/array/#structure","title":"Structure","text":"
typedef struct {\nsize_t size;\nfloat *arr;\n} Array;\n

Array is LinearML way of storing both the C array and the number of elements at the same time.

"},{"location":"ds/array/#examples","title":"Examples","text":""},{"location":"ds/array/#1-creating-a-simple-array","title":"1. Creating a simple Array","text":"
#include \"ds/array.h\"\n\nint main(void) {\n// create an array with 3 values\nArray *arr = arr_create(3);\n\nARR_AT(arr, 0) = 1;  // set arr[0] = 1\nARR_AT(arr, 1) = 4;  // set arr[1] = 4\nARR_AT(arr, 2) = 5;  // set arr[2] = 5\n\narr_print(arr);\n\n// free Array crated using arr_crate\narr_free(arr);\n\nreturn 0;\n}\n
[ 1.00 4.00 5.00 ]\n
"},{"location":"ds/array/#2-create-an-array-using-existing-c-array","title":"2. Create an Array using existing C array","text":"
#include \"ds/array.h\"\n\nint main(void) {\nfloat arr_vals[] = {1, 2, 3, 4};\n\n// use an existing C array with 4 values\nArray *arr = arr_init(arr_vals, 4);\n\nARR_AT(arr, 0) = 5;  // set arr[0] = 5\narr_print(arr);\n\n// free Array crated using arr_init\narr_init_free(arr);\nreturn 0;\n}\n
[ 5.00 2.00 3.00 4.00 ]\n
"},{"location":"ds/array/#3-changing-the-float-precision-during-printing","title":"3. Changing the float precision during printing","text":"
#include \"ds/array.h\"\n\nint main(void) {\n// create an array with 3 values\nArray *arr = arr_create(3);\n\nARR_AT(arr, 0) = 1;  // set arr[0] = 1\nARR_AT(arr, 1) = 4;  // set arr[1] = 4\nARR_AT(arr, 2) = 5;  // set arr[2] = 5\n\n// use arr_printp instead of arr_print\narr_printp(arr, 0);\n\n// free Array crated using arr_crate\narr_free(arr);\n\nreturn 0;\n}\n
[ 1 4 5 ]\n
"},{"location":"ds/array/#4-map-a-function-over-the-array","title":"4. Map a function over the Array","text":"
#include \"ds/array.h\"\n\nfloat add_three(float val) {\nreturn val + 3;\n}\n\nint main(void) {\n// create an array with 3 values\nArray *arr = arr_create(3);\n\nARR_AT(arr, 0) = 1;  // set arr[0] = 1\nARR_AT(arr, 1) = 4;  // set arr[1] = 4\nARR_AT(arr, 2) = 5;  // set arr[2] = 5\n\n// the function will return the same array back for\n// convenience reasons\narr_print(arr_map(arr, add_three));\n\n// free Array crated using arr_crate\narr_free(arr);\n\nreturn 0;\n}\n
[ 4.00 7.00 8.00 ]\n
"},{"location":"ds/array/#5-array-stats","title":"5. Array stats","text":"
#include <stdio.h>\n\n#define INCLUDE_ARRAY_STATS\n#include \"ds/array.h\"\n\n\nint main(void) {\n// create an array with 3 values\nArray *arr = arr_create(3);\n\nARR_AT(arr, 0) = 1;  // set arr[0] = 1\nARR_AT(arr, 1) = 4;  // set arr[1] = 4\nARR_AT(arr, 2) = 5;  // set arr[2] = 5\n\narr_print(arr);\n\nprintf(\"Mean: %.2f\\n\", arr_mean(arr));\nprintf(\"Max: %.2f\\n\", arr_max(arr));\nprintf(\"Min: %.2f\\n\", arr_min(arr));\n\n// free Array crated using arr_crate\narr_free(arr);\n\nreturn 0;\n}\n
[ 1.00 4.00 5.00 ]\nMean: 3.33\nMax: 5.00\nMin: 1.00\n
"},{"location":"ds/mat/","title":"Mat","text":""},{"location":"ds/mat/#structure","title":"Structure","text":"
typedef struct {\ngsl_matrix *mat;\nsize_t rows;\nsize_t cols;\n} Mat;\n

Internally the Matrix is 1D array where the values are stored in row major order.

/*\nCreate a rows x cols matrix\n*/\nMat *mat_create(size_t rows, size_t cols);\n\n/*\nCreate a Mat from an existing array\n*/\nMat *mat_create_from_array(double *arr, size_t rows, size_t cols);\n\n/*\nCreate a zero matrix of size rows x cols\n*/\nMat *mat_create_zeros(size_t rows, size_t cols);\n\n/*\nCreate a size x size identity matrix\n*/\nMat *mat_identity(size_t size);\n\n/*\nget value at mat[row, col]\n*/\ndouble mat_get(Mat *mat, size_t row, size_t col);\n\n/*\nSet mat[row, col] = val, and return the mat\n*/\nMat *mat_set(Mat *mat, size_t row, size_t col, double val);\n\n/*\nDisplay the given matrix\n*/\nvoid mat_printp(Mat *mat, int print_prec);\n\n/*\nFree a Mat\n*/\nvoid mat_free(Mat *mat);\n\n/*\nAdd two Mat of the same dimension\n\nAdds mat2 to mat1 and returns mat1\n*/\nMat *mat_add(Mat *mat1, Mat *mat2);\n\n/*\nSubtract two Mat of the same dimension\n\nSubtract mat2 from mat1 and returns mat1\n*/\nMat *mat_sub(Mat *mat1, Mat *mat2);\n\n/*\nMultiplies mat1 and mat2 and returns a new Mat\n\nyou need to free the new Mat\n*/\nMat *mat_mul(Mat *mat1, Mat *mat2);\n\n/*\nMultiplies a scalar value to the Mat and returns the given mat\n*/\nMat *mat_scalar_mul(Mat *mat, double val);\n\n/*\nReturns a new matrix that is the transpose of the given mat\n\nyou need to free the new matrix\n*/\nMat *mat_transpose(Mat *mat);\n\n/*\nFind the determinant of a matrix\n*/\ndouble mat_det(Mat *mat);\n\n/*\nFind the inverse of the given mat using LU decomposition\n\nYou need to free the returned Mat\n*/\nMat *mat_inverse(Mat *mat);\n\n/*\nreturns and inverse of MxN matrix using SVD\n*/\nMat *mat_invert_svd(Mat *mat);\n\n/*\nReturns a copy of the given matrix\n*/\nMat *mat_cpy(Mat *mat);\n

Here is a simple example on finding the inverse of a matrix

#include <stdio.h>\n\n#include \"ds/mat.h\"\n\nint main(void) {\nMat *mat = mat_create(2, 2);\n\nmat_set(mat, 0, 0, 4);\nmat_set(mat, 0, 1, 7);\n\nmat_set(mat, 1, 0, 2);\nmat_set(mat, 1, 1, 6);\n\nmat_printp(mat, 5);\nprintf(\"\\n\");\n\nMat *inv = mat_inverse(mat);\nmat_printp(inv, 7);\n\nmat_free(mat);\nmat_free(inv);\n\nreturn 0;\n}\n
  4.00000   7.00000\n  2.00000   6.00000\n\n  0.6000000   -0.7000000\n  -0.2000000   0.4000000\n

If you have any queries feel free to contact me through any of the socials given below

"},{"location":"ds/mat/#1-available-functions","title":"1. Available functions.","text":""},{"location":"ds/mat/#2-examples","title":"2. Examples.","text":""},{"location":"ds/vec/","title":"vec","text":""},{"location":"ds/vec/#this-is-the-vec-file","title":"this is the vec file","text":""},{"location":"ml/linregress/","title":"Linear Regression","text":""},{"location":"ml/linregress/#the-math","title":"The math","text":"

Well, what is Linear Regression or Simple Linear Regression?

Acco.To Wiki

\"linear regression is a linear approach for modelling the relationship between a scalar response and one or more explanatory variables (also known as dependent and independent variables)\"

or in other words use one variable to predict the value of another.

Now, how do you do that?

A simple linear regression model defines the relationship between two variables \\(x\\) and \\(y\\) using a line defined by an equation in the following form:

\\[ y = \\alpha + \\beta x \\]

In order to determine the optimal estimates of \\(\\alpha\\) and \\(\\beta\\), an estimation method known as Ordinary Least Squares is used.

Now after a lot of derivation and other math stuff, which I won't go over here, we will end up with two nice formulas for finding \\(\\alpha\\) and \\(\\beta\\)

\\[ \\beta = { Cov(x, y) \\over Var(x)} \\] \\[ \\alpha = \\overline{y} - \\beta \\overline{x} \\]

Where

\\[ Cov(x, y) = {1 \\over n - 1} \\sum_{i=1}^{n} (x_i - \\overline{x}) \\ (y_i - \\overline{y}) \\] \\[ Var(x) = {1 \\over n - 1} \\sum_{i=1}^{n} (x_i - \\overline{x})^2 \\] "},{"location":"ml/linregress/#the-code","title":"The Code","text":"

This is a really stupid example.. but it shows it's working.

#include <stdio.h>\n\n#include \"ds/array.h\"\n#include \"ml/linregress.h\"\n\nint main(void) {\nfloat x_vals[] = {1, 2, 3, 4, 5};\nfloat y_vals[] = {2, 4, 6, 8, 10};\n\nLinearRegressionModel *model = linregress_init();\nlinregress_fit(model, x_vals, y_vals, 5);\n\nprintf(\"Slope: %f\\n\", model->slope);\nprintf(\"Intercept: %f\\n\", model->intercept);\nprintf(\"R value: %f\\n\", model->rvalue);\n\nprintf(\"prediction (20): %f\\n\", linregress_predict(model, 20));\n\nlinregress_free(model);\n\nreturn 0;\n}\n

Run the code, instructions here

Slope: 2.000000\nIntercept: 0.000000\nR value: 1.000000\nprediction (20): 40.000000\n
"},{"location":"ml/linregress/#list-of-functions","title":"List of functions","text":"

The struct

typedef struct {\n// y = ax + b\ndouble slope;\ndouble intercept;\ndouble rvalue;  // corelation value\n} LinearRegressionModel;\n
/*\nInitialize the simple linear regression model\n*/\nLinearRegressionModel *linregress_init();\n\n/*\nFree the linear regression model\n*/\nvoid linregress_free(LinearRegressionModel *model);\n\n/*\nFit the linear regression model with the values\n*/\nLinearRegressionModel *linregress_fit(LinearRegressionModel *model, double *x, double *y, size_t len);\n\n/*\nPredict new values with the linear regression model\n*/\ndouble linregress_predict(LinearRegressionModel *model, double x);\n\n/*\nScore/test the linear regression model based on known x and y values\n*/\ndouble linregress_score(LinearRegressionModel *model, double *x_test, double *y_test, size_t len);\n

If you have any issues, queries feel free to contact me through the socials given below

"},{"location":"ml/logregress/","title":"Logistic Regression","text":""},{"location":"ml/logregress/#the-math","title":"The math","text":"

The references that is used

The logistic regression that LinearML performs is a Binary Logistic Regression. i.e we can classify two values

To perform a prediction, we use a bunch of values that include the weights(w), inputs(x) and bias(b). which can be written using neural-network like formula.

\\[ z = \\left(\\sum_{i=1}^n w_i x_i \\right) \\]

We need an activation function to get our predictions, in case of binary logistic regression, it's called the sigmoid which is usually denoted by \\(\\sigma\\) of \\(\\hat y\\), and is defined as follows.

\\[ \\hat y = \\sigma(z) = \\begin{cases} \\frac{1}{1 + exp(-z)}, & \\text{if $z$ $\\geq$ 0} \\\\[2ex] \\frac{exp(z)}{1 + exp(z)}, & \\text{otherwise} \\end{cases} \\]

In order to compute the loss we can use the following function.

\\[ Loss(\\hat y, y) = -\\frac{1}{m} \\sum_{i=1}^{m} y \\log(\\hat y) + (1 - y) \\log(1 - \\hat y) \\]

Now, to calculate the gradients to optimize your weights using gradient descent, you must calculate the derivative of your loss function

\\[ \\frac{\\partial Loss(\\hat y, y)}{\\partial w} = \\frac{1}{m}(\\hat y - y)x_i^T \\] \\[ \\frac{\\partial Loss(\\hat y, y)}{\\partial b} = \\frac{1}{m}(\\hat y - y) \\]

When you have the final values from your derivative calculation, you can use it in the gradient descent equation and update the weights and bias.

"},{"location":"ml/logregress/#the-code","title":"The code","text":"

The data used here is the Breast Cancer Wisconsin (Diagnostic) Data Set which has bee modified to look like this, where we don't have id's and M=0, and B=1

#define INCLUDE_MAT_CONVERSIONS\n#include \"ds/mat.h\"\n#include \"ml/logisticregress.h\"\n#include \"model/metrics.h\"\n#include \"model/train_test_split.h\"\n#include \"parsers/csv.h\"\n\nint main(void) {\nCSV *csv_reader = csv_init(569, 31, ',');\ncsv_parse(csv_reader, \"data/bcancer.csv\");\n\nMat *X = csv_get_mat_slice(csv_reader, (Slice){1, 31});\nMat *Y = csv_get_mat_slice(csv_reader, (Slice){0, 1});\nMat *X_train, *X_test, *Y_train, *Y_test;\n\ntrain_test_split(X, Y, &X_train, &X_test, &Y_train, &Y_test, 0.3, 101);\n\nlogregress_set_max_iter(2000);\nLogisticRegressionModel *model = logregress_init();\nlogregress_fit(model, X_train, Y_train);\n\n// printf(\"prediction: %lf\\n\", logregress_predict(model, (double[]){15.22, 30.62, 103.4, 716.9, ... , 0}, 30));\nArray *preds = logregress_predict_many(model, X_test);\nArray *true = mat_get_col_arr(Y_test, 0);\n\nlogregress_print(model);\n\nprintf(\"confusion matrix: \\n\");\nMat *conf_mat = model_confusion_matrix(true, preds);\nmat_print(conf_mat);\n\narr_free(true);\narr_free(preds);\nlogregress_free(model);\nmat_free_many(7, X, Y, X_test, X_train, Y_test, Y_train, conf_mat);\ncsv_free(csv_reader);\n}\n
LogisticRegressionModel(bias: 0.5159147, loss: -12.4263621, weights: 0x5556e8a732c0)\nweights:\n1546.6922009\n1139.6829595\n8552.1648900\n2522.0044946\n11.8724211\n-19.3345598\n-44.9646156\n-18.4984994\n23.8378678\n10.1676564\n0.2338315\n103.3839701\n-139.7864354\n-4498.8563443\n0.2662770\n-6.5798244\n-8.6158697\n-1.6938180\n1.6508702\n-0.3857419\n1650.7843571\n1445.0283208\n8312.7672485\n-4024.9280673\n13.2972726\n-72.4527931\n-111.8298475\n-26.6204266\n28.0612275\n5.4099162\nconfusion matrix:\n  57.00   10.00\n  2.00   101.00\n

Now, what does the confusion matrix generated by sklean look like.

array([[ 59,   7],\n       [  3, 102]])\n

we are pretty close... checkout the python implementation here

"},{"location":"ml/multiregress/","title":"Multiple Linear Regression","text":""},{"location":"ml/multiregress/#the-math","title":"The Math","text":"

Definition can be found here.

In simple terms MLR can described as

\"The case of one explanatory variable is called simple linear regression; for more than one, the process is called multiple linear regression.\"

The formula / model for MLR is

\\[ y = \\beta_0 + \\beta_1x_1 + \\dots + \\beta_nx_n + \\epsilon \\]

The same formula in Matrix form can be represented as

\\[ Y = \\beta X + \\epsilon \\] \\[ \\begin{bmatrix} y_1 \\\\ y_2 \\\\ \\vdots \\\\ y_N \\end{bmatrix} = \\begin{bmatrix} x_{11} & x_{12} & \\cdots & x_{1k} \\\\ x_{21} & x_{22} & \\cdots & x_{2k} \\\\ \\vdots & \\ddots & \\ddots & \\vdots \\\\ x_{N1} & x_{N2} & \\cdots & x_{Nk} \\\\ \\end{bmatrix} * \\begin{bmatrix} \\beta_0 \\\\ \\beta_1 \\\\ \\vdots \\\\ \\beta_k \\end{bmatrix} + \\begin{bmatrix} \\epsilon_1 \\\\ \\epsilon_2 \\\\ \\vdots \\\\ \\epsilon_N \\\\ \\end{bmatrix} \\]

Our goal is to find \\(\\beta\\) (coefficients / parameters) and to minimize \\(\\epsilon^2\\). The methods used to do so is called the Least Squared Method.

Now, the least squared estimator for the given model is

\\[ \\beta = {(X^TX)}^{-1}(X^TY) \\]

The proof for this derivation can be found here.

"},{"location":"ml/multiregress/#the-code","title":"The Code","text":"

Here is an example of training the Multiple Linear Regression Model with an example data.

Data

\\[ \\begin{array}{|c|c|c|} \\hline \\text{${x_1}$} & \\text{${x_2}$} & \\text{${y}$} \\\\ \\hline 1 & 1 & 3.25 \\\\ \\hline 1 & 2 & 6.5 \\\\ \\hline 2 & 2 & 3.5 \\\\ \\hline 0 & 1 & 5.0 \\\\ \\hline \\end{array} \\]

When fitting the MLinearRegressionModel using the function mlinregress_fit, the \\(X\\) matrix must be

\\[ X = \\begin{bmatrix} 1 & 1 \\\\ 1 & 2 \\\\ 2 & 2 \\\\ 0 & 1 \\\\ \\end{bmatrix} \\]

The \\(1^{st}\\) column are the values of \\(x_1\\) and \\(2^{nd}\\) columns are the values of \\(x_2\\).

The \\(Y\\) matrix will look like the following

\\[ Y = \\begin{bmatrix} 3.25 \\\\ 6.5 \\\\ 3.5 \\\\ 5.0 \\\\ \\end{bmatrix} \\]

In code

#include <stdio.h>\n\n#include \"ds/array.h\"\n#include \"ml/multiregress.h\"\n\nint main(void) {\nfloat x_vals[] = {\n1, 1,\n1, 2,\n2, 2,\n0, 1};\n\nfloat y_vals[] = {\n3.25,\n6.5,\n3.5,\n5.0};\n\nMat *x_mat = mat_create_from_array(x_vals, 4, 2);\nMat *y_mat = mat_create_from_array(y_vals, 4, 1);\n\nMLinearRegressionModel *model = mlinregress_init();\nmlinregress_fit(model, x_mat, y_mat);\n\nprintf(\"coefficients:\\n\");\nmat_print(model->coefs);\nprintf(\"intercept: %f\\n\", model->intercept);\n\n// Array *arr = arr_create(2);\n// ARR_AT(arr, 0) = 4;\n// ARR_AT(arr, 1) = 3;\n\nprintf(\"predicted: %f\\n\", mlinregress_predict(model, (float[]){4, 3}, 2));\n// printf(\"predicted: %f\\n\", mlinregress_predict_arr(model, arr));\n\nmat_free_no_array(x_mat);\nmat_free_no_array(y_mat);\nmlinregress_free(model);\n}\n

Run the code, instructions here

coefficients:\n  -2.38\n  3.25\nintercept: 2.062500\npredicted: 2.312500\n
"},{"location":"parsers/csv/","title":"CSV","text":"

LinearML has it's own simple CSV parser.

Here is an example code on how to use it. Will be working on the code of Multiple Linear Regression mentioned here.

Here, instead of the data being explicitly passed using Arrays, we will use CSV files.

"},{"location":"parsers/csv/#example","title":"Example","text":"

data.csv

1 1 3.25 1 2 6.5 2 2 3.5 0 1 5.0
#include <stdio.h>\n\n#include \"ds/mat.h\"\n#include \"ml/multiregress.h\"\n#include \"parsers/csv.h\"\n\nint main(void) {\n// initialize the reader for 4 row 3 col CSV file,\n// using ',' as the delimiter\nCSV *csv_reader = csv_init(4, 3, ',');\n\n// parse the CSV file\ncsv_parse(csv_reader, \"data.csv\");\n\n// select the cols needed for X and Y values\nMat *X = csv_get_mat(csv_reader, (int[]){0, 1}, 2);\nMat *Y = csv_get_mat(csv_reader, (int[]){2}, 1);\n\n// initialize the model\nMLinearRegressionModel *model = mlinregress_init();\n\n// fit/train the model\nmlinregress_fit(model, X, Y);\n\n// print the properties.\nmat_printp(model->coefs, 7);\nprintf(\"Intercept: %f\\n\", model->intercept);\n\n// free everything.\nmlinregress_free(model);\nmat_free(X);\nmat_free(Y);\ncsv_free(csv_reader);\n}\n
"},{"location":"parsers/csv/#limitations-of-the-parser","title":"Limitations of the parser","text":""}]} \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index a1898679cdbe7b4f0925993555c9245356b46e9a..ffd9766bb0c533753e73b140a22651da9efee243 100644 GIT binary patch delta 13 Ucmb=gXP58h;5cyJd?I@V03PxLmH+?% delta 13 Ucmb=gXP58h;0W7eHj%vo02~tp7ytkO