-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmat_mult_gap8.c
171 lines (142 loc) · 4.18 KB
/
mat_mult_gap8.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/* File:
* pth_mat_vect.c
*
* Purpose:
* Computes a parallel matrix-vector product. Matrix
* is distributed by block rows. Vectors are distributed by
* blocks.
*
* Input:
* m, n: order of matrix
* A, x: the matrix and the vector to be multiplied
*
* Output:
* y: the product vector
*
* Compile: gcc -g -Wall -o pth_mat_vect pth_mat_vect.c -lpthread
* Usage:
* pth_mat_vect <thread_count>
*
* Notes:
* 1. Local storage for A, x, y is dynamically allocated.
* 2. Number of threads (thread_count) should evenly divide both
* m and n. The program doesn't check for this.
* 3. We use a 1-dimensional array for A and compute subscripts
* using the formula A[i][j] = A[i*n + j]
* 4. Distribution of A, x, and y is logical: all three are
* globally shared.
*
* IPP: Section 4.3 (pp. 159 and ff.). Also Section 4.10 (pp. 191 and
* ff.)
*/
#include "cmsis.h"
#include "gap_common.h"
#include "mbed_wait_api.h"
// FEATURE_CLUSTER
#include "gap_cluster.h"
#include "gap_dmamchan.h"
#include <stdlib.h>
#include <time.h>
#define FC_FREQ (300000000)
#define CLUSTER_FREQ (200000000)
#define F_DIV (1000000)
#define NPOINTS (10000000)
//#define NUM_THREADS (8)
#define CORE_NUMBER (8)
#define DATA_MAX (10)
/* Global variables */
int thread_count = CORE_NUMBER;
int m, n;
int* A;
int* x;
int* y;
/*------------------------------------------------------------------
* Function: pth_mat_vect
* Purpose: Multiply an mxn matrix by an nx1 column vector
* In arg: rank
* Global in vars: A, x, m, n, thread_count
* Global out var: y
*/
void pth_mat_vect() {
int my_rank = __core_ID();
int i, j;
int local_m = m/thread_count;
int my_first_row = my_rank*local_m;
int my_last_row = (my_rank+1)*local_m - 1;
for (i = my_first_row; i <= my_last_row; i++) {
y[i] = 0.0;
for (j = 0; j < n; j++)
y[i] += A[i*n+j]*x[j];
}
printf("Core %d - FirstRow: %d - LastRow: %d\n", my_rank, my_first_row, my_last_row);
} /* pth_mat_vect */
/*------------------------------------------------------------------
* Function: Print_matrix
* Purpose: Print the matrix
* In args: title, A, m, n
*/
void print_matrix( char* title, int A[], int m, int n) {
int i, j;
printf("%s\n", title);
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++)
printf("%d ", A[i*n + j]);
printf("\n");
}
} /* Print_matrix */
/*------------------------------------------------------------------
* Function: Print_vector
* Purpose: Print a vector
* In args: title, y, m
*/
void print_vector(char* title, int y[], int m) {
int i;
printf("%s\n", title);
for (i = 0; i < m; i++)
printf("%d ", y[i]);
printf("\n");
} /* Print_vector */
uint32_t current_voltage(void)
{
return DCDC_TO_mV(PMU_State.DCDC_Settings[READ_PMU_REGULATOR_STATE(PMU_State.State)]);
}
void Master_Entry()
{
CLUSTER_CoresFork(pth_mat_vect, NULL);
}
void generate_data(){
for (int j = 0; j < n; j++) {
x[j] = rand() % DATA_MAX;
for (int i = 0; i < m; i++) {
A[i*n+j] = rand() % DATA_MAX;
}
}
}
/*------------------------------------------------------------------*/
int main() {
FLL_SetFrequency(uFLL_SOC, FC_FREQ, 0);
m = 48;
n = 27;
A = malloc(m*n*sizeof(int));
x = malloc(n*sizeof(int));
y = malloc(m*sizeof(int));
srand(10);
generate_data();
/* Cluster Start - Power on */
CLUSTER_Start(0, CORE_NUMBER);
if (FLL_SetFrequency(uFLL_CLUSTER, CLUSTER_FREQ, 0) == -1) {
printf("Error of changing frequency, check Voltage value!\n");
}
printf("FC Frequency: %d MHz - Cluster Frequency: %d MHz - Voltage: %lu mV\n",
FLL_GetFrequency(uFLL_SOC)/F_DIV, FLL_GetFrequency(uFLL_CLUSTER)/F_DIV, current_voltage());
CLUSTER_SendTask(0, Master_Entry, NULL, 0);
printf("Waiting...\n");
CLUSTER_Wait(0);
print_matrix("The matrix is", A, m, n);
print_vector("The vector is", x, n);
print_vector("The product is", y, m);
free(A);
free(x);
free(y);
exit(0);
} /* main */