Robin Thoni 8 vuotta sitten
commit
e16e6466a7
2 muutettua tiedostoa jossa 330 lisäystä ja 0 poistoa
  1. 23
    0
      Makefile
  2. 307
    0
      main.c

+ 23
- 0
Makefile Näytä tiedosto

@@ -0,0 +1,23 @@
1
+CFLAGS = -Wall -Wextra -Werror -pedantic -std=c99
2
+LDLIBS = -lpthread
3
+CC = gcc
4
+SOURCES = main.c
5
+OBJS = $(SOURCES:.c=.o)
6
+OUT = multithread-matrix-mult
7
+
8
+all: release
9
+
10
+debug: CFLAGS += -g3 -ggdb3
11
+debug: $(OUT)
12
+
13
+release: CFLAGS += -o3
14
+release: $(OUT)
15
+
16
+$(OUT): $(OBJS)
17
+	  $(LINK.c) $(OUTPUT_OPTION) $(OBJS) $(LDLIBS)
18
+
19
+clean:
20
+	  rm -f *.o
21
+
22
+distclean: clean
23
+	  rm -f *.a $(OUT)

+ 307
- 0
main.c Näytä tiedosto

@@ -0,0 +1,307 @@
1
+#define _POSIX_C_SOURCE 199309L
2
+#include <stdlib.h>
3
+#include <stdio.h>
4
+#include <time.h>
5
+#include <unistd.h>
6
+#include <string.h>
7
+#include <pthread.h>
8
+
9
+struct matrix
10
+{
11
+  unsigned m;
12
+  unsigned n;
13
+  int** scalars;
14
+};
15
+typedef struct matrix s_matrix;
16
+
17
+struct matrix_mult_thread_data
18
+{
19
+  s_matrix mat1;
20
+  s_matrix mat2;
21
+  s_matrix mat;
22
+  unsigned scalar_start;
23
+  unsigned scalars_count;
24
+  unsigned thread_number;
25
+};
26
+typedef struct matrix_mult_thread_data s_matrix_mult_thread_data;
27
+
28
+unsigned get_cpu_count(void)
29
+{
30
+  return (unsigned)sysconf(_SC_NPROCESSORS_ONLN);
31
+}
32
+
33
+s_matrix matrix_generate(unsigned m, unsigned n, unsigned rmax)
34
+{
35
+  s_matrix mat;
36
+  mat.m = m;
37
+  mat.n = n;
38
+
39
+  mat.scalars = malloc(mat.m * sizeof(int*));
40
+  for (unsigned i = 0; i < mat.m; ++i) {
41
+      mat.scalars[i] = malloc(mat.n * sizeof(int));
42
+      for (unsigned j = 0; j < mat.n; ++j) {
43
+        mat.scalars[i][j] = (rmax == 0 ? 0 : (rand() % rmax));
44
+      }
45
+  }
46
+
47
+  return mat;
48
+}
49
+
50
+void matrix_free(s_matrix mat)
51
+{
52
+  for (unsigned i = 0; i < mat.m; ++i) {
53
+    free(mat.scalars[i]);
54
+  }
55
+  free(mat.scalars);
56
+}
57
+
58
+void matrix_print(s_matrix mat)
59
+{
60
+  for (unsigned i = 0; i < mat.m; ++i) {
61
+    printf("|");
62
+    for (unsigned j = 0; j < mat.n; ++j) {
63
+      printf("%5d|", mat.scalars[i][j]);
64
+    }
65
+    printf("\n");
66
+  }
67
+}
68
+
69
+int matrix_equals(s_matrix mat1, s_matrix mat2)
70
+{
71
+  if (mat1.n != mat2.n || mat1.m != mat2.n) {
72
+    return 0;
73
+  }
74
+  for (unsigned i = 0; i < mat1.n; ++i) {
75
+    for (unsigned j = 0; j < mat1.m; ++j) {
76
+      if (mat1.scalars[i][j] != mat2.scalars[i][j]) {
77
+        return 0;
78
+      }
79
+    }
80
+  }
81
+  return 1;
82
+}
83
+
84
+unsigned matrix_mult_scalar(s_matrix mat1, s_matrix mat2, unsigned i, unsigned j)
85
+{
86
+  unsigned a = 0;
87
+  for (unsigned k = 0; k < mat1.n; ++k) {
88
+    a += mat1.scalars[i][k] * mat2.scalars[k][j];
89
+  }
90
+  return a;
91
+}
92
+
93
+s_matrix matrix_mult_sequential(s_matrix mat1, s_matrix mat2)
94
+{
95
+  s_matrix mat;
96
+  if (mat1.n != mat2.m) {
97
+    mat.n = 0;
98
+    mat.m = 0;
99
+    mat.scalars = 0;
100
+  }
101
+  else {
102
+    mat = matrix_generate(mat1.m, mat2.n, 0);
103
+    for (unsigned i = 0; i < mat.m; ++i) {
104
+      for (unsigned j = 0; j < mat.n; ++j) {
105
+        mat.scalars[i][j] = matrix_mult_scalar(mat1, mat2, i, j);
106
+      }
107
+    }
108
+  }
109
+  return mat;
110
+}
111
+
112
+void matrix_get_thread_scalars_distribution(unsigned scalars_count, unsigned thread_count, unsigned* distribution)
113
+{
114
+    unsigned scalars_per_thread = scalars_count / thread_count;
115
+    unsigned scalars_not_distributed = scalars_count;
116
+    if (scalars_per_thread == 0) {
117
+      scalars_per_thread = 1;
118
+    }
119
+    unsigned thread_number = 0;
120
+    for (; thread_number < thread_count && scalars_not_distributed > 0; ++thread_number) {
121
+      distribution[thread_number] = (thread_number == thread_count - 1 ? scalars_not_distributed : scalars_per_thread);
122
+      scalars_not_distributed -= distribution[thread_number];
123
+    }
124
+    for (; thread_number < thread_count; ++thread_number) {
125
+      distribution[thread_number] = 0;
126
+    }
127
+}
128
+
129
+void* matrix_mult_parallel_thread(void* arg)
130
+{
131
+  s_matrix_mult_thread_data* data = (s_matrix_mult_thread_data*)arg;
132
+
133
+  unsigned j = data->scalar_start % data->mat.m;
134
+
135
+  for (unsigned i = data->scalar_start / data->mat.m; i < data->mat.m && data->scalars_count > 0; ++i) {
136
+    for (; j < data->mat.n && data->scalars_count > 0; ++j) {
137
+      data->mat.scalars[i][j] = matrix_mult_scalar(data->mat1, data->mat2, i, j);
138
+      --data->scalars_count;
139
+    }
140
+    j = 0;
141
+  }
142
+
143
+  free(data);
144
+  return 0;
145
+}
146
+
147
+pthread_t matrix_mult_parallel_launch_thread(s_matrix mat1, s_matrix mat2, s_matrix mat, unsigned scalar_start,
148
+                          unsigned scalars_count, unsigned thread_number, int launch)
149
+{
150
+  s_matrix_mult_thread_data* data = (s_matrix_mult_thread_data*)malloc(sizeof(s_matrix_mult_thread_data));
151
+  data->mat1 = mat1;
152
+  data->mat2 = mat2;
153
+  data->mat = mat;
154
+  data->scalar_start = scalar_start;
155
+  data->scalars_count = scalars_count;
156
+  data->thread_number = thread_number;
157
+  pthread_t thread = 0;
158
+  (void)launch;
159
+  if (launch) {
160
+    pthread_create(&thread, 0, matrix_mult_parallel_thread, data);
161
+  }
162
+  else {
163
+    matrix_mult_parallel_thread(data);
164
+  }
165
+  return thread;
166
+}
167
+
168
+s_matrix matrix_mult_parallel(s_matrix mat1, s_matrix mat2, unsigned thread_count)
169
+{
170
+  s_matrix mat;
171
+  if (mat1.n != mat2.m) {
172
+    mat.n = 0;
173
+    mat.m = 0;
174
+    mat.scalars = 0;
175
+  }
176
+  else {
177
+    mat = matrix_generate(mat1.m, mat2.n, 0);
178
+    unsigned scalars_count = mat1.m * mat2.n;
179
+    unsigned distribution[thread_count];
180
+    unsigned scalar_start = 0;
181
+    //unsigned scalar_start = distribution[0];
182
+    matrix_get_thread_scalars_distribution(scalars_count, thread_count, distribution);
183
+    pthread_t threads[thread_count];
184
+    //threads[0] = 0;
185
+
186
+    for (unsigned thread_number = 0; thread_number < thread_count; ++thread_number) {
187
+      unsigned scalars_count = distribution[thread_number];
188
+      if (scalars_count > 0) {
189
+        threads[thread_number] = matrix_mult_parallel_launch_thread(mat1, mat2, mat, scalar_start, scalars_count, thread_number, 1);
190
+        scalar_start += scalars_count;
191
+      }
192
+      else {
193
+        threads[thread_number] = 0;
194
+      }
195
+    }
196
+
197
+    //matrix_mult_parallel_launch_thread(mat1, mat2, mat, 0, distribution[0], 0, 0);
198
+
199
+    for (unsigned thread_number = 0; thread_number < thread_count; ++thread_number) {
200
+      pthread_t thread = threads[thread_number];
201
+      if (thread != 0) {
202
+        pthread_join(thread, 0);
203
+      }
204
+    }
205
+  }
206
+  return mat;
207
+}
208
+
209
+struct timespec get_time()
210
+{
211
+    struct timespec start_time;
212
+    clock_gettime(CLOCK_MONOTONIC, &start_time);
213
+    return start_time;
214
+}
215
+
216
+struct timespec time_diff(struct timespec* ts1, struct timespec* ts2)
217
+{
218
+  static struct timespec ts;
219
+  ts.tv_sec = ts1->tv_sec - ts2->tv_sec;
220
+  ts.tv_nsec = ts1->tv_nsec - ts2->tv_nsec;
221
+  if (ts.tv_nsec < 0) {
222
+    ts.tv_sec--;
223
+    ts.tv_nsec += 1000000000;
224
+  }
225
+  return ts;
226
+}
227
+
228
+struct timespec get_duration(struct timespec* ts)
229
+{
230
+  struct timespec time = get_time();
231
+  return time_diff(&time, ts);
232
+}
233
+
234
+void print_time(struct timespec* ts)
235
+{
236
+  long ns = ts->tv_nsec % 1000;
237
+  long us = (ts->tv_nsec / 1000) % 1000;
238
+  long ms = (ts->tv_nsec / 1000000) % 1000;
239
+  long s =  (ts->tv_nsec / 1000000000) % 1000 + ts->tv_sec;
240
+  printf("%3lds %3ldms %3ldus %3ldns", s, ms, us, ns);
241
+}
242
+
243
+void test(unsigned size, unsigned thread_count)
244
+{
245
+  s_matrix mat1 = matrix_generate(size, size, 100);
246
+
247
+  s_matrix mat;
248
+  struct timespec start = get_time();
249
+  if (thread_count == 0) {
250
+    mat =  matrix_mult_sequential(mat1, mat1);
251
+  }
252
+  else {
253
+    mat = matrix_mult_parallel(mat1, mat1, thread_count);
254
+  }
255
+  struct timespec time = get_duration(&start);
256
+  printf("%3dcpu %3dthreads %4d*%-4d ", get_cpu_count(), thread_count, size, size);
257
+  print_time(&time);
258
+  printf("\n");
259
+  matrix_free(mat);
260
+
261
+}
262
+
263
+void check()
264
+{
265
+  s_matrix mat = matrix_generate(3, 3, 0);
266
+  mat.scalars[0][0] = 25;
267
+  mat.scalars[0][1] = 26;
268
+  mat.scalars[0][2] = 90;
269
+  mat.scalars[1][0] = 14;
270
+  mat.scalars[1][1] = 36;
271
+  mat.scalars[1][2] = 1;
272
+  mat.scalars[2][0] = 3;
273
+  mat.scalars[2][1] = 9;
274
+  mat.scalars[2][2] = 6;
275
+
276
+  s_matrix mat1 = matrix_mult_sequential(mat, mat);
277
+  s_matrix mat2 = matrix_mult_parallel(mat, mat, 1);
278
+  s_matrix mat3 = matrix_mult_parallel(mat, mat, get_cpu_count());
279
+  if (!matrix_equals(mat1, mat2) || !matrix_equals(mat1, mat3)) {
280
+    matrix_print(mat1);
281
+    printf("\n");
282
+    matrix_print(mat2);
283
+    printf("\n");
284
+    matrix_print(mat3);
285
+    exit(1);
286
+  }
287
+}
288
+
289
+int main(void)
290
+{
291
+  srand(time(0));
292
+
293
+  check();
294
+
295
+  unsigned sizes[] = {10, 100};
296
+  unsigned threads_count[] = {1, 4, 16, 64};
297
+
298
+  for (unsigned s = 0; s < sizeof(sizes) / sizeof(*sizes); ++s) {
299
+    unsigned size = sizes[s];
300
+    test(size, 0);
301
+    for (unsigned t = 0; t < sizeof(threads_count) / sizeof(*threads_count); ++t) {
302
+      test(size, threads_count[t]);
303
+    }
304
+  }
305
+
306
+  return 0;
307
+}

Loading…
Peruuta
Tallenna