You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

main.c 7.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. #define _POSIX_C_SOURCE 199309L
  2. #include <stdlib.h>
  3. #include <stdio.h>
  4. #include <time.h>
  5. #include <unistd.h>
  6. #include <string.h>
  7. #include <math.h>
  8. #include <pthread.h>
  9. struct matrix
  10. {
  11. unsigned m;
  12. unsigned n;
  13. int** scalars;
  14. };
  15. typedef struct matrix s_matrix;
  16. struct matrix_mult_thread_data
  17. {
  18. s_matrix mat1;
  19. s_matrix mat2;
  20. s_matrix mat;
  21. unsigned scalar_start;
  22. unsigned scalars_count;
  23. unsigned thread_number;
  24. };
  25. typedef struct matrix_mult_thread_data s_matrix_mult_thread_data;
  26. unsigned get_cpu_count(void)
  27. {
  28. return (unsigned)sysconf(_SC_NPROCESSORS_ONLN);
  29. }
  30. s_matrix matrix_generate(unsigned m, unsigned n, unsigned rmax)
  31. {
  32. s_matrix mat;
  33. mat.m = m;
  34. mat.n = n;
  35. mat.scalars = malloc(mat.m * sizeof(int*));
  36. for (unsigned i = 0; i < mat.m; ++i) {
  37. mat.scalars[i] = malloc(mat.n * sizeof(int));
  38. for (unsigned j = 0; j < mat.n; ++j) {
  39. mat.scalars[i][j] = (rmax == 0 ? 0 : (rand() % rmax));
  40. }
  41. }
  42. return mat;
  43. }
  44. void matrix_free(s_matrix mat)
  45. {
  46. for (unsigned i = 0; i < mat.m; ++i) {
  47. free(mat.scalars[i]);
  48. }
  49. free(mat.scalars);
  50. }
  51. void matrix_print(s_matrix mat)
  52. {
  53. for (unsigned i = 0; i < mat.m; ++i) {
  54. printf("|");
  55. for (unsigned j = 0; j < mat.n; ++j) {
  56. printf("%5d|", mat.scalars[i][j]);
  57. }
  58. printf("\n");
  59. }
  60. }
  61. int matrix_equals(s_matrix mat1, s_matrix mat2)
  62. {
  63. if (mat1.n != mat2.n || mat1.m != mat2.n) {
  64. return 0;
  65. }
  66. for (unsigned i = 0; i < mat1.n; ++i) {
  67. for (unsigned j = 0; j < mat1.m; ++j) {
  68. if (mat1.scalars[i][j] != mat2.scalars[i][j]) {
  69. return 0;
  70. }
  71. }
  72. }
  73. return 1;
  74. }
  75. unsigned matrix_mult_scalar(s_matrix mat1, s_matrix mat2, unsigned i, unsigned j)
  76. {
  77. unsigned a = 0;
  78. for (unsigned k = 0; k < mat1.n; ++k) {
  79. a += mat1.scalars[i][k] * mat2.scalars[k][j];
  80. }
  81. return a;
  82. }
  83. s_matrix matrix_mult_sequential(s_matrix mat1, s_matrix mat2)
  84. {
  85. s_matrix mat;
  86. if (mat1.n != mat2.m) {
  87. mat.n = 0;
  88. mat.m = 0;
  89. mat.scalars = 0;
  90. }
  91. else {
  92. mat = matrix_generate(mat1.m, mat2.n, 0);
  93. for (unsigned i = 0; i < mat.m; ++i) {
  94. for (unsigned j = 0; j < mat.n; ++j) {
  95. mat.scalars[i][j] = matrix_mult_scalar(mat1, mat2, i, j);
  96. }
  97. }
  98. }
  99. return mat;
  100. }
  101. void matrix_get_thread_scalars_distribution(unsigned scalars_count, unsigned thread_count, unsigned* distribution)
  102. {
  103. unsigned scalars_per_thread = scalars_count / thread_count;
  104. unsigned scalars_not_distributed = scalars_count;
  105. if (scalars_per_thread == 0) {
  106. scalars_per_thread = 1;
  107. }
  108. unsigned thread_number = 0;
  109. for (; thread_number < thread_count && scalars_not_distributed > 0; ++thread_number) {
  110. distribution[thread_number] = (thread_number == thread_count - 1 ? scalars_not_distributed : scalars_per_thread);
  111. scalars_not_distributed -= distribution[thread_number];
  112. }
  113. for (; thread_number < thread_count; ++thread_number) {
  114. distribution[thread_number] = 0;
  115. }
  116. }
  117. void* matrix_mult_parallel_thread(void* arg)
  118. {
  119. s_matrix_mult_thread_data* data = (s_matrix_mult_thread_data*)arg;
  120. unsigned j = data->scalar_start % data->mat.m;
  121. for (unsigned i = data->scalar_start / data->mat.m; i < data->mat.m && data->scalars_count > 0; ++i) {
  122. for (; j < data->mat.n && data->scalars_count > 0; ++j) {
  123. data->mat.scalars[i][j] = matrix_mult_scalar(data->mat1, data->mat2, i, j);
  124. --data->scalars_count;
  125. }
  126. j = 0;
  127. }
  128. free(data);
  129. return 0;
  130. }
  131. pthread_t matrix_mult_parallel_launch_thread(s_matrix mat1, s_matrix mat2, s_matrix mat, unsigned scalar_start,
  132. unsigned scalars_count, unsigned thread_number, int launch)
  133. {
  134. s_matrix_mult_thread_data* data = (s_matrix_mult_thread_data*)malloc(sizeof(s_matrix_mult_thread_data));
  135. data->mat1 = mat1;
  136. data->mat2 = mat2;
  137. data->mat = mat;
  138. data->scalar_start = scalar_start;
  139. data->scalars_count = scalars_count;
  140. data->thread_number = thread_number;
  141. pthread_t thread = 0;
  142. (void)launch;
  143. if (launch) {
  144. pthread_create(&thread, 0, matrix_mult_parallel_thread, data);
  145. }
  146. else {
  147. matrix_mult_parallel_thread(data);
  148. }
  149. return thread;
  150. }
  151. s_matrix matrix_mult_parallel(s_matrix mat1, s_matrix mat2, unsigned thread_count)
  152. {
  153. s_matrix mat;
  154. if (mat1.n != mat2.m) {
  155. mat.n = 0;
  156. mat.m = 0;
  157. mat.scalars = 0;
  158. }
  159. else {
  160. mat = matrix_generate(mat1.m, mat2.n, 0);
  161. unsigned scalars_count = mat1.m * mat2.n;
  162. unsigned distribution[thread_count];
  163. matrix_get_thread_scalars_distribution(scalars_count, thread_count, distribution);
  164. //unsigned scalar_start = 0;
  165. unsigned scalar_start = distribution[0];
  166. pthread_t threads[thread_count];
  167. threads[0] = 0;
  168. for (unsigned thread_number = 1; thread_number < thread_count; ++thread_number) {
  169. unsigned scalars_count = distribution[thread_number];
  170. if (scalars_count > 0) {
  171. threads[thread_number] = matrix_mult_parallel_launch_thread(mat1, mat2, mat, scalar_start, scalars_count, thread_number, 1);
  172. scalar_start += scalars_count;
  173. }
  174. else {
  175. threads[thread_number] = 0;
  176. }
  177. }
  178. matrix_mult_parallel_launch_thread(mat1, mat2, mat, 0, distribution[0], 0, 0);
  179. for (unsigned thread_number = 0; thread_number < thread_count; ++thread_number) {
  180. pthread_t thread = threads[thread_number];
  181. if (thread != 0) {
  182. pthread_join(thread, 0);
  183. }
  184. }
  185. }
  186. return mat;
  187. }
  188. struct timespec get_time()
  189. {
  190. struct timespec start_time;
  191. clock_gettime(CLOCK_MONOTONIC, &start_time);
  192. return start_time;
  193. }
  194. struct timespec time_diff(struct timespec* ts1, struct timespec* ts2)
  195. {
  196. static struct timespec ts;
  197. ts.tv_sec = ts1->tv_sec - ts2->tv_sec;
  198. ts.tv_nsec = ts1->tv_nsec - ts2->tv_nsec;
  199. if (ts.tv_nsec < 0) {
  200. ts.tv_sec--;
  201. ts.tv_nsec += 1000000000;
  202. }
  203. return ts;
  204. }
  205. struct timespec get_duration(struct timespec* ts)
  206. {
  207. struct timespec time = get_time();
  208. return time_diff(&time, ts);
  209. }
  210. void print_time(struct timespec* ts)
  211. {
  212. long ns = ts->tv_nsec % 1000;
  213. long us = (ts->tv_nsec / 1000) % 1000;
  214. long ms = (ts->tv_nsec / 1000000) % 1000;
  215. long s = (ts->tv_nsec / 1000000000) % 1000 + ts->tv_sec;
  216. long t = (s * 1000000000) + (ms * 1000000) + (us * 1000) + ns;
  217. printf("%3lds %3ldms %3ldus %3ldns %12ld", s, ms, us, ns, t);
  218. }
  219. void test(unsigned size, unsigned thread_count)
  220. {
  221. s_matrix mat1 = matrix_generate(size, size, 100);
  222. s_matrix mat;
  223. struct timespec start = get_time();
  224. if (thread_count == 0) {
  225. mat = matrix_mult_sequential(mat1, mat1);
  226. }
  227. else {
  228. mat = matrix_mult_parallel(mat1, mat1, thread_count);
  229. }
  230. struct timespec time = get_duration(&start);
  231. printf("%3dcpu %3dthreads %4d*%-4d ", get_cpu_count(), thread_count, size, size);
  232. print_time(&time);
  233. printf("\n");
  234. fflush(stdout);
  235. matrix_free(mat);
  236. }
  237. void check()
  238. {
  239. s_matrix mat = matrix_generate(3, 3, 0);
  240. mat.scalars[0][0] = 25;
  241. mat.scalars[0][1] = 26;
  242. mat.scalars[0][2] = 90;
  243. mat.scalars[1][0] = 14;
  244. mat.scalars[1][1] = 36;
  245. mat.scalars[1][2] = 1;
  246. mat.scalars[2][0] = 3;
  247. mat.scalars[2][1] = 9;
  248. mat.scalars[2][2] = 6;
  249. s_matrix mat1 = matrix_mult_sequential(mat, mat);
  250. s_matrix mat2 = matrix_mult_parallel(mat, mat, 1);
  251. s_matrix mat3 = matrix_mult_parallel(mat, mat, get_cpu_count());
  252. if (!matrix_equals(mat1, mat2) || !matrix_equals(mat1, mat3)) {
  253. matrix_print(mat1);
  254. printf("\n");
  255. matrix_print(mat2);
  256. printf("\n");
  257. matrix_print(mat3);
  258. exit(1);
  259. }
  260. }
  261. int main(void)
  262. {
  263. srand(time(0));
  264. check();
  265. unsigned sizes[] = {10, 100, 1000, 2000, 5000};
  266. unsigned threads_count = get_cpu_count();
  267. if (threads_count < 64) {
  268. threads_count = 64;
  269. }
  270. for (unsigned s = 0; s < sizeof(sizes) / sizeof(*sizes); ++s) {
  271. unsigned size = sizes[s];
  272. for (unsigned t = threads_count; t > 1; t /= 2) {
  273. test(size, t);
  274. }
  275. test(size, 0);
  276. }
  277. return 0;
  278. }