You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

main.c 7.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. #define _POSIX_C_SOURCE 199309L
  2. #include <stdlib.h>
  3. #include <stdio.h>
  4. #include <time.h>
  5. #include <unistd.h>
  6. #include <string.h>
  7. #include <pthread.h>
  8. struct matrix
  9. {
  10. unsigned m;
  11. unsigned n;
  12. int** scalars;
  13. };
  14. typedef struct matrix s_matrix;
  15. struct matrix_mult_thread_data
  16. {
  17. s_matrix mat1;
  18. s_matrix mat2;
  19. s_matrix mat;
  20. unsigned scalar_start;
  21. unsigned scalars_count;
  22. unsigned thread_number;
  23. };
  24. typedef struct matrix_mult_thread_data s_matrix_mult_thread_data;
  25. unsigned get_cpu_count(void)
  26. {
  27. return (unsigned)sysconf(_SC_NPROCESSORS_ONLN);
  28. }
  29. s_matrix matrix_generate(unsigned m, unsigned n, unsigned rmax)
  30. {
  31. s_matrix mat;
  32. mat.m = m;
  33. mat.n = n;
  34. mat.scalars = malloc(mat.m * sizeof(int*));
  35. for (unsigned i = 0; i < mat.m; ++i) {
  36. mat.scalars[i] = malloc(mat.n * sizeof(int));
  37. for (unsigned j = 0; j < mat.n; ++j) {
  38. mat.scalars[i][j] = (rmax == 0 ? 0 : (rand() % rmax));
  39. }
  40. }
  41. return mat;
  42. }
  43. void matrix_free(s_matrix mat)
  44. {
  45. for (unsigned i = 0; i < mat.m; ++i) {
  46. free(mat.scalars[i]);
  47. }
  48. free(mat.scalars);
  49. }
  50. void matrix_print(s_matrix mat)
  51. {
  52. for (unsigned i = 0; i < mat.m; ++i) {
  53. printf("|");
  54. for (unsigned j = 0; j < mat.n; ++j) {
  55. printf("%5d|", mat.scalars[i][j]);
  56. }
  57. printf("\n");
  58. }
  59. }
  60. int matrix_equals(s_matrix mat1, s_matrix mat2)
  61. {
  62. if (mat1.n != mat2.n || mat1.m != mat2.n) {
  63. return 0;
  64. }
  65. for (unsigned i = 0; i < mat1.n; ++i) {
  66. for (unsigned j = 0; j < mat1.m; ++j) {
  67. if (mat1.scalars[i][j] != mat2.scalars[i][j]) {
  68. return 0;
  69. }
  70. }
  71. }
  72. return 1;
  73. }
  74. unsigned matrix_mult_scalar(s_matrix mat1, s_matrix mat2, unsigned i, unsigned j)
  75. {
  76. unsigned a = 0;
  77. for (unsigned k = 0; k < mat1.n; ++k) {
  78. a += mat1.scalars[i][k] * mat2.scalars[k][j];
  79. }
  80. return a;
  81. }
  82. s_matrix matrix_mult_sequential(s_matrix mat1, s_matrix mat2)
  83. {
  84. s_matrix mat;
  85. if (mat1.n != mat2.m) {
  86. mat.n = 0;
  87. mat.m = 0;
  88. mat.scalars = 0;
  89. }
  90. else {
  91. mat = matrix_generate(mat1.m, mat2.n, 0);
  92. for (unsigned i = 0; i < mat.m; ++i) {
  93. for (unsigned j = 0; j < mat.n; ++j) {
  94. mat.scalars[i][j] = matrix_mult_scalar(mat1, mat2, i, j);
  95. }
  96. }
  97. }
  98. return mat;
  99. }
  100. void matrix_get_thread_scalars_distribution(unsigned scalars_count, unsigned thread_count, unsigned* distribution)
  101. {
  102. unsigned scalars_per_thread = scalars_count / thread_count;
  103. unsigned scalars_not_distributed = scalars_count;
  104. if (scalars_per_thread == 0) {
  105. scalars_per_thread = 1;
  106. }
  107. unsigned thread_number = 0;
  108. for (; thread_number < thread_count && scalars_not_distributed > 0; ++thread_number) {
  109. distribution[thread_number] = (thread_number == thread_count - 1 ? scalars_not_distributed : scalars_per_thread);
  110. scalars_not_distributed -= distribution[thread_number];
  111. }
  112. for (; thread_number < thread_count; ++thread_number) {
  113. distribution[thread_number] = 0;
  114. }
  115. }
  116. void* matrix_mult_parallel_thread(void* arg)
  117. {
  118. s_matrix_mult_thread_data* data = (s_matrix_mult_thread_data*)arg;
  119. unsigned j = data->scalar_start % data->mat.m;
  120. for (unsigned i = data->scalar_start / data->mat.m; i < data->mat.m && data->scalars_count > 0; ++i) {
  121. for (; j < data->mat.n && data->scalars_count > 0; ++j) {
  122. data->mat.scalars[i][j] = matrix_mult_scalar(data->mat1, data->mat2, i, j);
  123. --data->scalars_count;
  124. }
  125. j = 0;
  126. }
  127. free(data);
  128. return 0;
  129. }
  130. pthread_t matrix_mult_parallel_launch_thread(s_matrix mat1, s_matrix mat2, s_matrix mat, unsigned scalar_start,
  131. unsigned scalars_count, unsigned thread_number, int launch)
  132. {
  133. s_matrix_mult_thread_data* data = (s_matrix_mult_thread_data*)malloc(sizeof(s_matrix_mult_thread_data));
  134. data->mat1 = mat1;
  135. data->mat2 = mat2;
  136. data->mat = mat;
  137. data->scalar_start = scalar_start;
  138. data->scalars_count = scalars_count;
  139. data->thread_number = thread_number;
  140. pthread_t thread = 0;
  141. (void)launch;
  142. if (launch) {
  143. pthread_create(&thread, 0, matrix_mult_parallel_thread, data);
  144. }
  145. else {
  146. matrix_mult_parallel_thread(data);
  147. }
  148. return thread;
  149. }
  150. s_matrix matrix_mult_parallel(s_matrix mat1, s_matrix mat2, unsigned thread_count)
  151. {
  152. s_matrix mat;
  153. if (mat1.n != mat2.m) {
  154. mat.n = 0;
  155. mat.m = 0;
  156. mat.scalars = 0;
  157. }
  158. else {
  159. mat = matrix_generate(mat1.m, mat2.n, 0);
  160. unsigned scalars_count = mat1.m * mat2.n;
  161. unsigned distribution[thread_count];
  162. unsigned scalar_start = 0;
  163. //unsigned scalar_start = distribution[0];
  164. matrix_get_thread_scalars_distribution(scalars_count, thread_count, distribution);
  165. pthread_t threads[thread_count];
  166. //threads[0] = 0;
  167. for (unsigned thread_number = 0; thread_number < thread_count; ++thread_number) {
  168. unsigned scalars_count = distribution[thread_number];
  169. if (scalars_count > 0) {
  170. threads[thread_number] = matrix_mult_parallel_launch_thread(mat1, mat2, mat, scalar_start, scalars_count, thread_number, 1);
  171. scalar_start += scalars_count;
  172. }
  173. else {
  174. threads[thread_number] = 0;
  175. }
  176. }
  177. //matrix_mult_parallel_launch_thread(mat1, mat2, mat, 0, distribution[0], 0, 0);
  178. for (unsigned thread_number = 0; thread_number < thread_count; ++thread_number) {
  179. pthread_t thread = threads[thread_number];
  180. if (thread != 0) {
  181. pthread_join(thread, 0);
  182. }
  183. }
  184. }
  185. return mat;
  186. }
  187. struct timespec get_time()
  188. {
  189. struct timespec start_time;
  190. clock_gettime(CLOCK_MONOTONIC, &start_time);
  191. return start_time;
  192. }
  193. struct timespec time_diff(struct timespec* ts1, struct timespec* ts2)
  194. {
  195. static struct timespec ts;
  196. ts.tv_sec = ts1->tv_sec - ts2->tv_sec;
  197. ts.tv_nsec = ts1->tv_nsec - ts2->tv_nsec;
  198. if (ts.tv_nsec < 0) {
  199. ts.tv_sec--;
  200. ts.tv_nsec += 1000000000;
  201. }
  202. return ts;
  203. }
  204. struct timespec get_duration(struct timespec* ts)
  205. {
  206. struct timespec time = get_time();
  207. return time_diff(&time, ts);
  208. }
  209. void print_time(struct timespec* ts)
  210. {
  211. long ns = ts->tv_nsec % 1000;
  212. long us = (ts->tv_nsec / 1000) % 1000;
  213. long ms = (ts->tv_nsec / 1000000) % 1000;
  214. long s = (ts->tv_nsec / 1000000000) % 1000 + ts->tv_sec;
  215. printf("%3lds %3ldms %3ldus %3ldns", s, ms, us, ns);
  216. }
  217. void test(unsigned size, unsigned thread_count)
  218. {
  219. s_matrix mat1 = matrix_generate(size, size, 100);
  220. s_matrix mat;
  221. struct timespec start = get_time();
  222. if (thread_count == 0) {
  223. mat = matrix_mult_sequential(mat1, mat1);
  224. }
  225. else {
  226. mat = matrix_mult_parallel(mat1, mat1, thread_count);
  227. }
  228. struct timespec time = get_duration(&start);
  229. printf("%3dcpu %3dthreads %4d*%-4d ", get_cpu_count(), thread_count, size, size);
  230. print_time(&time);
  231. printf("\n");
  232. matrix_free(mat);
  233. }
  234. void check()
  235. {
  236. s_matrix mat = matrix_generate(3, 3, 0);
  237. mat.scalars[0][0] = 25;
  238. mat.scalars[0][1] = 26;
  239. mat.scalars[0][2] = 90;
  240. mat.scalars[1][0] = 14;
  241. mat.scalars[1][1] = 36;
  242. mat.scalars[1][2] = 1;
  243. mat.scalars[2][0] = 3;
  244. mat.scalars[2][1] = 9;
  245. mat.scalars[2][2] = 6;
  246. s_matrix mat1 = matrix_mult_sequential(mat, mat);
  247. s_matrix mat2 = matrix_mult_parallel(mat, mat, 1);
  248. s_matrix mat3 = matrix_mult_parallel(mat, mat, get_cpu_count());
  249. if (!matrix_equals(mat1, mat2) || !matrix_equals(mat1, mat3)) {
  250. matrix_print(mat1);
  251. printf("\n");
  252. matrix_print(mat2);
  253. printf("\n");
  254. matrix_print(mat3);
  255. exit(1);
  256. }
  257. }
  258. int main(void)
  259. {
  260. srand(time(0));
  261. check();
  262. unsigned sizes[] = {10, 100};
  263. unsigned threads_count[] = {1, 4, 16, 64};
  264. for (unsigned s = 0; s < sizeof(sizes) / sizeof(*sizes); ++s) {
  265. unsigned size = sizes[s];
  266. test(size, 0);
  267. for (unsigned t = 0; t < sizeof(threads_count) / sizeof(*threads_count); ++t) {
  268. test(size, threads_count[t]);
  269. }
  270. }
  271. return 0;
  272. }