Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

infiniband.c 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. #include <stdint.h>
  19. #include <stdlib.h>
  20. #include <stdio.h>
  21. #include <string.h>
  22. #include <unistd.h>
  23. #include <byteswap.h>
  24. #include <errno.h>
  25. #include <assert.h>
  26. #include <gpxe/list.h>
  27. #include <gpxe/if_arp.h>
  28. #include <gpxe/netdevice.h>
  29. #include <gpxe/iobuf.h>
  30. #include <gpxe/ipoib.h>
  31. #include <gpxe/process.h>
  32. #include <gpxe/infiniband.h>
  33. /** @file
  34. *
  35. * Infiniband protocol
  36. *
  37. */
  38. /** List of Infiniband devices */
  39. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  40. /**
  41. * Create completion queue
  42. *
  43. * @v ibdev Infiniband device
  44. * @v num_cqes Number of completion queue entries
  45. * @ret cq New completion queue
  46. */
  47. struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev,
  48. unsigned int num_cqes ) {
  49. struct ib_completion_queue *cq;
  50. int rc;
  51. DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
  52. /* Allocate and initialise data structure */
  53. cq = zalloc ( sizeof ( *cq ) );
  54. if ( ! cq )
  55. return NULL;
  56. cq->num_cqes = num_cqes;
  57. INIT_LIST_HEAD ( &cq->work_queues );
  58. /* Perform device-specific initialisation and get CQN */
  59. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  60. DBGC ( ibdev, "IBDEV %p could not initialise completion "
  61. "queue: %s\n", ibdev, strerror ( rc ) );
  62. free ( cq );
  63. return NULL;
  64. }
  65. DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
  66. "with CQN %#lx\n", ibdev, num_cqes, cq,
  67. ib_cq_get_drvdata ( cq ), cq->cqn );
  68. return cq;
  69. }
  70. /**
  71. * Destroy completion queue
  72. *
  73. * @v ibdev Infiniband device
  74. * @v cq Completion queue
  75. */
  76. void ib_destroy_cq ( struct ib_device *ibdev,
  77. struct ib_completion_queue *cq ) {
  78. DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
  79. ibdev, cq->cqn );
  80. assert ( list_empty ( &cq->work_queues ) );
  81. ibdev->op->destroy_cq ( ibdev, cq );
  82. free ( cq );
  83. }
  84. /**
  85. * Create queue pair
  86. *
  87. * @v ibdev Infiniband device
  88. * @v num_send_wqes Number of send work queue entries
  89. * @v send_cq Send completion queue
  90. * @v num_recv_wqes Number of receive work queue entries
  91. * @v recv_cq Receive completion queue
  92. * @v qkey Queue key
  93. * @ret qp Queue pair
  94. */
  95. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  96. unsigned int num_send_wqes,
  97. struct ib_completion_queue *send_cq,
  98. unsigned int num_recv_wqes,
  99. struct ib_completion_queue *recv_cq,
  100. unsigned long qkey ) {
  101. struct ib_queue_pair *qp;
  102. size_t total_size;
  103. int rc;
  104. DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
  105. /* Allocate and initialise data structure */
  106. total_size = ( sizeof ( *qp ) +
  107. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  108. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  109. qp = zalloc ( total_size );
  110. if ( ! qp )
  111. return NULL;
  112. qp->qkey = qkey;
  113. qp->send.qp = qp;
  114. qp->send.is_send = 1;
  115. qp->send.cq = send_cq;
  116. list_add ( &qp->send.list, &send_cq->work_queues );
  117. qp->send.num_wqes = num_send_wqes;
  118. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  119. qp->recv.qp = qp;
  120. qp->recv.cq = recv_cq;
  121. list_add ( &qp->recv.list, &recv_cq->work_queues );
  122. qp->recv.num_wqes = num_recv_wqes;
  123. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  124. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  125. /* Perform device-specific initialisation and get QPN */
  126. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  127. DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
  128. "%s\n", ibdev, strerror ( rc ) );
  129. list_del ( &qp->send.list );
  130. list_del ( &qp->recv.list );
  131. free ( qp );
  132. return NULL;
  133. }
  134. DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
  135. ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  136. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
  137. ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
  138. qp->recv.iobufs );
  139. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
  140. ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  141. ( ( ( void * ) qp ) + total_size ) );
  142. return qp;
  143. }
  144. /**
  145. * Modify queue pair
  146. *
  147. * @v ibdev Infiniband device
  148. * @v qp Queue pair
  149. * @v mod_list Modification list
  150. * @v qkey New queue key, if applicable
  151. * @ret rc Return status code
  152. */
  153. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  154. unsigned long mod_list, unsigned long qkey ) {
  155. int rc;
  156. DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
  157. if ( mod_list & IB_MODIFY_QKEY )
  158. qp->qkey = qkey;
  159. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp, mod_list ) ) != 0 ) {
  160. DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
  161. ibdev, qp->qpn, strerror ( rc ) );
  162. return rc;
  163. }
  164. return 0;
  165. }
  166. /**
  167. * Destroy queue pair
  168. *
  169. * @v ibdev Infiniband device
  170. * @v qp Queue pair
  171. */
  172. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  173. DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
  174. ibdev, qp->qpn );
  175. ibdev->op->destroy_qp ( ibdev, qp );
  176. list_del ( &qp->send.list );
  177. list_del ( &qp->recv.list );
  178. free ( qp );
  179. }
  180. /**
  181. * Find work queue belonging to completion queue
  182. *
  183. * @v cq Completion queue
  184. * @v qpn Queue pair number
  185. * @v is_send Find send work queue (rather than receive)
  186. * @ret wq Work queue, or NULL if not found
  187. */
  188. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  189. unsigned long qpn, int is_send ) {
  190. struct ib_work_queue *wq;
  191. list_for_each_entry ( wq, &cq->work_queues, list ) {
  192. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  193. return wq;
  194. }
  195. return NULL;
  196. }
  197. /***************************************************************************
  198. *
  199. * Management datagram operations
  200. *
  201. ***************************************************************************
  202. */
  203. /**
  204. * Get port information
  205. *
  206. * @v ibdev Infiniband device
  207. * @v port_info Port information datagram to fill in
  208. * @ret rc Return status code
  209. */
  210. static int ib_get_port_info ( struct ib_device *ibdev,
  211. struct ib_mad_port_info *port_info ) {
  212. struct ib_mad_hdr *hdr = &port_info->mad_hdr;
  213. int rc;
  214. /* Construct MAD */
  215. memset ( port_info, 0, sizeof ( *port_info ) );
  216. hdr->base_version = IB_MGMT_BASE_VERSION;
  217. hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
  218. hdr->class_version = 1;
  219. hdr->method = IB_MGMT_METHOD_GET;
  220. hdr->attr_id = htons ( IB_SMP_ATTR_PORT_INFO );
  221. hdr->attr_mod = htonl ( ibdev->port );
  222. if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *port_info ) ) ) != 0 ) {
  223. DBGC ( ibdev, "IBDEV %p could not get port info: %s\n",
  224. ibdev, strerror ( rc ) );
  225. return rc;
  226. }
  227. return 0;
  228. }
  229. /**
  230. * Get GUID information
  231. *
  232. * @v ibdev Infiniband device
  233. * @v guid_info GUID information datagram to fill in
  234. * @ret rc Return status code
  235. */
  236. static int ib_get_guid_info ( struct ib_device *ibdev,
  237. struct ib_mad_guid_info *guid_info ) {
  238. struct ib_mad_hdr *hdr = &guid_info->mad_hdr;
  239. int rc;
  240. /* Construct MAD */
  241. memset ( guid_info, 0, sizeof ( *guid_info ) );
  242. hdr->base_version = IB_MGMT_BASE_VERSION;
  243. hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
  244. hdr->class_version = 1;
  245. hdr->method = IB_MGMT_METHOD_GET;
  246. hdr->attr_id = htons ( IB_SMP_ATTR_GUID_INFO );
  247. if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *guid_info ) ) ) != 0 ) {
  248. DBGC ( ibdev, "IBDEV %p could not get GUID info: %s\n",
  249. ibdev, strerror ( rc ) );
  250. return rc;
  251. }
  252. return 0;
  253. }
  254. /**
  255. * Get partition key table
  256. *
  257. * @v ibdev Infiniband device
  258. * @v guid_info Partition key table datagram to fill in
  259. * @ret rc Return status code
  260. */
  261. static int ib_get_pkey_table ( struct ib_device *ibdev,
  262. struct ib_mad_pkey_table *pkey_table ) {
  263. struct ib_mad_hdr *hdr = &pkey_table->mad_hdr;
  264. int rc;
  265. /* Construct MAD */
  266. memset ( pkey_table, 0, sizeof ( *pkey_table ) );
  267. hdr->base_version = IB_MGMT_BASE_VERSION;
  268. hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
  269. hdr->class_version = 1;
  270. hdr->method = IB_MGMT_METHOD_GET;
  271. hdr->attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE );
  272. if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *pkey_table ) ) ) != 0 ) {
  273. DBGC ( ibdev, "IBDEV %p could not get pkey table: %s\n",
  274. ibdev, strerror ( rc ) );
  275. return rc;
  276. }
  277. return 0;
  278. }
  279. /**
  280. * Get MAD parameters
  281. *
  282. * @v ibdev Infiniband device
  283. * @ret rc Return status code
  284. */
  285. static int ib_get_mad_params ( struct ib_device *ibdev ) {
  286. union {
  287. /* This union exists just to save stack space */
  288. struct ib_mad_port_info port_info;
  289. struct ib_mad_guid_info guid_info;
  290. struct ib_mad_pkey_table pkey_table;
  291. } u;
  292. int rc;
  293. /* Port info gives us the link state, the first half of the
  294. * port GID and the SM LID.
  295. */
  296. if ( ( rc = ib_get_port_info ( ibdev, &u.port_info ) ) != 0 )
  297. return rc;
  298. ibdev->link_up = ( ( u.port_info.port_state__link_speed_supported
  299. & 0xf ) == 4 );
  300. memcpy ( &ibdev->port_gid.u.bytes[0], u.port_info.gid_prefix, 8 );
  301. ibdev->sm_lid = ntohs ( u.port_info.mastersm_lid );
  302. /* GUID info gives us the second half of the port GID */
  303. if ( ( rc = ib_get_guid_info ( ibdev, &u.guid_info ) ) != 0 )
  304. return rc;
  305. memcpy ( &ibdev->port_gid.u.bytes[8], u.guid_info.gid_local, 8 );
  306. /* Get partition key */
  307. if ( ( rc = ib_get_pkey_table ( ibdev, &u.pkey_table ) ) != 0 )
  308. return rc;
  309. ibdev->pkey = ntohs ( u.pkey_table.pkey[0][0] );
  310. DBGC ( ibdev, "IBDEV %p port GID is %08lx:%08lx:%08lx:%08lx\n",
  311. ibdev, htonl ( ibdev->port_gid.u.dwords[0] ),
  312. htonl ( ibdev->port_gid.u.dwords[1] ),
  313. htonl ( ibdev->port_gid.u.dwords[2] ),
  314. htonl ( ibdev->port_gid.u.dwords[3] ) );
  315. return 0;
  316. }
  317. /***************************************************************************
  318. *
  319. * Event queues
  320. *
  321. ***************************************************************************
  322. */
  323. /**
  324. * Handle Infiniband link state change
  325. *
  326. * @v ibdev Infiniband device
  327. */
  328. void ib_link_state_changed ( struct ib_device *ibdev ) {
  329. int rc;
  330. /* Update MAD parameters */
  331. if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 ) {
  332. DBGC ( ibdev, "IBDEV %p could not update MAD parameters: %s\n",
  333. ibdev, strerror ( rc ) );
  334. return;
  335. }
  336. /* Notify IPoIB of link state change */
  337. ipoib_link_state_changed ( ibdev );
  338. }
  339. /**
  340. * Single-step the Infiniband event queue
  341. *
  342. * @v process Infiniband event queue process
  343. */
  344. static void ib_step ( struct process *process __unused ) {
  345. struct ib_device *ibdev;
  346. list_for_each_entry ( ibdev, &ib_devices, list ) {
  347. ibdev->op->poll_eq ( ibdev );
  348. }
  349. }
  350. /** Infiniband event queue process */
  351. struct process ib_process __permanent_process = {
  352. .step = ib_step,
  353. };
  354. /***************************************************************************
  355. *
  356. * Infiniband device creation/destruction
  357. *
  358. ***************************************************************************
  359. */
  360. /**
  361. * Allocate Infiniband device
  362. *
  363. * @v priv_size Size of driver private data area
  364. * @ret ibdev Infiniband device, or NULL
  365. */
  366. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  367. struct ib_device *ibdev;
  368. void *drv_priv;
  369. size_t total_len;
  370. total_len = ( sizeof ( *ibdev ) + priv_size );
  371. ibdev = zalloc ( total_len );
  372. if ( ibdev ) {
  373. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  374. ib_set_drvdata ( ibdev, drv_priv );
  375. }
  376. return ibdev;
  377. }
  378. /**
  379. * Register Infiniband device
  380. *
  381. * @v ibdev Infiniband device
  382. * @ret rc Return status code
  383. */
  384. int register_ibdev ( struct ib_device *ibdev ) {
  385. int rc;
  386. /* Add to device list */
  387. ibdev_get ( ibdev );
  388. list_add_tail ( &ibdev->list, &ib_devices );
  389. /* Open link */
  390. if ( ( rc = ib_open ( ibdev ) ) != 0 )
  391. goto err_open;
  392. /* Get MAD parameters */
  393. if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 )
  394. goto err_get_mad_params;
  395. /* Add IPoIB device */
  396. if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
  397. DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
  398. ibdev, strerror ( rc ) );
  399. goto err_ipoib_probe;
  400. }
  401. DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
  402. ibdev->dev->name );
  403. return 0;
  404. err_ipoib_probe:
  405. err_get_mad_params:
  406. ib_close ( ibdev );
  407. err_open:
  408. list_del ( &ibdev->list );
  409. ibdev_put ( ibdev );
  410. return rc;
  411. }
  412. /**
  413. * Unregister Infiniband device
  414. *
  415. * @v ibdev Infiniband device
  416. */
  417. void unregister_ibdev ( struct ib_device *ibdev ) {
  418. /* Close device */
  419. ipoib_remove ( ibdev );
  420. ib_close ( ibdev );
  421. /* Remove from device list */
  422. list_del ( &ibdev->list );
  423. ibdev_put ( ibdev );
  424. DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
  425. }