選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

myri10ge.c 35KB


  1. /************************************************* -*- linux-c -*-
  2. * Myricom 10Gb Network Interface Card Software
  3. * Copyright 2009, Myricom, Inc.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17. ****************************************************************/
  18. FILE_LICENCE ( GPL2_ONLY );
  19. /*
  20. * Author: Glenn Brown <glenn@myri.com>
  21. */
  22. /*
  23. * General Theory of Operation
  24. *
  25. * This is a minimal Myricom 10 gigabit Ethernet driver for network
  26. * boot.
  27. *
  28. * Initialization
  29. *
  30. * myri10ge_pci_probe() is called by iPXE during initialization.
  31. * Minimal NIC initialization is performed to minimize resources
  32. * consumed when the driver is resident but unused.
  33. *
  34. * Network Boot
  35. *
  36. * myri10ge_net_open() is called by iPXE before attempting to network
  37. * boot from the card. Packet buffers are allocated and the NIC
  38. * interface is initialized.
  39. *
  40. * Transmit
  41. *
  42. * myri10ge_net_transmit() enqueues frames for transmission by writing
  43. * discriptors to the NIC's tx ring. For simplicity and to avoid
  44. * copies, we always have the NIC DMA up the packet. The sent I/O
  45. * buffer is released once the NIC signals myri10ge_interrupt_handler()
  46. * that the send has completed.
  47. *
  48. * Receive
  49. *
  50. * Receives are posted to the NIC's receive ring. The NIC fills a
  51. * DMAable receive_completion ring with completion notifications.
  52. * myri10ge_net_poll() polls for these receive notifications, posts
  53. * replacement receive buffers to the NIC, and passes received frames
  54. * to netdev_rx().
  55. *
  56. * NonVolatile Storage
  57. *
  58. * This driver supports NonVolatile Storage (nvs) in the NIC EEPROM.
  59. * If the last EEPROM block is not otherwise filled, we tell
  60. * iPXE it may store NonVolatile Options (nvo) there.
  61. */
  62. /*
  63. * Debugging levels:
  64. * - DBG() is for any errors, i.e. failed alloc_iob(), malloc_dma(),
  65. * TX overflow, corrupted packets, ...
  66. * - DBG2() is for successful events, like packet received,
  67. * packet transmitted, and other general notifications.
  68. * - DBGP() prints the name of each called function on entry
  69. */
  70. #include <stdint.h>
  71. #include <byteswap.h>
  72. #include <errno.h>
  73. #include <ipxe/ethernet.h>
  74. #include <ipxe/if_ether.h>
  75. #include <ipxe/iobuf.h>
  76. #include <ipxe/malloc.h>
  77. #include <ipxe/netdevice.h>
  78. #include <ipxe/nvo.h>
  79. #include <ipxe/nvs.h>
  80. #include <ipxe/pci.h>
  81. #include <ipxe/timer.h>
  82. #include "myri10ge_mcp.h"
  83. /****************************************************************
  84. * Forward declarations
  85. ****************************************************************/
  86. /* PCI driver entry points */
  87. static int myri10ge_pci_probe ( struct pci_device*,
  88. const struct pci_device_id* );
  89. static void myri10ge_pci_remove ( struct pci_device* );
  90. /* Network device operations */
  91. static void myri10ge_net_close ( struct net_device* );
  92. static void myri10ge_net_irq ( struct net_device*, int enable );
  93. static int myri10ge_net_open ( struct net_device* );
  94. static void myri10ge_net_poll ( struct net_device* );
  95. static int myri10ge_net_transmit ( struct net_device*, struct io_buffer* );
  96. /****************************************************************
  97. * Constants
  98. ****************************************************************/
  99. /* Maximum ring indices, used to wrap ring indices. These must be 2**N-1. */
  100. #define MYRI10GE_TRANSMIT_WRAP 1U
  101. #define MYRI10GE_RECEIVE_WRAP 7U
  102. #define MYRI10GE_RECEIVE_COMPLETION_WRAP 31U
  103. /****************************************************************
  104. * Driver internal data types.
  105. ****************************************************************/
  106. /* Structure holding all DMA buffers for a NIC, which we will
  107. allocated as contiguous read/write DMAable memory when the NIC is
  108. initialized. */
  109. struct myri10ge_dma_buffers
  110. {
  111. /* The NIC DMAs receive completion notifications into this ring */
  112. mcp_slot_t receive_completion[1+MYRI10GE_RECEIVE_COMPLETION_WRAP];
  113. /* Interrupt details are DMAd here before interrupting. */
  114. mcp_irq_data_t irq_data; /* 64B */
  115. /* NIC command completion status is DMAd here. */
  116. mcp_cmd_response_t command_response; /* 8B */
  117. };
  118. struct myri10ge_private
  119. {
  120. /* Interrupt support */
  121. uint32 *irq_claim; /* in NIC SRAM */
  122. uint32 *irq_deassert; /* in NIC SRAM */
  123. /* DMA buffers. */
  124. struct myri10ge_dma_buffers *dma;
  125. /*
  126. * Transmit state.
  127. *
  128. * The counts here are uint32 for easy comparison with
  129. * priv->dma->irq_data.send_done_count and with each other.
  130. */
  131. mcp_kreq_ether_send_t *transmit_ring; /* in NIC SRAM */
  132. uint32 transmit_ring_wrap;
  133. uint32 transmits_posted;
  134. uint32 transmits_done;
  135. struct io_buffer *transmit_iob[1 + MYRI10GE_TRANSMIT_WRAP];
  136. /*
  137. * Receive state.
  138. */
  139. mcp_kreq_ether_recv_t *receive_post_ring; /* in NIC SRAM */
  140. unsigned int receive_post_ring_wrap;
  141. unsigned int receives_posted;
  142. unsigned int receives_done;
  143. struct io_buffer *receive_iob[1 + MYRI10GE_RECEIVE_WRAP];
  144. /* Address for writing commands to the firmware.
  145. BEWARE: the value must be written 32 bits at a time. */
  146. mcp_cmd_t *command;
  147. /*
  148. * Nonvolatile Storage for configuration options.
  149. */
  150. struct nvs_device nvs;
  151. struct nvo_fragment nvo_fragment[2];
  152. struct nvo_block nvo;
  153. /* Cached PCI capability locations. */
  154. uint8 pci_cap_vs;
  155. };
  156. /****************************************************************
  157. * Driver internal functions.
  158. ****************************************************************/
  159. /* Print ring status when debugging. Use this only after a printed
  160. value changes. */
  161. #define DBG2_RINGS( priv ) \
  162. DBG2 ( "tx %x/%x rx %x/%x in %s() \n", \
  163. ( priv ) ->transmits_done, ( priv ) -> transmits_posted, \
  164. ( priv ) ->receives_done, ( priv ) -> receives_posted, \
  165. __FUNCTION__ )
  166. /*
  167. * Return a pointer to the driver private data for a network device.
  168. *
  169. * @v netdev Network device created by this driver.
  170. * @ret priv The corresponding driver private data.
  171. */
  172. static inline struct myri10ge_private *myri10ge_priv ( struct net_device *nd )
  173. {
  174. /* Our private data always follows the network device in memory,
  175. since we use alloc_netdev() to allocate the storage. */
  176. return ( struct myri10ge_private * ) ( nd + 1 );
  177. }
  178. /*
  179. * Convert a Myri10ge driver private data pointer to a netdev pointer.
  180. *
  181. * @v p Myri10ge device private data.
  182. * @ret r The corresponding network device.
  183. */
  184. static inline struct net_device *myri10ge_netdev ( struct myri10ge_private *p )
  185. {
  186. return ( ( struct net_device * ) p ) - 1;
  187. }
  188. /*
  189. * Convert a network device pointer to a PCI device pointer.
  190. *
  191. * @v netdev A Network Device.
  192. * @ret r The corresponding PCI device.
  193. */
  194. static inline struct pci_device *myri10ge_pcidev ( struct net_device *netdev )
  195. {
  196. return container_of (netdev->dev, struct pci_device, dev);
  197. }
  198. /*
  199. * Pass a receive buffer to the NIC to be filled.
  200. *
  201. * @v priv The network device to receive the buffer.
  202. * @v iob The I/O buffer to fill.
  203. *
  204. * Receive buffers are filled in FIFO order.
  205. */
  206. static void myri10ge_post_receive ( struct myri10ge_private *priv,
  207. struct io_buffer *iob )
  208. {
  209. unsigned int receives_posted;
  210. mcp_kreq_ether_recv_t *request;
  211. /* Record the posted I/O buffer, to be passed to netdev_rx() on
  212. receive. */
  213. receives_posted = priv->receives_posted;
  214. priv->receive_iob[receives_posted & MYRI10GE_RECEIVE_WRAP] = iob;
  215. /* Post the receive. */
  216. request = &priv->receive_post_ring[receives_posted
  217. & priv->receive_post_ring_wrap];
  218. request->addr_high = 0;
  219. wmb();
  220. request->addr_low = htonl ( virt_to_bus ( iob->data ) );
  221. priv->receives_posted = ++receives_posted;
  222. }
  223. /*
  224. * Execute a command on the NIC.
  225. *
  226. * @v priv NIC to perform the command.
  227. * @v cmd The command to perform.
  228. * @v data I/O copy buffer for parameters/results
  229. * @ret rc 0 on success, else an error code.
  230. */
  231. static int myri10ge_command ( struct myri10ge_private *priv,
  232. uint32 cmd,
  233. uint32 data[3] )
  234. {
  235. int i;
  236. mcp_cmd_t *command;
  237. uint32 result;
  238. unsigned int slept_ms;
  239. volatile mcp_cmd_response_t *response;
  240. DBGP ( "myri10ge_command ( ,%d, ) \n", cmd );
  241. command = priv->command;
  242. response = &priv->dma->command_response;
  243. /* Mark the command as incomplete. */
  244. response->result = 0xFFFFFFFF;
  245. /* Pass the command to the NIC. */
  246. command->cmd = htonl ( cmd );
  247. command->data0 = htonl ( data[0] );
  248. command->data1 = htonl ( data[1] );
  249. command->data2 = htonl ( data[2] );
  250. command->response_addr.high = 0;
  251. command->response_addr.low
  252. = htonl ( virt_to_bus ( &priv->dma->command_response ) );
  253. for ( i=0; i<36; i+=4 )
  254. * ( uint32 * ) &command->pad[i] = 0;
  255. wmb();
  256. * ( uint32 * ) &command->pad[36] = 0;
  257. /* Wait up to 2 seconds for a response. */
  258. for ( slept_ms=0; slept_ms<2000; slept_ms++ ) {
  259. result = response->result;
  260. if ( result == 0 ) {
  261. data[0] = ntohl ( response->data );
  262. return 0;
  263. } else if ( result != 0xFFFFFFFF ) {
  264. DBG ( "cmd%d:0x%x\n",
  265. cmd,
  266. ntohl ( response->result ) );
  267. return -EIO;
  268. }
  269. udelay ( 1000 );
  270. rmb();
  271. }
  272. DBG ( "cmd%d:timed out\n", cmd );
  273. return -ETIMEDOUT;
  274. }
  275. /*
  276. * Handle any pending interrupt.
  277. *
  278. * @v netdev Device being polled for interrupts.
  279. *
  280. * This is called periodically to let the driver check for interrupts.
  281. */
  282. static void myri10ge_interrupt_handler ( struct net_device *netdev )
  283. {
  284. struct myri10ge_private *priv;
  285. mcp_irq_data_t *irq_data;
  286. uint8 valid;
  287. priv = myri10ge_priv ( netdev );
  288. irq_data = &priv->dma->irq_data;
  289. /* Return if there was no interrupt. */
  290. rmb();
  291. valid = irq_data->valid;
  292. if ( !valid )
  293. return;
  294. DBG2 ( "irq " );
  295. /* Tell the NIC to deassert the interrupt and clear
  296. irq_data->valid.*/
  297. *priv->irq_deassert = 0; /* any value is OK. */
  298. mb();
  299. /* Handle any new receives. */
  300. if ( valid & 1 ) {
  301. /* Pass the receive interrupt token back to the NIC. */
  302. DBG2 ( "rx " );
  303. *priv->irq_claim = htonl ( 3 );
  304. wmb();
  305. }
  306. /* Handle any sent packet by freeing its I/O buffer, now that
  307. we know it has been DMAd. */
  308. if ( valid & 2 ) {
  309. unsigned int nic_done_count;
  310. DBG2 ( "snt " );
  311. nic_done_count = ntohl ( priv->dma->irq_data.send_done_count );
  312. while ( priv->transmits_done != nic_done_count ) {
  313. struct io_buffer *iob;
  314. iob = priv->transmit_iob [priv->transmits_done
  315. & MYRI10GE_TRANSMIT_WRAP];
  316. DBG2 ( "%p ", iob );
  317. netdev_tx_complete ( netdev, iob );
  318. ++priv->transmits_done;
  319. }
  320. }
  321. /* Record any statistics update. */
  322. if ( irq_data->stats_updated ) {
  323. /* Update the link status. */
  324. DBG2 ( "stats " );
  325. if ( ntohl ( irq_data->link_up ) == MXGEFW_LINK_UP )
  326. netdev_link_up ( netdev );
  327. else
  328. netdev_link_down ( netdev );
  329. /* Ignore all error counters from the NIC. */
  330. }
  331. /* Wait for the interrupt to be deasserted, as indicated by
  332. irq_data->valid, which is set by the NIC after the deassert. */
  333. DBG2 ( "wait " );
  334. do {
  335. mb();
  336. } while ( irq_data->valid );
  337. /* Claim the interrupt to enable future interrupt generation. */
  338. DBG2 ( "claim\n" );
  339. * ( priv->irq_claim + 1 ) = htonl ( 3 );
  340. mb();
  341. }
  342. /* Constants for reading the STRING_SPECS via the Myricom
  343. Vendor Specific PCI configuration space capability. */
  344. #define VS_EEPROM_READ_ADDR ( vs + 0x04 )
  345. #define VS_EEPROM_READ_DATA ( vs + 0x08 )
  346. #define VS_EEPROM_WRITE ( vs + 0x0C )
  347. #define VS_ADDR ( vs + 0x18 )
  348. #define VS_DATA ( vs + 0x14 )
  349. #define VS_MODE ( vs + 0x10 )
  350. #define VS_MODE_READ32 0x3
  351. #define VS_MODE_LOCATE 0x8
  352. #define VS_LOCATE_STRING_SPECS 0x3
  353. #define VS_MODE_EEPROM_STREAM_WRITE 0xB
  354. /*
  355. * Read MAC address from its 'string specs' via the vendor-specific
  356. * capability. (This capability allows NIC SRAM and ROM to be read
  357. * before it is mapped.)
  358. *
  359. * @v pci The device.
  360. * @v vs Offset of the PCI Vendor-Specific Capability.
  361. * @v mac Buffer to store the MAC address.
  362. * @ret rc Returns 0 on success, else an error code.
  363. */
  364. static int mac_address_from_string_specs ( struct pci_device *pci,
  365. unsigned int vs,
  366. uint8 mac[ETH_ALEN] )
  367. {
  368. char string_specs[256];
  369. char *ptr, *limit;
  370. char *to = string_specs;
  371. uint32 addr;
  372. uint32 len;
  373. int mac_set = 0;
  374. /* Locate the String specs in LANai SRAM. */
  375. pci_write_config_byte ( pci, VS_MODE, VS_MODE_LOCATE );
  376. pci_write_config_dword ( pci, VS_ADDR, VS_LOCATE_STRING_SPECS );
  377. pci_read_config_dword ( pci, VS_ADDR, &addr );
  378. pci_read_config_dword ( pci, VS_DATA, &len );
  379. DBG2 ( "ss@%x,%x\n", addr, len );
  380. /* Copy in the string specs. Use 32-bit reads for performance. */
  381. if ( len > sizeof ( string_specs ) || ( len & 3 ) ) {
  382. pci_write_config_byte ( pci, VS_MODE, 0 );
  383. DBG ( "SS too big\n" );
  384. return -ENOTSUP;
  385. }
  386. pci_write_config_byte ( pci, VS_MODE, VS_MODE_READ32 );
  387. while ( len >= 4 ) {
  388. uint32 tmp;
  389. pci_write_config_byte ( pci, VS_ADDR, addr );
  390. pci_read_config_dword ( pci, VS_DATA, &tmp );
  391. tmp = ntohl ( tmp );
  392. memcpy ( to, &tmp, 4 );
  393. to += 4;
  394. addr += 4;
  395. len -= 4;
  396. }
  397. pci_write_config_byte ( pci, VS_MODE, 0 );
  398. /* Parse the string specs. */
  399. DBG2 ( "STRING_SPECS:\n" );
  400. ptr = string_specs;
  401. limit = string_specs + sizeof ( string_specs );
  402. while ( *ptr != '\0' && ptr < limit ) {
  403. DBG2 ( "%s\n", ptr );
  404. if ( memcmp ( ptr, "MAC=", 4 ) == 0 ) {
  405. unsigned int i;
  406. ptr += 4;
  407. for ( i=0; i<6; i++ ) {
  408. if ( ( ptr + 2 ) > limit ) {
  409. DBG ( "bad MAC addr\n" );
  410. return -ENOTSUP;
  411. }
  412. mac[i] = strtoul ( ptr, &ptr, 16 );
  413. ptr += 1;
  414. }
  415. mac_set = 1;
  416. }
  417. else
  418. while ( ptr < limit && *ptr++ );
  419. }
  420. /* Verify we parsed all we need. */
  421. if ( !mac_set ) {
  422. DBG ( "no MAC addr\n" );
  423. return -ENOTSUP;
  424. }
  425. DBG2 ( "MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
  426. mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] );
  427. return 0;
  428. }
  429. /****************************************************************
  430. * NonVolatile Storage support
  431. ****************************************************************/
  432. /*
  433. * Fill a buffer with data read from nonvolatile storage.
  434. *
  435. * @v nvs The NonVolatile Storage device to be read.
  436. * @v addr The first NonVolatile Storage address to be read.
  437. * @v _buf Pointer to the data buffer to be filled.
  438. * @v len The number of bytes to copy.
  439. * @ret rc 0 on success, else nonzero.
  440. */
  441. static int myri10ge_nvs_read ( struct nvs_device *nvs,
  442. unsigned int addr,
  443. void *_buf,
  444. size_t len )
  445. {
  446. struct myri10ge_private *priv =
  447. container_of (nvs, struct myri10ge_private, nvs);
  448. struct pci_device *pci = myri10ge_pcidev ( myri10ge_netdev ( priv ) );
  449. unsigned int vs = priv->pci_cap_vs;
  450. unsigned char *buf = (unsigned char *) _buf;
  451. unsigned int data;
  452. unsigned int i, j;
  453. DBGP ( "myri10ge_nvs_read\n" );
  454. /* Issue the first read address. */
  455. pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 3, addr>>16 );
  456. pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 2, addr>>8 );
  457. pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 1, addr );
  458. addr++;
  459. /* Issue all the reads, and harvest the results every 4th issue. */
  460. for ( i=0; i<len; ++i,addr++ ) {
  461. /* Issue the next read address, updating only the
  462. bytes that need updating. We always update the
  463. LSB, which triggers the read. */
  464. if ( ( addr & 0xff ) == 0 ) {
  465. if ( ( addr & 0xffff ) == 0 ) {
  466. pci_write_config_byte ( pci,
  467. VS_EEPROM_READ_ADDR + 3,
  468. addr >> 16 );
  469. }
  470. pci_write_config_byte ( pci,
  471. VS_EEPROM_READ_ADDR + 2,
  472. addr >> 8 );
  473. }
  474. pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 1, addr );
  475. /* If 4 data bytes are available, read them with a single read. */
  476. if ( ( i & 3 ) == 3 ) {
  477. pci_read_config_dword ( pci,
  478. VS_EEPROM_READ_DATA,
  479. &data );
  480. for ( j=0; j<4; j++ ) {
  481. buf[i-j] = data;
  482. data >>= 8;
  483. }
  484. }
  485. }
  486. /* Harvest any remaining results. */
  487. if ( ( i & 3 ) != 0 ) {
  488. pci_read_config_dword ( pci, VS_EEPROM_READ_DATA, &data );
  489. for ( j=1; j<=(i&3); j++ ) {
  490. buf[i-j] = data;
  491. data >>= 8;
  492. }
  493. }
  494. DBGP_HDA ( addr - len, _buf, len );
  495. return 0;
  496. }
  497. /*
  498. * Write a buffer into nonvolatile storage.
  499. *
  500. * @v nvs The NonVolatile Storage device to be written.
  501. * @v address The NonVolatile Storage address to be written.
  502. * @v _buf Pointer to the data to be written.
  503. * @v len Length of the buffer to be written.
  504. * @ret rc 0 on success, else nonzero.
  505. */
  506. static int myri10ge_nvs_write ( struct nvs_device *nvs,
  507. unsigned int addr,
  508. const void *_buf,
  509. size_t len )
  510. {
  511. struct myri10ge_private *priv =
  512. container_of (nvs, struct myri10ge_private, nvs);
  513. struct pci_device *pci = myri10ge_pcidev ( myri10ge_netdev ( priv ) );
  514. unsigned int vs = priv->pci_cap_vs;
  515. const unsigned char *buf = (const unsigned char *)_buf;
  516. unsigned int i;
  517. uint8 verify;
  518. DBGP ( "nvs_write " );
  519. DBGP_HDA ( addr, _buf, len );
  520. /* Start erase of the NonVolatile Options block. */
  521. DBGP ( "erasing " );
  522. pci_write_config_dword ( pci, VS_EEPROM_WRITE, ( addr << 8 ) | 0xff );
  523. /* Wait for erase to complete. */
  524. DBGP ( "waiting " );
  525. pci_read_config_byte ( pci, VS_EEPROM_READ_DATA, &verify );
  526. while ( verify != 0xff ) {
  527. pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 1, addr );
  528. pci_read_config_byte ( pci, VS_EEPROM_READ_DATA, &verify );
  529. }
  530. /* Write the data one byte at a time. */
  531. DBGP ( "writing " );
  532. pci_write_config_byte ( pci, VS_MODE, VS_MODE_EEPROM_STREAM_WRITE );
  533. pci_write_config_dword ( pci, VS_ADDR, addr );
  534. for (i=0; i<len; i++, addr++)
  535. pci_write_config_byte ( pci, VS_DATA, buf[i] );
  536. pci_write_config_dword ( pci, VS_ADDR, 0xffffffff );
  537. pci_write_config_byte ( pci, VS_MODE, 0 );
  538. DBGP ( "done\n" );
  539. return 0;
  540. }
  541. /*
  542. * Initialize NonVolatile storage support for a device.
  543. *
  544. * @v priv Device private data for the device.
  545. * @ret rc 0 on success, else an error code.
  546. */
  547. static int myri10ge_nv_init ( struct myri10ge_private *priv )
  548. {
  549. int rc;
  550. struct myri10ge_eeprom_header
  551. {
  552. uint8 __jump[8];
  553. uint32 eeprom_len;
  554. uint32 eeprom_segment_len;
  555. uint32 mcp1_offset;
  556. uint32 mcp2_offset;
  557. uint32 version;
  558. } hdr;
  559. uint32 mcp2_len;
  560. unsigned int nvo_fragment_pos;
  561. DBGP ( "myri10ge_nv_init\n" );
  562. /* Read the EEPROM header, and byteswap the fields we will use.
  563. This is safe even though priv->nvs is not yet initialized. */
  564. rc = myri10ge_nvs_read ( &priv->nvs, 0, &hdr, sizeof ( hdr ) );
  565. if ( rc ) {
  566. DBG ( "EEPROM header unreadable\n" );
  567. return rc;
  568. }
  569. hdr.eeprom_len = ntohl ( hdr.eeprom_len );
  570. hdr.eeprom_segment_len = ntohl ( hdr.eeprom_segment_len );
  571. hdr.mcp2_offset = ntohl ( hdr.mcp2_offset );
  572. hdr.version = ntohl ( hdr.version );
  573. DBG2 ( "eelen:%xh seglen:%xh mcp2@%xh ver%d\n", hdr.eeprom_len,
  574. hdr.eeprom_segment_len, hdr.mcp2_offset, hdr.version );
  575. /* If the firmware does not support EEPROM writes, simply return. */
  576. if ( hdr.version < 1 ) {
  577. DBG ( "No EEPROM write support\n" );
  578. return 0;
  579. }
  580. /* Read the length of MCP2. */
  581. rc = myri10ge_nvs_read ( &priv->nvs, hdr.mcp2_offset, &mcp2_len, 4 );
  582. mcp2_len = ntohl ( mcp2_len );
  583. DBG2 ( "mcp2len:%xh\n", mcp2_len );
  584. /* Determine the position of the NonVolatile Options fragment and
  585. simply return if it overlaps other data. */
  586. nvo_fragment_pos = hdr.eeprom_len - hdr.eeprom_segment_len;
  587. if ( hdr.mcp2_offset + mcp2_len > nvo_fragment_pos ) {
  588. DBG ( "EEPROM full\n" );
  589. return 0;
  590. }
  591. /* Initilize NonVolatile Storage state. */
  592. priv->nvs.word_len_log2 = 0;
  593. priv->nvs.size = hdr.eeprom_len;
  594. priv->nvs.block_size = hdr.eeprom_segment_len;
  595. priv->nvs.read = myri10ge_nvs_read;
  596. priv->nvs.write = myri10ge_nvs_write;
  597. /* Build the NonVolatile storage fragment list. We would like
  598. to use the whole last EEPROM block for this, but we must
  599. reduce the block size lest malloc fail in
  600. src/core/nvo.o. */
  601. priv->nvo_fragment[0].address = nvo_fragment_pos;
  602. priv->nvo_fragment[0].len = 0x200;
  603. /* Register the NonVolatile Options storage. */
  604. nvo_init ( &priv->nvo,
  605. &priv->nvs,
  606. priv->nvo_fragment,
  607. & myri10ge_netdev (priv) -> refcnt );
  608. rc = register_nvo ( &priv->nvo,
  609. netdev_settings ( myri10ge_netdev ( priv ) ) );
  610. if ( rc ) {
  611. DBG ("register_nvo failed");
  612. priv->nvo_fragment[0].len = 0;
  613. return rc;
  614. }
  615. DBG2 ( "NVO supported\n" );
  616. return 0;
  617. }
  618. void
  619. myri10ge_nv_fini ( struct myri10ge_private *priv )
  620. {
  621. /* Simply return if nonvolatile access is not supported. */
  622. if ( 0 == priv->nvo_fragment[0].len )
  623. return;
  624. unregister_nvo ( &priv->nvo );
  625. }
  626. /****************************************************************
  627. * iPXE PCI Device Driver API functions
  628. ****************************************************************/
  629. /*
  630. * Initialize the PCI device.
  631. *
  632. * @v pci The device's associated pci_device structure.
  633. * @v id The PCI device + vendor id.
  634. * @ret rc Returns zero if successfully initialized.
  635. *
  636. * This function is called very early on, while iPXE is initializing.
  637. * This is a iPXE PCI Device Driver API function.
  638. */
  639. static int myri10ge_pci_probe ( struct pci_device *pci,
  640. const struct pci_device_id *id __unused )
  641. {
  642. static struct net_device_operations myri10ge_operations = {
  643. .open = myri10ge_net_open,
  644. .close = myri10ge_net_close,
  645. .transmit = myri10ge_net_transmit,
  646. .poll = myri10ge_net_poll,
  647. .irq = myri10ge_net_irq
  648. };
  649. const char *dbg;
  650. int rc;
  651. struct net_device *netdev;
  652. struct myri10ge_private *priv;
  653. DBGP ( "myri10ge_pci_probe: " );
  654. netdev = alloc_etherdev ( sizeof ( *priv ) );
  655. if ( !netdev ) {
  656. rc = -ENOMEM;
  657. dbg = "alloc_etherdev";
  658. goto abort_with_nothing;
  659. }
  660. netdev_init ( netdev, &myri10ge_operations );
  661. priv = myri10ge_priv ( netdev );
  662. pci_set_drvdata ( pci, netdev );
  663. netdev->dev = &pci->dev;
  664. /* Make sure interrupts are disabled. */
  665. myri10ge_net_irq ( netdev, 0 );
  666. /* Find the PCI Vendor-Specific capability. */
  667. priv->pci_cap_vs = pci_find_capability ( pci , PCI_CAP_ID_VNDR );
  668. if ( 0 == priv->pci_cap_vs ) {
  669. rc = -ENOTSUP;
  670. dbg = "no_vs";
  671. goto abort_with_netdev_init;
  672. }
  673. /* Read the NIC HW address. */
  674. rc = mac_address_from_string_specs ( pci,
  675. priv->pci_cap_vs,
  676. netdev->hw_addr );
  677. if ( rc ) {
  678. dbg = "mac_from_ss";
  679. goto abort_with_netdev_init;
  680. }
  681. DBGP ( "mac " );
  682. /* Enable bus master, etc. */
  683. adjust_pci_device ( pci );
  684. DBGP ( "pci " );
  685. /* Register the initialized network device. */
  686. rc = register_netdev ( netdev );
  687. if ( rc ) {
  688. dbg = "register_netdev";
  689. goto abort_with_netdev_init;
  690. }
  691. /* Initialize NonVolatile Storage support. */
  692. rc = myri10ge_nv_init ( priv );
  693. if ( rc ) {
  694. dbg = "myri10ge_nv_init";
  695. goto abort_with_registered_netdev;
  696. }
  697. DBGP ( "done\n" );
  698. return 0;
  699. abort_with_registered_netdev:
  700. unregister_netdev ( netdev );
  701. abort_with_netdev_init:
  702. netdev_nullify ( netdev );
  703. netdev_put ( netdev );
  704. abort_with_nothing:
  705. DBG ( "%s:%s\n", dbg, strerror ( rc ) );
  706. return rc;
  707. }
  708. /*
  709. * Remove a device from the PCI device list.
  710. *
  711. * @v pci PCI device to remove.
  712. *
  713. * This is a PCI Device Driver API function.
  714. */
  715. static void myri10ge_pci_remove ( struct pci_device *pci )
  716. {
  717. struct net_device *netdev;
  718. DBGP ( "myri10ge_pci_remove\n" );
  719. netdev = pci_get_drvdata ( pci );
  720. myri10ge_nv_fini ( myri10ge_priv ( netdev ) );
  721. unregister_netdev ( netdev );
  722. netdev_nullify ( netdev );
  723. netdev_put ( netdev );
  724. }
  725. /****************************************************************
  726. * iPXE Network Device Driver Operations
  727. ****************************************************************/
  728. /*
  729. * Close a network device.
  730. *
  731. * @v netdev Device to close.
  732. *
  733. * This is a iPXE Network Device Driver API function.
  734. */
  735. static void myri10ge_net_close ( struct net_device *netdev )
  736. {
  737. struct myri10ge_private *priv;
  738. uint32 data[3];
  739. DBGP ( "myri10ge_net_close\n" );
  740. priv = myri10ge_priv ( netdev );
  741. /* disable interrupts */
  742. myri10ge_net_irq ( netdev, 0 );
  743. /* Reset the NIC interface, so we won't get any more events from
  744. the NIC. */
  745. myri10ge_command ( priv, MXGEFW_CMD_RESET, data );
  746. /* Free receive buffers that were never filled. */
  747. while ( priv->receives_done != priv->receives_posted ) {
  748. free_iob ( priv->receive_iob[priv->receives_done
  749. & MYRI10GE_RECEIVE_WRAP] );
  750. ++priv->receives_done;
  751. }
  752. /* Release DMAable memory. */
  753. free_dma ( priv->dma, sizeof ( *priv->dma ) );
  754. /* Erase all state from the open. */
  755. memset ( priv, 0, sizeof ( *priv ) );
  756. DBG2_RINGS ( priv );
  757. }
  758. /*
  759. * Enable or disable IRQ masking.
  760. *
  761. * @v netdev Device to control.
  762. * @v enable Zero to mask off IRQ, non-zero to enable IRQ.
  763. *
  764. * This is a iPXE Network Driver API function.
  765. */
  766. static void myri10ge_net_irq ( struct net_device *netdev, int enable )
  767. {
  768. struct pci_device *pci_dev;
  769. uint16 val;
  770. DBGP ( "myri10ge_net_irq\n" );
  771. pci_dev = ( struct pci_device * ) netdev->dev;
  772. /* Adjust the Interrupt Disable bit in the Command register of the
  773. PCI Device. */
  774. pci_read_config_word ( pci_dev, PCI_COMMAND, &val );
  775. if ( enable )
  776. val &= ~PCI_COMMAND_INTX_DISABLE;
  777. else
  778. val |= PCI_COMMAND_INTX_DISABLE;
  779. pci_write_config_word ( pci_dev, PCI_COMMAND, val );
  780. }
  781. /*
  782. * Opens a network device.
  783. *
  784. * @v netdev Device to be opened.
  785. * @ret rc Non-zero if failed to open.
  786. *
  787. * This enables tx and rx on the device.
  788. * This is a iPXE Network Device Driver API function.
  789. */
  790. static int myri10ge_net_open ( struct net_device *netdev )
  791. {
  792. const char *dbg; /* printed upon error return */
  793. int rc;
  794. struct io_buffer *iob;
  795. struct myri10ge_private *priv;
  796. uint32 data[3];
  797. struct pci_device *pci_dev;
  798. void *membase;
  799. DBGP ( "myri10ge_net_open\n" );
  800. priv = myri10ge_priv ( netdev );
  801. pci_dev = ( struct pci_device * ) netdev->dev;
  802. membase = phys_to_virt ( pci_dev->membase );
  803. /* Compute address for passing commands to the firmware. */
  804. priv->command = membase + MXGEFW_ETH_CMD;
  805. /* Ensure interrupts are disabled. */
  806. myri10ge_net_irq ( netdev, 0 );
  807. /* Allocate cleared DMAable buffers. */
  808. priv->dma = malloc_dma ( sizeof ( *priv->dma ) , 128 );
  809. if ( !priv->dma ) {
  810. rc = -ENOMEM;
  811. dbg = "DMA";
  812. goto abort_with_nothing;
  813. }
  814. memset ( priv->dma, 0, sizeof ( *priv->dma ) );
  815. /* Simplify following code. */
  816. #define TRY( prefix, base, suffix ) do { \
  817. rc = myri10ge_command ( priv, \
  818. MXGEFW_ \
  819. ## prefix \
  820. ## base \
  821. ## suffix, \
  822. data ); \
  823. if ( rc ) { \
  824. dbg = #base; \
  825. goto abort_with_dma; \
  826. } \
  827. } while ( 0 )
  828. /* Send a reset command to the card to see if it is alive,
  829. and to reset its queue state. */
  830. TRY ( CMD_, RESET , );
  831. /* Set the interrupt queue size. */
  832. data[0] = ( (uint32_t)( sizeof ( priv->dma->receive_completion ) )
  833. | MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK );
  834. TRY ( CMD_SET_ , INTRQ_SIZE , );
  835. /* Set the interrupt queue DMA address. */
  836. data[0] = virt_to_bus ( &priv->dma->receive_completion );
  837. data[1] = 0;
  838. TRY ( CMD_SET_, INTRQ_DMA, );
  839. /* Get the NIC interrupt claim address. */
  840. TRY ( CMD_GET_, IRQ_ACK, _OFFSET );
  841. priv->irq_claim = membase + data[0];
  842. /* Get the NIC interrupt assert address. */
  843. TRY ( CMD_GET_, IRQ_DEASSERT, _OFFSET );
  844. priv->irq_deassert = membase + data[0];
  845. /* Disable interrupt coalescing, which is inappropriate for the
  846. minimal buffering we provide. */
  847. TRY ( CMD_GET_, INTR_COAL, _DELAY_OFFSET );
  848. * ( ( uint32 * ) ( membase + data[0] ) ) = 0;
  849. /* Set the NIC mac address. */
  850. data[0] = ( netdev->ll_addr[0] << 24
  851. | netdev->ll_addr[1] << 16
  852. | netdev->ll_addr[2] << 8
  853. | netdev->ll_addr[3] );
  854. data[1] = ( ( netdev->ll_addr[4] << 8 )
  855. | netdev->ll_addr[5] );
  856. TRY ( SET_ , MAC_ADDRESS , );
  857. /* Enable multicast receives, because some iPXE clients don't work
  858. without multicast. . */
  859. TRY ( ENABLE_ , ALLMULTI , );
  860. /* Disable Ethernet flow control, so the NIC cannot deadlock the
  861. network under any circumstances. */
  862. TRY ( DISABLE_ , FLOW , _CONTROL );
  863. /* Compute transmit ring sizes. */
  864. data[0] = 0; /* slice 0 */
  865. TRY ( CMD_GET_, SEND_RING, _SIZE );
  866. priv->transmit_ring_wrap
  867. = data[0] / sizeof ( mcp_kreq_ether_send_t ) - 1;
  868. if ( priv->transmit_ring_wrap
  869. & ( priv->transmit_ring_wrap + 1 ) ) {
  870. rc = -EPROTO;
  871. dbg = "TX_RING";
  872. goto abort_with_dma;
  873. }
  874. /* Compute receive ring sizes. */
  875. data[0] = 0; /* slice 0 */
  876. TRY ( CMD_GET_ , RX_RING , _SIZE );
  877. priv->receive_post_ring_wrap = data[0] / sizeof ( mcp_dma_addr_t ) - 1;
  878. if ( priv->receive_post_ring_wrap
  879. & ( priv->receive_post_ring_wrap + 1 ) ) {
  880. rc = -EPROTO;
  881. dbg = "RX_RING";
  882. goto abort_with_dma;
  883. }
  884. /* Get NIC transmit ring address. */
  885. data[0] = 0; /* slice 0. */
  886. TRY ( CMD_GET_, SEND, _OFFSET );
  887. priv->transmit_ring = membase + data[0];
  888. /* Get the NIC receive ring address. */
  889. data[0] = 0; /* slice 0. */
  890. TRY ( CMD_GET_, SMALL_RX, _OFFSET );
  891. priv->receive_post_ring = membase + data[0];
  892. /* Set the Nic MTU. */
  893. data[0] = ETH_FRAME_LEN;
  894. TRY ( CMD_SET_, MTU, );
  895. /* Tell the NIC our buffer sizes. ( We use only small buffers, so we
  896. set both buffer sizes to the same value, which will force all
  897. received frames to use small buffers. ) */
  898. data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
  899. TRY ( CMD_SET_, SMALL_BUFFER, _SIZE );
  900. data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
  901. TRY ( CMD_SET_, BIG_BUFFER, _SIZE );
  902. /* Tell firmware where to DMA IRQ data */
  903. data[0] = virt_to_bus ( &priv->dma->irq_data );
  904. data[1] = 0;
  905. data[2] = sizeof ( priv->dma->irq_data );
  906. TRY ( CMD_SET_, STATS_DMA_V2, );
  907. /* Post receives. */
  908. while ( priv->receives_posted <= MYRI10GE_RECEIVE_WRAP ) {
  909. /* Reserve 2 extra bytes at the start of packets, since
  910. the firmware always skips the first 2 bytes of the buffer
  911. so TCP headers will be aligned. */
  912. iob = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
  913. if ( !iob ) {
  914. rc = -ENOMEM;
  915. dbg = "alloc_iob";
  916. goto abort_with_receives_posted;
  917. }
  918. iob_reserve ( iob, MXGEFW_PAD );
  919. myri10ge_post_receive ( priv, iob );
  920. }
  921. /* Bring up the link. */
  922. TRY ( CMD_, ETHERNET_UP, );
  923. DBG2_RINGS ( priv );
  924. return 0;
  925. abort_with_receives_posted:
  926. while ( priv->receives_posted-- )
  927. free_iob ( priv->receive_iob[priv->receives_posted] );
  928. abort_with_dma:
  929. /* Because the link is not up, we don't have to reset the NIC here. */
  930. free_dma ( priv->dma, sizeof ( *priv->dma ) );
  931. abort_with_nothing:
  932. /* Erase all signs of the failed open. */
  933. memset ( priv, 0, sizeof ( *priv ) );
  934. DBG ( "%s: %s\n", dbg, strerror ( rc ) );
  935. return ( rc );
  936. }
  937. /*
  938. * This function allows a driver to process events during operation.
  939. *
  940. * @v netdev Device being polled.
  941. *
  942. * This is called periodically by iPXE to let the driver check the status of
  943. * transmitted packets and to allow the driver to check for received packets.
  944. * This is a iPXE Network Device Driver API function.
  945. */
  946. static void myri10ge_net_poll ( struct net_device *netdev )
  947. {
  948. struct io_buffer *iob;
  949. struct io_buffer *replacement;
  950. struct myri10ge_dma_buffers *dma;
  951. struct myri10ge_private *priv;
  952. unsigned int length;
  953. unsigned int orig_receives_posted;
  954. DBGP ( "myri10ge_net_poll\n" );
  955. priv = myri10ge_priv ( netdev );
  956. dma = priv->dma;
  957. /* Process any pending interrupt. */
  958. myri10ge_interrupt_handler ( netdev );
  959. /* Pass up received frames, but limit ourselves to receives posted
  960. before this function was called, so we cannot livelock if
  961. receives are arriving faster than we process them. */
  962. orig_receives_posted = priv->receives_posted;
  963. while ( priv->receives_done != orig_receives_posted ) {
  964. /* Stop if there is no pending receive. */
  965. length = ntohs ( dma->receive_completion
  966. [priv->receives_done
  967. & MYRI10GE_RECEIVE_COMPLETION_WRAP]
  968. .length );
  969. if ( length == 0 )
  970. break;
  971. /* Allocate a replacement buffer. If none is available,
  972. stop passing up packets until a buffer is available.
  973. Reserve 2 extra bytes at the start of packets, since
  974. the firmware always skips the first 2 bytes of the buffer
  975. so TCP headers will be aligned. */
  976. replacement = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
  977. if ( !replacement ) {
  978. DBG ( "NO RX BUF\n" );
  979. break;
  980. }
  981. iob_reserve ( replacement, MXGEFW_PAD );
  982. /* Pass up the received frame. */
  983. iob = priv->receive_iob[priv->receives_done
  984. & MYRI10GE_RECEIVE_WRAP];
  985. iob_put ( iob, length );
  986. netdev_rx ( netdev, iob );
  987. /* We have consumed the packet, so clear the receive
  988. notification. */
  989. dma->receive_completion [priv->receives_done
  990. & MYRI10GE_RECEIVE_COMPLETION_WRAP]
  991. .length = 0;
  992. wmb();
  993. /* Replace the passed-up I/O buffer. */
  994. myri10ge_post_receive ( priv, replacement );
  995. ++priv->receives_done;
  996. DBG2_RINGS ( priv );
  997. }
  998. }
  999. /*
  1000. * This transmits a packet.
  1001. *
  1002. * @v netdev Device to transmit from.
  1003. * @v iobuf Data to transmit.
  1004. * @ret rc Non-zero if failed to transmit.
  1005. *
  1006. * This is a iPXE Network Driver API function.
  1007. */
  1008. static int myri10ge_net_transmit ( struct net_device *netdev,
  1009. struct io_buffer *iobuf )
  1010. {
  1011. mcp_kreq_ether_send_t *kreq;
  1012. size_t len;
  1013. struct myri10ge_private *priv;
  1014. uint32 transmits_posted;
  1015. DBGP ( "myri10ge_net_transmit\n" );
  1016. priv = myri10ge_priv ( netdev );
  1017. /* Confirm space in the send ring. */
  1018. transmits_posted = priv->transmits_posted;
  1019. if ( transmits_posted - priv->transmits_done
  1020. > MYRI10GE_TRANSMIT_WRAP ) {
  1021. DBG ( "TX ring full\n" );
  1022. return -ENOBUFS;
  1023. }
  1024. DBG2 ( "TX %p+%zd ", iobuf->data, iob_len ( iobuf ) );
  1025. DBG2_HD ( iobuf->data, 14 );
  1026. /* Record the packet being transmitted, so we can later report
  1027. send completion. */
  1028. priv->transmit_iob[transmits_posted & MYRI10GE_TRANSMIT_WRAP] = iobuf;
  1029. /* Copy and pad undersized frames, because the NIC does not pad,
  1030. and we would rather copy small frames than do a gather. */
  1031. len = iob_len ( iobuf );
  1032. if ( len < ETH_ZLEN ) {
  1033. iob_pad ( iobuf, ETH_ZLEN );
  1034. len = ETH_ZLEN;
  1035. }
  1036. /* Enqueue the packet by writing a descriptor to the NIC.
  1037. This is a bit tricky because the HW requires 32-bit writes,
  1038. but the structure has smaller fields. */
  1039. kreq = &priv->transmit_ring[transmits_posted
  1040. & priv->transmit_ring_wrap];
  1041. kreq->addr_high = 0;
  1042. kreq->addr_low = htonl ( virt_to_bus ( iobuf->data ) );
  1043. ( ( uint32 * ) kreq ) [2] = htonl (
  1044. 0x0000 << 16 /* pseudo_header_offset */
  1045. | ( len & 0xFFFF ) /* length */
  1046. );
  1047. wmb();
  1048. ( ( uint32 * ) kreq ) [3] = htonl (
  1049. 0x00 << 24 /* pad */
  1050. | 0x01 << 16 /* rdma_count */
  1051. | 0x00 << 8 /* cksum_offset */
  1052. | ( MXGEFW_FLAGS_SMALL
  1053. | MXGEFW_FLAGS_FIRST
  1054. | MXGEFW_FLAGS_NO_TSO ) /* flags */
  1055. );
  1056. wmb();
  1057. /* Mark the slot as consumed and return. */
  1058. priv->transmits_posted = ++transmits_posted;
  1059. DBG2_RINGS ( priv );
  1060. return 0;
  1061. }
  1062. static struct pci_device_id myri10ge_nics[] = {
  1063. /* Each of these macros must be a single line to satisfy a script. */
  1064. PCI_ROM ( 0x14c1, 0x0008, "myri10ge", "Myricom 10Gb Ethernet Adapter", 0 ) ,
  1065. };
  1066. struct pci_driver myri10ge_driver __pci_driver = {
  1067. .ids = myri10ge_nics,
  1068. .id_count = ( sizeof ( myri10ge_nics ) / sizeof ( myri10ge_nics[0] ) ) ,
  1069. .probe = myri10ge_pci_probe,
  1070. .remove = myri10ge_pci_remove
  1071. };
  1072. /*
  1073. * Local variables:
  1074. * c-basic-offset: 8
  1075. * c-indent-level: 8
  1076. * tab-width: 8
  1077. * End:
  1078. */