Sfoglia il codice sorgente

Added an almost obscene amount of debugging and assertion code while

tracking down a bug that turned out to be a free_iob() used where I
needed a netdev_tx_complete().  This left the freed I/O buffer on the
net device's TX list, with bad, bad consequences later.

Also fixed the bug in question.
tags/v0.9.3
Michael Brown 16 anni fa
parent
commit
267a4483ab
3 ha cambiato i file con 64 aggiunte e 16 eliminazioni
  1. 15
    7
      src/drivers/net/ipoib.c
  2. 34
    2
      src/drivers/net/mlx_ipoib/mt25218.c
  3. 15
    7
      src/net/infiniband.c

+ 15
- 7
src/drivers/net/ipoib.c Vedi File

@@ -47,22 +47,22 @@ extern struct ib_address_vector hack_ipoib_bcast_av;
47 47
 #define IPOIB_MTU 2048
48 48
 
49 49
 /** Number of IPoIB data send work queue entries */
50
-#define IPOIB_DATA_NUM_SEND_WQES 4
50
+#define IPOIB_DATA_NUM_SEND_WQES 2
51 51
 
52 52
 /** Number of IPoIB data receive work queue entries */
53
-#define IPOIB_DATA_NUM_RECV_WQES 4
53
+#define IPOIB_DATA_NUM_RECV_WQES 2
54 54
 
55 55
 /** Number of IPoIB data completion entries */
56
-#define IPOIB_DATA_NUM_CQES 8
56
+#define IPOIB_DATA_NUM_CQES 32
57 57
 
58 58
 /** Number of IPoIB metadata send work queue entries */
59
-#define IPOIB_META_NUM_SEND_WQES 4
59
+#define IPOIB_META_NUM_SEND_WQES 2
60 60
 
61 61
 /** Number of IPoIB metadata receive work queue entries */
62
-#define IPOIB_META_NUM_RECV_WQES 4
62
+#define IPOIB_META_NUM_RECV_WQES 2
63 63
 
64 64
 /** Number of IPoIB metadata completion entries */
65
-#define IPOIB_META_NUM_CQES 8
65
+#define IPOIB_META_NUM_CQES 32
66 66
 
67 67
 /** An IPoIB queue set */
68 68
 struct ipoib_queue_set {
@@ -205,6 +205,14 @@ static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) {
205 205
 	}
206 206
 
207 207
 	/* Strip off IPoIB header */
208
+	int len = iob_len ( iobuf );
209
+	DBG ( "WTF iob_len = %zd\n", len );
210
+	if ( len < 0 ) {
211
+		DBG_HD ( iobuf, sizeof ( *iobuf ) );
212
+		DBG ( "locking\n" );
213
+		while ( 1 ) {}
214
+	}
215
+
208 216
 	iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
209 217
 
210 218
 	/* Hand off to network-layer protocol */
@@ -492,7 +500,7 @@ static int ipoib_transmit ( struct net_device *netdev,
492 500
 			/* No path entry - get path record */
493 501
 			rc = ipoib_get_path_record ( ipoib,
494 502
 						     &ipoib_pshdr->peer.gid );
495
-			free_iob ( iobuf );
503
+			netdev_tx_complete ( netdev, iobuf );
496 504
 			return rc;
497 505
 		}
498 506
 		av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn );

+ 34
- 2
src/drivers/net/mlx_ipoib/mt25218.c Vedi File

@@ -398,6 +398,9 @@ static int arbel_create_cq ( struct ib_device *ibdev,
398 398
 		goto err_sw2hw_cq;
399 399
 	}
400 400
 
401
+	DBGC ( arbel, "Arbel %p CQN %#lx ring at [%p,%p)\n",
402
+	       arbel, cq->cqn, arbel_cq->cqe,
403
+	       ( ( ( void * ) arbel_cq->cqe ) + arbel_cq->cqe_size ) );
401 404
 	cq->dev_priv = arbel_cq;
402 405
 	return 0;
403 406
 
@@ -650,6 +653,12 @@ static int arbel_create_qp ( struct ib_device *ibdev,
650 653
 		goto err_rtr2rts_qpee;
651 654
 	}
652 655
 
656
+	DBGC ( arbel, "Arbel %p QPN %#lx send ring at [%p,%p)\n",
657
+	       arbel, qp->qpn, arbel_qp->send.wqe,
658
+	       ( ( (void *) arbel_qp->send.wqe ) + arbel_qp->send.wqe_size ) );
659
+	DBGC ( arbel, "Arbel %p QPN %#lx receive ring at [%p,%p)\n",
660
+	       arbel, qp->qpn, arbel_qp->recv.wqe,
661
+	       ( ( (void *) arbel_qp->recv.wqe ) + arbel_qp->recv.wqe_size ) );
653 662
 	qp->dev_priv = arbel_qp;
654 663
 	return 0;
655 664
 
@@ -904,6 +913,7 @@ static int arbel_complete ( struct ib_device *ibdev,
904 913
 	struct arbel_queue_pair *arbel_qp;
905 914
 	struct arbel_send_work_queue *arbel_send_wq;
906 915
 	struct arbel_recv_work_queue *arbel_recv_wq;
916
+	struct arbelprm_recv_wqe *recv_wqe;
907 917
 	struct io_buffer *iobuf;
908 918
 	ib_completer_t complete;
909 919
 	unsigned int opcode;
@@ -915,7 +925,6 @@ static int arbel_complete ( struct ib_device *ibdev,
915 925
 
916 926
 	/* Parse completion */
917 927
 	memset ( &completion, 0, sizeof ( completion ) );
918
-	completion.len = MLX_GET ( &cqe->normal, byte_cnt );
919 928
 	qpn = MLX_GET ( &cqe->normal, my_qpn );
920 929
 	is_send = MLX_GET ( &cqe->normal, s );
921 930
 	wqe_adr = ( MLX_GET ( &cqe->normal, wqe_adr ) << 6 );
@@ -946,10 +955,12 @@ static int arbel_complete ( struct ib_device *ibdev,
946 955
 		arbel_send_wq = &arbel_qp->send;
947 956
 		wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) /
948 957
 			    sizeof ( arbel_send_wq->wqe[0] ) );
958
+		assert ( wqe_idx < qp->send.num_wqes );
949 959
 	} else {
950 960
 		arbel_recv_wq = &arbel_qp->recv;
951 961
 		wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) /
952 962
 			    sizeof ( arbel_recv_wq->wqe[0] ) );
963
+		assert ( wqe_idx < qp->recv.num_wqes );
953 964
 	}
954 965
 
955 966
 	/* Identify I/O buffer */
@@ -961,6 +972,27 @@ static int arbel_complete ( struct ib_device *ibdev,
961 972
 	}
962 973
 	wq->iobufs[wqe_idx] = NULL;
963 974
 
975
+	/* Fill in length for received packets */
976
+	if ( ! is_send ) {
977
+		completion.len = MLX_GET ( &cqe->normal, byte_cnt );
978
+		recv_wqe = &arbel_recv_wq->wqe[wqe_idx].recv;
979
+		assert ( MLX_GET ( &recv_wqe->data[0], local_address_l ) ==
980
+			 virt_to_bus ( iobuf->data ) );
981
+		assert ( MLX_GET ( &recv_wqe->data[0], byte_count ) ==
982
+			 iob_tailroom ( iobuf ) );
983
+		DBG ( "CPQ %lx QPN %lx WQE %x\n", cq->cqn, qp->qpn, wqe_idx );
984
+		//		DBG_HD ( iobuf, sizeof ( *iobuf ) );
985
+		MLX_FILL_1 ( &recv_wqe->data[0], 0, byte_count, 0 );
986
+		MLX_FILL_1 ( &recv_wqe->data[0], 1,
987
+			     l_key, ARBEL_INVALID_LKEY );
988
+		if ( completion.len > iob_tailroom ( iobuf ) ) {
989
+			DBGC ( arbel, "Arbel %p CQN %lx QPN %lx IDX %x "
990
+			       "overlength received packet length %zd\n",
991
+			       arbel, cq->cqn, qpn, wqe_idx, completion.len );
992
+			return -EIO;
993
+		}
994
+	}
995
+
964 996
 	/* Pass off to caller's completion handler */
965 997
 	complete = ( is_send ? complete_send : complete_recv );
966 998
 	complete ( ibdev, qp, &completion, iobuf );
@@ -1252,7 +1284,7 @@ static int arbel_get_sm_lid ( struct arbel *arbel,
1252 1284
 	return 0;
1253 1285
 }
1254 1286
 
1255
-static int arbel_get_pkey ( struct arbel *arbel, unsigned long *pkey ) {
1287
+static int arbel_get_pkey ( struct arbel *arbel, unsigned int *pkey ) {
1256 1288
 	struct ib_mad_pkey_table pkey_table;
1257 1289
 	int rc;
1258 1290
 

+ 15
- 7
src/net/infiniband.c Vedi File

@@ -64,8 +64,8 @@ struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev,
64 64
 		return NULL;
65 65
 	}
66 66
 
67
-	DBGC ( ibdev, "IBDEV %p created completion queue %#lx\n",
68
-	       ibdev, cq->cqn );
67
+	DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
68
+	       "with CQN %#lx\n", ibdev, num_cqes, cq, cq->dev_priv, cq->cqn );
69 69
 	return cq;
70 70
 }
71 71
 
@@ -102,14 +102,16 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
102 102
 				      struct ib_completion_queue *recv_cq,
103 103
 				      unsigned long qkey ) {
104 104
 	struct ib_queue_pair *qp;
105
+	size_t total_size;
105 106
 	int rc;
106 107
 
107 108
 	DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
108 109
 
109 110
 	/* Allocate and initialise data structure */
110
-	qp = zalloc ( sizeof ( *qp ) +
111
-		      ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
112
-		      ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
111
+	total_size = ( sizeof ( *qp ) +
112
+		       ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
113
+		       ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
114
+	qp = zalloc ( total_size );
113 115
 	if ( ! qp )
114 116
 		return NULL;
115 117
 	qp->qkey = qkey;
@@ -134,8 +136,14 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
134 136
 		return NULL;
135 137
 	}
136 138
 
137
-	DBGC ( ibdev, "IBDEV %p created queue pair %#lx\n",
138
-	       ibdev, qp->qpn );
139
+	DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
140
+	       ibdev, qp, qp->dev_priv, qp->qpn );
141
+	DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
142
+	       ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
143
+	       qp->recv.iobufs );
144
+	DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
145
+	       ibdev, qp->qpn, num_send_wqes, qp->recv.iobufs,
146
+	       ( ( ( void * ) qp ) + total_size ) );
139 147
 	return qp;
140 148
 }
141 149
 

Loading…
Annulla
Salva