浏览代码

Added an almost obscene amount of debugging and assertion code while

tracking down a bug that turned out to be a free_iob() used where I
needed a netdev_tx_complete().  This left the freed I/O buffer on the
net device's TX list, with bad, bad consequences later.

Also fixed the bug in question.
tags/v0.9.3
Michael Brown 18 年前
父节点
当前提交
267a4483ab
共有 3 个文件被更改,包括 64 次插入16 次删除
  1. 15
    7
      src/drivers/net/ipoib.c
  2. 34
    2
      src/drivers/net/mlx_ipoib/mt25218.c
  3. 15
    7
      src/net/infiniband.c

+ 15
- 7
src/drivers/net/ipoib.c 查看文件

47
 #define IPOIB_MTU 2048
47
 #define IPOIB_MTU 2048
48
 
48
 
49
 /** Number of IPoIB data send work queue entries */
49
 /** Number of IPoIB data send work queue entries */
50
-#define IPOIB_DATA_NUM_SEND_WQES 4
50
+#define IPOIB_DATA_NUM_SEND_WQES 2
51
 
51
 
52
 /** Number of IPoIB data receive work queue entries */
52
 /** Number of IPoIB data receive work queue entries */
53
-#define IPOIB_DATA_NUM_RECV_WQES 4
53
+#define IPOIB_DATA_NUM_RECV_WQES 2
54
 
54
 
55
 /** Number of IPoIB data completion entries */
55
 /** Number of IPoIB data completion entries */
56
-#define IPOIB_DATA_NUM_CQES 8
56
+#define IPOIB_DATA_NUM_CQES 32
57
 
57
 
58
 /** Number of IPoIB metadata send work queue entries */
58
 /** Number of IPoIB metadata send work queue entries */
59
-#define IPOIB_META_NUM_SEND_WQES 4
59
+#define IPOIB_META_NUM_SEND_WQES 2
60
 
60
 
61
 /** Number of IPoIB metadata receive work queue entries */
61
 /** Number of IPoIB metadata receive work queue entries */
62
-#define IPOIB_META_NUM_RECV_WQES 4
62
+#define IPOIB_META_NUM_RECV_WQES 2
63
 
63
 
64
 /** Number of IPoIB metadata completion entries */
64
 /** Number of IPoIB metadata completion entries */
65
-#define IPOIB_META_NUM_CQES 8
65
+#define IPOIB_META_NUM_CQES 32
66
 
66
 
67
 /** An IPoIB queue set */
67
 /** An IPoIB queue set */
68
 struct ipoib_queue_set {
68
 struct ipoib_queue_set {
205
 	}
205
 	}
206
 
206
 
207
 	/* Strip off IPoIB header */
207
 	/* Strip off IPoIB header */
208
+	int len = iob_len ( iobuf );
209
+	DBG ( "WTF iob_len = %zd\n", len );
210
+	if ( len < 0 ) {
211
+		DBG_HD ( iobuf, sizeof ( *iobuf ) );
212
+		DBG ( "locking\n" );
213
+		while ( 1 ) {}
214
+	}
215
+
208
 	iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
216
 	iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
209
 
217
 
210
 	/* Hand off to network-layer protocol */
218
 	/* Hand off to network-layer protocol */
492
 			/* No path entry - get path record */
500
 			/* No path entry - get path record */
493
 			rc = ipoib_get_path_record ( ipoib,
501
 			rc = ipoib_get_path_record ( ipoib,
494
 						     &ipoib_pshdr->peer.gid );
502
 						     &ipoib_pshdr->peer.gid );
495
-			free_iob ( iobuf );
503
+			netdev_tx_complete ( netdev, iobuf );
496
 			return rc;
504
 			return rc;
497
 		}
505
 		}
498
 		av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn );
506
 		av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn );

+ 34
- 2
src/drivers/net/mlx_ipoib/mt25218.c 查看文件

398
 		goto err_sw2hw_cq;
398
 		goto err_sw2hw_cq;
399
 	}
399
 	}
400
 
400
 
401
+	DBGC ( arbel, "Arbel %p CQN %#lx ring at [%p,%p)\n",
402
+	       arbel, cq->cqn, arbel_cq->cqe,
403
+	       ( ( ( void * ) arbel_cq->cqe ) + arbel_cq->cqe_size ) );
401
 	cq->dev_priv = arbel_cq;
404
 	cq->dev_priv = arbel_cq;
402
 	return 0;
405
 	return 0;
403
 
406
 
650
 		goto err_rtr2rts_qpee;
653
 		goto err_rtr2rts_qpee;
651
 	}
654
 	}
652
 
655
 
656
+	DBGC ( arbel, "Arbel %p QPN %#lx send ring at [%p,%p)\n",
657
+	       arbel, qp->qpn, arbel_qp->send.wqe,
658
+	       ( ( (void *) arbel_qp->send.wqe ) + arbel_qp->send.wqe_size ) );
659
+	DBGC ( arbel, "Arbel %p QPN %#lx receive ring at [%p,%p)\n",
660
+	       arbel, qp->qpn, arbel_qp->recv.wqe,
661
+	       ( ( (void *) arbel_qp->recv.wqe ) + arbel_qp->recv.wqe_size ) );
653
 	qp->dev_priv = arbel_qp;
662
 	qp->dev_priv = arbel_qp;
654
 	return 0;
663
 	return 0;
655
 
664
 
904
 	struct arbel_queue_pair *arbel_qp;
913
 	struct arbel_queue_pair *arbel_qp;
905
 	struct arbel_send_work_queue *arbel_send_wq;
914
 	struct arbel_send_work_queue *arbel_send_wq;
906
 	struct arbel_recv_work_queue *arbel_recv_wq;
915
 	struct arbel_recv_work_queue *arbel_recv_wq;
916
+	struct arbelprm_recv_wqe *recv_wqe;
907
 	struct io_buffer *iobuf;
917
 	struct io_buffer *iobuf;
908
 	ib_completer_t complete;
918
 	ib_completer_t complete;
909
 	unsigned int opcode;
919
 	unsigned int opcode;
915
 
925
 
916
 	/* Parse completion */
926
 	/* Parse completion */
917
 	memset ( &completion, 0, sizeof ( completion ) );
927
 	memset ( &completion, 0, sizeof ( completion ) );
918
-	completion.len = MLX_GET ( &cqe->normal, byte_cnt );
919
 	qpn = MLX_GET ( &cqe->normal, my_qpn );
928
 	qpn = MLX_GET ( &cqe->normal, my_qpn );
920
 	is_send = MLX_GET ( &cqe->normal, s );
929
 	is_send = MLX_GET ( &cqe->normal, s );
921
 	wqe_adr = ( MLX_GET ( &cqe->normal, wqe_adr ) << 6 );
930
 	wqe_adr = ( MLX_GET ( &cqe->normal, wqe_adr ) << 6 );
946
 		arbel_send_wq = &arbel_qp->send;
955
 		arbel_send_wq = &arbel_qp->send;
947
 		wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) /
956
 		wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) /
948
 			    sizeof ( arbel_send_wq->wqe[0] ) );
957
 			    sizeof ( arbel_send_wq->wqe[0] ) );
958
+		assert ( wqe_idx < qp->send.num_wqes );
949
 	} else {
959
 	} else {
950
 		arbel_recv_wq = &arbel_qp->recv;
960
 		arbel_recv_wq = &arbel_qp->recv;
951
 		wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) /
961
 		wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) /
952
 			    sizeof ( arbel_recv_wq->wqe[0] ) );
962
 			    sizeof ( arbel_recv_wq->wqe[0] ) );
963
+		assert ( wqe_idx < qp->recv.num_wqes );
953
 	}
964
 	}
954
 
965
 
955
 	/* Identify I/O buffer */
966
 	/* Identify I/O buffer */
961
 	}
972
 	}
962
 	wq->iobufs[wqe_idx] = NULL;
973
 	wq->iobufs[wqe_idx] = NULL;
963
 
974
 
975
+	/* Fill in length for received packets */
976
+	if ( ! is_send ) {
977
+		completion.len = MLX_GET ( &cqe->normal, byte_cnt );
978
+		recv_wqe = &arbel_recv_wq->wqe[wqe_idx].recv;
979
+		assert ( MLX_GET ( &recv_wqe->data[0], local_address_l ) ==
980
+			 virt_to_bus ( iobuf->data ) );
981
+		assert ( MLX_GET ( &recv_wqe->data[0], byte_count ) ==
982
+			 iob_tailroom ( iobuf ) );
983
+		DBG ( "CPQ %lx QPN %lx WQE %x\n", cq->cqn, qp->qpn, wqe_idx );
984
+		//		DBG_HD ( iobuf, sizeof ( *iobuf ) );
985
+		MLX_FILL_1 ( &recv_wqe->data[0], 0, byte_count, 0 );
986
+		MLX_FILL_1 ( &recv_wqe->data[0], 1,
987
+			     l_key, ARBEL_INVALID_LKEY );
988
+		if ( completion.len > iob_tailroom ( iobuf ) ) {
989
+			DBGC ( arbel, "Arbel %p CQN %lx QPN %lx IDX %x "
990
+			       "overlength received packet length %zd\n",
991
+			       arbel, cq->cqn, qpn, wqe_idx, completion.len );
992
+			return -EIO;
993
+		}
994
+	}
995
+
964
 	/* Pass off to caller's completion handler */
996
 	/* Pass off to caller's completion handler */
965
 	complete = ( is_send ? complete_send : complete_recv );
997
 	complete = ( is_send ? complete_send : complete_recv );
966
 	complete ( ibdev, qp, &completion, iobuf );
998
 	complete ( ibdev, qp, &completion, iobuf );
1252
 	return 0;
1284
 	return 0;
1253
 }
1285
 }
1254
 
1286
 
1255
-static int arbel_get_pkey ( struct arbel *arbel, unsigned long *pkey ) {
1287
+static int arbel_get_pkey ( struct arbel *arbel, unsigned int *pkey ) {
1256
 	struct ib_mad_pkey_table pkey_table;
1288
 	struct ib_mad_pkey_table pkey_table;
1257
 	int rc;
1289
 	int rc;
1258
 
1290
 

+ 15
- 7
src/net/infiniband.c 查看文件

64
 		return NULL;
64
 		return NULL;
65
 	}
65
 	}
66
 
66
 
67
-	DBGC ( ibdev, "IBDEV %p created completion queue %#lx\n",
68
-	       ibdev, cq->cqn );
67
+	DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
68
+	       "with CQN %#lx\n", ibdev, num_cqes, cq, cq->dev_priv, cq->cqn );
69
 	return cq;
69
 	return cq;
70
 }
70
 }
71
 
71
 
102
 				      struct ib_completion_queue *recv_cq,
102
 				      struct ib_completion_queue *recv_cq,
103
 				      unsigned long qkey ) {
103
 				      unsigned long qkey ) {
104
 	struct ib_queue_pair *qp;
104
 	struct ib_queue_pair *qp;
105
+	size_t total_size;
105
 	int rc;
106
 	int rc;
106
 
107
 
107
 	DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
108
 	DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
108
 
109
 
109
 	/* Allocate and initialise data structure */
110
 	/* Allocate and initialise data structure */
110
-	qp = zalloc ( sizeof ( *qp ) +
111
-		      ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
112
-		      ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
111
+	total_size = ( sizeof ( *qp ) +
112
+		       ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
113
+		       ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
114
+	qp = zalloc ( total_size );
113
 	if ( ! qp )
115
 	if ( ! qp )
114
 		return NULL;
116
 		return NULL;
115
 	qp->qkey = qkey;
117
 	qp->qkey = qkey;
134
 		return NULL;
136
 		return NULL;
135
 	}
137
 	}
136
 
138
 
137
-	DBGC ( ibdev, "IBDEV %p created queue pair %#lx\n",
138
-	       ibdev, qp->qpn );
139
+	DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
140
+	       ibdev, qp, qp->dev_priv, qp->qpn );
141
+	DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
142
+	       ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
143
+	       qp->recv.iobufs );
144
+	DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
145
+	       ibdev, qp->qpn, num_send_wqes, qp->recv.iobufs,
146
+	       ( ( ( void * ) qp ) + total_size ) );
139
 	return qp;
147
 	return qp;
140
 }
148
 }
141
 
149
 

正在加载...
取消
保存