Переглянути джерело

[hermon] Use correct alignment for doorbell records

Doorbell records are currently embedded within the completion queue
and receive work queue strucures, which are allocated using zalloc()
and so have an alignment guarantee of only sizeof(void*), i.e. four
bytes.  This is sufficient for the receive work queue, but not for the
completion queue, which requires an alignment guarantee of eight
bytes.

Though not guaranteed, it so happens that zalloc() will always return
a pointer that is exactly four bytes above a sixteen-byte boundary.
The completion queue doorbell record is therefore always misaligned,
and the value passed to the hardware via SW2HW_CQ is actually always
pointing to the page_offset value within the MTT descriptor (which
directly precedes the inline doorbell record).  Provided that the page
offset is greater than 0x100, this looks to the hardware like an
update_ci value of greater than 0x010000 (taking into account
endianness differences), and so the hardware will happily deliver more
than 0x010000 completions before stopping.  Hence this problem is
rarely observable.

Fix by allocating the doorbell records separately and using the
correct alignment constraints.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown 14 роки тому
джерело
коміт
46c7f99c66
2 змінених файлів з 37 додано та 11 видалено
  1. 33
    7
      src/drivers/infiniband/hermon.c
  2. 4
    4
      src/drivers/infiniband/hermon.h

+ 33
- 7
src/drivers/infiniband/hermon.c Переглянути файл

@@ -770,6 +770,15 @@ static int hermon_create_cq ( struct ib_device *ibdev,
770 770
 		goto err_hermon_cq;
771 771
 	}
772 772
 
773
+	/* Allocate doorbell */
774
+	hermon_cq->doorbell = malloc_dma ( sizeof ( hermon_cq->doorbell[0] ),
775
+					   sizeof ( hermon_cq->doorbell[0] ) );
776
+	if ( ! hermon_cq->doorbell ) {
777
+		rc = -ENOMEM;
778
+		goto err_doorbell;
779
+	}
780
+	memset ( hermon_cq->doorbell, 0, sizeof ( hermon_cq->doorbell[0] ) );
781
+
773 782
 	/* Allocate completion queue itself */
774 783
 	hermon_cq->cqe_size = ( cq->num_cqes * sizeof ( hermon_cq->cqe[0] ) );
775 784
 	hermon_cq->cqe = malloc_dma ( hermon_cq->cqe_size,
@@ -802,7 +811,7 @@ static int hermon_create_cq ( struct ib_device *ibdev,
802 811
 	MLX_FILL_1 ( &cqctx, 7, mtt_base_addr_l,
803 812
 		     ( hermon_cq->mtt.mtt_base_addr >> 3 ) );
804 813
 	MLX_FILL_1 ( &cqctx, 15, db_record_addr_l,
805
-		     ( virt_to_phys ( &hermon_cq->doorbell ) >> 3 ) );
814
+		     ( virt_to_phys ( hermon_cq->doorbell ) >> 3 ) );
806 815
 	if ( ( rc = hermon_cmd_sw2hw_cq ( hermon, cq->cqn, &cqctx ) ) != 0 ) {
807 816
 		DBGC ( hermon, "Hermon %p CQN %#lx SW2HW_CQ failed: %s\n",
808 817
 		       hermon, cq->cqn, strerror ( rc ) );
@@ -812,7 +821,7 @@ static int hermon_create_cq ( struct ib_device *ibdev,
812 821
 	DBGC ( hermon, "Hermon %p CQN %#lx ring [%08lx,%08lx), doorbell "
813 822
 	       "%08lx\n", hermon, cq->cqn, virt_to_phys ( hermon_cq->cqe ),
814 823
 	       ( virt_to_phys ( hermon_cq->cqe ) + hermon_cq->cqe_size ),
815
-	       virt_to_phys ( &hermon_cq->doorbell ) );
824
+	       virt_to_phys ( hermon_cq->doorbell ) );
816 825
 	ib_cq_set_drvdata ( cq, hermon_cq );
817 826
 	return 0;
818 827
 
@@ -821,6 +830,8 @@ static int hermon_create_cq ( struct ib_device *ibdev,
821 830
  err_alloc_mtt:
822 831
 	free_dma ( hermon_cq->cqe, hermon_cq->cqe_size );
823 832
  err_cqe:
833
+	free_dma ( hermon_cq->doorbell, sizeof ( hermon_cq->doorbell[0] ) );
834
+ err_doorbell:
824 835
 	free ( hermon_cq );
825 836
  err_hermon_cq:
826 837
 	hermon_bitmask_free ( hermon->cq_inuse, cqn_offset, 1 );
@@ -855,6 +866,7 @@ static void hermon_destroy_cq ( struct ib_device *ibdev,
855 866
 
856 867
 	/* Free memory */
857 868
 	free_dma ( hermon_cq->cqe, hermon_cq->cqe_size );
869
+	free_dma ( hermon_cq->doorbell, sizeof ( hermon_cq->doorbell[0] ) );
858 870
 	free ( hermon_cq );
859 871
 
860 872
 	/* Mark queue number as free */
@@ -1013,7 +1025,16 @@ static int hermon_create_qp ( struct ib_device *ibdev,
1013 1025
 		goto err_hermon_qp;
1014 1026
 	}
1015 1027
 
1016
-	/* Calculate doorbell address */
1028
+	/* Allocate doorbells */
1029
+	hermon_qp->recv.doorbell =
1030
+		malloc_dma ( sizeof ( hermon_qp->recv.doorbell[0] ),
1031
+			     sizeof ( hermon_qp->recv.doorbell[0] ) );
1032
+	if ( ! hermon_qp->recv.doorbell ) {
1033
+		rc = -ENOMEM;
1034
+		goto err_recv_doorbell;
1035
+	}
1036
+	memset ( hermon_qp->recv.doorbell, 0,
1037
+		 sizeof ( hermon_qp->recv.doorbell[0] ) );
1017 1038
 	hermon_qp->send.doorbell =
1018 1039
 		( hermon->uar + HERMON_UAR_NON_EQ_PAGE * HERMON_PAGE_SIZE +
1019 1040
 		  HERMON_DB_POST_SND_OFFSET );
@@ -1072,7 +1093,7 @@ static int hermon_create_qp ( struct ib_device *ibdev,
1072 1093
 		     ( hermon_qp->mtt.page_offset >> 6 ) );
1073 1094
 	MLX_FILL_1 ( &qpctx, 41, qpc_eec_data.cqn_rcv, qp->recv.cq->cqn );
1074 1095
 	MLX_FILL_1 ( &qpctx, 43, qpc_eec_data.db_record_addr_l,
1075
-		     ( virt_to_phys ( &hermon_qp->recv.doorbell ) >> 2 ) );
1096
+		     ( virt_to_phys ( hermon_qp->recv.doorbell ) >> 2 ) );
1076 1097
 	MLX_FILL_1 ( &qpctx, 53, qpc_eec_data.mtt_base_addr_l,
1077 1098
 		     ( hermon_qp->mtt.mtt_base_addr >> 3 ) );
1078 1099
 	if ( ( rc = hermon_cmd_rst2init_qp ( hermon, qp->qpn,
@@ -1094,7 +1115,7 @@ static int hermon_create_qp ( struct ib_device *ibdev,
1094 1115
 	       virt_to_phys ( hermon_qp->recv.wqe ),
1095 1116
 	       ( virt_to_phys ( hermon_qp->recv.wqe ) +
1096 1117
 		 hermon_qp->recv.wqe_size ),
1097
-	       virt_to_phys ( &hermon_qp->recv.doorbell ) );
1118
+	       virt_to_phys ( hermon_qp->recv.doorbell ) );
1098 1119
 	DBGC ( hermon, "Hermon %p QPN %#lx send CQN %#lx receive CQN %#lx\n",
1099 1120
 	       hermon, qp->qpn, qp->send.cq->cqn, qp->recv.cq->cqn );
1100 1121
 	ib_qp_set_drvdata ( qp, hermon_qp );
@@ -1106,6 +1127,9 @@ static int hermon_create_qp ( struct ib_device *ibdev,
1106 1127
  err_alloc_mtt:
1107 1128
 	free_dma ( hermon_qp->wqe, hermon_qp->wqe_size );
1108 1129
  err_alloc_wqe:
1130
+	free_dma ( hermon_qp->recv.doorbell,
1131
+		   sizeof ( hermon_qp->recv.doorbell[0] ) );
1132
+ err_recv_doorbell:
1109 1133
 	free ( hermon_qp );
1110 1134
  err_hermon_qp:
1111 1135
 	hermon_free_qpn ( ibdev, qp );
@@ -1215,6 +1239,8 @@ static void hermon_destroy_qp ( struct ib_device *ibdev,
1215 1239
 
1216 1240
 	/* Free memory */
1217 1241
 	free_dma ( hermon_qp->wqe, hermon_qp->wqe_size );
1242
+	free_dma ( hermon_qp->recv.doorbell,
1243
+		   sizeof ( hermon_qp->recv.doorbell[0] ) );
1218 1244
 	free ( hermon_qp );
1219 1245
 
1220 1246
 	/* Mark queue number as free */
@@ -1482,7 +1508,7 @@ static int hermon_post_recv ( struct ib_device *ibdev,
1482 1508
 
1483 1509
 	/* Update doorbell record */
1484 1510
 	barrier();
1485
-	MLX_FILL_1 ( &hermon_recv_wq->doorbell, 0, receive_wqe_counter,
1511
+	MLX_FILL_1 ( hermon_recv_wq->doorbell, 0, receive_wqe_counter,
1486 1512
 		     ( wq->next_idx & 0xffff ) );
1487 1513
 
1488 1514
 	return 0;
@@ -1631,7 +1657,7 @@ static void hermon_poll_cq ( struct ib_device *ibdev,
1631 1657
 		cq->next_idx++;
1632 1658
 
1633 1659
 		/* Update doorbell record */
1634
-		MLX_FILL_1 ( &hermon_cq->doorbell, 0, update_ci,
1660
+		MLX_FILL_1 ( hermon_cq->doorbell, 0, update_ci,
1635 1661
 			     ( cq->next_idx & 0x00ffffffUL ) );
1636 1662
 	}
1637 1663
 }

+ 4
- 4
src/drivers/infiniband/hermon.h Переглянути файл

@@ -408,8 +408,8 @@ struct hermon_recv_work_queue {
408 408
 	union hermon_recv_wqe *wqe;
409 409
 	/** Size of work queue */
410 410
 	size_t wqe_size;
411
-	/** Doorbell */
412
-	struct hermonprm_qp_db_record doorbell __attribute__ (( aligned (4) ));
411
+	/** Doorbell record */
412
+	struct hermonprm_qp_db_record *doorbell;
413 413
 };
414 414
 
415 415
 /** Number of special queue pairs */
@@ -469,8 +469,8 @@ struct hermon_completion_queue {
469 469
 	size_t cqe_size;
470 470
 	/** MTT descriptor */
471 471
 	struct hermon_mtt mtt;
472
-	/** Doorbell */
473
-	struct hermonprm_cq_db_record doorbell __attribute__ (( aligned (8) ));
472
+	/** Doorbell record */
473
+	struct hermonprm_cq_db_record *doorbell;
474 474
 };
475 475
 
476 476
 /** Maximum number of allocatable event queues

Завантаження…
Відмінити
Зберегти