Browse Source

[arbel] Optimise ICM layout to reduce overall memory usage

Reduce the amount of ICM space required by choosing to order the
various allocations in approximately descending order of alignment
requirements.

This saves approximately 512kB of host memory.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown 13 years ago
parent
commit
144a23a852
2 changed files with 136 additions and 75 deletions
  1. 132
    73
      src/drivers/infiniband/arbel.c
  2. 4
    2
      src/drivers/infiniband/arbel.h

+ 132
- 73
src/drivers/infiniband/arbel.c View File

@@ -2076,7 +2076,6 @@ static int arbel_get_limits ( struct arbel *arbel ) {
2076 2076
 	arbel->limits.reserved_cqs =
2077 2077
 		( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) );
2078 2078
 	arbel->limits.cqc_entry_size = MLX_GET ( &dev_lim, cqc_entry_sz );
2079
-	arbel->limits.reserved_eqs = MLX_GET ( &dev_lim, num_rsvd_eqs );
2080 2079
 	arbel->limits.reserved_mtts =
2081 2080
 		( 1 << MLX_GET ( &dev_lim, log2_rsvd_mtts ) );
2082 2081
 	arbel->limits.mtt_entry_size = MLX_GET ( &dev_lim, mtt_entry_sz );
@@ -2085,8 +2084,33 @@ static int arbel_get_limits ( struct arbel *arbel ) {
2085 2084
 	arbel->limits.mpt_entry_size = MLX_GET ( &dev_lim, mpt_entry_sz );
2086 2085
 	arbel->limits.reserved_rdbs =
2087 2086
 		( 1 << MLX_GET ( &dev_lim, log2_rsvd_rdbs ) );
2087
+	arbel->limits.reserved_eqs = MLX_GET ( &dev_lim, num_rsvd_eqs );
2088 2088
 	arbel->limits.eqc_entry_size = MLX_GET ( &dev_lim, eqc_entry_sz );
2089 2089
 	arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars );
2090
+	arbel->limits.uar_scratch_entry_size =
2091
+		MLX_GET ( &dev_lim, uar_scratch_entry_sz );
2092
+
2093
+	DBGC ( arbel, "Arbel %p reserves %d x %#zx QPC, %d x %#zx EQPC, "
2094
+	       "%d x %#zx SRQC\n", arbel,
2095
+	       arbel->limits.reserved_qps, arbel->limits.qpc_entry_size,
2096
+	       arbel->limits.reserved_qps, arbel->limits.eqpc_entry_size,
2097
+	       arbel->limits.reserved_srqs, arbel->limits.srqc_entry_size );
2098
+	DBGC ( arbel, "Arbel %p reserves %d x %#zx EEC, %d x %#zx EEEC, "
2099
+	       "%d x %#zx CQC\n", arbel,
2100
+	       arbel->limits.reserved_ees, arbel->limits.eec_entry_size,
2101
+	       arbel->limits.reserved_ees, arbel->limits.eeec_entry_size,
2102
+	       arbel->limits.reserved_cqs, arbel->limits.cqc_entry_size );
2103
+	DBGC ( arbel, "Arbel %p reserves %d x %#zx EQC, %d x %#zx MTT, "
2104
+	       "%d x %#zx MPT\n", arbel,
2105
+	       arbel->limits.reserved_eqs, arbel->limits.eqc_entry_size,
2106
+	       arbel->limits.reserved_mtts, arbel->limits.mtt_entry_size,
2107
+	       arbel->limits.reserved_mrws, arbel->limits.mpt_entry_size );
2108
+	DBGC ( arbel, "Arbel %p reserves %d x %#zx RDB, %d x %#zx UAR, "
2109
+	       "%d x %#zx UAR scratchpad\n", arbel,
2110
+	       arbel->limits.reserved_rdbs, ARBEL_RDB_ENTRY_SIZE,
2111
+	       arbel->limits.reserved_uars, ARBEL_PAGE_SIZE,
2112
+	       arbel->limits.reserved_uars,
2113
+	       arbel->limits.uar_scratch_entry_size );
2090 2114
 
2091 2115
 	return 0;
2092 2116
 }
@@ -2120,14 +2144,13 @@ static int arbel_alloc_icm ( struct arbel *arbel,
2120 2144
 	struct arbelprm_virtual_physical_mapping map_icm;
2121 2145
 	union arbelprm_doorbell_record *db_rec;
2122 2146
 	size_t icm_offset = 0;
2123
-	unsigned int log_num_qps, log_num_srqs, log_num_ees, log_num_cqs;
2124
-	unsigned int log_num_mtts, log_num_mpts, log_num_rdbs, log_num_eqs;
2125
-	unsigned int log_num_mcs;
2147
+	unsigned int log_num_uars, log_num_qps, log_num_srqs, log_num_ees;
2148
+	unsigned int log_num_cqs, log_num_mtts, log_num_mpts, log_num_rdbs;
2149
+	unsigned int log_num_eqs, log_num_mcs;
2150
+	size_t db_rec_offset;
2126 2151
 	size_t len;
2127 2152
 	int rc;
2128 2153
 
2129
-	icm_offset = ( ( arbel->limits.reserved_uars + 1 ) << 12 );
2130
-
2131 2154
 	/* Queue pair contexts */
2132 2155
 	log_num_qps = fls ( arbel->limits.reserved_qps +
2133 2156
 			    ARBEL_RSVD_SPECIAL_QPS + ARBEL_MAX_QPS - 1 );
@@ -2138,8 +2161,9 @@ static int arbel_alloc_icm ( struct arbel *arbel,
2138 2161
 		     ( icm_offset >> 7 ),
2139 2162
 		     qpc_eec_cqc_eqc_rdb_parameters.log_num_of_qp,
2140 2163
 		     log_num_qps );
2141
-	DBGC ( arbel, "Arbel %p ICM QPC at [%zx,%zx)\n",
2142
-	       arbel, icm_offset, ( icm_offset + len ) );
2164
+	DBGC ( arbel, "Arbel %p ICM QPC is %d x %#zx at [%zx,%zx)\n",
2165
+	       arbel, ( 1 << log_num_qps ), arbel->limits.qpc_entry_size,
2166
+	       icm_offset, ( icm_offset + len ) );
2143 2167
 	icm_offset += len;
2144 2168
 
2145 2169
 	/* Extended queue pair contexts */
@@ -2148,21 +2172,53 @@ static int arbel_alloc_icm ( struct arbel *arbel,
2148 2172
 	MLX_FILL_1 ( init_hca, 25,
2149 2173
 		     qpc_eec_cqc_eqc_rdb_parameters.eqpc_base_addr_l,
2150 2174
 		     icm_offset );
2151
-	DBGC ( arbel, "Arbel %p ICM EQPC at [%zx,%zx)\n",
2152
-	       arbel, icm_offset, ( icm_offset + len ) );
2175
+	DBGC ( arbel, "Arbel %p ICM EQPC is %d x %#zx at [%zx,%zx)\n",
2176
+	       arbel, ( 1 << log_num_qps ), arbel->limits.eqpc_entry_size,
2177
+	       icm_offset, ( icm_offset + len ) );
2153 2178
 	icm_offset += len;
2154 2179
 
2155
-	/* Shared receive queue contexts */
2156
-	log_num_srqs = fls ( arbel->limits.reserved_srqs - 1 );
2157
-	len = ( ( 1 << log_num_srqs ) * arbel->limits.srqc_entry_size );
2180
+	/* Completion queue contexts */
2181
+	log_num_cqs = fls ( arbel->limits.reserved_cqs + ARBEL_MAX_CQS - 1 );
2182
+	len = ( ( 1 << log_num_cqs ) * arbel->limits.cqc_entry_size );
2158 2183
 	icm_offset = icm_align ( icm_offset, len );
2159
-	MLX_FILL_2 ( init_hca, 19,
2160
-		     qpc_eec_cqc_eqc_rdb_parameters.srqc_base_addr_l,
2161
-		     ( icm_offset >> 5 ),
2162
-		     qpc_eec_cqc_eqc_rdb_parameters.log_num_of_srq,
2163
-		     log_num_srqs );
2164
-	DBGC ( arbel, "Arbel %p ICM SRQC at [%zx,%zx)\n",
2165
-	       arbel, icm_offset, ( icm_offset + len ) );
2184
+	MLX_FILL_2 ( init_hca, 21,
2185
+		     qpc_eec_cqc_eqc_rdb_parameters.cqc_base_addr_l,
2186
+		     ( icm_offset >> 6 ),
2187
+		     qpc_eec_cqc_eqc_rdb_parameters.log_num_of_cq,
2188
+		     log_num_cqs );
2189
+	DBGC ( arbel, "Arbel %p ICM CQC is %d x %#zx at [%zx,%zx)\n",
2190
+	       arbel, ( 1 << log_num_cqs ), arbel->limits.cqc_entry_size,
2191
+	       icm_offset, ( icm_offset + len ) );
2192
+	icm_offset += len;
2193
+
2194
+	/* User access region contexts */
2195
+	log_num_uars = fls ( arbel->limits.reserved_uars +
2196
+			     1 /* single UAR used */ - 1 );
2197
+	len = ( ( 1 << log_num_uars ) * ARBEL_PAGE_SIZE );
2198
+	icm_offset = icm_align ( icm_offset, len );
2199
+	MLX_FILL_1 ( init_hca, 74, uar_parameters.log_max_uars, log_num_uars );
2200
+	MLX_FILL_1 ( init_hca, 79,
2201
+		     uar_parameters.uar_context_base_addr_l, icm_offset );
2202
+	db_rec_offset = ( icm_offset +
2203
+			  ( arbel->limits.reserved_uars * ARBEL_PAGE_SIZE ) );
2204
+	DBGC ( arbel, "Arbel %p UAR is %d x %#zx at [%zx,%zx), doorbells "
2205
+	       "[%zx,%zx)\n", arbel, ( 1 << log_num_uars ), ARBEL_PAGE_SIZE,
2206
+	       icm_offset, ( icm_offset + len ), db_rec_offset,
2207
+	       ( db_rec_offset + ARBEL_PAGE_SIZE ) );
2208
+	icm_offset += len;
2209
+
2210
+	/* Event queue contexts */
2211
+	log_num_eqs = fls ( arbel->limits.reserved_eqs + ARBEL_MAX_EQS - 1 );
2212
+	len = ( ( 1 << log_num_eqs ) * arbel->limits.eqc_entry_size );
2213
+	icm_offset = icm_align ( icm_offset, len );
2214
+	MLX_FILL_2 ( init_hca, 33,
2215
+		     qpc_eec_cqc_eqc_rdb_parameters.eqc_base_addr_l,
2216
+		     ( icm_offset >> 6 ),
2217
+		     qpc_eec_cqc_eqc_rdb_parameters.log_num_eq,
2218
+		     log_num_eqs );
2219
+	DBGC ( arbel, "Arbel %p ICM EQC is %d x %#zx at [%zx,%zx)\n",
2220
+	       arbel, ( 1 << log_num_eqs ), arbel->limits.eqc_entry_size,
2221
+	       icm_offset, ( icm_offset + len ) );
2166 2222
 	icm_offset += len;
2167 2223
 
2168 2224
 	/* End-to-end contexts */
@@ -2174,41 +2230,23 @@ static int arbel_alloc_icm ( struct arbel *arbel,
2174 2230
 		     ( icm_offset >> 7 ),
2175 2231
 		     qpc_eec_cqc_eqc_rdb_parameters.log_num_of_ee,
2176 2232
 		     log_num_ees );
2177
-	DBGC ( arbel, "Arbel %p ICM EEC at [%zx,%zx)\n",
2178
-	       arbel, icm_offset, ( icm_offset + len ) );
2179
-	icm_offset += len;
2180
-
2181
-	/* Extended end-to-end contexts */
2182
-	len = ( ( 1 << log_num_ees ) * arbel->limits.eeec_entry_size );
2183
-	icm_offset = icm_align ( icm_offset, len );
2184
-	MLX_FILL_1 ( init_hca, 29,
2185
-		     qpc_eec_cqc_eqc_rdb_parameters.eeec_base_addr_l,
2186
-		     icm_offset );
2187
-	DBGC ( arbel, "Arbel %p ICM EEEC at [%zx,%zx)\n",
2188
-	       arbel, icm_offset, ( icm_offset + len ) );
2189
-	icm_offset += len;
2190
-
2191
-	/* Completion queue contexts */
2192
-	log_num_cqs = fls ( arbel->limits.reserved_cqs + ARBEL_MAX_CQS - 1 );
2193
-	len = ( ( 1 << log_num_cqs ) * arbel->limits.cqc_entry_size );
2194
-	icm_offset = icm_align ( icm_offset, len );
2195
-	MLX_FILL_2 ( init_hca, 21,
2196
-		     qpc_eec_cqc_eqc_rdb_parameters.cqc_base_addr_l,
2197
-		     ( icm_offset >> 6 ),
2198
-		     qpc_eec_cqc_eqc_rdb_parameters.log_num_of_cq,
2199
-		     log_num_cqs );
2200
-	DBGC ( arbel, "Arbel %p ICM CQC at [%zx,%zx)\n",
2201
-	       arbel, icm_offset, ( icm_offset + len ) );
2233
+	DBGC ( arbel, "Arbel %p ICM EEC is %d x %#zx at [%zx,%zx)\n",
2234
+	       arbel, ( 1 << log_num_ees ), arbel->limits.eec_entry_size,
2235
+	       icm_offset, ( icm_offset + len ) );
2202 2236
 	icm_offset += len;
2203 2237
 
2204
-	/* Memory translation table */
2205
-	log_num_mtts = fls ( arbel->limits.reserved_mtts - 1 );
2206
-	len = ( ( 1 << log_num_mtts ) * arbel->limits.mtt_entry_size );
2238
+	/* Shared receive queue contexts */
2239
+	log_num_srqs = fls ( arbel->limits.reserved_srqs - 1 );
2240
+	len = ( ( 1 << log_num_srqs ) * arbel->limits.srqc_entry_size );
2207 2241
 	icm_offset = icm_align ( icm_offset, len );
2208
-	MLX_FILL_1 ( init_hca, 65,
2209
-		     tpt_parameters.mtt_base_addr_l, icm_offset );
2210
-	DBGC ( arbel, "Arbel %p ICM MTT at [%zx,%zx)\n",
2211
-	       arbel, icm_offset, ( icm_offset + len ) );
2242
+	MLX_FILL_2 ( init_hca, 19,
2243
+		     qpc_eec_cqc_eqc_rdb_parameters.srqc_base_addr_l,
2244
+		     ( icm_offset >> 5 ),
2245
+		     qpc_eec_cqc_eqc_rdb_parameters.log_num_of_srq,
2246
+		     log_num_srqs );
2247
+	DBGC ( arbel, "Arbel %p ICM SRQC is %d x %#zx at [%zx,%zx)\n",
2248
+	       arbel, ( 1 << log_num_srqs ), arbel->limits.srqc_entry_size,
2249
+	       icm_offset, ( icm_offset + len ) );
2212 2250
 	icm_offset += len;
2213 2251
 
2214 2252
 	/* Memory protection table */
@@ -2219,8 +2257,9 @@ static int arbel_alloc_icm ( struct arbel *arbel,
2219 2257
 		     tpt_parameters.mpt_base_adr_l, icm_offset );
2220 2258
 	MLX_FILL_1 ( init_hca, 62,
2221 2259
 		     tpt_parameters.log_mpt_sz, log_num_mpts );
2222
-	DBGC ( arbel, "Arbel %p ICM MTT at [%zx,%zx)\n",
2223
-	       arbel, icm_offset, ( icm_offset + len ) );
2260
+	DBGC ( arbel, "Arbel %p ICM MPT is %d x %#zx at [%zx,%zx)\n",
2261
+	       arbel, ( 1 << log_num_mpts ), arbel->limits.mpt_entry_size,
2262
+	       icm_offset, ( icm_offset + len ) );
2224 2263
 	icm_offset += len;
2225 2264
 
2226 2265
 	/* Remote read data base table */
@@ -2230,21 +2269,20 @@ static int arbel_alloc_icm ( struct arbel *arbel,
2230 2269
 	MLX_FILL_1 ( init_hca, 37,
2231 2270
 		     qpc_eec_cqc_eqc_rdb_parameters.rdb_base_addr_l,
2232 2271
 		     icm_offset );
2233
-	DBGC ( arbel, "Arbel %p ICM RDB at [%zx,%zx)\n",
2234
-	       arbel, icm_offset, ( icm_offset + len ) );
2272
+	DBGC ( arbel, "Arbel %p ICM RDB is %d x %#zx at [%zx,%zx)\n",
2273
+	       arbel, ( 1 << log_num_rdbs ), ARBEL_RDB_ENTRY_SIZE,
2274
+	       icm_offset, ( icm_offset + len ) );
2235 2275
 	icm_offset += len;
2236 2276
 
2237
-	/* Event queue contexts */
2238
-	log_num_eqs = fls ( arbel->limits.reserved_eqs + ARBEL_MAX_EQS - 1 );
2239
-	len = ( ( 1 << log_num_eqs ) * arbel->limits.eqc_entry_size );
2277
+	/* Extended end-to-end contexts */
2278
+	len = ( ( 1 << log_num_ees ) * arbel->limits.eeec_entry_size );
2240 2279
 	icm_offset = icm_align ( icm_offset, len );
2241
-	MLX_FILL_2 ( init_hca, 33,
2242
-		     qpc_eec_cqc_eqc_rdb_parameters.eqc_base_addr_l,
2243
-		     ( icm_offset >> 6 ),
2244
-		     qpc_eec_cqc_eqc_rdb_parameters.log_num_eq,
2245
-		     log_num_eqs );
2246
-	DBGC ( arbel, "Arbel %p ICM EQ at [%zx,%zx)\n",
2247
-	       arbel, icm_offset, ( icm_offset + len ) );
2280
+	MLX_FILL_1 ( init_hca, 29,
2281
+		     qpc_eec_cqc_eqc_rdb_parameters.eeec_base_addr_l,
2282
+		     icm_offset );
2283
+	DBGC ( arbel, "Arbel %p ICM EEEC is %d x %#zx at [%zx,%zx)\n",
2284
+	       arbel, ( 1 << log_num_ees ), arbel->limits.eeec_entry_size,
2285
+	       icm_offset, ( icm_offset + len ) );
2248 2286
 	icm_offset += len;
2249 2287
 
2250 2288
 	/* Multicast table */
@@ -2262,8 +2300,31 @@ static int arbel_alloc_icm ( struct arbel *arbel,
2262 2300
 	MLX_FILL_1 ( init_hca, 54,
2263 2301
 		     multicast_parameters.log_mc_table_sz,
2264 2302
 		     log_num_mcs /* Only one entry per hash */ );
2265
-	DBGC ( arbel, "Arbel %p ICM MC at [%zx,%zx)\n",
2266
-	       arbel, icm_offset, ( icm_offset + len ) );
2303
+	DBGC ( arbel, "Arbel %p ICM MC is %d x %#zx at [%zx,%zx)\n", arbel,
2304
+	       ( 1 << log_num_mcs ), sizeof ( struct arbelprm_mgm_entry ),
2305
+	       icm_offset, ( icm_offset + len ) );
2306
+	icm_offset += len;
2307
+
2308
+	/* Memory translation table */
2309
+	log_num_mtts = fls ( arbel->limits.reserved_mtts - 1 );
2310
+	len = ( ( 1 << log_num_mtts ) * arbel->limits.mtt_entry_size );
2311
+	icm_offset = icm_align ( icm_offset, len );
2312
+	MLX_FILL_1 ( init_hca, 65,
2313
+		     tpt_parameters.mtt_base_addr_l, icm_offset );
2314
+	DBGC ( arbel, "Arbel %p ICM MTT is %d x %#zx at [%zx,%zx)\n",
2315
+	       arbel, ( 1 << log_num_mtts ), arbel->limits.mtt_entry_size,
2316
+	       icm_offset, ( icm_offset + len ) );
2317
+	icm_offset += len;
2318
+
2319
+	/* User access region scratchpads */
2320
+	len = ( ( 1 << log_num_uars ) * arbel->limits.uar_scratch_entry_size );
2321
+	icm_offset = icm_align ( icm_offset, len );
2322
+	MLX_FILL_1 ( init_hca, 77,
2323
+		     uar_parameters.uar_scratch_base_addr_l, icm_offset );
2324
+	DBGC ( arbel, "Arbel %p UAR scratchpad is %d x %#zx at [%zx,%zx)\n",
2325
+	       arbel, ( 1 << log_num_uars ),
2326
+	       arbel->limits.uar_scratch_entry_size,
2327
+	       icm_offset, ( icm_offset + len ) );
2267 2328
 	icm_offset += len;
2268 2329
 
2269 2330
 	/* Round up to a whole number of pages */
@@ -2316,10 +2377,9 @@ static int arbel_alloc_icm ( struct arbel *arbel,
2316 2377
 		goto err_map_icm;
2317 2378
 	}
2318 2379
 
2319
-	/* Initialise UAR context */
2320
-	arbel->db_rec = phys_to_virt ( user_to_phys ( arbel->icm, 0 ) +
2321
-				       ( arbel->limits.reserved_uars *
2322
-					 ARBEL_PAGE_SIZE ) );
2380
+	/* Initialise doorbell records */
2381
+	arbel->db_rec =
2382
+		phys_to_virt ( user_to_phys ( arbel->icm, db_rec_offset ) );
2323 2383
 	memset ( arbel->db_rec, 0, ARBEL_PAGE_SIZE );
2324 2384
 	db_rec = &arbel->db_rec[ARBEL_GROUP_SEPARATOR_DOORBELL];
2325 2385
 	MLX_FILL_1 ( &db_rec->qp, 1, res, ARBEL_UAR_RES_GROUP_SEP );
@@ -2502,7 +2562,6 @@ static int arbel_probe ( struct pci_device *pci,
2502 2562
 		goto err_alloc_icm;
2503 2563
 
2504 2564
 	/* Initialise HCA */
2505
-	MLX_FILL_1 ( &init_hca, 74, uar_parameters.log_max_uars, 1 );
2506 2565
 	if ( ( rc = arbel_cmd_init_hca ( arbel, &init_hca ) ) != 0 ) {
2507 2566
 		DBGC ( arbel, "Arbel %p could not initialise HCA: %s\n",
2508 2567
 		       arbel, strerror ( rc ) );

+ 4
- 2
src/drivers/infiniband/arbel.h View File

@@ -91,9 +91,9 @@ FILE_LICENCE ( GPL2_OR_LATER );
91 91
 
92 92
 #define ARBEL_INVALID_LKEY		0x00000100UL
93 93
 
94
-#define ARBEL_PAGE_SIZE			4096
94
+#define ARBEL_PAGE_SIZE			( ( size_t ) 4096 )
95 95
 
96
-#define ARBEL_RDB_ENTRY_SIZE		32
96
+#define ARBEL_RDB_ENTRY_SIZE		( ( size_t ) 32 )
97 97
 
98 98
 #define ARBEL_DB_POST_SND_OFFSET	0x10
99 99
 #define ARBEL_DB_EQ_OFFSET(_eqn)	( 0x08 * (_eqn) )
@@ -308,6 +308,8 @@ struct arbel_dev_limits {
308 308
 	size_t eqc_entry_size;
309 309
 	/** Number of reserved UARs */
310 310
 	unsigned int reserved_uars;
311
+	/** UAR scratchpad entry size */
312
+	size_t uar_scratch_entry_size;
311 313
 };
312 314
 
313 315
 /** Alignment of Arbel send work queue entries */

Loading…
Cancel
Save