Skip to content

Commit ec7f9b5

Browse files
committed
Added ext_umad files to libinetdisc
ext_umad ("extended umad") provides an interface and utilities for handling planarized HCAs. The planarized HCAs appear as 2 separate devices on umad_get_cas output, and the ext_umad interface allows to group the devices together, and find matching devices. Signed-off-by: Amir Nir <anir@nvidia.com>
1 parent 0c6807a commit ec7f9b5

File tree

6 files changed

+423
-1
lines changed

6 files changed

+423
-1
lines changed

debian/libibnetdisc5.symbols

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,5 @@ libibnetdisc.so.5 libibnetdisc5 #MINVER#
2727
ibnd_dump_agg_linkspeedext@IBNETDISC_1.1 1.6.1
2828
ibnd_dump_agg_linkspeedexten@IBNETDISC_1.1 1.6.1
2929
ibnd_dump_agg_linkspeedextsup@IBNETDISC_1.1 1.6.1
30+
ibnd_ext_umad_get_cas@IBNETDISC_1.2 1.6.1
31+
ibnd_ext_umad_get_ca_by_name@IBNETDISC_1.2 1.6.1

infiniband-diags/ibdiag_common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include <infiniband/mad.h>
4646
#include <util/iba_types.h>
4747
#include <infiniband/ibnetdisc.h>
48+
#include <infiniband/ibnetdisc_ext_umad.h>
4849
#include <linux/types.h>
4950

5051
extern int ibverbose;

libibnetdisc/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
publish_headers(infiniband
2+
ibnetdisc_ext_umad.h
23
ibnetdisc.h
34
ibnetdisc_osd.h
45
)
56

67
rdma_library(ibnetdisc libibnetdisc.map
78
# See Documentation/versioning.md
8-
5 5.1.${PACKAGE_VERSION}
9+
5 5.2.${PACKAGE_VERSION}
10+
ibnetdisc_ext_umad.c
911
chassis.c
1012
ibnetdisc.c
1113
ibnetdisc_cache.c

libibnetdisc/ibnetdisc_ext_umad.c

Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
/*
2+
* Copyright (c) 2023-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
*
4+
*
5+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
6+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
7+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
8+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
9+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
10+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
11+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
12+
* SOFTWARE.
13+
*
14+
*/
15+
16+
17+
#include <infiniband/ibnetdisc_ext_umad.h>
18+
#include <stdbool.h>
19+
#include <stdint.h>
20+
#include <stdio.h>
21+
#include <string.h>
22+
23+
#define CAPMASK_IS_SM_DISABLED 0x400
24+
25+
typedef typeof(((struct umad_port *)0)->port_guid) umad_guid_t;
26+
27+
/**
28+
* @brief struct to save the number of ports with a specific port GUID
29+
*/
30+
struct port_guid_port_count {
31+
umad_guid_t port_guid;
32+
uint8_t count;
33+
};
34+
35+
/**
36+
* @brief A mapping between a port GUID, and the extended ca that has ports with this GUID.
37+
* Used to search the correct extended ca for a given port.
38+
*/
39+
struct guid_ext_ca_mapping {
40+
umad_guid_t port_guid;
41+
ext_umad_ca_t *ext_ca;
42+
};
43+
44+
/**
45+
* @brief search the 'counts' array for a struct with a given GUID / the first
46+
* empty struct if GUID was not found
47+
*
48+
* @param counts[in]
49+
* @param max - size of counts
50+
* @param port_guid
51+
* @param index[out]
52+
* @return true - a struct was found, 'index' contains it's index
53+
* @return false - a struct was not found, 'index' contains the
54+
first unused index in counts / the last index if counts is full.
55+
*/
56+
static bool find_port_guid_count(struct port_guid_port_count counts[], size_t max,
57+
umad_guid_t port_guid, size_t *index)
58+
{
59+
for (size_t i = 0; i < max; ++i) {
60+
if (counts[i].port_guid == 0) {
61+
*index = i;
62+
return false;
63+
}
64+
if (counts[i].port_guid == port_guid) {
65+
*index = i;
66+
return true;
67+
}
68+
}
69+
70+
*index = max;
71+
return false;
72+
}
73+
74+
/**
75+
* @brief count the number of ports that hold each GUID.
76+
*
77+
* @param legacy_ca_names[in] - ca names given by umad_get_cas_names
78+
* @param num_cas - number of cas returned by umad_get_cas_names
79+
* @param counts[out] - each entry in this array contains a port guid and
80+
the number of ports with that guid.
81+
* @param max - maximum output array size. new GUIDs will be
82+
ignored after the maximum amount was added.
83+
* @return number of guids counted (output array length)
84+
*/
85+
static int count_ports_by_guid(char legacy_ca_names[][UMAD_CA_NAME_LEN], size_t num_cas,
86+
struct port_guid_port_count counts[], size_t max)
87+
{
88+
// how many unique port GUIDs were added
89+
size_t num_of_guid = 0;
90+
91+
memset(counts, 0, max * sizeof(struct port_guid_port_count));
92+
for (size_t c_idx = 0; c_idx < num_cas; ++c_idx) {
93+
umad_ca_t curr_ca;
94+
95+
if (umad_get_ca(legacy_ca_names[c_idx], &curr_ca) < 0)
96+
continue;
97+
98+
for (size_t p_idx = 1; p_idx < (size_t)curr_ca.numports + 1; ++p_idx) {
99+
umad_port_t *p_port = curr_ca.ports[p_idx];
100+
size_t count_idx = 0;
101+
102+
if (!p_port)
103+
continue;
104+
105+
if (find_port_guid_count(counts, max, p_port->port_guid, &count_idx)) {
106+
// port GUID already has a count struct
107+
++counts[count_idx].count;
108+
} else {
109+
// add a new count struct for this GUID.
110+
// if the maximum amount was already added, do nothing.
111+
if (count_idx != max) {
112+
counts[count_idx].port_guid = p_port->port_guid;
113+
counts[count_idx].count = 1;
114+
++num_of_guid;
115+
}
116+
}
117+
}
118+
119+
umad_release_ca(&curr_ca);
120+
}
121+
122+
return num_of_guid;
123+
}
124+
125+
/**
126+
* @brief return the amount of ports with the same port GUID as the one given.
127+
* simply searches the counts array for the correct GUID.
128+
*
129+
* @param guid
130+
* @param counts[in] - an array holding each guid and it's count.
131+
* @param max_guids - maximum amount of entries in 'counts' array.
132+
* @return size_t
133+
*/
134+
static uint8_t get_port_guid_count(umad_guid_t guid, const struct port_guid_port_count counts[],
135+
size_t max_guids)
136+
{
137+
for (size_t i = 0; i < max_guids; ++i) {
138+
if (counts[i].port_guid == guid)
139+
return counts[i].count;
140+
}
141+
142+
return 0;
143+
}
144+
145+
static bool is_smi_disabled(umad_port_t *p_port)
146+
{
147+
return (be32toh(p_port->capmask) & CAPMASK_IS_SM_DISABLED);
148+
}
149+
150+
/**
151+
* @brief Get a pointer to the device in which a planarized port
152+
* with 'port_guid' should be inserted.
153+
*
154+
* Search the mapping array for the given port_guid.
155+
* if found, return the result pointer.
156+
* if not found, return the first non-initialized 'dev'
157+
* array index (or NULL if the array is full),
158+
* add a new mapping for the given port, and advance 'added' counters.
159+
*
160+
* @param port_guid
161+
* @param mapping[input, output] - search this array for the given port GUID.
162+
* @param map_max - maximum size of the mapping array
163+
* @param map_added - amount of mappings in 'mapping'.
164+
* will be increased if a new mapping is added.
165+
* @param devs[input] - the array from which the index will be returned
166+
* @param devs_max - maximum size of 'devs' array
167+
* @param devs_added - amount of initialized devices in 'devs' array.
168+
* will be changed if a new device is added.
169+
* @return address of the device that corresponds to the given GUID.
170+
* NULL if not found and 'devs' is full.
171+
*/
172+
static ext_umad_ca_t *get_ext_ca_from_arr_by_guid(umad_guid_t port_guid,
173+
struct guid_ext_ca_mapping mapping[],
174+
size_t map_max, size_t *map_added,
175+
ext_umad_ca_t devs[],
176+
size_t devs_max, size_t *devs_added)
177+
{
178+
ext_umad_ca_t *dev = NULL;
179+
// attempt to find the port guid in the mapping
180+
for (size_t i = 0; i < *map_added; ++i) {
181+
if (mapping[i].port_guid == port_guid)
182+
return mapping[i].ext_ca;
183+
}
184+
185+
// attempt to add a new mapping/device
186+
if (*map_added >= map_max || *devs_added >= devs_max)
187+
return NULL;
188+
189+
dev = &devs[*devs_added];
190+
mapping[*map_added].port_guid = port_guid;
191+
mapping[*map_added].ext_ca = dev;
192+
(*devs_added)++;
193+
(*map_added)++;
194+
195+
return dev;
196+
}
197+
198+
/**
199+
* @brief add a new port to a device's port numbers array (zero terminated).
200+
* set the device's name if it doesn't have one.
201+
*
202+
* @param dev[output] - devices the port number should be added to.
203+
* @param p_port[input] - the port whose number will be added to the
204+
* list (and potentially ca name)
205+
*/
206+
static void add_new_port(ext_umad_device_t *dev, umad_port_t *p_port)
207+
{
208+
for (size_t i = 0; i < UMAD_CA_MAX_PORTS; ++i) {
209+
if (dev->ports[i] == 0) {
210+
dev->ports[i] = p_port->portnum;
211+
break;
212+
}
213+
}
214+
if (!dev->name[0])
215+
memcpy(dev->name, p_port->ca_name, UMAD_CA_NAME_LEN);
216+
}
217+
218+
int ibnd_ext_umad_get_cas(ext_umad_ca_t cas[], size_t max)
219+
{
220+
size_t added_devices = 0, added_mappings = 0;
221+
char legacy_ca_names[UMAD_MAX_DEVICES][UMAD_CA_NAME_LEN] = {};
222+
struct port_guid_port_count counts[UMAD_MAX_PORTS] = {};
223+
struct guid_ext_ca_mapping mapping[UMAD_MAX_PORTS] = {};
224+
225+
memset(cas, 0, sizeof(ext_umad_ca_t) * max);
226+
int cas_found = umad_get_cas_names(legacy_ca_names, UMAD_MAX_DEVICES);
227+
228+
if (cas_found < 0)
229+
return 0;
230+
231+
count_ports_by_guid(legacy_ca_names, cas_found, counts, UMAD_MAX_PORTS);
232+
233+
for (size_t c_idx = 0; c_idx < (size_t)cas_found; ++c_idx) {
234+
umad_ca_t curr_ca;
235+
236+
if (umad_get_ca(legacy_ca_names[c_idx], &curr_ca) < 0)
237+
continue;
238+
239+
for (size_t p_idx = 1; p_idx < (size_t)curr_ca.numports + 1; ++p_idx) {
240+
umad_port_t *p_port = curr_ca.ports[p_idx];
241+
uint8_t guid_count = 0;
242+
243+
if (!p_port)
244+
continue;
245+
246+
guid_count = get_port_guid_count(curr_ca.ports[p_idx]->port_guid,
247+
counts, UMAD_MAX_PORTS);
248+
ext_umad_ca_t *dev = get_ext_ca_from_arr_by_guid(p_port->port_guid,
249+
mapping, UMAD_MAX_PORTS,
250+
&added_mappings, cas,
251+
max, &added_devices);
252+
if (!dev)
253+
continue;
254+
if (guid_count > 1) {
255+
// planarized port
256+
add_new_port(is_smi_disabled(p_port) ?
257+
&dev->gsi : &dev->smi, p_port);
258+
} else if (guid_count == 1) {
259+
if (!is_smi_disabled(p_port))
260+
add_new_port(&dev->smi, p_port);
261+
262+
// all ports are GSI ports in legacy HCAs
263+
add_new_port(&dev->gsi, p_port);
264+
} else {
265+
return -1;
266+
}
267+
}
268+
269+
umad_release_ca(&curr_ca);
270+
}
271+
272+
return added_devices;
273+
}
274+
275+
int ibnd_ext_umad_get_ca_by_name(const char *devname, uint8_t portnum, ext_umad_ca_t *out)
276+
{
277+
int rc = 1;
278+
int num_cas = 0;
279+
ext_umad_ca_t ext_cas[UMAD_MAX_PORTS] = {};
280+
size_t i = 0;
281+
bool is_devname_gsi = false;
282+
283+
num_cas = ibnd_ext_umad_get_cas(ext_cas, UMAD_MAX_PORTS);
284+
if (num_cas <= 0)
285+
return num_cas;
286+
287+
for (i = 0; i < (size_t)num_cas; ++i) {
288+
if (!ext_cas[i].gsi.name[0] || !ext_cas[i].smi.name[0] ||
289+
!ext_cas[i].gsi.ports[0] || !ext_cas[i].smi.ports[0])
290+
continue;
291+
292+
if (devname) {
293+
if (strncmp(ext_cas[i].gsi.name, devname, UMAD_CA_NAME_LEN)
294+
&& strncmp(ext_cas[i].smi.name, devname, UMAD_CA_NAME_LEN)) {
295+
// name doesn't match - keep searching
296+
continue;
297+
}
298+
}
299+
300+
if (portnum) {
301+
// check that the device given by "devname" has a port number "portnum"
302+
// (if devname doesn't exist, assume smi port is given)
303+
is_devname_gsi = (devname && \
304+
!strncmp(devname, ext_cas[i].gsi.name, UMAD_CA_NAME_LEN));
305+
ext_umad_device_t *devname_dev = is_devname_gsi ? \
306+
&ext_cas[i].gsi : &ext_cas[i].smi;
307+
bool found_port = false;
308+
309+
for (size_t port_idx = 0; port_idx < UMAD_CA_MAX_PORTS; ++port_idx) {
310+
if (!devname_dev->ports[port_idx])
311+
break;
312+
if (devname_dev->ports[port_idx] == portnum)
313+
found_port = true;
314+
}
315+
316+
// couldn't find portnum - keep searching
317+
if (!found_port)
318+
continue;
319+
}
320+
321+
rc = 0;
322+
break;
323+
}
324+
325+
if (!rc) {
326+
if (out)
327+
*out = ext_cas[i];
328+
}
329+
330+
return rc;
331+
}

0 commit comments

Comments
 (0)