Skip to content

Commit af2c8b9

Browse files
committed
Added ext_umad files to libinetdisc
ext_umad ("extended umad") provides an interface and utilities for handling planarized HCAs. The planarized HCAs appear as 2 seperate devices on umad_get_cas output, and the ext_umad interface allows to group the devices together, and find matching devices. Signed-off-by: Amir Nir <anir@nvidia.com>
1 parent ec1310b commit af2c8b9

File tree

5 files changed

+391
-0
lines changed

5 files changed

+391
-0
lines changed

infiniband-diags/ibdiag_common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include <infiniband/mad.h>
4646
#include <util/iba_types.h>
4747
#include <infiniband/ibnetdisc.h>
48+
#include <infiniband/ext_umad.h>
4849
#include <linux/types.h>
4950

5051
extern int ibverbose;

libibnetdisc/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
publish_headers(infiniband
2+
ext_umad.h
23
ibnetdisc.h
34
ibnetdisc_osd.h
45
)
56

67
rdma_library(ibnetdisc libibnetdisc.map
78
# See Documentation/versioning.md
89
5 5.0.${PACKAGE_VERSION}
10+
ext_umad.c
911
chassis.c
1012
ibnetdisc.c
1113
ibnetdisc_cache.c

libibnetdisc/ext_umad.c

Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
/*
2+
* Copyright (c) 2023-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
*
4+
*
5+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
6+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
7+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
8+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
9+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
10+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
11+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
12+
* SOFTWARE.
13+
*
14+
*/
15+
16+
17+
#include <infiniband/ext_umad.h>
18+
#include <stdbool.h>
19+
#include <stdint.h>
20+
#include <stdio.h>
21+
#include <string.h>
22+
23+
#define CAPMASK_IS_SM_DISABLED 0x400
24+
25+
typedef typeof(((struct umad_port *)0)->port_guid) umad_guid_t;
26+
27+
/**
28+
* @brief struct to save the number of ports with a specific port GUID
29+
*/
30+
struct port_guid_port_count {
31+
uint64_t port_guid;
32+
uint8_t count;
33+
};
34+
35+
/**
36+
* @brief A mapping between a port GUID, and the extended ca that has ports with this GUID.
37+
* Used to search the correct extended ca for a given port.
38+
*/
39+
struct guid_ext_ca_mapping {
40+
uint64_t port_guid;
41+
ext_umad_ca_t* ext_ca;
42+
};
43+
44+
/**
45+
* @brief search the 'counts' array for a struct with a given GUID / the first
46+
empty struct if GUID was not found
47+
*
48+
* @param counts[in]
49+
* @param max - size of counts
50+
* @param port_guid
51+
* @param index[out]
52+
* @return true - a struct was found, 'index' contains it's index
53+
* @return false - a struct was not found, 'index' contains the
54+
first unused index in counts / the last index if counts is full.
55+
*/
56+
static bool find_port_guid_count(struct port_guid_port_count counts[], size_t max,
57+
uint64_t port_guid, size_t *index)
58+
{
59+
for (size_t i = 0; i < max; ++i) {
60+
if (counts[i].port_guid == 0) {
61+
*index = i;
62+
return false;
63+
}
64+
if (counts[i].port_guid == port_guid) {
65+
*index = i;
66+
return true;
67+
}
68+
}
69+
70+
*index = max;
71+
return false;
72+
}
73+
74+
/**
75+
* @brief count the number of ports that hold each GUID.
76+
*
77+
* @param legacy_ca_names[in] - ca names given by umad_get_cas_names
78+
* @param num_cas - number of cas returned by umad_get_cas_names
79+
* @param counts[out] - each entry in this array contains a port guid and
80+
the number of ports with that guid.
81+
* @param max - maximum output array size. new GUIDs will be
82+
ignored after the maximum amount was added.
83+
* @return number of guids counted (output array length)
84+
*/
85+
static int count_ports_by_guid(char legacy_ca_names[][UMAD_CA_NAME_LEN], size_t num_cas,
86+
struct port_guid_port_count counts[], size_t max)
87+
{
88+
// how many unique port GUIDs were added
89+
size_t num_of_guid = 0;
90+
memset(counts, 0, max * sizeof(struct port_guid_port_count));
91+
92+
for (size_t c_idx = 0; c_idx < num_cas; ++c_idx) {
93+
umad_ca_t curr_ca;
94+
95+
if (umad_get_ca(legacy_ca_names[c_idx], &curr_ca) < 0)
96+
continue;
97+
98+
for (size_t p_idx = 1; p_idx < (size_t)curr_ca.numports + 1; ++p_idx) {
99+
umad_port_t *p_port = curr_ca.ports[p_idx];
100+
size_t count_idx = 0;
101+
if (!p_port)
102+
continue;
103+
104+
if (find_port_guid_count(counts, max, p_port->port_guid, &count_idx)) {
105+
// port GUID already has a count struct
106+
++counts[count_idx].count;
107+
} else {
108+
// add a new count struct for this GUID. if the maximum amount was already added, do nothing.
109+
if (count_idx != max) {
110+
counts[count_idx].port_guid = p_port->port_guid;
111+
counts[count_idx].count = 1;
112+
++num_of_guid;
113+
}
114+
}
115+
}
116+
117+
umad_release_ca(&curr_ca);
118+
}
119+
120+
return num_of_guid;
121+
}
122+
123+
/**
124+
* @brief return the amount of ports with the same port GUID as the one given.
125+
* simply searches the counts array for the correct GUID.
126+
*
127+
* @param guid
128+
* @param counts[in] - an array holding each guid and it's count.
129+
* @param max_guids - maximum amount of entries in 'counts' array.
130+
* @return size_t
131+
*/
132+
static uint8_t get_port_guid_count(uint64_t guid, const struct port_guid_port_count counts[],
133+
size_t max_guids)
134+
{
135+
for (size_t i = 0; i < max_guids; ++i) {
136+
if (counts[i].port_guid == guid)
137+
return counts[i].count;
138+
}
139+
140+
return 0;
141+
}
142+
143+
static bool is_smi_disabled(umad_port_t *p_port)
144+
{
145+
return (be32toh(p_port->capmask) & CAPMASK_IS_SM_DISABLED);
146+
}
147+
148+
/**
149+
* @brief Get a pointer to the device in which a planarized port with 'port_guid' should be inserted.
150+
*
151+
* Search the mapping array for the given port_guid. if found, return the the result pointer.
152+
* if not found, return the first non-initialized 'dev' array index (or NULL if the array is full),
153+
* add a new mapping for the given port, and advance 'added' counters.
154+
*
155+
* @param port_guid
156+
* @param mapping[input, output] - search this array for the given port GUID.
157+
* @param map_max - maximum size of the mapping array
158+
* @param map_added - amount of mappings in 'mapping'. will be increased if a new mapping is added.
159+
* @param devs[input] - the array from which the index will be returned
160+
* @param devs_max - maximum size of 'devs' array
161+
* @param devs_added - amount of initialized devices in 'devs' array. will be changed if a new device is added.
162+
* @return address of the device that corresponds to the given GUID. NULL if not found and 'devs' is full.
163+
*/
164+
static ext_umad_ca_t* get_ext_ca_from_arr_by_guid(uint64_t port_guid,
165+
struct guid_ext_ca_mapping mapping[],
166+
size_t map_max, size_t* map_added,
167+
ext_umad_ca_t devs[],
168+
size_t devs_max, size_t* devs_added)
169+
{
170+
ext_umad_ca_t* dev = NULL;
171+
// attempt to find the port guid in the mapping
172+
for (size_t i = 0; i < *map_added; ++i) {
173+
if (mapping[i].port_guid == port_guid)
174+
return mapping[i].ext_ca;
175+
}
176+
177+
// attempt to add a new mapping/device
178+
if (*map_added >= map_max || *devs_added >= devs_max)
179+
return NULL;
180+
181+
dev = &devs[*devs_added];
182+
mapping[*map_added].port_guid = port_guid;
183+
mapping[*map_added].ext_ca = dev;
184+
(*devs_added)++;
185+
(*map_added)++;
186+
187+
return dev;
188+
}
189+
190+
/**
191+
* @brief add a new port to a device's port numbers array (zero terminated).
192+
* set the device's name if it doesn't have one.
193+
*
194+
* @param dev[output] - devices the port number should be added to.
195+
* @param p_port[input] - the port whose number will be added to the list (and potentially ca name)
196+
*/
197+
static void add_new_port(ext_umad_device_t *dev, umad_port_t *p_port)
198+
{
199+
for (size_t i = 0; i < UMAD_CA_MAX_PORTS; ++i) {
200+
if (dev->ports[i] == 0) {
201+
dev->ports[i] = p_port->portnum;
202+
break;
203+
}
204+
}
205+
if (!dev->name[0])
206+
memcpy(dev->name, p_port->ca_name, UMAD_CA_NAME_LEN);
207+
}
208+
209+
int ext_umad_get_cas(ext_umad_ca_t cas[], size_t max)
210+
{
211+
size_t added_devices = 0, added_mappings = 0;
212+
char legacy_ca_names[UMAD_MAX_DEVICES][UMAD_CA_NAME_LEN] = {};
213+
struct port_guid_port_count counts[UMAD_MAX_PORTS] = {};
214+
struct guid_ext_ca_mapping mapping[UMAD_MAX_PORTS] = {};
215+
216+
memset(cas, 0, sizeof(ext_umad_ca_t) * max);
217+
int cas_found = umad_get_cas_names(legacy_ca_names, UMAD_MAX_DEVICES);
218+
if (cas_found < 0)
219+
return 0;
220+
221+
count_ports_by_guid(legacy_ca_names, cas_found, counts, UMAD_MAX_PORTS);
222+
223+
for (size_t c_idx = 0; c_idx < (size_t)cas_found; ++c_idx) {
224+
umad_ca_t curr_ca;
225+
226+
if (umad_get_ca(legacy_ca_names[c_idx], &curr_ca) < 0)
227+
continue;
228+
229+
for (size_t p_idx = 1; p_idx < (size_t)curr_ca.numports + 1; ++p_idx) {
230+
umad_port_t *p_port = curr_ca.ports[p_idx];
231+
uint8_t guid_count = 0;
232+
if (!p_port)
233+
continue;
234+
235+
guid_count = get_port_guid_count(curr_ca.ports[p_idx]->port_guid,
236+
counts, UMAD_MAX_PORTS);
237+
ext_umad_ca_t* dev = get_ext_ca_from_arr_by_guid(p_port->port_guid, mapping,
238+
UMAD_MAX_PORTS, &added_mappings,
239+
cas, max, &added_devices);
240+
if (!dev)
241+
continue;
242+
if (guid_count > 1) {
243+
// planarized port
244+
add_new_port(is_smi_disabled(p_port) ? &dev->gsi : &dev->smi, p_port);
245+
} else if (guid_count == 1) {
246+
if (!is_smi_disabled(p_port))
247+
add_new_port(&dev->smi, p_port);
248+
249+
// all ports are GSI ports in legacy HCAs
250+
add_new_port(&dev->gsi, p_port);
251+
} else {
252+
return -1;
253+
}
254+
}
255+
256+
umad_release_ca(&curr_ca);
257+
}
258+
259+
return added_devices;
260+
}
261+
262+
int ext_umad_get_ca_by_name(const char *devname, uint8_t portnum, ext_umad_ca_t *out)
263+
{
264+
int rc = 1;
265+
int num_cas = 0;
266+
ext_umad_ca_t ext_cas[UMAD_MAX_PORTS] = {};
267+
size_t i = 0;
268+
bool is_devname_gsi = false;
269+
270+
num_cas = ext_umad_get_cas(ext_cas, UMAD_MAX_PORTS);
271+
if (num_cas <= 0)
272+
return num_cas;
273+
274+
for (i = 0; i < (size_t)num_cas; ++i) {
275+
if (!ext_cas[i].gsi.name[0] || !ext_cas[i].smi.name[0] ||
276+
!ext_cas[i].gsi.ports[0] || !ext_cas[i].smi.ports[0])
277+
continue;
278+
279+
if (devname) {
280+
if (strncmp(ext_cas[i].gsi.name, devname, UMAD_CA_NAME_LEN)
281+
&& strncmp(ext_cas[i].smi.name, devname, UMAD_CA_NAME_LEN)) {
282+
// name doesn't match - keep searching
283+
continue;
284+
}
285+
}
286+
287+
if (portnum) {
288+
// check that the device given by "devname" has a port number "portnum"
289+
// (if devname doesn't exist, assume smi port is given)
290+
is_devname_gsi = devname && !strncmp(devname, ext_cas[i].gsi.name, UMAD_CA_NAME_LEN);
291+
ext_umad_device_t *devname_dev = is_devname_gsi ? &ext_cas[i].gsi : &ext_cas[i].smi;
292+
bool found_port = false;
293+
for (size_t port_index = 0; port_index < UMAD_CA_MAX_PORTS; ++port_index) {
294+
if (!devname_dev->ports[port_index])
295+
break;
296+
if (devname_dev->ports[port_index] == portnum)
297+
found_port = true;
298+
}
299+
300+
// couldn't find portnum - keep searching
301+
if (!found_port)
302+
continue;
303+
}
304+
305+
rc = 0;
306+
break;
307+
}
308+
309+
if (!rc) {
310+
if (out)
311+
*out = ext_cas[i];
312+
}
313+
314+
return rc;
315+
}

0 commit comments

Comments
 (0)