forked from Mellanox/rdma-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrdma.kernel-init
248 lines (222 loc) · 6.72 KB
/
rdma.kernel-init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#!/bin/bash
#
# Bring up the kernel RDMA stack
#
# This is usually run automatically by systemd after a hardware activation
# event in udev has triggered a start of the rdma.service unit
#
shopt -s nullglob
CONFIG=/etc/rdma/rdma.conf
MTRR_SCRIPT=/usr/libexec/rdma-fixup-mtrr.awk
LOAD_ULP_MODULES=""
LOAD_CORE_USER_MODULES="ib_umad ib_uverbs ib_ucm rdma_ucm"
LOAD_CORE_CM_MODULES="iw_cm ib_cm rdma_cm"
LOAD_CORE_MODULES="ib_core ib_mad ib_sa ib_addr"
if [ -f $CONFIG ]; then
. $CONFIG
if [ "${RDS_LOAD}" == "yes" ]; then
IPOIB_LOAD=yes
fi
if [ "${IPOIB_LOAD}" == "yes" ]; then
LOAD_ULP_MODULES="ib_ipoib"
fi
if [ "${RDS_LOAD}" == "yes" -a -f /lib/modules/`uname -r`/kernel/net/rds/rds.ko ]; then
LOAD_ULP_MODULES="$LOAD_ULP_MODULES rds"
if [ -f /lib/modules/`uname -r`/kernel/net/rds/rds_tcp.ko ]; then
LOAD_ULP_MODULES="$LOAD_ULP_MODULES rds_tcp"
fi
if [ -f /lib/modules/`uname -r`/kernel/net/rds/rds_rdma.ko ]; then
LOAD_ULP_MODULES="$LOAD_ULP_MODULES rds_rdma"
fi
fi
if [ "${SRP_LOAD}" == "yes" ]; then
LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_srp"
fi
if [ "${SRPT_LOAD}" == "yes" ]; then
LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_srpt"
fi
if [ "${ISER_LOAD}" == "yes" ]; then
LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_iser"
fi
if [ "${ISERT_LOAD}" == "yes" ]; then
LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_isert"
fi
if [ "${XPRTRDMA_LOAD}" == "yes" ]; then
LOAD_ULP_MODULES="$LOAD_ULP_MODULES xprtrdma"
fi
if [ "${SVCRDMA_LOAD}" == "yes" ]; then
LOAD_ULP_MODULES="$LOAD_ULP_MODULES svcrdma"
fi
else
LOAD_ULP_MODULES="ib_ipoib"
fi
# If module $1 is loaded return - 0 else - 1
is_module()
{
/sbin/lsmod | grep -w "$1" > /dev/null 2>&1
return $?
}
load_modules()
{
local RC=0
for module in $*; do
if ! is_module $module; then
/sbin/modprobe $module
res=$?
RC=$[ $RC + $res ]
if [ $res -ne 0 ]; then
echo
echo "Failed to load module $module"
fi
fi
done
return $RC
}
# This function is a horrible hack to work around BIOS authors that should
# be shot. Specifically, certain BIOSes will map the entire 4GB address
# space as write-back cacheable when the machine has 4GB or more of RAM, and
# then they will exclude the reserved PCI I/O addresses from that 4GB
# cacheable mapping by making on overlapping uncacheable mapping. However,
# once you do that, it is then impossible to set *any* of the PCI I/O
# address space as write-combining. This is an absolute death-knell to
# certain IB hardware. So, we unroll this mapping here. Instead of
# punching a hole in a single 4GB mapping, we redo the base 4GB mapping as
# a series of discreet mappings that effectively are the same as the 4GB
# mapping minus the hole, and then we delete the uncacheable mappings that
# are used to punch the hole. This then leaves the PCI I/O address space
# unregistered (which defaults it to uncacheable), but available for
# write-combining mappings where needed.
check_mtrr_registers()
{
# If we actually change the mtrr registers, then the awk script will
# return true, and we need to unload the ib_ipath module if it's already
# loaded. The udevtrigger in load_hardware_modules will immediately
# reload the ib_ipath module for us, so there shouldn't be a problem.
[ -f /proc/mtrr -a -f $MTRR_SCRIPT ] &&
awk -f $MTRR_SCRIPT /proc/mtrr 2>/dev/null &&
if is_module ib_ipath; then
/sbin/rmmod ib_ipath
fi
}
load_hardware_modules()
{
local -i RC=0
[ "$FIXUP_MTRR_REGS" = "yes" ] && check_mtrr_registers
# We match both class NETWORK and class INFINIBAND devices since our
# iWARP hardware is listed under class NETWORK. The side effect of
# this is that we might cause a non-iWARP network driver to be loaded.
udevadm trigger --subsystem-match=pci --attr-nomatch=driver --attr-match=class=0x020000 --attr-match=class=0x0c0600
udevadm settle
if [ -r /proc/device-tree ]; then
if [ -n "`ls /proc/device-tree | grep lhca`" ]; then
if ! is_module ib_ehca; then
load_modules ib_ehca
RC+=$?
fi
fi
fi
if is_module mlx4_core -a ! is_module mlx4_ib; then
load_modules mlx4_ib
RC+=$?
fi
if is_module mlx4_core -a ! is_module mlx4_en; then
load_modules mlx4_en
RC+=$?
fi
if is_module mlx5_core -a ! is_module mlx5_ib; then
load_modules mlx5_ib
RC+=$?
fi
if is_module cxgb3 -a ! is_module iw_cxgb3; then
load_modules iw_cxgb3
RC+=$?
fi
if is_module cxgb4 -a ! is_module iw_cxgb4; then
load_modules iw_cxgb4
RC+=$?
fi
if is_module be2net -a ! is_module ocrdma; then
load_modules ocrdma
RC+=$?
fi
if is_module enic -a ! is_module usnic_verbs; then
load_modules usnic_verbs
RC+=$?
fi
return $RC
}
errata_58()
{
# Check AMD chipset issue Errata #58
if test -x /sbin/lspci && test -x /sbin/setpci; then
if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
CURVAL=`/sbin/setpci -d 1022:1100 69`
for val in $CURVAL
do
if [ "${val}" != "c0" ]; then
/sbin/setpci -d 1022:1100 69=c0
if [ $? -eq 0 ]; then
break
else
echo "Failed to apply AMD-8131 Errata #58 workaround"
fi
fi
done
fi
fi
}
errata_56()
{
# Check AMD chipset issue Errata #56
if test -x /sbin/lspci && test -x /sbin/setpci; then
if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
bus=""
# Look for devices AMD-8131
for dev in `/sbin/setpci -v -f -d 1022:7450 19 | cut -d':' -f1,2`
do
bus=`/sbin/setpci -s $dev 19`
rev=`/sbin/setpci -s $dev 8`
# Look for Tavor attach to secondary bus of this devices
for device in `/sbin/setpci -f -s $bus: -d 15b3:5a46 19`
do
if [ $rev -lt 13 ]; then
/sbin/setpci -d 15b3:5a44 72=14
if [ $? -eq 0 ]; then
break
else
echo
echo "Failed to apply AMD-8131 Errata #56 workaround"
fi
else
continue
fi
# If more than one device is on the bus the issue a
# warning
num=`/sbin/setpci -f -s $bus: 0 | wc -l | sed 's/\ *//g'`
if [ $num -gt 1 ]; then
echo "Warning: your current PCI-X configuration might be incorrect."
echo "see AMD-8131 Errata 56 for more details."
fi
done
done
fi
fi
}
load_hardware_modules
RC=$[ $RC + $? ]
load_modules $LOAD_CORE_MODULES
RC=$[ $RC + $? ]
load_modules $LOAD_CORE_CM_MODULES
RC=$[ $RC + $? ]
load_modules $LOAD_CORE_USER_MODULES
RC=$[ $RC + $? ]
load_modules $LOAD_ULP_MODULES
RC=$[ $RC + $? ]
errata_58
errata_56
/usr/libexec/rdma-set-sriov-vf
exit $RC