Newer
Older
// SPDX-License-Identifier: GPL-2.0-only
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* AF_SMC protocol family socket handler keeping the AF_INET sock address type
* applies to SOCK_STREAM sockets only
* offers an alternative communication option for TCP-protocol sockets
* applicable with RoCE-cards only
*
* Initial restrictions:
* - support for alternate links postponed
*
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
* based on prototype from Frank Blaschka
*/
#define KMSG_COMPONENT "smc"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/sched/signal.h>
#include <linux/rcupdate_wait.h>
#include <asm/ioctls.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include "smc_netns.h"
#include "smc_netlink.h"
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
*/
static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group
* creation on client
struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
struct workqueue_struct *smc_close_wq; /* wq for close work */
static void smc_tcp_listen_work(struct work_struct *);
static void smc_connect_work(struct work_struct *);
static void smc_set_keepalive(struct sock *sk, int val)
{
struct smc_sock *smc = smc_sk(sk);
smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
}
static struct smc_hashinfo smc_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
};
static struct smc_hashinfo smc_v6_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
};
int smc_hash_sk(struct sock *sk)
{
struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
struct hlist_head *head;
head = &h->ht;
write_lock_bh(&h->lock);
sk_add_node(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&h->lock);
return 0;
}
EXPORT_SYMBOL_GPL(smc_hash_sk);
void smc_unhash_sk(struct sock *sk)
{
struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
write_lock_bh(&h->lock);
if (sk_del_node_init(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
write_unlock_bh(&h->lock);
}
EXPORT_SYMBOL_GPL(smc_unhash_sk);
struct proto smc_proto = {
.name = "SMC",
.owner = THIS_MODULE,
.keepalive = smc_set_keepalive,
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
struct proto smc_proto6 = {
.name = "SMC6",
.owner = THIS_MODULE,
.keepalive = smc_set_keepalive,
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
.obj_size = sizeof(struct smc_sock),
.h.smc_hash = &smc_v6_hashinfo,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
};
EXPORT_SYMBOL_GPL(smc_proto6);
static void smc_restore_fallback_changes(struct smc_sock *smc)
{
if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
smc->clcsock->file->private_data = smc->sk.sk_socket;
smc->clcsock->file = NULL;
}
static int __smc_release(struct smc_sock *smc)
struct sock *sk = &smc->sk;
if (!smc->use_fallback) {
rc = smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
if (sk->sk_state != SMC_CLOSED) {
if (sk->sk_state != SMC_LISTEN &&
sk->sk_state != SMC_INIT)
sock_put(sk); /* passive closing */
if (sk->sk_state == SMC_LISTEN) {
/* wake up clcsock accept */
rc = kernel_sock_shutdown(smc->clcsock,
SHUT_RDWR);
}
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk);
smc_restore_fallback_changes(smc);
sk->sk_prot->unhash(sk);
if (sk->sk_state == SMC_CLOSED) {
if (smc->clcsock) {
release_sock(sk);
smc_clcsock_release(smc);
lock_sock(sk);
}
if (!smc->use_fallback)
smc_conn_free(&smc->conn);
}
return rc;
}
static int smc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
int rc = 0;
if (!sk)
goto out;
smc = smc_sk(sk);
/* cleanup for a dangling non-blocking connect */
if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
tcp_abort(smc->clcsock->sk, ECONNABORTED);
flush_work(&smc->connect_work);
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
* sock lock for child sockets again
*/
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
else
lock_sock(sk);
rc = __smc_release(smc);
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
release_sock(sk);
sock_put(sk); /* final sock_put */
}
static void smc_destruct(struct sock *sk)
{
if (sk->sk_state != SMC_CLOSED)
return;
if (!sock_flag(sk, SOCK_DEAD))
return;
sk_refcnt_debug_dec(sk);
}
static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
int protocol)
prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
if (!sk)
return NULL;
sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_WORK(&smc->connect_work, smc_connect_work);
INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
spin_lock_init(&smc->conn.send_lock);
mutex_init(&smc->clcsock_release_lock);
return sk;
}
static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
int addr_len)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
struct smc_sock *smc;
int rc;
smc = smc_sk(sk);
/* replicate tests from inet_bind(), to be safe wrt. future changes */
rc = -EINVAL;
if (addr_len < sizeof(struct sockaddr_in))
goto out;
rc = -EAFNOSUPPORT;
if (addr->sin_family != AF_INET &&
addr->sin_family != AF_INET6 &&
addr->sin_family != AF_UNSPEC)
goto out;
/* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
if (addr->sin_family == AF_UNSPEC &&
addr->sin_addr.s_addr != htonl(INADDR_ANY))
goto out;
lock_sock(sk);
/* Check if socket is already active */
rc = -EINVAL;
if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
goto out_rel;
smc->clcsock->sk->sk_reuse = sk->sk_reuse;
rc = kernel_bind(smc->clcsock, uaddr, addr_len);
out_rel:
release_sock(sk);
out:
return rc;
}
static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
unsigned long mask)
{
/* options we don't get control via setsockopt for */
nsk->sk_type = osk->sk_type;
nsk->sk_sndbuf = osk->sk_sndbuf;
nsk->sk_rcvbuf = osk->sk_rcvbuf;
nsk->sk_sndtimeo = osk->sk_sndtimeo;
nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
nsk->sk_mark = osk->sk_mark;
nsk->sk_priority = osk->sk_priority;
nsk->sk_rcvlowat = osk->sk_rcvlowat;
nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
nsk->sk_err = osk->sk_err;
nsk->sk_flags &= ~mask;
nsk->sk_flags |= osk->sk_flags & mask;
}
#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
(1UL << SOCK_KEEPOPEN) | \
(1UL << SOCK_LINGER) | \
(1UL << SOCK_BROADCAST) | \
(1UL << SOCK_TIMESTAMP) | \
(1UL << SOCK_DBG) | \
(1UL << SOCK_RCVTSTAMP) | \
(1UL << SOCK_RCVTSTAMPNS) | \
(1UL << SOCK_LOCALROUTE) | \
(1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
(1UL << SOCK_RXQ_OVFL) | \
(1UL << SOCK_WIFI_STATUS) | \
(1UL << SOCK_NOFCS) | \
(1UL << SOCK_FILTER_LOCKED) | \
(1UL << SOCK_TSTAMP_NEW))
/* copy only relevant settings and flags of SOL_SOCKET level from smc to
* clc socket (since smc is not called for these options from net/core)
*/
static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
{
smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
}
#define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
(1UL << SOCK_KEEPOPEN) | \
(1UL << SOCK_LINGER) | \
(1UL << SOCK_DBG))
/* copy only settings and flags relevant for smc from clc to smc socket */
static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
{
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
/* register the new rmb on all links */
static int smcr_lgr_reg_rmbs(struct smc_link *link,
struct smc_buf_desc *rmb_desc)
{
struct smc_link_group *lgr = link->lgr;
int i, rc = 0;
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (rc)
return rc;
/* protect against parallel smc_llc_cli_rkey_exchange() and
* parallel smcr_link_reg_rmb()
*/
mutex_lock(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
if (rc)
/* exchange confirm_rkey msg with peer */
rc = smc_llc_do_confirm_rkey(link, rmb_desc);
if (rc) {
rc = -EFAULT;
goto out;
}
rmb_desc->is_conf_rkey = true;
out:
mutex_unlock(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
static int smcr_clnt_conf_first_link(struct smc_sock *smc)
struct smc_link *link = smc->conn.lnk;
struct smc_llc_qentry *qentry;
int rc;
/* receive CONFIRM LINK request from server over RoCE fabric */
qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
SMC_LLC_CONFIRM_LINK);
if (!qentry) {
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
if (rc)
return SMC_CLC_DECL_RMBE_EC;
rc = smc_ib_modify_qp_rts(link);
if (rc)
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
/* confirm_rkey is implicit on 1st contact */
smc->conn.rmb_desc->is_conf_rkey = true;
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
smc_llc_link_active(link);
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
/* optional 2nd link, receive ADD LINK request from server */
qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
SMC_LLC_ADD_LINK);
if (!qentry) {
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
if (rc == -EAGAIN)
rc = 0; /* no DECLINE received, go with one link */
return rc;
smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
smc_llc_cli_add_link(link, qentry);
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
static bool smc_isascii(char *hostname)
{
int i;
for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
if (!isascii(hostname[i]))
return false;
return true;
}
static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)clc;
struct smc_clc_first_contact_ext *fce;
int clc_v2_len;
if (clc->hdr.version == SMC_V1 ||
!(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
return;
if (smc->conn.lgr->is_smcd) {
memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid,
SMC_MAX_EID_LEN);
clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
d1);
} else {
memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid,
SMC_MAX_EID_LEN);
clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
r1);
}
fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len);
smc->conn.lgr->peer_os = fce->os_type;
smc->conn.lgr->peer_smc_release = fce->release;
if (smc_isascii(fce->hostname))
memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
SMC_MAX_HOSTNAME_LEN);
}
static void smcr_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size);
smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx;
smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token);
smc->conn.peer_rmbe_size = bufsize;
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
static void smcd_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size);
smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx;
smc->conn.peer_token = clc->d0.token;
/* msg header takes up space in the buffer */
smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
}
static void smc_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
if (smc->conn.lgr->is_smcd)
smcd_conn_save_peer_info(smc, clc);
else
smcr_conn_save_peer_info(smc, clc);
smc_conn_save_peer_info_fce(smc, clc);
static void smc_link_save_peer_info(struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc,
struct smc_init_info *ini)
link->peer_qpn = ntoh24(clc->r0.qpn);
memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE);
memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac));
link->peer_psn = ntoh24(clc->r0.psn);
link->peer_mtu = clc->r0.qp_mtu;
static void smc_stat_inc_fback_rsn_cnt(struct smc_sock *smc,
struct smc_stats_fback *fback_arr)
{
int cnt;
for (cnt = 0; cnt < SMC_MAX_FBACK_RSN_CNT; cnt++) {
if (fback_arr[cnt].fback_code == smc->fallback_rsn) {
fback_arr[cnt].count++;
break;
}
if (!fback_arr[cnt].fback_code) {
fback_arr[cnt].fback_code = smc->fallback_rsn;
fback_arr[cnt].count++;
break;
}
}
}
static void smc_stat_fallback(struct smc_sock *smc)
{
struct net *net = sock_net(&smc->sk);
mutex_lock(&net->smc.mutex_fback_rsn);
smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->srv);
net->smc.fback_rsn->srv_fback_cnt++;
smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->clnt);
net->smc.fback_rsn->clnt_fback_cnt++;
mutex_unlock(&net->smc.mutex_fback_rsn);
}
static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
{
smc->use_fallback = true;
smc->fallback_rsn = reason_code;
smc_stat_fallback(smc);
trace_smc_switch_to_fallback(smc, reason_code);
if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
smc->clcsock->file = smc->sk.sk_socket->file;
smc->clcsock->file->private_data = smc->clcsock;
smc->clcsock->wq.fasync_list =
smc->sk.sk_socket->wq.fasync_list;
/* fall back during connect */
static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
smc_switch_to_fallback(smc, reason_code);
smc_copy_sock_settings_to_clc(smc);
if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE;
return 0;
}
/* decline and fall back during connect */
static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
u8 version)
struct net *net = sock_net(&smc->sk);
if (reason_code < 0) { /* error, fallback is not possible */
this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
if (smc->sk.sk_state == SMC_INIT)
sock_put(&smc->sk); /* passive closing */
if (reason_code != SMC_CLC_DECL_PEERDECL) {
rc = smc_clc_send_decline(smc, reason_code, version);
this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
if (smc->sk.sk_state == SMC_INIT)
sock_put(&smc->sk); /* passive closing */
return smc_connect_fallback(smc, reason_code);
static void smc_conn_abort(struct smc_sock *smc, int local_first)
smc_lgr_cleanup_early(&smc->conn);
else
smc_conn_free(&smc->conn);
}
/* check if there is a rdma device available for this connection. */
/* called for connect and listen */
static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
/* PNET table look up: search active ib_device and port
* within same PNETID that also contains the ethernet device
* used for the internal TCP socket
*/
smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
if (!ini->check_smcrv2 && !ini->ib_dev)
return SMC_CLC_DECL_NOSMCRDEV;
if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2)
return SMC_CLC_DECL_NOSMCRDEV;
/* check if there is an ISM device available for this connection. */
/* called for connect and listen */
static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
{
/* Find ISM device with same PNETID as connecting interface */
smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
return SMC_CLC_DECL_NOSMCDDEV;
else
ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]);
/* is chid unique for the ism devices that are already determined? */
static bool smc_find_ism_v2_is_unique_chid(u16 chid, struct smc_init_info *ini,
int cnt)
{
int i = (!ini->ism_dev[0]) ? 1 : 0;
for (; i < cnt; i++)
if (ini->ism_chid[i] == chid)
return false;
return true;
}
/* determine possible V2 ISM devices (either without PNETID or with PNETID plus
* PNETID matching net_device)
*/
static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
struct smc_init_info *ini)
{
int rc = SMC_CLC_DECL_NOSMCDDEV;
struct smcd_dev *smcd;
int i = 1;
if (smcd_indicated(ini->smc_type_v1))
rc = 0; /* already initialized for V1 */
mutex_lock(&smcd_dev_list.mutex);
list_for_each_entry(smcd, &smcd_dev_list.list, list) {
if (smcd->going_away || smcd == ini->ism_dev[0])
continue;
chid = smc_ism_get_chid(smcd);
if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
continue;
if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
ini->ism_dev[i] = smcd;
ini->ism_chid[i] = chid;
ini->is_smcd = true;
rc = 0;
i++;
if (i > SMC_MAX_ISM_DEVS)
break;
}
}
mutex_unlock(&smcd_dev_list.mutex);
ini->ism_offered_cnt = i - 1;
if (!ini->ism_dev[0] && !ini->ism_dev[1])
ini->smcd_version = 0;
return rc;
}
/* Check for VLAN ID and register it on ISM device just for CLC handshake */
static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id))
return SMC_CLC_DECL_ISMVLANERR;
static int smc_find_proposal_devices(struct smc_sock *smc,
struct smc_init_info *ini)
{
int rc = 0;
/* check if there is an ism device available */
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
if (!(ini->smcd_version & SMC_V1) ||
smc_find_ism_device(smc, ini) ||
smc_connect_ism_vlan_setup(smc, ini))
ini->smcd_version &= ~SMC_V1;
/* else ISM V1 is supported for this connection */
/* check if there is an rdma device available */
if (!(ini->smcr_version & SMC_V1) ||
smc_find_rdma_device(smc, ini))
ini->smcr_version &= ~SMC_V1;
/* else RDMA is supported for this connection */
ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1,
ini->smcr_version & SMC_V1);
/* check if there is an ism v2 device available */
if (!(ini->smcd_version & SMC_V2) ||
!smc_ism_is_v2_capable() ||
smc_find_ism_v2_device_clnt(smc, ini))
ini->smcd_version &= ~SMC_V2;
/* check if there is an rdma v2 device available */
ini->check_smcrv2 = true;
ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
if (!(ini->smcr_version & SMC_V2) ||
smc->clcsock->sk->sk_family != AF_INET ||
!smc_clc_ueid_count() ||
smc_find_rdma_device(smc, ini))
ini->smcr_version &= ~SMC_V2;
ini->check_smcrv2 = false;
ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2,
ini->smcr_version & SMC_V2);
/* if neither ISM nor RDMA are supported, fallback */
if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
rc = SMC_CLC_DECL_NOSMCDEV;
return rc;
}
/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
* used, the VLAN ID will be registered again during the connection setup.
*/
static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
if (!smcd_indicated(ini->smc_type_v1))
if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id))
return SMC_CLC_DECL_CNFERR;
return 0;
}
#define SMC_CLC_MAX_ACCEPT_LEN \
(sizeof(struct smc_clc_msg_accept_confirm_v2) + \
sizeof(struct smc_clc_first_contact_ext) + \
/* CLC handshake during connect */
static int smc_connect_clc(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *aclc2,
/* do inband token exchange */
rc = smc_clc_send_proposal(smc, ini);
if (rc)
return rc;
/* receive SMC Accept CLC message */
return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN,
SMC_CLC_ACCEPT, CLC_WAIT_TIME);
void smc_fill_gid_list(struct smc_link_group *lgr,
struct smc_gidlist *gidlist,
struct smc_ib_device *known_dev, u8 *known_gid)
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
{
struct smc_init_info *alt_ini = NULL;
memset(gidlist, 0, sizeof(*gidlist));
memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE);
alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL);
if (!alt_ini)
goto out;
alt_ini->vlan_id = lgr->vlan_id;
alt_ini->check_smcrv2 = true;
alt_ini->smcrv2.saddr = lgr->saddr;
smc_pnet_find_alt_roce(lgr, alt_ini, known_dev);
if (!alt_ini->smcrv2.ib_dev_v2)
goto out;
memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2,
SMC_GID_SIZE);
out:
kfree(alt_ini);
}
static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
struct smc_init_info *ini)
{
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
struct smc_clc_first_contact_ext *fce =
(struct smc_clc_first_contact_ext *)
(((u8 *)clc_v2) + sizeof(*clc_v2));
if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
return 0;
if (fce->v2_direct) {
memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
ini->smcrv2.uses_gateway = false;
} else {
if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
ini->smcrv2.nexthop_mac,
&ini->smcrv2.uses_gateway))
return SMC_CLC_DECL_NOROUTE;
if (!ini->smcrv2.uses_gateway) {
/* mismatch: peer claims indirect, but its direct */
return SMC_CLC_DECL_NOINDIRECT;
}
}
return 0;
}
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
int i, reason_code = 0;
struct smc_link *link;
ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
if (reason_code)
return reason_code;
mutex_lock(&smc_client_lgr_pending);
reason_code = smc_conn_create(smc, ini);
if (reason_code) {
mutex_unlock(&smc_client_lgr_pending);
return reason_code;
smc_conn_save_peer_info(smc, aclc);
link = smc->conn.lnk;
} else {
/* set link that was assigned by server */
link = NULL;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *l = &smc->conn.lgr->lnk[i];
if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
!memcmp(l->peer_gid, &aclc->r0.lcl.gid,
SMC_GID_SIZE) &&
(aclc->hdr.version > SMC_V1 ||
!memcmp(l->peer_mac, &aclc->r0.lcl.mac,
sizeof(l->peer_mac)))) {
link = l;
break;
}
}
if (!link) {
reason_code = SMC_CLC_DECL_NOSRVLINK;
goto connect_abort;
}
smc_switch_link_and_count(&smc->conn, link);
/* create send buffer and rmb */
if (smc_buf_create(smc, false)) {
reason_code = SMC_CLC_DECL_MEM;
goto connect_abort;
}
smc_link_save_peer_info(link, aclc, ini);
if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) {
reason_code = SMC_CLC_DECL_ERR_RTOK;
goto connect_abort;
}
smc_close_init(smc);
smc_rx_init(smc);
if (smc_ib_ready_link(link)) {
reason_code = SMC_CLC_DECL_ERR_RDYLNK;
goto connect_abort;
}
if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) {
reason_code = SMC_CLC_DECL_ERR_REGRMB;
goto connect_abort;
}
smc_rmb_sync_sg_for_device(&smc->conn);
if (aclc->hdr.version > SMC_V1) {
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
eid = clc_v2->r1.eid;
if (ini->first_contact_local)
smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
link->smcibdev, link->gid);
}
reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
aclc->hdr.version, eid, ini);
smc_tx_init(smc);
smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
reason_code = smcr_clnt_conf_first_link(smc);
smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
mutex_unlock(&smc_client_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE;
smc_conn_abort(smc, ini->first_contact_local);
mutex_unlock(&smc_client_lgr_pending);
smc->connect_nonblock = 0;
return reason_code;
/* The server has chosen one of the proposed ISM devices for the communication.
* Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
*/
static int
smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
struct smc_init_info *ini)
{
int i;
for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
ini->ism_selected = i;
return 0;
}
}
return -EPROTO;
}
/* setup for ISM connection of client */
static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,