Date: Mon, 27 Oct 2014 19:39:49 +0800 From: James Pan <jiaming.pan@yahoo.com> To: "freebsd-infiniband@freebsd.org" <freebsd-infiniband@freebsd.org> Subject: Infiniband loopback fails to connect Message-ID: <E93F1DC5-FEBE-4E52-BDEF-20641A593F64@yahoo.com>
next in thread | raw e-mail | index | archive | help
Hi, I=A1=AFve configured Inifniband on two FreeBSD machines (rd1 and rd2), = the IB interface appears as ib0 on both machines, I assigned 10.9.0.1 to ib0 on rd1 and 10.9.0.2 to ib0 on rd2. Then I downloaded a sample code from = http://thegeekinthecorner.wordpress.com and modified them a little, the = code could be found as attached. The problem I met is: if I run the server on rd2 and then run the client from rd1, the program = works as expected. if I run both the server and the client on the same host, the client = will fail to connect to the server: root@rd2:~/the-geek-in-the-corner/01_basic-client-server # ./server listening on port 21277. root@rd2:~/the-geek-in-the-corner/01_basic-client-server # ./client = 10.9.0.2 21277 = =20 event 1, status -60 on_event: unknown event. It looks like rdma_resolve_addr() has failed. I did some debugging on the driver and the cause seems to be: In the infiniband driver addr_resolve() (in file = /usr/src/sys/ofed/driers/infiniband/core/addr.c)=20 depends on arpresolve() to resolve the address but unfortunately the ifp = passed to arpresolve() is lo0, because the ifp is got from the route table while = route table on rd2 is: root@rd2:~/the-geek-in-the-corner/01_basic-client-server # netstat -rn Routing tables Internet: Destination Gateway Flags Refs Use Netif = Expire default 192.168.1.1 UGS 0 0 ix0 10.9.0.0/24 link#5 U 0 1 ib0 10.9.0.2 link#5 UHS 0 0 lo0 = <----------------- 127.0.0.1 link#4 UH 0 0 lo0 as the destination ip address is 10.9.0.2, lo0 is found and passed to = arpresolve() lo0 doesn=A1=AFt have an address so it fails. I made some changes to the driver and pass the correct ifp to = addr_resolve(), this time addr_resolve() passed but rmda_resolve_route() = failed. Could anyone who has experience with infiniband programming help take a = look? Your help is very appreciated, thanks a lot! ------------------------------ the sample code = -------------------------------- root@rd2:~/the-geek-in-the-corner/01_basic-client-server # cat server.c #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <rdma/rdma_cma.h> #define TEST_NZ(x) do { if ( (x)) die("error: " #x " failed (returned = non-zero)." ); } while (0) #define TEST_Z(x) do { if (!(x)) die("error: " #x " failed (returned = zero/null)."); } while (0) const int BUFFER_SIZE =3D 1024; struct context { struct ibv_context *ctx; struct ibv_pd *pd; struct ibv_cq *cq; struct ibv_comp_channel *comp_channel; pthread_t cq_poller_thread; }; struct connection { struct ibv_qp *qp; struct ibv_mr *recv_mr; struct ibv_mr *send_mr; char *recv_region; char *send_region; }; static void die(const char *reason); static void build_context(struct ibv_context *verbs); static void build_qp_attr(struct ibv_qp_init_attr *qp_attr); static void * poll_cq(void *); static void post_receives(struct connection *conn); static void register_memory(struct connection *conn); static void on_completion(struct ibv_wc *wc); static int on_connect_request(struct rdma_cm_id *id); static int on_connection(void *context); static int on_disconnect(struct rdma_cm_id *id); static int on_event(struct rdma_cm_event *event); static struct context *s_ctx =3D NULL; int main(int argc, char **argv) { struct sockaddr_in addr; struct rdma_cm_event *event =3D NULL; struct rdma_cm_id *listener =3D NULL; struct rdma_event_channel *ec =3D NULL; uint16_t port =3D 0; memset(&addr, 0, sizeof(addr)); addr.sin_family =3D AF_INET; addr.sin_len =3D sizeof addr; TEST_Z(ec =3D rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &listener, NULL, RDMA_PS_TCP)); return 0; } int on_disconnect(struct rdma_cm_id *id) { struct connection *conn =3D (struct connection *)id->context; printf("peer disconnected.\n"); rdma_destroy_qp(id); ibv_dereg_mr(conn->send_mr); ibv_dereg_mr(conn->recv_mr); free(conn->send_region); free(conn->recv_region); free(conn); rdma_destroy_id(id); return 0; } int on_event(struct rdma_cm_event *event) { int r =3D 0; if (event->event =3D=3D RDMA_CM_EVENT_CONNECT_REQUEST) r =3D on_connect_request(event->id); else if (event->event =3D=3D RDMA_CM_EVENT_ESTABLISHED) r =3D on_connection(event->id->context); else if (event->event =3D=3D RDMA_CM_EVENT_DISCONNECTED) r =3D on_disconnect(event->id); else die("on_event: unknown event."); return r; } root@rd2:~/the-geek-in-the-corner/01_basic-client-server # cat client.c #include <netdb.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <rdma/rdma_cma.h> #define TEST_NZ(x) do { if ( (x)) die("error: " #x " failed (returned = non-zero)." ); } while (0) #define TEST_Z(x) do { if (!(x)) die("error: " #x " failed (returned = zero/null)."); } while (0) const int BUFFER_SIZE =3D 1024; const int TIMEOUT_IN_MS =3D 500; /* ms */ struct context { struct ibv_context *ctx; struct ibv_pd *pd; struct ibv_cq *cq; struct ibv_comp_channel *comp_channel; pthread_t cq_poller_thread; }; struct connection { struct rdma_cm_id *id; struct ibv_qp *qp; struct ibv_mr *recv_mr; struct ibv_mr *send_mr; char *recv_region; char *send_region; int num_completions; }; static void die(const char *reason); static void build_context(struct ibv_context *verbs); static void build_qp_attr(struct ibv_qp_init_attr *qp_attr); static void * poll_cq(void *); static void post_receives(struct connection *conn); static void register_memory(struct connection *conn); static int on_addr_resolved(struct rdma_cm_id *id); static void on_completion(struct ibv_wc *wc); static int on_connection(void *context); static int on_disconnect(struct rdma_cm_id *id); static int on_event(struct rdma_cm_event *event); static int on_route_resolved(struct rdma_cm_id *id); static struct context *s_ctx =3D NULL; int main(int argc, char **argv) { struct addrinfo *addr; struct rdma_cm_event *event =3D NULL; struct rdma_cm_id *conn=3D NULL; struct rdma_event_channel *ec =3D NULL; if (argc !=3D 3) die("usage: client <server-address> <server-port>"); TEST_NZ(getaddrinfo(argv[1], argv[2], NULL, &addr)); TEST_Z(ec =3D rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &conn, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(conn, NULL, addr->ai_addr, TIMEOUT_IN_MS)); freeaddrinfo(addr); while (rdma_get_cm_event(ec, &event) =3D=3D 0) { struct rdma_cm_event event_copy; memcpy(&event_copy, event, sizeof(*event)); rdma_ack_cm_event(event); if (on_event(&event_copy)) break; } free(conn->recv_region); free(conn); rdma_destroy_id(id); return 1; /* exit event loop */ } int on_event(struct rdma_cm_event *event) { int r =3D 0; if (event->event =3D=3D RDMA_CM_EVENT_ADDR_RESOLVED) r =3D on_addr_resolved(event->id); else if (event->event =3D=3D RDMA_CM_EVENT_ROUTE_RESOLVED) r =3D on_route_resolved(event->id); else if (event->event =3D=3D RDMA_CM_EVENT_ESTABLISHED) r =3D on_connection(event->id->context); else if (event->event =3D=3D RDMA_CM_EVENT_DISCONNECTED) r =3D on_disconnect(event->id); else { printf("event %d, status %d\n", event->event, event->status); die("on_event: unknown event."); } return r; } int on_route_resolved(struct rdma_cm_id *id) { struct rdma_conn_param cm_params; printf("route resolved.\n"); memset(&cm_params, 0, sizeof(cm_params)); TEST_NZ(rdma_connect(id, &cm_params)); return 0; } =20=
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?E93F1DC5-FEBE-4E52-BDEF-20641A593F64>