From owner-svn-src-projects@freebsd.org Mon Nov 13 10:49:20 2017 Return-Path: Delivered-To: svn-src-projects@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id C9BB7D7FDE6 for ; Mon, 13 Nov 2017 10:49:20 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 45666694AA; Mon, 13 Nov 2017 10:49:20 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id vADAnJEX074531; Mon, 13 Nov 2017 10:49:19 GMT (envelope-from hselasky@FreeBSD.org) Received: (from hselasky@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id vADAnJff074526; Mon, 13 Nov 2017 10:49:19 GMT (envelope-from hselasky@FreeBSD.org) Message-Id: <201711131049.vADAnJff074526@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: hselasky set sender to hselasky@FreeBSD.org using -f From: Hans Petter Selasky Date: Mon, 13 Nov 2017 10:49:19 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r325760 - in projects/bsd_rdma_4_9/sys: conf dev/mlx4/mlx4_ib modules/mlx4ib X-SVN-Group: projects X-SVN-Commit-Author: hselasky X-SVN-Commit-Paths: in projects/bsd_rdma_4_9/sys: conf dev/mlx4/mlx4_ib modules/mlx4ib X-SVN-Commit-Revision: 325760 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 13 Nov 2017 10:49:20 -0000 Author: hselasky Date: Mon Nov 13 10:49:18 2017 New Revision: 325760 URL: https://svnweb.freebsd.org/changeset/base/325760 Log: Update mlx4ib(4) to Linux 4.9. Sponsored by: Mellanox Technologies Deleted: projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_exp.h projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_exp.c projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/user.h Modified: projects/bsd_rdma_4_9/sys/conf/files projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib.h projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_ah.c projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_alias_GUID.c projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_cm.c projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_cq.c projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_mad.c projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_mcg.c projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_mr.c projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_srq.c projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_sysfs.c projects/bsd_rdma_4_9/sys/modules/mlx4ib/Makefile Modified: projects/bsd_rdma_4_9/sys/conf/files ============================================================================== --- projects/bsd_rdma_4_9/sys/conf/files Mon Nov 13 10:45:31 2017 (r325759) +++ projects/bsd_rdma_4_9/sys/conf/files Mon Nov 13 10:49:18 2017 (r325760) @@ -4550,8 +4550,6 @@ dev/mlx4/mlx4_ib/mlx4_ib_mad.c optional mlx4ib pci o compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_main.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" -dev/mlx4/mlx4_ib/mlx4_ib_exp.c optional mlx4ib pci ofed \ - compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_mr.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_qp.c optional mlx4ib pci ofed \ Modified: projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib.h ============================================================================== --- projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib.h Mon Nov 13 10:45:31 2017 (r325759) +++ projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib.h Mon Nov 13 10:49:18 2017 (r325760) @@ -41,6 +41,8 @@ #include #include +#include + #include #include #include @@ -59,9 +61,6 @@ #define mlx4_ib_warn(ibdev, format, arg...) \ dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg) -#define mlx4_ib_info(ibdev, format, arg...) \ - dev_info((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg) - enum { MLX4_IB_SQ_MIN_WQE_SHIFT = 6, MLX4_IB_MAX_HEADROOM = 2048 @@ -75,17 +74,25 @@ extern int mlx4_ib_sm_guid_assign; extern struct proc_dir_entry *mlx4_mrs_dir_entry; #define MLX4_IB_UC_STEER_QPN_ALIGN 1 -#define MLX4_IB_UC_MAX_NUM_QPS (256 * 1024) +#define MLX4_IB_UC_MAX_NUM_QPS 256 +enum hw_bar_type { + HW_BAR_BF, + HW_BAR_DB, + HW_BAR_CLOCK, + HW_BAR_COUNT +}; -#define MLX4_IB_MMAP_CMD_MASK 0xFF -#define MLX4_IB_MMAP_CMD_BITS 8 +struct mlx4_ib_vma_private_data { + struct vm_area_struct *vma; +}; struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; struct list_head db_page_list; struct mutex db_page_mutex; + struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT]; }; struct mlx4_ib_pd { @@ -111,11 +118,6 @@ struct mlx4_ib_cq_resize { int cqe; }; -struct mlx4_shared_mr_info { - int mr_id; - struct ib_umem *umem; -}; - struct mlx4_ib_cq { struct ib_cq ibcq; struct mlx4_cq mcq; @@ -127,15 +129,22 @@ struct mlx4_ib_cq { struct ib_umem *umem; struct ib_umem *resize_umem; int create_flags; + /* List of qps that it serves.*/ + struct list_head send_qp_list; + struct list_head recv_qp_list; }; +#define MLX4_MR_PAGES_ALIGN 0x40 + struct mlx4_ib_mr { struct ib_mr ibmr; + __be64 *pages; + dma_addr_t page_map; + u32 npages; + u32 max_pages; struct mlx4_mr mmr; struct ib_umem *umem; - struct mlx4_shared_mr_info *smr_info; - atomic_t invalidated; - struct completion invalidation_comp; + size_t page_map_size; }; struct mlx4_ib_mw { @@ -143,21 +152,22 @@ struct mlx4_ib_mw { struct mlx4_mw mmw; }; -struct mlx4_ib_fast_reg_page_list { - struct ib_fast_reg_page_list ibfrpl; - __be64 *mapped_page_list; - dma_addr_t map; -}; - struct mlx4_ib_fmr { struct ib_fmr ibfmr; struct mlx4_fmr mfmr; }; +#define MAX_REGS_PER_FLOW 2 + +struct mlx4_flow_reg_id { + u64 id; + u64 mirror; +}; + struct mlx4_ib_flow { struct ib_flow ibflow; /* translating DMFS verbs sniffer rule to FW API requires two reg IDs */ - u64 reg_id[2]; + struct mlx4_flow_reg_id reg_id[MAX_REGS_PER_FLOW]; }; struct mlx4_ib_wq { @@ -172,13 +182,18 @@ struct mlx4_ib_wq { unsigned tail; }; +enum { + MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START +}; + enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, - MLX4_IB_QP_CAP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL, - MLX4_IB_QP_CAP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND, - MLX4_IB_QP_CAP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, + MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, + + /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */ + MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI, MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, MLX4_IB_SRIOV_SQP = 1 << 31, }; @@ -190,13 +205,6 @@ struct mlx4_ib_gid_entry { u8 port; }; -enum mlx4_ib_mmap_cmd { - MLX4_IB_MMAP_UAR_PAGE = 0, - MLX4_IB_MMAP_BLUE_FLAME_PAGE = 1, - MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES = 2, - MLX4_IB_MMAP_GET_HW_CLOCK = 3, -}; - enum mlx4_ib_qp_type { /* * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries @@ -287,17 +295,6 @@ struct mlx4_roce_smac_vlan_info { int update_vid; }; -struct mlx4_ib_qpg_data { - unsigned long *tss_bitmap; - unsigned long *rss_bitmap; - struct mlx4_ib_qp *qpg_parent; - int tss_qpn_base; - int rss_qpn_base; - u32 tss_child_count; - u32 rss_child_count; - u32 qpg_tss_mask_sz; -}; - struct mlx4_ib_qp { struct ib_qp ibqp; struct mlx4_qp mqp; @@ -327,22 +324,16 @@ struct mlx4_ib_qp { u8 sq_no_prefetch; u8 state; int mlx_type; - enum ib_qpg_type qpg_type; - struct mlx4_ib_qpg_data *qpg_data; struct list_head gid_list; struct list_head steering_rules; struct mlx4_ib_buf *sqp_proxy_rcv; struct mlx4_roce_smac_vlan_info pri; struct mlx4_roce_smac_vlan_info alt; - struct list_head rules_list; u64 reg_id; - int max_inline_data; - struct mlx4_bf bf; - - /* - * Experimental data - */ - int max_inlr_data; + struct list_head qps_list; + struct list_head cq_recv_list; + struct list_head cq_send_list; + struct counter_index *counter_index; }; struct mlx4_ib_srq { @@ -376,23 +367,12 @@ struct mlx4_ib_ah { #define MLX4_NOT_SET_GUID (0x00LL) #define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL)) -/****************************************/ -/* ioctl codes */ -/****************************************/ -#define MLX4_IOC_MAGIC 'm' -#define MLX4_IOCHWCLOCKOFFSET _IOR(MLX4_IOC_MAGIC, 1, int) - enum mlx4_guid_alias_rec_status { MLX4_GUID_INFO_STATUS_IDLE, MLX4_GUID_INFO_STATUS_SET, - MLX4_GUID_INFO_STATUS_PENDING, }; -enum mlx4_guid_alias_rec_ownership { - MLX4_GUID_DRIVER_ASSIGN, - MLX4_GUID_SYSADMIN_ASSIGN, - MLX4_GUID_NONE_ASSIGN, /*init state of each record*/ -}; +#define GUID_STATE_NEED_PORT_INIT 0x01 enum mlx4_guid_alias_rec_method { MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET, @@ -403,8 +383,8 @@ struct mlx4_sriov_alias_guid_info_rec_det { u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC]; ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/ enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/ - u8 method; /*set or delete*/ - enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/ + unsigned int guids_retry_schedule[NUM_ALIAS_GUID_IN_REC]; + u64 time_to_run; }; struct mlx4_sriov_alias_guid_port_rec_det { @@ -412,6 +392,7 @@ struct mlx4_sriov_alias_guid_port_rec_det { struct workqueue_struct *wq; struct delayed_work alias_guid_work; u8 port; + u32 state_flags; struct mlx4_sriov_alias_guid *parent; struct list_head cb_list; }; @@ -461,7 +442,6 @@ struct mlx4_ib_demux_pv_ctx { struct ib_device *ib_dev; struct ib_cq *cq; struct ib_pd *pd; - struct ib_mr *mr; struct work_struct work; struct workqueue_struct *wq; struct mlx4_ib_demux_pv_qp qp[2]; @@ -473,7 +453,7 @@ struct mlx4_ib_demux_ctx { struct workqueue_struct *wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; - __be64 subnet_prefix; + atomic64_t subnet_prefix; __be64 guid_cache[128]; struct mlx4_ib_dev *dev; /* the following lock protects both mcg_table and mcg_mgid0_list */ @@ -503,13 +483,27 @@ struct mlx4_ib_sriov { struct idr pv_id_table; }; +struct gid_cache_context { + int real_index; + int refcount; +}; + +struct gid_entry { + union ib_gid gid; + enum ib_gid_type gid_type; + struct gid_cache_context *ctx; +}; + +struct mlx4_port_gid_table { + struct gid_entry gids[MLX4_MAX_PORT_GIDS]; +}; + struct mlx4_ib_iboe { spinlock_t lock; struct net_device *netdevs[MLX4_MAX_PORTS]; - struct net_device *masters[MLX4_MAX_PORTS]; - struct notifier_block nb; - struct notifier_block nb_inet; - union ib_gid gid_table[MLX4_MAX_PORTS][128]; + atomic64_t mac[MLX4_MAX_PORTS]; + struct notifier_block nb; + struct mlx4_port_gid_table gids[MLX4_MAX_PORTS]; }; struct pkey_mgt { @@ -548,15 +542,32 @@ struct mlx4_ib_iov_port { struct mlx4_ib_iov_sysfs_attr mcg_dentry; }; -struct mlx4_ib_counter { - int counter_index; - int status; +struct counter_index { + struct list_head list; + u32 index; + u8 allocated; }; +struct mlx4_ib_counters { + struct list_head counters_list; + struct mutex mutex; /* mutex for accessing counters list */ + u32 default_counter; +}; + +#define MLX4_DIAG_COUNTERS_TYPES 2 + +struct mlx4_ib_diag_counters { + const char **name; + u32 *offset; + u32 num_counters; +}; + struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; int num_ports; + void __iomem *uar_map; + struct mlx4_uar priv_uar; u32 priv_pdn; MLX4_DECLARE_DOORBELL_LOCK(uar_lock); @@ -564,14 +575,14 @@ struct mlx4_ib_dev { struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; struct ib_ah *sm_ah[MLX4_MAX_PORTS]; spinlock_t sm_lock; + atomic64_t sl2vl[MLX4_MAX_PORTS]; struct mlx4_ib_sriov sriov; struct mutex cap_mask_mutex; bool ib_active; struct mlx4_ib_iboe iboe; - struct mlx4_ib_counter counters[MLX4_MAX_PORTS]; + struct mlx4_ib_counters counters_table[MLX4_MAX_PORTS]; int *eq_table; - int eq_added; struct kobject *iov_parent; struct kobject *ports_parent; struct kobject *dev_ports_parent[MLX4_MFUNC_MAX]; @@ -580,12 +591,22 @@ struct mlx4_ib_dev { unsigned long *ib_uc_qpns_bitmap; int steer_qpn_count; int steer_qpn_base; + int steering_support; + struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS]; + /* lock when destroying qp1_proxy and getting netdev events */ + struct mutex qp1_proxy_lock[MLX4_MAX_PORTS]; + u8 bond_next_port; + /* protect resources needed as part of reset flow */ + spinlock_t reset_flow_resource_lock; + struct list_head qp_list; + struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES]; }; struct ib_event_work { struct work_struct work; struct mlx4_ib_dev *ib_dev; struct mlx4_eqe ib_eqe; + int port; }; struct mlx4_ib_qp_tunnel_init_attr { @@ -595,6 +616,21 @@ struct mlx4_ib_qp_tunnel_init_attr { u8 port; }; +struct mlx4_uverbs_ex_query_device { + __u32 comp_mask; + __u32 reserved; +}; + +enum query_device_resp_mask { + QUERY_DEVICE_RESP_MASK_TIMESTAMP = 1UL << 0, +}; + +struct mlx4_uverbs_ex_query_device_resp { + __u32 comp_mask; + __u32 response_length; + __u64 hca_core_clock_offset; +}; + static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); @@ -635,11 +671,6 @@ static inline struct mlx4_ib_mw *to_mmw(struct ib_mw * return container_of(ibmw, struct mlx4_ib_mw, ibmw); } -static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) -{ - return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); -} - static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); @@ -675,6 +706,13 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah * return container_of(ibah, struct mlx4_ib_ah, ibah); } +static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev) +{ + dev->bond_next_port = (dev->bond_next_port + 1) % dev->num_ports; + + return dev->bond_next_port + 1; +} + int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); @@ -685,30 +723,22 @@ void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *co struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc); int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem); -int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, - u64 start_va, - int *num_of_mtts); struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, - struct ib_udata *udata, int mr_id); + struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); -struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); -int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, - struct ib_mw_bind *mw_bind); +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); -struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, - int max_page_list_len); -struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len); -void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); - -int mlx4_ib_modify_cq(struct ib_cq *cq, - struct ib_cq_attr *cq_attr, - int cq_attr_mask); +struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg); +int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset); +int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); -int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq); struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, - struct ib_cq_init_attr *attr, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); int mlx4_ib_destroy_cq(struct ib_cq *cq); @@ -746,11 +776,13 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_re struct ib_recv_wr **bad_wr); int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad); + int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad); int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); @@ -768,29 +800,16 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 p int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid, int netw_view); -int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, - u8 *mac, int *is_mcast, u8 port); - -int mlx4_ib_query_if_stat(struct mlx4_ib_dev *dev, u32 counter_index, - union mlx4_counter *counter, u8 clear); - -static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) +static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) { u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3; if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET) - return 1; + return true; return !!(ah->av.ib.g_slid & 0x80); } -static inline int mlx4_ib_qp_has_rq(struct ib_qp_init_attr *attr) -{ - if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) - return 0; - return !attr->srq; -} - int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx); void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq); void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave); @@ -815,13 +834,16 @@ void mlx4_ib_tunnels_update_work(struct work_struct *w int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad); + int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, - u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, struct ib_mad *mad); + u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, + u16 vlan_id, struct ib_mad *mad); + __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, - struct ib_mad *mad, int is_eth); + struct ib_mad *mad); int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, struct ib_mad *mad); @@ -848,6 +870,8 @@ int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, struct attribute *attr); ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index); +void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave, + int port, int slave_init); int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ; @@ -859,7 +883,16 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, in void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach); -int mlx4_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props); +int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, + u64 start, u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); +int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, + u8 port_num, int index); + +void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, + int port); + +void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); #endif /* MLX4_IB_H */ Modified: projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_ah.c ============================================================================== --- projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_ah.c Mon Nov 13 10:45:31 2017 (r325759) +++ projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_ah.c Mon Nov 13 10:49:18 2017 (r325760) @@ -38,28 +38,10 @@ #include #include +#include #include "mlx4_ib.h" -int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, - u8 *mac, int *is_mcast, u8 port) -{ - struct in6_addr in6; - - *is_mcast = 0; - - memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6); - if (rdma_link_local_addr(&in6)) - rdma_get_ll_mac(&in6, mac); - else if (rdma_is_multicast_addr(&in6)) { - rdma_get_mcast_mac(&in6, mac); - *is_mcast = 1; - } else - return -EINVAL; - - return 0; -} - static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, struct mlx4_ib_ah *ah) { @@ -67,6 +49,7 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, st ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); ah->av.ib.g_slid = ah_attr->src_path_bits; + ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); if (ah_attr->ah_flags & IB_AH_GRH) { ah->av.ib.g_slid |= 0x80; ah->av.ib.gid_index = ah_attr->grh.sgid_index; @@ -84,7 +67,6 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, st !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) --ah->av.ib.stat_rate; } - ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); return &ah->ibah; } @@ -96,21 +78,38 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct mlx4_dev *dev = ibdev->dev; int is_mcast = 0; struct in6_addr in6; - u16 vlan_tag; + u16 vlan_tag = 0xffff; + union ib_gid sgid; + struct ib_gid_attr gid_attr; + int ret; memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); if (rdma_is_multicast_addr(&in6)) { is_mcast = 1; - resolve_mcast_mac(&in6, ah->av.eth.mac); + rdma_get_mcast_mac(&in6, ah->av.eth.mac); } else { - memcpy(ah->av.eth.mac, ah_attr->dmac, 6); + memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN); } - vlan_tag = ah_attr->vlan_id; + ret = ib_get_cached_gid(pd->device, ah_attr->port_num, + ah_attr->grh.sgid_index, &sgid, &gid_attr); + if (ret) + return ERR_PTR(ret); + eth_zero_addr(ah->av.eth.s_mac); + if (gid_attr.ndev) { + if (is_vlan_dev(gid_attr.ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); + memcpy(ah->av.eth.s_mac, IF_LLADDR(gid_attr.ndev), ETH_ALEN); + dev_put(gid_attr.ndev); + } if (vlan_tag < 0x1000) vlan_tag |= (ah_attr->sl & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); - ah->av.eth.gid_index = ah_attr->grh.sgid_index; + ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index); + if (ret < 0) + return ERR_PTR(ret); + ah->av.eth.gid_index = ret; ah->av.eth.vlan = cpu_to_be16(vlan_tag); + ah->av.eth.hop_limit = ah_attr->grh.hop_limit; if (ah_attr->static_rate) { ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && @@ -168,9 +167,13 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_ enum rdma_link_layer ll; memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24; ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num); + if (ll == IB_LINK_LAYER_ETHERNET) + ah_attr->sl = be32_to_cpu(ah->av.eth.sl_tclass_flowlabel) >> 29; + else + ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0; if (ah->av.ib.stat_rate) ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET; Modified: projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_alias_GUID.c ============================================================================== --- projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_alias_GUID.c Mon Nov 13 10:45:31 2017 (r325759) +++ projects/bsd_rdma_4_9/sys/dev/mlx4/mlx4_ib/mlx4_ib_alias_GUID.c Mon Nov 13 10:49:18 2017 (r325760) @@ -42,6 +42,8 @@ #include #include #include +#include +#include #include "mlx4_ib.h" /* @@ -57,15 +59,19 @@ struct mlx4_alias_guid_work_context { int query_id; struct list_head list; int block_num; + ib_sa_comp_mask guid_indexes; u8 method; }; struct mlx4_next_alias_guid_work { u8 port; u8 block_num; + u8 method; struct mlx4_sriov_alias_guid_info_rec_det rec_det; }; +static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port, + int *resched_delay_sec); void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num, u8 port_num, u8 *p_data) @@ -119,6 +125,57 @@ ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(in return IB_SA_COMP_MASK(4 + index); } +void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave, + int port, int slave_init) +{ + __be64 curr_guid, required_guid; + int record_num = slave / 8; + int index = slave % 8; + int port_index = port - 1; + unsigned long flags; + int do_work = 0; + + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); + if (dev->sriov.alias_guid.ports_guid[port_index].state_flags & + GUID_STATE_NEED_PORT_INIT) + goto unlock; + if (!slave_init) { + curr_guid = *(__be64 *)&dev->sriov. + alias_guid.ports_guid[port_index]. + all_rec_per_port[record_num]. + all_recs[GUID_REC_SIZE * index]; + if (curr_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL) || + !curr_guid) + goto unlock; + required_guid = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL); + } else { + required_guid = mlx4_get_admin_guid(dev->dev, slave, port); + if (required_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) + goto unlock; + } + *(__be64 *)&dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[record_num]. + all_recs[GUID_REC_SIZE * index] = required_guid; + dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[record_num].guid_indexes + |= mlx4_ib_get_aguid_comp_mask_from_ix(index); + dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[record_num].status + = MLX4_GUID_INFO_STATUS_IDLE; + /* set to run immediately */ + dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[record_num].time_to_run = 0; + dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[record_num]. + guids_retry_schedule[index] = 0; + do_work = 1; +unlock: + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); + + if (do_work) + mlx4_ib_init_alias_guid_work(dev, port_index); +} + /* * Whenever new GUID is set/unset (guid table change) create event and * notify the relevant slave (master also should be notified). @@ -134,15 +191,20 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ { int i; u64 guid_indexes; - int slave_id; + int slave_id, slave_port; enum slave_port_state new_state; enum slave_port_state prev_state; __be64 tmp_cur_ag, form_cache_ag; enum slave_port_gen_event gen_event; + struct mlx4_sriov_alias_guid_info_rec_det *rec; + unsigned long flags; + __be64 required_value; if (!mlx4_is_master(dev->dev)) return; + rec = &dev->sriov.alias_guid.ports_guid[port_num - 1]. + all_rec_per_port[block_num]; guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. ports_guid[port_num - 1]. all_rec_per_port[block_num].guid_indexes); @@ -156,8 +218,13 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ continue; slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; - if (slave_id >= dev->dev->num_slaves) + if (slave_id >= dev->dev->persist->num_vfs + 1) return; + + slave_port = mlx4_phys_to_slave_port(dev->dev, slave_id, port_num); + if (slave_port < 0) /* this port isn't available for the VF */ + continue; + tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; form_cache_ag = get_cached_alias_guid(dev, port_num, (NUM_ALIAS_GUID_IN_REC * block_num) + i); @@ -168,8 +235,27 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ */ if (tmp_cur_ag != form_cache_ag) continue; - mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); + required_value = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]; + + if (required_value == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) + required_value = 0; + + if (tmp_cur_ag == required_value) { + rec->guid_indexes = rec->guid_indexes & + ~mlx4_ib_get_aguid_comp_mask_from_ix(i); + } else { + /* may notify port down if value is 0 */ + if (tmp_cur_ag != MLX4_NOT_SET_GUID) { + spin_unlock_irqrestore(&dev->sriov. + alias_guid.ag_work_lock, flags); + continue; + } + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, + flags); + mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num); /*2 cases: Valid GUID, and Invalid Guid*/ if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/ @@ -190,10 +276,14 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ set_and_calc_slave_port_state(dev->dev, slave_id, port_num, MLX4_PORT_STATE_IB_EVENT_GID_INVALID, &gen_event); - pr_debug("sending PORT DOWN event to slave: %d, port: %d\n", - slave_id, port_num); - mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num, - MLX4_PORT_CHANGE_SUBTYPE_DOWN); + if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) { + pr_debug("sending PORT DOWN event to slave: %d, port: %d\n", + slave_id, port_num); + mlx4_gen_port_state_change_eqe(dev->dev, + slave_id, + port_num, + MLX4_PORT_CHANGE_SUBTYPE_DOWN); + } } } } @@ -208,6 +298,9 @@ static void aliasguid_query_handler(int status, int i; struct mlx4_sriov_alias_guid_info_rec_det *rec; unsigned long flags, flags1; + ib_sa_comp_mask declined_guid_indexes = 0; + ib_sa_comp_mask applied_guid_indexes = 0; + unsigned int resched_delay_sec = 0; if (!context) return; @@ -218,9 +311,9 @@ static void aliasguid_query_handler(int status, all_rec_per_port[cb_ctx->block_num]; if (status) { - rec->status = MLX4_GUID_INFO_STATUS_IDLE; pr_debug("(port: %d) failed: status = %d\n", cb_ctx->port, status); + rec->time_to_run = ktime_get_ns() + 1 * NSEC_PER_SEC; goto out; } @@ -237,69 +330,101 @@ static void aliasguid_query_handler(int status, rec = &dev->sriov.alias_guid.ports_guid[port_index]. all_rec_per_port[guid_rec->block_num]; - rec->status = MLX4_GUID_INFO_STATUS_SET; - rec->method = MLX4_GUID_INFO_RECORD_SET; - + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) { - __be64 tmp_cur_ag; - tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE]; - if ((cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) - && (MLX4_NOT_SET_GUID == tmp_cur_ag)) { - pr_debug("%s:Record num %d in block_num:%d " - "was deleted by SM,ownership by %d " - "(0 = driver, 1=sysAdmin, 2=None)\n", - __func__, i, guid_rec->block_num, - rec->ownership); - rec->guid_indexes = rec->guid_indexes & - ~mlx4_ib_get_aguid_comp_mask_from_ix(i); + __be64 sm_response, required_val; + + if (!(cb_ctx->guid_indexes & + mlx4_ib_get_aguid_comp_mask_from_ix(i))) continue; + sm_response = *(__be64 *)&guid_rec->guid_info_list + [i * GUID_REC_SIZE]; + required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]; + if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) { + if (required_val == + cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) + goto next_entry; + + /* A new value was set till we got the response */ + pr_debug("need to set new value %llx, record num %d, block_num:%d\n", + (long long)be64_to_cpu(required_val), + i, guid_rec->block_num); + goto entry_declined; } /* check if the SM didn't assign one of the records. - * if it didn't, if it was not sysadmin request: - * ask the SM to give a new GUID, (instead of the driver request). + * if it didn't, re-ask for. */ - if (tmp_cur_ag == MLX4_NOT_SET_GUID) { - mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in " - "block_num: %d was declined by SM, " - "ownership by %d (0 = driver, 1=sysAdmin," - " 2=None)\n", __func__, i, - guid_rec->block_num, rec->ownership); - if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) { - /* if it is driver assign, asks for new GUID from SM*/ - *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] = - MLX4_NOT_SET_GUID; - - /* Mark the record as not assigned, and let it - * be sent again in the next work sched.*/ - rec->status = MLX4_GUID_INFO_STATUS_IDLE; - rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i); - } + if (sm_response == MLX4_NOT_SET_GUID) { + if (rec->guids_retry_schedule[i] == 0) + mlx4_ib_warn(&dev->ib_dev, + "%s:Record num %d in block_num: %d was declined by SM\n", + __func__, i, + guid_rec->block_num); + goto entry_declined; } else { /* properly assigned record. */ /* We save the GUID we just got from the SM in the * admin_guid in order to be persistent, and in the * request from the sm the process will ask for the same GUID */ - if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN && - tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) { - /* the sysadmin assignment failed.*/ - mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set" - " admin guid after SysAdmin " - "configuration. " - "Record num %d in block_num:%d " - "was declined by SM, " - "new val(0x%llx) was kept\n", - __func__, i, - guid_rec->block_num, - (long long)be64_to_cpu(*(__be64 *) & - rec->all_recs[i * GUID_REC_SIZE])); + if (required_val && + sm_response != required_val) { + /* Warn only on first retry */ + if (rec->guids_retry_schedule[i] == 0) + mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set" + " admin guid after SysAdmin " + "configuration. " + "Record num %d in block_num:%d " + "was declined by SM, " + "new val(0x%llx) was kept, SM returned (0x%llx)\n", + __func__, i, + guid_rec->block_num, + (long long)be64_to_cpu(required_val), + (long long)be64_to_cpu(sm_response)); + goto entry_declined; } else { - memcpy(&rec->all_recs[i * GUID_REC_SIZE], - &guid_rec->guid_info_list[i * GUID_REC_SIZE], - GUID_REC_SIZE); + *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] = + sm_response; + if (required_val == 0) + mlx4_set_admin_guid(dev->dev, + sm_response, + (guid_rec->block_num + * NUM_ALIAS_GUID_IN_REC) + i, + cb_ctx->port); + goto next_entry; } } +entry_declined: + declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i); + rec->guids_retry_schedule[i] = + (rec->guids_retry_schedule[i] == 0) ? 1 : + min((unsigned int)60, + rec->guids_retry_schedule[i] * 2); + /* using the minimum value among all entries in that record */ + resched_delay_sec = (resched_delay_sec == 0) ? + rec->guids_retry_schedule[i] : + min(resched_delay_sec, + rec->guids_retry_schedule[i]); + continue; + +next_entry: + rec->guids_retry_schedule[i] = 0; } + *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***