diff --git a/0016-libhns-assign-doorbell-to-zero-when-allocate-it.patch b/0016-libhns-Assign-doorbell-to-zero-when-allocate-it.patch similarity index 76% rename from 0016-libhns-assign-doorbell-to-zero-when-allocate-it.patch rename to 0016-libhns-Assign-doorbell-to-zero-when-allocate-it.patch index 7777e81f3654b5cf836faf5066b6492c881a1ced..dcb3ea88920e3feb2293731b1e5fa1b747ed40d5 100644 --- a/0016-libhns-assign-doorbell-to-zero-when-allocate-it.patch +++ b/0016-libhns-Assign-doorbell-to-zero-when-allocate-it.patch @@ -1,18 +1,25 @@ -From 2c11318d6a06ba6afd6efd91b2881b9fa05f35f6 Mon Sep 17 00:00:00 2001 +From 62e56376912213cab92a4378a719d037fef61cd4 Mon Sep 17 00:00:00 2001 From: Chengchang Tang -Date: Tue, 26 Sep 2023 19:19:09 +0800 -Subject: [PATCH 16/18] libhns: assign doorbell to zero when allocate it +Date: Thu, 18 Apr 2024 13:49:32 +0800 +Subject: [PATCH] libhns: Assign doorbell to zero when allocate it -driver inclusion +mainline inclusion +from mainline-master +commit 2af6b0f3262c432f35cb6a92de50c4e93b63b6af category: feature -bugzilla: https://gitee.com/openeuler/kernel/issues/I98HQV +bugzilla: https://gitee.com/openeuler/kernel/issues/I9NZME +CVE: NA --------------------------------------------------------------------------- +Reference: https://github.com/linux-rdma/rdma-core/pull/1450/commits/2af6b0f3262c432f35cb6a92de50c4e93b63b6af + +---------------------------------------------------------------------- Clear the doorbell when getting it to avoid clearing it in each function that uses hns_roce_alloc_db() Signed-off-by: Chengchang Tang +Signed-off-by: Junxian Huang +Signed-off-by: Juan Zhou --- providers/hns/hns_roce_u_db.c | 2 ++ providers/hns/hns_roce_u_verbs.c | 8 -------- diff --git a/0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch b/0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch index 7957daea82ec2adf1133a404053b1e3327dc52a0..cefb81142f851cdbb7f5144a2ba3758e47f36ba7 100644 --- a/0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch +++ b/0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch @@ -1,21 +1,27 @@ -From 64e8d59358cfdb05d7b172bb1b60f18fb7f3d844 Mon Sep 17 00:00:00 2001 +From 26cd3b3f19a019cf0bc17915af179de6193fe56c Mon Sep 17 00:00:00 2001 From: Chengchang Tang -Date: Thu, 7 Dec 2023 09:48:02 +0800 -Subject: [PATCH 18/18] libhns: Fix owner bit when SQ wraps around in new IO +Date: Thu, 18 Apr 2024 13:49:33 +0800 +Subject: [PATCH] libhns: Fix owner bit when SQ wraps around in new IO -driver inclusion +mainline inclusion +from mainline-master +commit 0067aad0a3a9a46d6c150e089b30bc9246dfe663 category: bugfix -bugzilla: https://gitee.com/openeuler/kernel/issues/I98YNG +bugzilla: https://gitee.com/openeuler/kernel/issues/I9NZME +CVE: NA --------------------------------------------------------------------------- +Reference: https://github.com/linux-rdma/rdma-core/pull/1450/commits/0067aad0a3a9a46d6c150e089b30bc9246dfe663 -The owner bit has been write in init_rc_wqe() or init_ud_wqe() -with a write value. And it will be overwritten by some subsequent -operations. When the SQ wraps around, the overwritten value will be -an incorrect value. +---------------------------------------------------------------------- -For example, driver will assign the owner bit in the second step, -and overwrite it in the third step. +Commit c292b7809f38 ("libhns: Fix the owner bit error of sq in new io") +fixed a bug that the SQ head was updated before the owner bit was filled +in WQE, but only when using ibv_wr_set_sge(). Actually this bug still +exists in other ibv_wr_set_*(). + +For example, in the flow below, the driver will fill the owner bit in +ibv_wr_rdma_write(), but mistakenly overwrite it again in +ibv_wr_set_sge_list() or ibv_wr_set_inline_data_list(). ```c ibv_wr_start(); @@ -27,11 +33,14 @@ else ibv_wr_complete(); ``` -This patch removes the redundant owner bit assignment operations -in new IO. +When the SQ wraps around, the overwritten value will be incorrect. +Remove all the incorrect owner bit filling in ibv_wr_set_*(). -Fixes: ("libhns: Fix the owner bit error of sq in new io") +Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism") +Fixes: c292b7809f38 ("libhns: Fix the owner bit error of sq in new io") Signed-off-by: Chengchang Tang +Signed-off-by: Junxian Huang +Signed-off-by: Juan Zhou --- providers/hns/hns_roce_u_hw_v2.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/0030-libhns-Remove-unused-return-value.patch b/0030-libhns-Remove-unused-return-value.patch new file mode 100644 index 0000000000000000000000000000000000000000..0263254d16767c85ea204b5128ab308aeddb141e --- /dev/null +++ b/0030-libhns-Remove-unused-return-value.patch @@ -0,0 +1,51 @@ +From 953cb41219fbbb5ccb4cadd2524adc4d6927d996 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Thu, 18 Apr 2024 13:49:29 +0800 +Subject: [PATCH 30/33] libhns: Remove unused return value + +mainline inclusion +from mainline-master +commit 9e1847c96356c452b3ed2330ddf4c484508c6f10 +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I9NZME +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1450/commits/9e1847c96356c452b3ed2330ddf4c484508c6f10 + +---------------------------------------------------------------------- + +parse_cqe_for_resp() will never fail and always returns 0. Remove the +unused return value. + +Fixes: 061f7e1757ca ("libhns: Refactor the poll one interface") +Signed-off-by: Junxian Huang +Signed-off-by: Juan Zhou +--- + providers/hns/hns_roce_u_hw_v2.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index b2d452b..a0b8655 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -531,7 +531,7 @@ static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + handle_recv_cqe_inl_from_srq(cqe, srq); + } + +-static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, ++static void parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + struct hns_roce_qp *hr_qp) + { + struct hns_roce_wq *wq; +@@ -547,8 +547,6 @@ static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + handle_recv_cqe_inl_from_rq(cqe, hr_qp); + else if (hr_reg_read(cqe, CQE_RQ_INLINE)) + handle_recv_rq_inl(cqe, hr_qp); +- +- return 0; + } + + static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, +-- +2.33.0 + diff --git a/0031-libhns-Fix-several-context-locks-issue.patch b/0031-libhns-Fix-several-context-locks-issue.patch new file mode 100644 index 0000000000000000000000000000000000000000..539b04b0e8de5438d194c1f027289a9e41e407ca --- /dev/null +++ b/0031-libhns-Fix-several-context-locks-issue.patch @@ -0,0 +1,148 @@ +From 4030d141751c6fb73270fdb8e8c46854df307865 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Thu, 18 Apr 2024 13:49:30 +0800 +Subject: [PATCH 31/33] libhns: Fix several context locks issue + +mainline inclusion +from mainline-master +commit 6772962084dd1ee0ec277d79c63673f8736aa94f +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I9NZME +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1450/commits/6772962084dd1ee0ec277d79c63673f8736aa94f + +---------------------------------------------------------------------- + +Fix several context lock issue: + +1. db_list_mutex is used without init currently. Add its init to + hns_roce_alloc_context(). + +2. pthread_mutex_init()/pthread_spin_init() may return error value. + Check the return value in hns_roce_alloc_context(). + +3. Add destruction for these context locks. + +4. Encapsulate init and destruction functions for these context locks. + +Fixes: 13eae8889690 ("libhns: Support rq record doorbell") +Fixes: 887b78c80224 ("libhns: Add initial main frame") +Signed-off-by: Junxian Huang +Signed-off-by: Juan Zhou +--- + providers/hns/hns_roce_u.c | 61 ++++++++++++++++++++++++++++++++------ + 1 file changed, 52 insertions(+), 9 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index c4a3ba5..e219b9e 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -355,6 +355,47 @@ static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, + } + } + ++static int hns_roce_init_context_lock(struct hns_roce_context *context) ++{ ++ int ret; ++ ++ ret = pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); ++ if (ret) ++ return ret; ++ ++ ret = pthread_mutex_init(&context->qp_table_mutex, NULL); ++ if (ret) ++ goto destroy_uar_lock; ++ ++ ret = pthread_mutex_init(&context->srq_table_mutex, NULL); ++ if (ret) ++ goto destroy_qp_mutex; ++ ++ ret = pthread_mutex_init(&context->db_list_mutex, NULL); ++ if (ret) ++ goto destroy_srq_mutex; ++ ++ return 0; ++ ++destroy_srq_mutex: ++ pthread_mutex_destroy(&context->srq_table_mutex); ++ ++destroy_qp_mutex: ++ pthread_mutex_destroy(&context->qp_table_mutex); ++ ++destroy_uar_lock: ++ pthread_spin_destroy(&context->uar_lock); ++ return ret; ++} ++ ++static void hns_roce_destroy_context_lock(struct hns_roce_context *context) ++{ ++ pthread_spin_destroy(&context->uar_lock); ++ pthread_mutex_destroy(&context->qp_table_mutex); ++ pthread_mutex_destroy(&context->srq_table_mutex); ++ pthread_mutex_destroy(&context->db_list_mutex); ++} ++ + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) +@@ -373,19 +414,22 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + ucontext_set_cmd(&cmd, ctx_attr); + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) +- goto err_free; ++ goto err_ibv_cmd; ++ ++ if (hns_roce_init_context_lock(context)) ++ goto err_ibv_cmd; + + if (set_context_attr(hr_dev, context, &resp)) +- goto err_free; ++ goto err_set_attr; + + context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, cmd_fd, 0); + if (context->uar == MAP_FAILED) +- goto err_free; ++ goto err_set_attr; + + if (init_dca_context(context, cmd_fd, + &resp, ctx_attr, hr_dev->page_size)) +- goto err_free; ++ goto err_set_attr; + + if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) + goto reset_free; +@@ -393,10 +437,6 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (hns_roce_mmap(hr_dev, context, cmd_fd)) + goto uar_free; + +- pthread_mutex_init(&context->qp_table_mutex, NULL); +- pthread_mutex_init(&context->srq_table_mutex, NULL); +- pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); +- + verbs_set_ops(&context->ibv_ctx, &hns_common_ops); + verbs_set_ops(&context->ibv_ctx, &hr_dev->u_hw->hw_ops); + +@@ -407,7 +447,9 @@ uar_free: + munmap(context->reset_state, hr_dev->page_size); + reset_free: + uninit_dca_context(context); +-err_free: ++err_set_attr: ++ hns_roce_destroy_context_lock(context); ++err_ibv_cmd: + verbs_uninit_context(&context->ibv_ctx); + free(context); + return NULL; +@@ -422,6 +464,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx) + if (context->reset_state) + munmap(context->reset_state, hr_dev->page_size); + uninit_dca_context(context); ++ hns_roce_destroy_context_lock(context); + verbs_uninit_context(&context->ibv_ctx); + free(context); + } +-- +2.33.0 + diff --git a/0032-libhns-Clean-up-signed-unsigned-mix-with-relational-.patch b/0032-libhns-Clean-up-signed-unsigned-mix-with-relational-.patch new file mode 100644 index 0000000000000000000000000000000000000000..b0a6e14fc2974e7c5f869728377b045d15c1fe58 --- /dev/null +++ b/0032-libhns-Clean-up-signed-unsigned-mix-with-relational-.patch @@ -0,0 +1,80 @@ +From 146a980b0669d6db58ac4b5e83efa951ea48ddae Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Thu, 18 Apr 2024 13:49:34 +0800 +Subject: [PATCH 32/33] libhns: Clean up signed-unsigned mix with relational + issue + +mainline inclusion +from mainline-master +commit 79475124d39a92819030a854b7ea94fb73d9bc39 +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I9NZME +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1450/commits/79475124d39a92819030a854b7ea94fb73d9bc39 + +---------------------------------------------------------------------- + +Clean up signed-unsigned mix with relational issue. + +Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism") +Signed-off-by: Junxian Huang +Signed-off-by: Juan Zhou +--- + providers/hns/hns_roce_u_hw_v2.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index a0b8655..9371150 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -2632,8 +2632,8 @@ static void set_inline_data_list_rc(struct hns_roce_qp *qp, + { + unsigned int msg_len = qp->sge_info.total_len; + void *dseg; ++ size_t i; + int ret; +- int i; + + hr_reg_enable(wqe, RCWQE_INLINE); + +@@ -2692,7 +2692,7 @@ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf, + { + struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); + struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe; +- int i; ++ size_t i; + + if (!wqe) + return; +@@ -2822,7 +2822,7 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge, + } + + hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, sge_idx & mask); +- for (int i = 0; i < num_sge; i++) { ++ for (size_t i = 0; i < num_sge; i++) { + if (!sg_list[i].length) + continue; + +@@ -2848,8 +2848,8 @@ static void set_inline_data_list_ud(struct hns_roce_qp *qp, + uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {}; + unsigned int msg_len = qp->sge_info.total_len; + void *tmp; ++ size_t i; + int ret; +- int i; + + if (!check_inl_data_len(qp, msg_len)) { + qp->err = EINVAL; +@@ -2910,7 +2910,7 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, + { + struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); + struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe; +- int i; ++ size_t i; + + if (!wqe) + return; +-- +2.33.0 + diff --git a/0033-libhns-Fix-missing-flag-when-creating-qp-by-hnsdv_cr.patch b/0033-libhns-Fix-missing-flag-when-creating-qp-by-hnsdv_cr.patch new file mode 100644 index 0000000000000000000000000000000000000000..f52033fb7de51de513f4d77b23498f347bb92ff6 --- /dev/null +++ b/0033-libhns-Fix-missing-flag-when-creating-qp-by-hnsdv_cr.patch @@ -0,0 +1,38 @@ +From bd53382cfbc0f1b1b5636dd9b425d546d98079b1 Mon Sep 17 00:00:00 2001 +From: Juan Zhou +Date: Sat, 11 May 2024 14:23:19 +0800 +Subject: [PATCH 33/33] libhns: Fix missing flag when creating qp by + hnsdv_create_qp() + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9NZME + +-------------------------------------------------------------------------- + +This flag will be used when the DCA mode is enabled. Without this flag, +the QP fails to be created in DCA mode. + +Fixes: c7bf0dbf472d ("libhns: Introduce DCA for RC QP") +Signed-off-by: Juan Zhou +--- + providers/hns/hns_roce_u_verbs.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index a680339..e30880c 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1049,7 +1049,8 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + } + + enum { +- HNSDV_QP_SUP_COMP_MASK = HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE, ++ HNSDV_QP_SUP_COMP_MASK = HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS | ++ HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE, + }; + + static int check_hnsdv_qp_attr(struct hns_roce_context *ctx, +-- +2.33.0 + diff --git a/rdma-core.spec b/rdma-core.spec index 5ba0596cbac8d71ca3b2603ee29fd74fc2576ac4..4db035d8fc2b395d13c6fe3749a4cb37c2410bc8 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 50.0 -Release: 8 +Release: 9 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core @@ -21,7 +21,7 @@ patch12: 0012-Update-kernel-headers.patch patch13: 0013-libhns-Add-reset-stop-flow-mechanism.patch patch14: 0014-libhns-Support-reporting-wc-as-software-mode.patch patch15: 0015-libhns-return-error-when-post-send-in-reset-state.patch -patch16: 0016-libhns-assign-doorbell-to-zero-when-allocate-it.patch +patch16: 0016-libhns-Assign-doorbell-to-zero-when-allocate-it.patch patch17: 0017-libhns-Fix-missing-reset-notification.patch patch18: 0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch patch19: 0019-Update-kernel-headers.patch @@ -35,6 +35,10 @@ patch26: 0026-libhns-Add-RoH-device-IDs.patch patch27: 0027-libhns-Add-the-parsing-of-mac-type-in-RoH-mode.patch patch28: 0028-libhns-Fix-missing-flexible-WQE-buffer-page-flag.patch patch29: 0029-libhns-Fix-ext_sge-page-size.patch +patch30: 0030-libhns-Remove-unused-return-value.patch +patch31: 0031-libhns-Fix-several-context-locks-issue.patch +patch32: 0032-libhns-Clean-up-signed-unsigned-mix-with-relational-.patch +patch33: 0033-libhns-Fix-missing-flag-when-creating-qp-by-hnsdv_cr.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel @@ -614,6 +618,12 @@ fi %doc %{_docdir}/%{name}-%{version}/70-persistent-ipoib.rules %changelog +* Sat May 11 2024 Juan Zhou - 50.0-9 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Some bugfixes and cleanups + * Mon May 6 2024 Juan Zhou - 50.0-8 - Type: bugfix - ID: NA