Date: Thu, 18 Jan 2018 22:01:30 +0000 (UTC) From: Conrad Meyer <cem@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r328150 - in head: sys/crypto/ccp sys/modules sys/modules/ccp tests/sys/opencrypto Message-ID: <201801182201.w0IM1Ubo023725@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: cem Date: Thu Jan 18 22:01:30 2018 New Revision: 328150 URL: https://svnweb.freebsd.org/changeset/base/328150 Log: Add ccp(4): experimental driver for AMD Crypto Co-Processor * Registers TRNG source for random(4) * Finds available queues, LSBs; allocates static objects * Allocates a shared MSI-X for all queues. The hardware does not have separate interrupts per queue. Working interrupt mode driver. * Computes SHA hashes, HMAC. Passes cryptotest.py, cryptocheck tests. * Does AES-CBC, CTR mode, and XTS. cryptotest.py and cryptocheck pass. * Support for "authenc" (AES + HMAC). (SHA1 seems to result in "unaligned" cleartext inputs from cryptocheck -- which the engine cannot handle. SHA2 seems to work fine.) * GCM passes for block-multiple AAD, input lengths Largely based on ccr(4), part of cxgbe(4). Rough performance averages on AMD Ryzen 1950X (4kB buffer): aesni: SHA1: ~8300 Mb/s SHA256: ~8000 Mb/s ccp: ~630 Mb/s SHA256: ~660 Mb/s SHA512: ~700 Mb/s cryptosoft: ~1800 Mb/s SHA256: ~1800 Mb/s SHA512: ~2700 Mb/s As you can see, performance is poor in comparison to aesni(4) and even cryptosoft (due to high setup cost). At a larger buffer size (128kB), throughput is a little better (but still worse than aesni(4)): aesni: SHA1:~10400 Mb/s SHA256: ~9950 Mb/s ccp: ~2200 Mb/s SHA256: ~2600 Mb/s SHA512: ~3800 Mb/s cryptosoft: ~1750 Mb/s SHA256: ~1800 Mb/s SHA512: ~2700 Mb/s AES performance has a similar story: aesni: 4kB: ~11250 Mb/s 128kB: ~11250 Mb/s ccp: ~350 Mb/s 128kB: ~4600 Mb/s cryptosoft: ~1750 Mb/s 128kB: ~1700 Mb/s This driver is EXPERIMENTAL. You should verify cryptographic results on typical and corner case inputs from your application against a known- good implementation. Sponsored by: Dell EMC Isilon Differential Revision: https://reviews.freebsd.org/D12723 Added: head/sys/crypto/ccp/ head/sys/crypto/ccp/ccp.c - copied, changed from r328137, head/sys/dev/cxgbe/crypto/t4_crypto.c head/sys/crypto/ccp/ccp.h (contents, props changed) head/sys/crypto/ccp/ccp_hardware.c - copied, changed from r328137, head/sys/dev/cxgbe/crypto/t4_crypto.c head/sys/crypto/ccp/ccp_hardware.h (contents, props changed) head/sys/crypto/ccp/ccp_lsb.c (contents, props changed) head/sys/crypto/ccp/ccp_lsb.h (contents, props changed) head/sys/modules/ccp/ head/sys/modules/ccp/Makefile (contents, props changed) Modified: head/sys/modules/Makefile head/tests/sys/opencrypto/cryptotest.py Copied and modified: head/sys/crypto/ccp/ccp.c (from r328137, head/sys/dev/cxgbe/crypto/t4_crypto.c) ============================================================================== --- head/sys/dev/cxgbe/crypto/t4_crypto.c Thu Jan 18 21:19:57 2018 (r328137, copy source) +++ head/sys/crypto/ccp/ccp.c Thu Jan 18 22:01:30 2018 (r328150) @@ -1,7 +1,10 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2017 Chelsio Communications, Inc. + * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org> * All rights reserved. - * Written by: John Baldwin <jhb@FreeBSD.org> + * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -28,1330 +31,88 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_ddb.h" + #include <sys/types.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/kernel.h> #include <sys/malloc.h> #include <sys/mutex.h> #include <sys/module.h> +#include <sys/random.h> #include <sys/sglist.h> +#include <sys/sysctl.h> +#ifdef DDB +#include <ddb/ddb.h> +#endif + +#include <dev/pci/pcivar.h> + +#include <dev/random/randomdev.h> + #include <opencrypto/cryptodev.h> #include <opencrypto/xform.h> #include "cryptodev_if.h" -#include "common/common.h" -#include "crypto/t4_crypto.h" +#include "ccp.h" +#include "ccp_hardware.h" -/* - * Requests consist of: - * - * +-------------------------------+ - * | struct fw_crypto_lookaside_wr | - * +-------------------------------+ - * | struct ulp_txpkt | - * +-------------------------------+ - * | struct ulptx_idata | - * +-------------------------------+ - * | struct cpl_tx_sec_pdu | - * +-------------------------------+ - * | struct cpl_tls_tx_scmd_fmt | - * +-------------------------------+ - * | key context header | - * +-------------------------------+ - * | AES key | ----- For requests with AES - * +-------------------------------+ - - * | IPAD (16-byte aligned) | \ - * +-------------------------------+ +---- For requests with HMAC - * | OPAD (16-byte aligned) | / - * +-------------------------------+ - - * | GMAC H | ----- For AES-GCM - * +-------------------------------+ - - * | struct cpl_rx_phys_dsgl | \ - * +-------------------------------+ +---- Destination buffer for - * | PHYS_DSGL entries | / non-hash-only requests - * +-------------------------------+ - - * | 16 dummy bytes | ----- Only for hash-only requests - * +-------------------------------+ - * | IV | ----- If immediate IV - * +-------------------------------+ - * | Payload | ----- If immediate Payload - * +-------------------------------+ - - * | struct ulptx_sgl | \ - * +-------------------------------+ +---- If payload via SGL - * | SGL entries | / - * +-------------------------------+ - - * - * Note that the key context must be padded to ensure 16-byte alignment. - * For HMAC requests, the key consists of the partial hash of the IPAD - * followed by the partial hash of the OPAD. - * - * Replies consist of: - * - * +-------------------------------+ - * | struct cpl_fw6_pld | - * +-------------------------------+ - * | hash digest | ----- For HMAC request with - * +-------------------------------+ 'hash_size' set in work request - * - * A 32-bit big-endian error status word is supplied in the last 4 - * bytes of data[0] in the CPL_FW6_PLD message. bit 0 indicates a - * "MAC" error and bit 1 indicates a "PAD" error. - * - * The 64-bit 'cookie' field from the fw_crypto_lookaside_wr message - * in the request is returned in data[1] of the CPL_FW6_PLD message. - * - * For block cipher replies, the updated IV is supplied in data[2] and - * data[3] of the CPL_FW6_PLD message. - * - * For hash replies where the work request set 'hash_size' to request - * a copy of the hash in the reply, the hash digest is supplied - * immediately following the CPL_FW6_PLD message. - */ +MALLOC_DEFINE(M_CCP, "ccp", "AMD CCP crypto"); /* - * The documentation for CPL_RX_PHYS_DSGL claims a maximum of 32 - * SG entries. + * Need a global softc available for garbage random_source API, which lacks any + * context pointer. It's also handy for debugging. */ -#define MAX_RX_PHYS_DSGL_SGE 32 -#define DSGL_SGE_MAXLEN 65535 +struct ccp_softc *g_ccp_softc; -/* - * The adapter only supports requests with a total input or output - * length of 64k-1 or smaller. Longer requests either result in hung - * requests or incorrect results. - */ -#define MAX_REQUEST_SIZE 65535 +bool g_debug_print = false; +SYSCTL_BOOL(_hw_ccp, OID_AUTO, debug, CTLFLAG_RWTUN, &g_debug_print, 0, + "Set to enable debugging log messages"); -static MALLOC_DEFINE(M_CCR, "ccr", "Chelsio T6 crypto"); - -struct ccr_session_hmac { - struct auth_hash *auth_hash; - int hash_len; - unsigned int partial_digest_len; - unsigned int auth_mode; - unsigned int mk_size; - char ipad[CHCR_HASH_MAX_BLOCK_SIZE_128]; - char opad[CHCR_HASH_MAX_BLOCK_SIZE_128]; +static struct pciid { + uint32_t devid; + const char *desc; +} ccp_ids[] = { + { 0x14561022, "AMD CCP-5a" }, + { 0x14681022, "AMD CCP-5b" }, }; +MODULE_PNP_INFO("W32:vendor/device", pci, ccp, ccp_ids, sizeof(ccp_ids[0]), + nitems(ccp_ids)); -struct ccr_session_gmac { - int hash_len; - char ghash_h[GMAC_BLOCK_LEN]; +static struct random_source random_ccp = { + .rs_ident = "AMD CCP TRNG", + .rs_source = RANDOM_PURE_CCP, + .rs_read = random_ccp_read, }; -struct ccr_session_blkcipher { - unsigned int cipher_mode; - unsigned int key_len; - unsigned int iv_len; - __be32 key_ctx_hdr; - char enckey[CHCR_AES_MAX_KEY_LEN]; - char deckey[CHCR_AES_MAX_KEY_LEN]; -}; - -struct ccr_session { - bool active; - int pending; - enum { HMAC, BLKCIPHER, AUTHENC, GCM } mode; - union { - struct ccr_session_hmac hmac; - struct ccr_session_gmac gmac; - }; - struct ccr_session_blkcipher blkcipher; -}; - -struct ccr_softc { - struct adapter *adapter; - device_t dev; - uint32_t cid; - int tx_channel_id; - struct ccr_session *sessions; - int nsessions; - struct mtx lock; - bool detaching; - struct sge_wrq *txq; - struct sge_rxq *rxq; - - /* - * Pre-allocate S/G lists used when preparing a work request. - * 'sg_crp' contains an sglist describing the entire buffer - * for a 'struct cryptop'. 'sg_ulptx' is used to describe - * the data the engine should DMA as input via ULPTX_SGL. - * 'sg_dsgl' is used to describe the destination that cipher - * text and a tag should be written to. - */ - struct sglist *sg_crp; - struct sglist *sg_ulptx; - struct sglist *sg_dsgl; - - /* Statistics. */ - uint64_t stats_blkcipher_encrypt; - uint64_t stats_blkcipher_decrypt; - uint64_t stats_hmac; - uint64_t stats_authenc_encrypt; - uint64_t stats_authenc_decrypt; - uint64_t stats_gcm_encrypt; - uint64_t stats_gcm_decrypt; - uint64_t stats_wr_nomem; - uint64_t stats_inflight; - uint64_t stats_mac_error; - uint64_t stats_pad_error; - uint64_t stats_bad_session; - uint64_t stats_sglist_error; - uint64_t stats_process_error; -}; - /* - * Crypto requests involve two kind of scatter/gather lists. - * - * Non-hash-only requests require a PHYS_DSGL that describes the - * location to store the results of the encryption or decryption - * operation. This SGL uses a different format (PHYS_DSGL) and should - * exclude the crd_skip bytes at the start of the data as well as - * any AAD or IV. For authenticated encryption requests it should - * cover include the destination of the hash or tag. - * - * The input payload may either be supplied inline as immediate data, - * or via a standard ULP_TX SGL. This SGL should include AAD, - * ciphertext, and the hash or tag for authenticated decryption - * requests. - * - * These scatter/gather lists can describe different subsets of the - * buffer described by the crypto operation. ccr_populate_sglist() - * generates a scatter/gather list that covers the entire crypto - * operation buffer that is then used to construct the other - * scatter/gather lists. + * ccp_populate_sglist() generates a scatter/gather list that covers the entire + * crypto operation buffer. */ static int -ccr_populate_sglist(struct sglist *sg, struct cryptop *crp) +ccp_populate_sglist(struct sglist *sg, struct cryptop *crp) { int error; sglist_reset(sg); if (crp->crp_flags & CRYPTO_F_IMBUF) - error = sglist_append_mbuf(sg, (struct mbuf *)crp->crp_buf); + error = sglist_append_mbuf(sg, crp->crp_mbuf); else if (crp->crp_flags & CRYPTO_F_IOV) - error = sglist_append_uio(sg, (struct uio *)crp->crp_buf); + error = sglist_append_uio(sg, crp->crp_uio); else error = sglist_append(sg, crp->crp_buf, crp->crp_ilen); return (error); } /* - * Segments in 'sg' larger than 'maxsegsize' are counted as multiple - * segments. - */ -static int -ccr_count_sgl(struct sglist *sg, int maxsegsize) -{ - int i, nsegs; - - nsegs = 0; - for (i = 0; i < sg->sg_nseg; i++) - nsegs += howmany(sg->sg_segs[i].ss_len, maxsegsize); - return (nsegs); -} - -/* These functions deal with PHYS_DSGL for the reply buffer. */ -static inline int -ccr_phys_dsgl_len(int nsegs) -{ - int len; - - len = (nsegs / 8) * sizeof(struct phys_sge_pairs); - if ((nsegs % 8) != 0) { - len += sizeof(uint16_t) * 8; - len += roundup2(nsegs % 8, 2) * sizeof(uint64_t); - } - return (len); -} - -static void -ccr_write_phys_dsgl(struct ccr_softc *sc, void *dst, int nsegs) -{ - struct sglist *sg; - struct cpl_rx_phys_dsgl *cpl; - struct phys_sge_pairs *sgl; - vm_paddr_t paddr; - size_t seglen; - u_int i, j; - - sg = sc->sg_dsgl; - cpl = dst; - cpl->op_to_tid = htobe32(V_CPL_RX_PHYS_DSGL_OPCODE(CPL_RX_PHYS_DSGL) | - V_CPL_RX_PHYS_DSGL_ISRDMA(0)); - cpl->pcirlxorder_to_noofsgentr = htobe32( - V_CPL_RX_PHYS_DSGL_PCIRLXORDER(0) | - V_CPL_RX_PHYS_DSGL_PCINOSNOOP(0) | - V_CPL_RX_PHYS_DSGL_PCITPHNTENB(0) | V_CPL_RX_PHYS_DSGL_DCAID(0) | - V_CPL_RX_PHYS_DSGL_NOOFSGENTR(nsegs)); - cpl->rss_hdr_int.opcode = CPL_RX_PHYS_ADDR; - cpl->rss_hdr_int.qid = htobe16(sc->rxq->iq.abs_id); - cpl->rss_hdr_int.hash_val = 0; - sgl = (struct phys_sge_pairs *)(cpl + 1); - j = 0; - for (i = 0; i < sg->sg_nseg; i++) { - seglen = sg->sg_segs[i].ss_len; - paddr = sg->sg_segs[i].ss_paddr; - do { - sgl->addr[j] = htobe64(paddr); - if (seglen > DSGL_SGE_MAXLEN) { - sgl->len[j] = htobe16(DSGL_SGE_MAXLEN); - paddr += DSGL_SGE_MAXLEN; - seglen -= DSGL_SGE_MAXLEN; - } else { - sgl->len[j] = htobe16(seglen); - seglen = 0; - } - j++; - if (j == 8) { - sgl++; - j = 0; - } - } while (seglen != 0); - } - MPASS(j + 8 * (sgl - (struct phys_sge_pairs *)(cpl + 1)) == nsegs); -} - -/* These functions deal with the ULPTX_SGL for input payload. */ -static inline int -ccr_ulptx_sgl_len(int nsegs) -{ - u_int n; - - nsegs--; /* first segment is part of ulptx_sgl */ - n = sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); - return (roundup2(n, 16)); -} - -static void -ccr_write_ulptx_sgl(struct ccr_softc *sc, void *dst, int nsegs) -{ - struct ulptx_sgl *usgl; - struct sglist *sg; - struct sglist_seg *ss; - int i; - - sg = sc->sg_ulptx; - MPASS(nsegs == sg->sg_nseg); - ss = &sg->sg_segs[0]; - usgl = dst; - usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | - V_ULPTX_NSGE(nsegs)); - usgl->len0 = htobe32(ss->ss_len); - usgl->addr0 = htobe64(ss->ss_paddr); - ss++; - for (i = 0; i < sg->sg_nseg - 1; i++) { - usgl->sge[i / 2].len[i & 1] = htobe32(ss->ss_len); - usgl->sge[i / 2].addr[i & 1] = htobe64(ss->ss_paddr); - ss++; - } - -} - -static bool -ccr_use_imm_data(u_int transhdr_len, u_int input_len) -{ - - if (input_len > CRYPTO_MAX_IMM_TX_PKT_LEN) - return (false); - if (roundup2(transhdr_len, 16) + roundup2(input_len, 16) > - SGE_MAX_WR_LEN) - return (false); - return (true); -} - -static void -ccr_populate_wreq(struct ccr_softc *sc, struct chcr_wr *crwr, u_int kctx_len, - u_int wr_len, uint32_t sid, u_int imm_len, u_int sgl_len, u_int hash_size, - u_int iv_loc, struct cryptop *crp) -{ - u_int cctx_size; - - cctx_size = sizeof(struct _key_ctx) + kctx_len; - crwr->wreq.op_to_cctx_size = htobe32( - V_FW_CRYPTO_LOOKASIDE_WR_OPCODE(FW_CRYPTO_LOOKASIDE_WR) | - V_FW_CRYPTO_LOOKASIDE_WR_COMPL(0) | - V_FW_CRYPTO_LOOKASIDE_WR_IMM_LEN(imm_len) | - V_FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC(1) | - V_FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE(cctx_size >> 4)); - crwr->wreq.len16_pkd = htobe32( - V_FW_CRYPTO_LOOKASIDE_WR_LEN16(wr_len / 16)); - crwr->wreq.session_id = htobe32(sid); - crwr->wreq.rx_chid_to_rx_q_id = htobe32( - V_FW_CRYPTO_LOOKASIDE_WR_RX_CHID(sc->tx_channel_id) | - V_FW_CRYPTO_LOOKASIDE_WR_LCB(0) | - V_FW_CRYPTO_LOOKASIDE_WR_PHASH(0) | - V_FW_CRYPTO_LOOKASIDE_WR_IV(iv_loc) | - V_FW_CRYPTO_LOOKASIDE_WR_FQIDX(0) | - V_FW_CRYPTO_LOOKASIDE_WR_TX_CH(0) | - V_FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID(sc->rxq->iq.abs_id)); - crwr->wreq.key_addr = 0; - crwr->wreq.pld_size_hash_size = htobe32( - V_FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE(sgl_len) | - V_FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE(hash_size)); - crwr->wreq.cookie = htobe64((uintptr_t)crp); - - crwr->ulptx.cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | - V_ULP_TXPKT_DATAMODIFY(0) | - V_ULP_TXPKT_CHANNELID(sc->tx_channel_id) | V_ULP_TXPKT_DEST(0) | - V_ULP_TXPKT_FID(0) | V_ULP_TXPKT_RO(1)); - crwr->ulptx.len = htobe32( - ((wr_len - sizeof(struct fw_crypto_lookaside_wr)) / 16)); - - crwr->sc_imm.cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | - V_ULP_TX_SC_MORE(imm_len != 0 ? 0 : 1)); - crwr->sc_imm.len = htobe32(wr_len - offsetof(struct chcr_wr, sec_cpl) - - sgl_len); -} - -static int -ccr_hmac(struct ccr_softc *sc, uint32_t sid, struct ccr_session *s, - struct cryptop *crp) -{ - struct chcr_wr *crwr; - struct wrqe *wr; - struct auth_hash *axf; - struct cryptodesc *crd; - char *dst; - u_int hash_size_in_response, kctx_flits, kctx_len, transhdr_len, wr_len; - u_int imm_len, iopad_size; - int error, sgl_nsegs, sgl_len; - - crd = crp->crp_desc; - - /* Reject requests with too large of an input buffer. */ - if (crd->crd_len > MAX_REQUEST_SIZE) - return (EFBIG); - - axf = s->hmac.auth_hash; - - /* PADs must be 128-bit aligned. */ - iopad_size = roundup2(s->hmac.partial_digest_len, 16); - - /* - * The 'key' part of the context includes the aligned IPAD and - * OPAD. - */ - kctx_len = iopad_size * 2; - hash_size_in_response = axf->hashsize; - transhdr_len = HASH_TRANSHDR_SIZE(kctx_len); - - if (crd->crd_len == 0) { - imm_len = axf->blocksize; - sgl_nsegs = 0; - sgl_len = 0; - } else if (ccr_use_imm_data(transhdr_len, crd->crd_len)) { - imm_len = crd->crd_len; - sgl_nsegs = 0; - sgl_len = 0; - } else { - imm_len = 0; - sglist_reset(sc->sg_ulptx); - error = sglist_append_sglist(sc->sg_ulptx, sc->sg_crp, - crd->crd_skip, crd->crd_len); - if (error) - return (error); - sgl_nsegs = sc->sg_ulptx->sg_nseg; - sgl_len = ccr_ulptx_sgl_len(sgl_nsegs); - } - - wr_len = roundup2(transhdr_len, 16) + roundup2(imm_len, 16) + sgl_len; - wr = alloc_wrqe(wr_len, sc->txq); - if (wr == NULL) { - sc->stats_wr_nomem++; - return (ENOMEM); - } - crwr = wrtod(wr); - memset(crwr, 0, wr_len); - - ccr_populate_wreq(sc, crwr, kctx_len, wr_len, sid, imm_len, sgl_len, - hash_size_in_response, IV_NOP, crp); - - /* XXX: Hardcodes SGE loopback channel of 0. */ - crwr->sec_cpl.op_ivinsrtofst = htobe32( - V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) | - V_CPL_TX_SEC_PDU_RXCHID(sc->tx_channel_id) | - V_CPL_TX_SEC_PDU_ACKFOLLOWS(0) | V_CPL_TX_SEC_PDU_ULPTXLPBK(1) | - V_CPL_TX_SEC_PDU_CPLLEN(2) | V_CPL_TX_SEC_PDU_PLACEHOLDER(0) | - V_CPL_TX_SEC_PDU_IVINSRTOFST(0)); - - crwr->sec_cpl.pldlen = htobe32(crd->crd_len == 0 ? axf->blocksize : - crd->crd_len); - - crwr->sec_cpl.cipherstop_lo_authinsert = htobe32( - V_CPL_TX_SEC_PDU_AUTHSTART(1) | V_CPL_TX_SEC_PDU_AUTHSTOP(0)); - - /* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */ - crwr->sec_cpl.seqno_numivs = htobe32( - V_SCMD_SEQ_NO_CTRL(0) | - V_SCMD_PROTO_VERSION(CHCR_SCMD_PROTO_VERSION_GENERIC) | - V_SCMD_CIPH_MODE(CHCR_SCMD_CIPHER_MODE_NOP) | - V_SCMD_AUTH_MODE(s->hmac.auth_mode) | - V_SCMD_HMAC_CTRL(CHCR_SCMD_HMAC_CTRL_NO_TRUNC)); - crwr->sec_cpl.ivgen_hdrlen = htobe32( - V_SCMD_LAST_FRAG(0) | - V_SCMD_MORE_FRAGS(crd->crd_len == 0 ? 1 : 0) | V_SCMD_MAC_ONLY(1)); - - memcpy(crwr->key_ctx.key, s->hmac.ipad, s->hmac.partial_digest_len); - memcpy(crwr->key_ctx.key + iopad_size, s->hmac.opad, - s->hmac.partial_digest_len); - - /* XXX: F_KEY_CONTEXT_SALT_PRESENT set, but 'salt' not set. */ - kctx_flits = (sizeof(struct _key_ctx) + kctx_len) / 16; - crwr->key_ctx.ctx_hdr = htobe32(V_KEY_CONTEXT_CTX_LEN(kctx_flits) | - V_KEY_CONTEXT_OPAD_PRESENT(1) | V_KEY_CONTEXT_SALT_PRESENT(1) | - V_KEY_CONTEXT_CK_SIZE(CHCR_KEYCTX_NO_KEY) | - V_KEY_CONTEXT_MK_SIZE(s->hmac.mk_size) | V_KEY_CONTEXT_VALID(1)); - - dst = (char *)(crwr + 1) + kctx_len + DUMMY_BYTES; - if (crd->crd_len == 0) { - dst[0] = 0x80; - *(uint64_t *)(dst + axf->blocksize - sizeof(uint64_t)) = - htobe64(axf->blocksize << 3); - } else if (imm_len != 0) - crypto_copydata(crp->crp_flags, crp->crp_buf, crd->crd_skip, - crd->crd_len, dst); - else - ccr_write_ulptx_sgl(sc, dst, sgl_nsegs); - - /* XXX: TODO backpressure */ - t4_wrq_tx(sc->adapter, wr); - - return (0); -} - -static int -ccr_hmac_done(struct ccr_softc *sc, struct ccr_session *s, struct cryptop *crp, - const struct cpl_fw6_pld *cpl, int error) -{ - struct cryptodesc *crd; - - crd = crp->crp_desc; - if (error == 0) { - crypto_copyback(crp->crp_flags, crp->crp_buf, crd->crd_inject, - s->hmac.hash_len, (c_caddr_t)(cpl + 1)); - } - - return (error); -} - -static int -ccr_blkcipher(struct ccr_softc *sc, uint32_t sid, struct ccr_session *s, - struct cryptop *crp) -{ - char iv[CHCR_MAX_CRYPTO_IV_LEN]; - struct chcr_wr *crwr; - struct wrqe *wr; - struct cryptodesc *crd; - char *dst; - u_int iv_loc, kctx_len, key_half, op_type, transhdr_len, wr_len; - u_int imm_len; - int dsgl_nsegs, dsgl_len; - int sgl_nsegs, sgl_len; - int error; - - crd = crp->crp_desc; - - if (s->blkcipher.key_len == 0 || crd->crd_len == 0) - return (EINVAL); - if (crd->crd_alg == CRYPTO_AES_CBC && - (crd->crd_len % AES_BLOCK_LEN) != 0) - return (EINVAL); - - /* Reject requests with too large of an input buffer. */ - if (crd->crd_len > MAX_REQUEST_SIZE) - return (EFBIG); - - iv_loc = IV_NOP; - if (crd->crd_flags & CRD_F_ENCRYPT) { - op_type = CHCR_ENCRYPT_OP; - if (crd->crd_flags & CRD_F_IV_EXPLICIT) - memcpy(iv, crd->crd_iv, s->blkcipher.iv_len); - else - arc4rand(iv, s->blkcipher.iv_len, 0); - iv_loc = IV_IMMEDIATE; - if ((crd->crd_flags & CRD_F_IV_PRESENT) == 0) - crypto_copyback(crp->crp_flags, crp->crp_buf, - crd->crd_inject, s->blkcipher.iv_len, iv); - } else { - op_type = CHCR_DECRYPT_OP; - if (crd->crd_flags & CRD_F_IV_EXPLICIT) { - memcpy(iv, crd->crd_iv, s->blkcipher.iv_len); - iv_loc = IV_IMMEDIATE; - } else - iv_loc = IV_DSGL; - } - - sglist_reset(sc->sg_dsgl); - error = sglist_append_sglist(sc->sg_dsgl, sc->sg_crp, crd->crd_skip, - crd->crd_len); - if (error) - return (error); - dsgl_nsegs = ccr_count_sgl(sc->sg_dsgl, DSGL_SGE_MAXLEN); - if (dsgl_nsegs > MAX_RX_PHYS_DSGL_SGE) - return (EFBIG); - dsgl_len = ccr_phys_dsgl_len(dsgl_nsegs); - - /* The 'key' must be 128-bit aligned. */ - kctx_len = roundup2(s->blkcipher.key_len, 16); - transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dsgl_len); - - if (ccr_use_imm_data(transhdr_len, crd->crd_len + - s->blkcipher.iv_len)) { - imm_len = crd->crd_len; - if (iv_loc == IV_DSGL) { - crypto_copydata(crp->crp_flags, crp->crp_buf, - crd->crd_inject, s->blkcipher.iv_len, iv); - iv_loc = IV_IMMEDIATE; - } - sgl_nsegs = 0; - sgl_len = 0; - } else { - imm_len = 0; - sglist_reset(sc->sg_ulptx); - if (iv_loc == IV_DSGL) { - error = sglist_append_sglist(sc->sg_ulptx, sc->sg_crp, - crd->crd_inject, s->blkcipher.iv_len); - if (error) - return (error); - } - error = sglist_append_sglist(sc->sg_ulptx, sc->sg_crp, - crd->crd_skip, crd->crd_len); - if (error) - return (error); - sgl_nsegs = sc->sg_ulptx->sg_nseg; - sgl_len = ccr_ulptx_sgl_len(sgl_nsegs); - } - - wr_len = roundup2(transhdr_len, 16) + roundup2(imm_len, 16) + sgl_len; - if (iv_loc == IV_IMMEDIATE) - wr_len += s->blkcipher.iv_len; - wr = alloc_wrqe(wr_len, sc->txq); - if (wr == NULL) { - sc->stats_wr_nomem++; - return (ENOMEM); - } - crwr = wrtod(wr); - memset(crwr, 0, wr_len); - - ccr_populate_wreq(sc, crwr, kctx_len, wr_len, sid, imm_len, sgl_len, 0, - iv_loc, crp); - - /* XXX: Hardcodes SGE loopback channel of 0. */ - crwr->sec_cpl.op_ivinsrtofst = htobe32( - V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) | - V_CPL_TX_SEC_PDU_RXCHID(sc->tx_channel_id) | - V_CPL_TX_SEC_PDU_ACKFOLLOWS(0) | V_CPL_TX_SEC_PDU_ULPTXLPBK(1) | - V_CPL_TX_SEC_PDU_CPLLEN(2) | V_CPL_TX_SEC_PDU_PLACEHOLDER(0) | - V_CPL_TX_SEC_PDU_IVINSRTOFST(1)); - - crwr->sec_cpl.pldlen = htobe32(s->blkcipher.iv_len + crd->crd_len); - - crwr->sec_cpl.aadstart_cipherstop_hi = htobe32( - V_CPL_TX_SEC_PDU_CIPHERSTART(s->blkcipher.iv_len + 1) | - V_CPL_TX_SEC_PDU_CIPHERSTOP_HI(0)); - crwr->sec_cpl.cipherstop_lo_authinsert = htobe32( - V_CPL_TX_SEC_PDU_CIPHERSTOP_LO(0)); - - /* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */ - crwr->sec_cpl.seqno_numivs = htobe32( - V_SCMD_SEQ_NO_CTRL(0) | - V_SCMD_PROTO_VERSION(CHCR_SCMD_PROTO_VERSION_GENERIC) | - V_SCMD_ENC_DEC_CTRL(op_type) | - V_SCMD_CIPH_MODE(s->blkcipher.cipher_mode) | - V_SCMD_AUTH_MODE(CHCR_SCMD_AUTH_MODE_NOP) | - V_SCMD_HMAC_CTRL(CHCR_SCMD_HMAC_CTRL_NOP) | - V_SCMD_IV_SIZE(s->blkcipher.iv_len / 2) | - V_SCMD_NUM_IVS(0)); - crwr->sec_cpl.ivgen_hdrlen = htobe32( - V_SCMD_IV_GEN_CTRL(0) | - V_SCMD_MORE_FRAGS(0) | V_SCMD_LAST_FRAG(0) | V_SCMD_MAC_ONLY(0) | - V_SCMD_AADIVDROP(1) | V_SCMD_HDR_LEN(dsgl_len)); - - crwr->key_ctx.ctx_hdr = s->blkcipher.key_ctx_hdr; - switch (crd->crd_alg) { - case CRYPTO_AES_CBC: - if (crd->crd_flags & CRD_F_ENCRYPT) - memcpy(crwr->key_ctx.key, s->blkcipher.enckey, - s->blkcipher.key_len); - else - memcpy(crwr->key_ctx.key, s->blkcipher.deckey, - s->blkcipher.key_len); - break; - case CRYPTO_AES_ICM: - memcpy(crwr->key_ctx.key, s->blkcipher.enckey, - s->blkcipher.key_len); - break; - case CRYPTO_AES_XTS: - key_half = s->blkcipher.key_len / 2; - memcpy(crwr->key_ctx.key, s->blkcipher.enckey + key_half, - key_half); - if (crd->crd_flags & CRD_F_ENCRYPT) - memcpy(crwr->key_ctx.key + key_half, - s->blkcipher.enckey, key_half); - else - memcpy(crwr->key_ctx.key + key_half, - s->blkcipher.deckey, key_half); - break; - } - - dst = (char *)(crwr + 1) + kctx_len; - ccr_write_phys_dsgl(sc, dst, dsgl_nsegs); - dst += sizeof(struct cpl_rx_phys_dsgl) + dsgl_len; - if (iv_loc == IV_IMMEDIATE) { - memcpy(dst, iv, s->blkcipher.iv_len); - dst += s->blkcipher.iv_len; - } - if (imm_len != 0) - crypto_copydata(crp->crp_flags, crp->crp_buf, crd->crd_skip, - crd->crd_len, dst); - else - ccr_write_ulptx_sgl(sc, dst, sgl_nsegs); - - /* XXX: TODO backpressure */ - t4_wrq_tx(sc->adapter, wr); - - return (0); -} - -static int -ccr_blkcipher_done(struct ccr_softc *sc, struct ccr_session *s, - struct cryptop *crp, const struct cpl_fw6_pld *cpl, int error) -{ - - /* - * The updated IV to permit chained requests is at - * cpl->data[2], but OCF doesn't permit chained requests. - */ - return (error); -} - -/* - * 'hashsize' is the length of a full digest. 'authsize' is the - * requested digest length for this operation which may be less - * than 'hashsize'. - */ -static int -ccr_hmac_ctrl(unsigned int hashsize, unsigned int authsize) -{ - - if (authsize == 10) - return (CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366); - if (authsize == 12) - return (CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT); - if (authsize == hashsize / 2) - return (CHCR_SCMD_HMAC_CTRL_DIV2); - return (CHCR_SCMD_HMAC_CTRL_NO_TRUNC); -} - -static int -ccr_authenc(struct ccr_softc *sc, uint32_t sid, struct ccr_session *s, - struct cryptop *crp, struct cryptodesc *crda, struct cryptodesc *crde) -{ - char iv[CHCR_MAX_CRYPTO_IV_LEN]; - struct chcr_wr *crwr; - struct wrqe *wr; - struct auth_hash *axf; - char *dst; - u_int iv_loc, kctx_len, key_half, op_type, transhdr_len, wr_len; - u_int hash_size_in_response, imm_len, iopad_size; - u_int aad_start, aad_len, aad_stop; - u_int auth_start, auth_stop, auth_insert; - u_int cipher_start, cipher_stop; - u_int hmac_ctrl, input_len; - int dsgl_nsegs, dsgl_len; - int sgl_nsegs, sgl_len; - int error; - - /* - * If there is a need in the future, requests with an empty - * payload could be supported as HMAC-only requests. - */ - if (s->blkcipher.key_len == 0 || crde->crd_len == 0) - return (EINVAL); - if (crde->crd_alg == CRYPTO_AES_CBC && - (crde->crd_len % AES_BLOCK_LEN) != 0) - return (EINVAL); - - /* - * AAD is only permitted before the cipher/plain text, not - * after. - */ - if (crda->crd_len + crda->crd_skip > crde->crd_len + crde->crd_skip) - return (EINVAL); - - axf = s->hmac.auth_hash; - hash_size_in_response = s->hmac.hash_len; - - /* - * The IV is always stored at the start of the buffer even - * though it may be duplicated in the payload. The crypto - * engine doesn't work properly if the IV offset points inside - * of the AAD region, so a second copy is always required. - */ - iv_loc = IV_IMMEDIATE; - if (crde->crd_flags & CRD_F_ENCRYPT) { - op_type = CHCR_ENCRYPT_OP; - if (crde->crd_flags & CRD_F_IV_EXPLICIT) - memcpy(iv, crde->crd_iv, s->blkcipher.iv_len); - else - arc4rand(iv, s->blkcipher.iv_len, 0); - if ((crde->crd_flags & CRD_F_IV_PRESENT) == 0) - crypto_copyback(crp->crp_flags, crp->crp_buf, - crde->crd_inject, s->blkcipher.iv_len, iv); - } else { - op_type = CHCR_DECRYPT_OP; - if (crde->crd_flags & CRD_F_IV_EXPLICIT) - memcpy(iv, crde->crd_iv, s->blkcipher.iv_len); - else - crypto_copydata(crp->crp_flags, crp->crp_buf, - crde->crd_inject, s->blkcipher.iv_len, iv); - } - - /* - * The output buffer consists of the cipher text followed by - * the hash when encrypting. For decryption it only contains - * the plain text. - */ - if (op_type == CHCR_ENCRYPT_OP) { - if (crde->crd_len + hash_size_in_response > MAX_REQUEST_SIZE) - return (EFBIG); - } else { - if (crde->crd_len > MAX_REQUEST_SIZE) - return (EFBIG); - } - sglist_reset(sc->sg_dsgl); - error = sglist_append_sglist(sc->sg_dsgl, sc->sg_crp, crde->crd_skip, - crde->crd_len); - if (error) - return (error); - if (op_type == CHCR_ENCRYPT_OP) { - error = sglist_append_sglist(sc->sg_dsgl, sc->sg_crp, - crda->crd_inject, hash_size_in_response); - if (error) - return (error); - } - dsgl_nsegs = ccr_count_sgl(sc->sg_dsgl, DSGL_SGE_MAXLEN); - if (dsgl_nsegs > MAX_RX_PHYS_DSGL_SGE) - return (EFBIG); - dsgl_len = ccr_phys_dsgl_len(dsgl_nsegs); - - /* PADs must be 128-bit aligned. */ - iopad_size = roundup2(s->hmac.partial_digest_len, 16); - - /* - * The 'key' part of the key context consists of the key followed - * by the IPAD and OPAD. - */ - kctx_len = roundup2(s->blkcipher.key_len, 16) + iopad_size * 2; - transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dsgl_len); - - /* - * The input buffer consists of the IV, any AAD, and then the - * cipher/plain text. For decryption requests the hash is - * appended after the cipher text. - */ - if (crda->crd_skip < crde->crd_skip) { - if (crda->crd_skip + crda->crd_len > crde->crd_skip) - aad_len = (crde->crd_skip - crda->crd_skip); - else - aad_len = crda->crd_len; - } else - aad_len = 0; - input_len = aad_len + crde->crd_len; - - /* - * The firmware hangs if sent a request which is a - * bit smaller than MAX_REQUEST_SIZE. In particular, the - * firmware appears to require 512 - 16 bytes of spare room - * along with the size of the hash even if the hash isn't - * included in the input buffer. - */ - if (input_len + roundup2(axf->hashsize, 16) + (512 - 16) > - MAX_REQUEST_SIZE) - return (EFBIG); - if (op_type == CHCR_DECRYPT_OP) - input_len += hash_size_in_response; - if (ccr_use_imm_data(transhdr_len, s->blkcipher.iv_len + input_len)) { - imm_len = input_len; - sgl_nsegs = 0; - sgl_len = 0; - } else { - imm_len = 0; - sglist_reset(sc->sg_ulptx); - if (aad_len != 0) { - error = sglist_append_sglist(sc->sg_ulptx, sc->sg_crp, - crda->crd_skip, aad_len); - if (error) - return (error); - } - error = sglist_append_sglist(sc->sg_ulptx, sc->sg_crp, - crde->crd_skip, crde->crd_len); - if (error) - return (error); - if (op_type == CHCR_DECRYPT_OP) { - error = sglist_append_sglist(sc->sg_ulptx, sc->sg_crp, - crda->crd_inject, hash_size_in_response); - if (error) - return (error); - } - sgl_nsegs = sc->sg_ulptx->sg_nseg; - sgl_len = ccr_ulptx_sgl_len(sgl_nsegs); - } - - /* - * Any auth-only data before the cipher region is marked as AAD. - * Auth-data that overlaps with the cipher region is placed in - * the auth section. - */ - if (aad_len != 0) { - aad_start = s->blkcipher.iv_len + 1; - aad_stop = aad_start + aad_len - 1; - } else { - aad_start = 0; - aad_stop = 0; - } - cipher_start = s->blkcipher.iv_len + aad_len + 1; - if (op_type == CHCR_DECRYPT_OP) - cipher_stop = hash_size_in_response; - else - cipher_stop = 0; - if (aad_len == crda->crd_len) { - auth_start = 0; - auth_stop = 0; - } else { - if (aad_len != 0) - auth_start = cipher_start; - else - auth_start = s->blkcipher.iv_len + crda->crd_skip - - crde->crd_skip + 1; - auth_stop = (crde->crd_skip + crde->crd_len) - - (crda->crd_skip + crda->crd_len) + cipher_stop; - } - if (op_type == CHCR_DECRYPT_OP) - auth_insert = hash_size_in_response; - else - auth_insert = 0; - - wr_len = roundup2(transhdr_len, 16) + roundup2(imm_len, 16) + sgl_len; - if (iv_loc == IV_IMMEDIATE) - wr_len += s->blkcipher.iv_len; - wr = alloc_wrqe(wr_len, sc->txq); - if (wr == NULL) { - sc->stats_wr_nomem++; - return (ENOMEM); - } - crwr = wrtod(wr); - memset(crwr, 0, wr_len); - - ccr_populate_wreq(sc, crwr, kctx_len, wr_len, sid, imm_len, sgl_len, - op_type == CHCR_DECRYPT_OP ? hash_size_in_response : 0, iv_loc, - crp); - - /* XXX: Hardcodes SGE loopback channel of 0. */ - crwr->sec_cpl.op_ivinsrtofst = htobe32( - V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) | - V_CPL_TX_SEC_PDU_RXCHID(sc->tx_channel_id) | - V_CPL_TX_SEC_PDU_ACKFOLLOWS(0) | V_CPL_TX_SEC_PDU_ULPTXLPBK(1) | - V_CPL_TX_SEC_PDU_CPLLEN(2) | V_CPL_TX_SEC_PDU_PLACEHOLDER(0) | - V_CPL_TX_SEC_PDU_IVINSRTOFST(1)); - - crwr->sec_cpl.pldlen = htobe32(s->blkcipher.iv_len + input_len); - - crwr->sec_cpl.aadstart_cipherstop_hi = htobe32( - V_CPL_TX_SEC_PDU_AADSTART(aad_start) | - V_CPL_TX_SEC_PDU_AADSTOP(aad_stop) | - V_CPL_TX_SEC_PDU_CIPHERSTART(cipher_start) | - V_CPL_TX_SEC_PDU_CIPHERSTOP_HI(cipher_stop >> 4)); - crwr->sec_cpl.cipherstop_lo_authinsert = htobe32( - V_CPL_TX_SEC_PDU_CIPHERSTOP_LO(cipher_stop & 0xf) | - V_CPL_TX_SEC_PDU_AUTHSTART(auth_start) | - V_CPL_TX_SEC_PDU_AUTHSTOP(auth_stop) | - V_CPL_TX_SEC_PDU_AUTHINSERT(auth_insert)); - *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201801182201.w0IM1Ubo023725>