From owner-p4-projects@FreeBSD.ORG Sun Feb 5 13:03:17 2006 Return-Path: X-Original-To: p4-projects@freebsd.org Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 8FE2916A423; Sun, 5 Feb 2006 13:03:16 +0000 (GMT) X-Original-To: perforce@freebsd.org Delivered-To: perforce@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 4D28216A420 for ; Sun, 5 Feb 2006 13:03:16 +0000 (GMT) (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Received: from repoman.freebsd.org (repoman.freebsd.org [216.136.204.115]) by mx1.FreeBSD.org (Postfix) with ESMTP id F0B2243D46 for ; Sun, 5 Feb 2006 13:03:15 +0000 (GMT) (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.13.1/8.13.1) with ESMTP id k15D3FNk033588 for ; Sun, 5 Feb 2006 13:03:15 GMT (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.13.1/8.13.1/Submit) id k15D3FPU033585 for perforce@freebsd.org; Sun, 5 Feb 2006 13:03:15 GMT (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Date: Sun, 5 Feb 2006 13:03:15 GMT Message-Id: <200602051303.k15D3FPU033585@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to bb+lists.freebsd.perforce@cyrus.watson.org using -f From: Robert Watson To: Perforce Change Reviews Cc: Subject: PERFORCE change 91158 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 05 Feb 2006 13:03:17 -0000 http://perforce.freebsd.org/chv.cgi?CH=91158 Change 91158 by rwatson@rwatson_peppercorn on 2006/02/05 13:03:07 Manage audit record memory with the slab allocator, turning initialization routines into a ctor, tear-down to a dtor, cleaning up, etc. This will allow audit records to be allocated from per-cpu caches. On recent FreeBSD, dropping the audit_mtx around freeing to UMA is no longer required (at one point it was possible to acquire Giant on that path), so a mutex-free thread-local drain is no longer required. Affected files ... .. //depot/projects/trustedbsd/audit3/sys/security/audit/audit.c#6 edit Differences ... ==== //depot/projects/trustedbsd/audit3/sys/security/audit/audit.c#6 (text+ko) ==== @@ -1,5 +1,6 @@ /* * Copyright (c) 1999-2005 Apple Computer, Inc. + * Copyright (c) 2006 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -54,14 +55,17 @@ #include #include +#include +#include + #include #include -#include -#include #include #include +#include + /* * The AUDIT_EXCESSIVELY_VERBOSE define enables a number of * gratuitously noisy printf's to the console. Due to the @@ -75,8 +79,8 @@ #define AUDIT_PRINTF(X) #endif +static uma_zone_t audit_record_zone; static MALLOC_DEFINE(M_AUDITPROC, "audit_proc", "Audit process storage"); -static MALLOC_DEFINE(M_AUDITREC, "audit_rec", "Audit event records"); MALLOC_DEFINE(M_AUDITDATA, "audit_data", "Audit data storage"); MALLOC_DEFINE(M_AUDITPATH, "audit_path", "Audit path storage"); MALLOC_DEFINE(M_AUDITTEXT, "audit_text", "Audit text storage"); @@ -191,28 +195,60 @@ static int audit_file_rotate_wait; /* - * Perform a deep free of an audit record (core record and referenced objects) + * Construct an audit record for the passed thread. */ +static int +audit_record_ctor(void *mem, int size, void *arg, int flags) +{ + struct kaudit_record *ar; + struct thread *td; + + KASSERT(sizeof(*ar) == size, ("audit_record_ctor: wrong size")); + + td = arg; + ar = mem; + bzero(ar, sizeof(*ar)); + ar->k_ar.ar_magic = AUDIT_RECORD_MAGIC; + nanotime(&ar->k_ar.ar_starttime); + + /* + * Export the subject credential. + * + * XXXAUDIT: td_ucred access is OK without proc lock, but some other + * fields here may require the proc lock. + */ + cru2x(td->td_ucred, &ar->k_ar.ar_subj_cred); + ar->k_ar.ar_subj_ruid = td->td_ucred->cr_ruid; + ar->k_ar.ar_subj_rgid = td->td_ucred->cr_rgid; + ar->k_ar.ar_subj_egid = td->td_ucred->cr_groups[0]; + ar->k_ar.ar_subj_auid = td->td_proc->p_au->ai_auid; + ar->k_ar.ar_subj_asid = td->td_proc->p_au->ai_asid; + ar->k_ar.ar_subj_pid = td->td_proc->p_pid; + ar->k_ar.ar_subj_amask = td->td_proc->p_au->ai_mask; + ar->k_ar.ar_subj_term = td->td_proc->p_au->ai_termid; + bcopy(td->td_proc->p_comm, ar->k_ar.ar_subj_comm, MAXCOMLEN); + + return (0); +} + static void -audit_record_free(struct kaudit_record *ar) +audit_record_dtor(void *mem, int size, void *arg) { + struct kaudit_record *ar; + + KASSERT(sizeof(*ar) == size, ("audit_record_dtor: wrong size")); - if (ar->k_ar.ar_arg_upath1 != NULL) { + ar = mem; + if (ar->k_ar.ar_arg_upath1 != NULL) free(ar->k_ar.ar_arg_upath1, M_AUDITPATH); - } - if (ar->k_ar.ar_arg_upath2 != NULL) { + if (ar->k_ar.ar_arg_upath2 != NULL) free(ar->k_ar.ar_arg_upath2, M_AUDITPATH); - } - if (ar->k_ar.ar_arg_text != NULL) { + if (ar->k_ar.ar_arg_text != NULL) free(ar->k_ar.ar_arg_text, M_AUDITTEXT); - } - if (ar->k_ar.ar_arg_iovecstr != NULL) { + if (ar->k_ar.ar_arg_iovecstr != NULL) free(ar->k_ar.ar_arg_iovecstr, M_AUDITTEXT); - } - if (ar->k_udata != NULL) { + if (ar->k_udata != NULL) free(ar->k_udata, M_AUDITDATA); - } - free(ar, M_AUDITREC); } /* @@ -504,58 +540,39 @@ } /* - * If we have records, but there's no active vnode to - * write to, drain the record queue. Generally, we - * prevent the unnecessary allocation of records - * elsewhere, but we need to allow for races between - * conditional allocation and queueing. Go back to - * waiting when we're done. - * - * XXX: We go out of our way to avoid calling - * audit_record_free(). - * with the audit_mtx held, to avoid a lock order reversal - * as free() may grab Giant. This should be fixed at - * some point. + * If we have records, but there's no active vnode to write + * to, drain the record queue. Generally, we prevent the + * unnecessary allocation of records elsewhere, but we need + * to allow for races between conditional allocation and + * queueing. Go back to waiting when we're done. */ if (audit_vp == NULL) { while ((ar = TAILQ_FIRST(&audit_q))) { TAILQ_REMOVE(&audit_q, ar, k_q); + uma_zfree(audit_record_zone, ar); audit_q_len--; + /* + * XXXRW: Why broadcast if we hold the + * mutex and know that audit_vp is NULL? + */ if (audit_q_len <= audit_qctrl.aq_lowater) cv_broadcast(&audit_commit_cv); - - TAILQ_INSERT_TAIL(&ar_worklist, ar, k_q); - } - mtx_unlock(&audit_mtx); - while ((ar = TAILQ_FIRST(&ar_worklist))) { - TAILQ_REMOVE(&ar_worklist, ar, k_q); - audit_record_free(ar); } - mtx_lock(&audit_mtx); continue; } /* - * We have both records to write and an active vnode - * to write to. Dequeue a record, and start the write. - * Eventually, it might make sense to dequeue several - * records and perform our own clustering, if the lower - * layers aren't doing it automatically enough. - * - * XXX: We go out of our way to avoid calling - * audit_record_free() - * with the audit_mtx held, to avoid a lock order reversal - * as free() may grab Giant. This should be fixed at - * some point. - * - * XXXAUDIT: free() no longer grabs Giant. + * We have both records to write and an active vnode to write + * to. Dequeue a record, and start the write. Eventually, + * it might make sense to dequeue several records and perform + * our own clustering, if the lower layers aren't doing it + * automatically enough. */ while ((ar = TAILQ_FIRST(&audit_q))) { TAILQ_REMOVE(&audit_q, ar, k_q); audit_q_len--; if (audit_q_len <= audit_qctrl.aq_lowater) cv_broadcast(&audit_commit_cv); - TAILQ_INSERT_TAIL(&ar_worklist, ar, k_q); } @@ -572,7 +589,7 @@ printf("audit_worker: write error %d\n", error); } - audit_record_free(ar); + uma_zfree(audit_record_zone, ar); } mtx_lock(&audit_mtx); } @@ -618,6 +635,10 @@ cv_init(&audit_commit_cv, "audit_commit_cv"); cv_init(&audit_fail_cv, "audit_fail_cv"); + audit_record_zone = uma_zcreate("audit_record_zone", + sizeof(struct kaudit_record *), audit_record_ctor, + audit_record_dtor, NULL, NULL, UMA_ALIGN_PTR, 0); + /* Initialize the BSM audit subsystem. */ kau_init(); @@ -735,65 +756,24 @@ struct kaudit_record *ar; int no_record; - /* - * Eventually, there may be certain classes of events that - * we will audit regardless of the audit state at the time - * the record is created. These events will generally - * correspond to changes in the audit state. The dummy - * code below is from our first prototype, but may also - * be used in the final version (with modified event numbers). - */ -#if 0 - if (event != AUDIT_EVENT_FILESTOP && event != AUDIT_EVENT_FILESTART) { -#endif - mtx_lock(&audit_mtx); - no_record = (audit_suspended || !audit_enabled); - mtx_unlock(&audit_mtx); - if (no_record) - return (NULL); -#if 0 - } -#endif + mtx_lock(&audit_mtx); + no_record = (audit_suspended || !audit_enabled); + mtx_unlock(&audit_mtx); + if (no_record) + return (NULL); /* - * Initialize the audit record header. - * XXX: We may want to fail-stop if allocation fails. * XXX: The number of outstanding uncommitted audit records is - * limited by the number of concurrent threads servicing system + * limited to the number of concurrent threads servicing system * calls in the kernel. */ + ar = uma_zalloc_arg(audit_record_zone, td, M_WAITOK); + ar->k_ar.ar_event = event; - ar = malloc(sizeof(*ar), M_AUDITREC, M_WAITOK); - if (ar == NULL) - return NULL; - mtx_lock(&audit_mtx); audit_pre_q_len++; mtx_unlock(&audit_mtx); - bzero(ar, sizeof(*ar)); - ar->k_ar.ar_magic = AUDIT_RECORD_MAGIC; - ar->k_ar.ar_event = event; - nanotime(&ar->k_ar.ar_starttime); - - /* - * Export the subject credential. - * - * XXXAUDIT: td_ucred access is OK without proc lock, but some other - * fields here may require the proc lock. - */ - cru2x(td->td_ucred, &ar->k_ar.ar_subj_cred); - ar->k_ar.ar_subj_ruid = td->td_ucred->cr_ruid; - ar->k_ar.ar_subj_rgid = td->td_ucred->cr_rgid; - ar->k_ar.ar_subj_egid = td->td_ucred->cr_groups[0]; - ar->k_ar.ar_subj_auid = td->td_proc->p_au->ai_auid; - ar->k_ar.ar_subj_asid = td->td_proc->p_au->ai_asid; - ar->k_ar.ar_subj_pid = td->td_proc->p_pid; - ar->k_ar.ar_subj_amask = td->td_proc->p_au->ai_mask; - ar->k_ar.ar_subj_term = td->td_proc->p_au->ai_termid; - - bcopy(td->td_proc->p_comm, ar->k_ar.ar_subj_comm, MAXCOMLEN); - return (ar); } @@ -851,11 +831,15 @@ if (au_preselect(ar->k_ar.ar_event, aumask, sorf) != 0) ar->k_ar_commit |= AR_COMMIT_KERNEL; + /* + * XXXRW: Why is this necessary? Should we ever accept a record that + * we're not willing to commit? + */ if ((ar->k_ar_commit & (AR_COMMIT_USER | AR_COMMIT_KERNEL)) == 0) { mtx_lock(&audit_mtx); audit_pre_q_len--; mtx_unlock(&audit_mtx); - audit_record_free(ar); + uma_zfree(audit_record_zone, ar); return; } @@ -881,7 +865,7 @@ if (audit_suspended || !audit_enabled) { audit_pre_q_len--; mtx_unlock(&audit_mtx); - audit_record_free(ar); + uma_zfree(audit_record_zone, ar); return; }