From owner-freebsd-arch@FreeBSD.ORG Mon May 2 19:37:21 2011 Return-Path: Delivered-To: arch@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 4EE87106564A for ; Mon, 2 May 2011 19:37:21 +0000 (UTC) (envelope-from jhb@freebsd.org) Received: from cyrus.watson.org (cyrus.watson.org [65.122.17.42]) by mx1.freebsd.org (Postfix) with ESMTP id 1382D8FC19 for ; Mon, 2 May 2011 19:37:21 +0000 (UTC) Received: from bigwig.baldwin.cx (66.111.2.69.static.nyinternet.net [66.111.2.69]) by cyrus.watson.org (Postfix) with ESMTPSA id 9F8AD46B0D for ; Mon, 2 May 2011 15:37:20 -0400 (EDT) Received: from jhbbsd.localnet (unknown [209.249.190.124]) by bigwig.baldwin.cx (Postfix) with ESMTPSA id 39E1B8A01B for ; Mon, 2 May 2011 15:37:20 -0400 (EDT) From: John Baldwin To: arch@freebsd.org Date: Mon, 2 May 2011 15:37:19 -0400 User-Agent: KMail/1.13.5 (FreeBSD/8.2-CBSD-20110325; KDE/4.5.5; amd64; ; ) MIME-Version: 1.0 Content-Type: Text/Plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Message-Id: <201105021537.19507.jhb@freebsd.org> X-Greylist: Sender succeeded SMTP AUTH, not delayed by milter-greylist-4.2.6 (bigwig.baldwin.cx); Mon, 02 May 2011 15:37:20 -0400 (EDT) Cc: Subject: [PATCH] Add ktrace records for user page faults X-BeenThere: freebsd-arch@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Discussion related to FreeBSD architecture List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 02 May 2011 19:37:21 -0000 One thing I have found useful is knowing when processes are in the kernel instead of in userland. ktrace already provides records for syscall entry/exit. The other major source of time spent in the kernel that I've seen is page fault handling. To that end, I have a patch that adds ktrace records to the beginning and end of VM faults. This gives a pair of records so a user can see how long a fault took (similar to how one can see how long a syscall takes now). Sample output from kdump is below: 47565 echo CALL mmap(0x800a87000,0x179000,PROT_READ| PROT_WRITE,MAP_PRIVATE|MAP_ANON,0xffffffff,0) 47565 echo RET mmap 34370777088/0x800a87000 47565 echo PFLT 0x800723000 VM_PROT_EXECUTE 47565 echo RET KERN_SUCCESS 47565 echo CALL munmap(0x800887000,0x179000) 47565 echo RET munmap 0 47565 echo PFLT 0x800a00000 VM_PROT_WRITE 47565 echo RET KERN_SUCCESS The patch is available at www.freebsd.org/~jhb/patches/ktrace_fault.patch and included below. Index: usr.bin/kdump/kdump.c =================================================================== --- usr.bin/kdump/kdump.c (.../mirror/FreeBSD/stable/8) (revision 221926) +++ usr.bin/kdump/kdump.c (.../stable/8) (revision 221926) @@ -103,6 +103,8 @@ void ktrsockaddr(struct sockaddr *); void ktrstat(struct stat *); void ktrstruct(char *, size_t); +void ktrfault(struct ktr_fault *); +void ktrfaultend(struct ktr_faultend *); void usage(void); void sockfamilyname(int); const char *ioctlname(u_long); @@ -306,6 +308,12 @@ case KTR_STRUCT: ktrstruct(m, ktrlen); break; + case KTR_FAULT: + ktrfault((struct ktr_fault *)m); + break; + case KTR_FAULTEND: + ktrfaultend((struct ktr_faultend *)m); + break; default: printf("\n"); break; @@ -445,6 +453,12 @@ /* FALLTHROUGH */ case KTR_PROCDTOR: return; + case KTR_FAULT: + type = "PFLT"; + break; + case KTR_FAULTEND: + type = "RET "; + break; default: (void)sprintf(unknown, "UNKNOWN(%d)", kth->ktr_type); type = unknown; @@ -1505,6 +1519,23 @@ printf("invalid record\n"); } +void +ktrfault(struct ktr_fault *ktr) +{ + + printf("0x%jx ", ktr->vaddr); + vmprotname(ktr->type); + printf("\n"); +} + +void +ktrfaultend(struct ktr_faultend *ktr) +{ + + vmresultname(ktr->result); + printf("\n"); +} + #if defined(__amd64__) || defined(__i386__) void linux_ktrsyscall(struct ktr_syscall *ktr) Index: usr.bin/kdump/kdump_subr.h =================================================================== --- usr.bin/kdump/kdump_subr.h (.../mirror/FreeBSD/stable/8) (revision 221926) +++ usr.bin/kdump/kdump_subr.h (.../stable/8) (revision 221926) @@ -45,3 +45,5 @@ void minheritname (int); void quotactlname (int); void ptraceopname (int); +void vmprotname (int); +void vmresultname (int); Index: usr.bin/kdump/mksubr =================================================================== --- usr.bin/kdump/mksubr (.../mirror/FreeBSD/stable/8) (revision 221926) +++ usr.bin/kdump/mksubr (.../stable/8) (revision 221926) @@ -160,6 +160,8 @@ #include #include #include +#include +#include #include "kdump_subr.h" @@ -304,6 +306,26 @@ } } +/* + * MANUAL + * + * Used for page fault type. Cannot use auto_or_type since the macro + * values contain a cast. Also, VM_PROT_NONE has to be handled specially. + */ +void +vmprotname (int type) +{ + int or = 0; + + if (type == VM_PROT_NONE) { + (void)printf("VM_PROT_NONE"); + return; + } + if_print_or(type, VM_PROT_READ, or); + if_print_or(type, VM_PROT_WRITE, or); + if_print_or(type, VM_PROT_EXECUTE, or); + if_print_or(type, VM_PROT_OVERRIDE_WRITE, or); +} _EOF_ auto_or_type "modename" "S_[A-Z]+[[:space:]]+[0-6]{7}" "sys/stat.h" @@ -344,6 +366,7 @@ auto_switch_type "sockoptname" "SO_[A-Z]+[[:space:]]+0x[0-9]+" "sys/socket.h" auto_switch_type "socktypename" "SOCK_[A-Z]+[[:space:]]+[1-9]+[0-9]*" "sys/socket.h" auto_switch_type "ptraceopname" "PT_[[:alnum:]]+[[:space:]]+[0-9]+" "sys/ptrace.h" +auto_switch_type "vmresultname" "KERN_[A-Z]+[[:space:]]+[0-9]+" "vm/vm_param.h" cat <<_EOF_ /* Index: usr.bin/ktrace/ktrace.h =================================================================== --- usr.bin/ktrace/ktrace.h (.../mirror/FreeBSD/stable/8) (revision 221926) +++ usr.bin/ktrace/ktrace.h (.../stable/8) (revision 221926) @@ -36,7 +36,8 @@ #define DEF_POINTS (KTRFAC_SYSCALL | KTRFAC_SYSRET | KTRFAC_NAMEI | \ KTRFAC_GENIO | KTRFAC_PSIG | KTRFAC_USER | \ - KTRFAC_STRUCT | KTRFAC_SYSCTL) + KTRFAC_STRUCT | KTRFAC_SYSCTL | KTRFAC_FAULT | \ + KTRFAC_FAULTEND) #define PROC_ABI_POINTS (KTRFAC_PROCCTOR | KTRFAC_PROCDTOR) Index: usr.bin/ktrace/ktrace.1 =================================================================== --- usr.bin/ktrace/ktrace.1 (.../mirror/FreeBSD/stable/8) (revision 221926) +++ usr.bin/ktrace/ktrace.1 (.../stable/8) (revision 221926) @@ -112,6 +112,8 @@ .Bl -tag -width flag -compact .It Cm c trace system calls +.It Cm f +trace page faults .It Cm i trace .Tn I/O @@ -131,7 +133,7 @@ requests .It Cm + trace the default set of trace points - -.Cm c , i , n , s , t , u , y +.Cm c , f , i , n , s , t , u , y .El .It Ar command Execute Index: usr.bin/ktrace/subr.c =================================================================== --- usr.bin/ktrace/subr.c (.../mirror/FreeBSD/stable/8) (revision 221926) +++ usr.bin/ktrace/subr.c (.../stable/8) (revision 221926) @@ -65,6 +65,9 @@ case 'c': facs |= KTRFAC_SYSCALL | KTRFAC_SYSRET; break; + case 'f': + facs |= KTRFAC_FAULT | KTRFAC_FAULTEND; + break; case 'n': facs |= KTRFAC_NAMEI; break; Index: sys/kern/kern_ktrace.c =================================================================== --- sys/kern/kern_ktrace.c (.../mirror/FreeBSD/stable/8) (revision 221926) +++ sys/kern/kern_ktrace.c (.../stable/8) (revision 221926) @@ -98,6 +98,8 @@ struct ktr_genio ktr_genio; struct ktr_psig ktr_psig; struct ktr_csw ktr_csw; + struct ktr_fault ktr_fault; + struct ktr_faultend ktr_faultend; } ktr_data; STAILQ_ENTRY(ktr_request) ktr_list; }; @@ -115,6 +117,8 @@ 0, /* KTR_SYSCTL */ sizeof(struct ktr_proc_ctor), /* KTR_PROCCTOR */ 0, /* KTR_PROCDTOR */ + sizeof(struct ktr_fault), /* KTR_FAULT */ + sizeof(struct ktr_faultend), /* KTR_FAULTEND */ }; static STAILQ_HEAD(, ktr_request) ktr_free; @@ -767,6 +769,38 @@ req->ktr_header.ktr_len = buflen; ktr_submitrequest(curthread, req); } + +void +ktrfault(vaddr, type) + vm_offset_t vaddr; + int type; +{ + struct ktr_request *req; + struct ktr_fault *kf; + + req = ktr_getrequest(KTR_FAULT); + if (req == NULL) + return; + kf = &req->ktr_data.ktr_fault; + kf->vaddr = vaddr; + kf->type = type; + ktr_enqueuerequest(curthread, req); +} + +void +ktrfaultend(result) + int result; +{ + struct ktr_request *req; + struct ktr_faultend *kf; + + req = ktr_getrequest(KTR_FAULTEND); + if (req == NULL) + return; + kf = &req->ktr_data.ktr_faultend; + kf->result = result; + ktr_enqueuerequest(curthread, req); +} #endif /* KTRACE */ /* Interface and common routines */ Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c (.../mirror/FreeBSD/stable/8) (revision 221926) +++ sys/vm/vm_fault.c (.../stable/8) (revision 221926) @@ -74,6 +74,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_ktrace.h" #include "opt_vm.h" #include @@ -86,6 +87,9 @@ #include #include #include +#ifdef KTRACE +#include +#endif #include #include @@ -114,6 +118,9 @@ static int vm_fault_additional_pages(vm_page_t, int, int, vm_page_t *, int *); static void vm_fault_prefault(pmap_t, vm_offset_t, vm_map_entry_t); +#ifdef KTRACE +static int vm_fault_traced(vm_map_t, vm_offset_t, vm_prot_t, int); +#endif #define VM_FAULT_READ_AHEAD 8 #define VM_FAULT_READ_BEHIND 7 @@ -209,7 +216,25 @@ int vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags) +#ifdef KTRACE { + struct thread *td; + int result; + + td = curthread; + if (map != kernel_map && KTRPOINT(td, KTR_FAULT)) + ktrfault(vaddr, fault_type); + result = vm_fault_traced(map, vaddr, fault_type, fault_flags); + if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND)) + ktrfaultend(result); + return (result); +} + +int +vm_fault_traced(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, + int fault_flags) +#endif +{ vm_prot_t prot; int is_first_object_locked, result; boolean_t are_queues_locked, growstack, wired; Index: sys/sys/ktrace.h =================================================================== --- sys/sys/ktrace.h (.../mirror/FreeBSD/stable/8) (revision 221926) +++ sys/sys/ktrace.h (.../stable/8) (revision 221926) @@ -178,6 +182,23 @@ #define KTR_PROCDTOR 11 /* + * KTR_FAULT - page fault record + */ +#define KTR_FAULT 12 +struct ktr_fault { + vm_offset_t vaddr; + int type; +}; + +/* + * KTR_FAULTEND - end of page fault record + */ +#define KTR_FAULTEND 13 +struct ktr_faultend { + int result; +}; + +/* * KTR_DROP - If this bit is set in ktr_type, then at least one event * between the previous record and this record was dropped. */ @@ -198,6 +219,8 @@ #define KTRFAC_SYSCTL (1<