Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 2 May 2011 15:37:19 -0400
From:      John Baldwin <jhb@freebsd.org>
To:        arch@freebsd.org
Subject:   [PATCH] Add ktrace records for user page faults
Message-ID:  <201105021537.19507.jhb@freebsd.org>

next in thread | raw e-mail | index | archive | help
One thing I have found useful is knowing when processes are in the kernel 
instead of in userland.  ktrace already provides records for syscall 
entry/exit.  The other major source of time spent in the kernel that I've seen 
is page fault handling.  To that end, I have a patch that adds ktrace records 
to the beginning and end of VM faults.  This gives a pair of records so a user 
can see how long a fault took (similar to how one can see how long a syscall 
takes now).  Sample output from kdump is below:

 47565 echo     CALL  mmap(0x800a87000,0x179000,PROT_READ|
PROT_WRITE,MAP_PRIVATE|MAP_ANON,0xffffffff,0)
 47565 echo     RET   mmap 34370777088/0x800a87000
 47565 echo     PFLT  0x800723000 VM_PROT_EXECUTE
 47565 echo     RET   KERN_SUCCESS
 47565 echo     CALL  munmap(0x800887000,0x179000)
 47565 echo     RET   munmap 0
 47565 echo     PFLT  0x800a00000 VM_PROT_WRITE
 47565 echo     RET   KERN_SUCCESS

The patch is available at www.freebsd.org/~jhb/patches/ktrace_fault.patch and 
included below.

Index: usr.bin/kdump/kdump.c
===================================================================
--- usr.bin/kdump/kdump.c	(.../mirror/FreeBSD/stable/8)	(revision 221926)
+++ usr.bin/kdump/kdump.c	(.../stable/8)	(revision 221926)
@@ -103,6 +103,8 @@
 void ktrsockaddr(struct sockaddr *);
 void ktrstat(struct stat *);
 void ktrstruct(char *, size_t);
+void ktrfault(struct ktr_fault *);
+void ktrfaultend(struct ktr_faultend *);
 void usage(void);
 void sockfamilyname(int);
 const char *ioctlname(u_long);
@@ -306,6 +308,12 @@
 		case KTR_STRUCT:
 			ktrstruct(m, ktrlen);
 			break;
+		case KTR_FAULT:
+			ktrfault((struct ktr_fault *)m);
+			break;
+		case KTR_FAULTEND:
+			ktrfaultend((struct ktr_faultend *)m);
+			break;
 		default:
 			printf("\n");
 			break;
@@ -445,6 +453,12 @@
 		/* FALLTHROUGH */
 	case KTR_PROCDTOR:
 		return;
+	case KTR_FAULT:
+		type = "PFLT";
+		break;
+	case KTR_FAULTEND:
+		type = "RET ";
+		break;
 	default:
 		(void)sprintf(unknown, "UNKNOWN(%d)", kth->ktr_type);
 		type = unknown;
@@ -1505,6 +1519,23 @@
 	printf("invalid record\n");
 }
 
+void
+ktrfault(struct ktr_fault *ktr)
+{
+
+	printf("0x%jx ", ktr->vaddr);
+	vmprotname(ktr->type);
+	printf("\n");
+}
+
+void
+ktrfaultend(struct ktr_faultend *ktr)
+{
+
+	vmresultname(ktr->result);
+	printf("\n");
+}
+
 #if defined(__amd64__) || defined(__i386__)
 void
 linux_ktrsyscall(struct ktr_syscall *ktr)
Index: usr.bin/kdump/kdump_subr.h
===================================================================
--- usr.bin/kdump/kdump_subr.h	(.../mirror/FreeBSD/stable/8)	(revision 
221926)
+++ usr.bin/kdump/kdump_subr.h	(.../stable/8)	(revision 221926)
@@ -45,3 +45,5 @@
 void minheritname (int);
 void quotactlname (int);
 void ptraceopname (int);
+void vmprotname (int);
+void vmresultname (int);
Index: usr.bin/kdump/mksubr
===================================================================
--- usr.bin/kdump/mksubr	(.../mirror/FreeBSD/stable/8)	(revision 221926)
+++ usr.bin/kdump/mksubr	(.../stable/8)	(revision 221926)
@@ -160,6 +160,8 @@
 #include <sys/shm.h>
 #include <nfsserver/nfs.h>
 #include <ufs/ufs/quota.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
 
 #include "kdump_subr.h"
 
@@ -304,6 +306,26 @@
 	}
 }
 
+/*
+ * MANUAL
+ *
+ * Used for page fault type.  Cannot use auto_or_type since the macro
+ * values contain a cast.  Also, VM_PROT_NONE has to be handled specially.
+ */
+void
+vmprotname (int type)
+{
+	int	or = 0;
+
+	if (type == VM_PROT_NONE) {
+		(void)printf("VM_PROT_NONE");
+		return;
+	}
+	if_print_or(type, VM_PROT_READ, or);
+	if_print_or(type, VM_PROT_WRITE, or);
+	if_print_or(type, VM_PROT_EXECUTE, or);
+	if_print_or(type, VM_PROT_OVERRIDE_WRITE, or);
+}
 _EOF_
 
 auto_or_type "modename" "S_[A-Z]+[[:space:]]+[0-6]{7}" "sys/stat.h"
@@ -344,6 +366,7 @@
 auto_switch_type "sockoptname" "SO_[A-Z]+[[:space:]]+0x[0-9]+" "sys/socket.h"
 auto_switch_type "socktypename" "SOCK_[A-Z]+[[:space:]]+[1-9]+[0-9]*" 
"sys/socket.h"
 auto_switch_type "ptraceopname" "PT_[[:alnum:]]+[[:space:]]+[0-9]+" 
"sys/ptrace.h"
+auto_switch_type "vmresultname" "KERN_[A-Z]+[[:space:]]+[0-9]+" 
"vm/vm_param.h"
 
 cat <<_EOF_
 /*
Index: usr.bin/ktrace/ktrace.h
===================================================================
--- usr.bin/ktrace/ktrace.h	(.../mirror/FreeBSD/stable/8)	(revision 221926)
+++ usr.bin/ktrace/ktrace.h	(.../stable/8)	(revision 221926)
@@ -36,7 +36,8 @@
 
 #define DEF_POINTS (KTRFAC_SYSCALL | KTRFAC_SYSRET | KTRFAC_NAMEI | \
 		    KTRFAC_GENIO | KTRFAC_PSIG | KTRFAC_USER | \
-		    KTRFAC_STRUCT | KTRFAC_SYSCTL)
+		    KTRFAC_STRUCT | KTRFAC_SYSCTL | KTRFAC_FAULT | \
+		    KTRFAC_FAULTEND)
 
 #define PROC_ABI_POINTS (KTRFAC_PROCCTOR | KTRFAC_PROCDTOR)
 
Index: usr.bin/ktrace/ktrace.1
===================================================================
--- usr.bin/ktrace/ktrace.1	(.../mirror/FreeBSD/stable/8)	(revision 221926)
+++ usr.bin/ktrace/ktrace.1	(.../stable/8)	(revision 221926)
@@ -112,6 +112,8 @@
 .Bl -tag -width flag -compact
 .It Cm c
 trace system calls
+.It Cm f
+trace page faults
 .It Cm i
 trace
 .Tn I/O
@@ -131,7 +133,7 @@
 requests
 .It Cm +
 trace the default set of trace points -
-.Cm c , i , n , s , t , u , y
+.Cm c , f , i , n , s , t , u , y
 .El
 .It Ar command
 Execute
Index: usr.bin/ktrace/subr.c
===================================================================
--- usr.bin/ktrace/subr.c	(.../mirror/FreeBSD/stable/8)	(revision 221926)
+++ usr.bin/ktrace/subr.c	(.../stable/8)	(revision 221926)
@@ -65,6 +65,9 @@
 		case 'c':
 			facs |= KTRFAC_SYSCALL | KTRFAC_SYSRET;
 			break;
+		case 'f':
+			facs |= KTRFAC_FAULT | KTRFAC_FAULTEND;
+			break;
 		case 'n':
 			facs |= KTRFAC_NAMEI;
 			break;
Index: sys/kern/kern_ktrace.c
===================================================================
--- sys/kern/kern_ktrace.c	(.../mirror/FreeBSD/stable/8)	(revision 221926)
+++ sys/kern/kern_ktrace.c	(.../stable/8)	(revision 221926)
@@ -98,6 +98,8 @@
 		struct	ktr_genio ktr_genio;
 		struct	ktr_psig ktr_psig;
 		struct	ktr_csw ktr_csw;
+		struct	ktr_fault ktr_fault;
+		struct	ktr_faultend ktr_faultend;
 	} ktr_data;
 	STAILQ_ENTRY(ktr_request) ktr_list;
 };
@@ -115,6 +117,8 @@
 	0,					/* KTR_SYSCTL */
 	sizeof(struct ktr_proc_ctor),		/* KTR_PROCCTOR */
 	0,					/* KTR_PROCDTOR */
+	sizeof(struct ktr_fault),		/* KTR_FAULT */
+	sizeof(struct ktr_faultend),		/* KTR_FAULTEND */
 };
 
 static STAILQ_HEAD(, ktr_request) ktr_free;
@@ -767,6 +769,38 @@
 	req->ktr_header.ktr_len = buflen;
 	ktr_submitrequest(curthread, req);
 }
+
+void
+ktrfault(vaddr, type)
+	vm_offset_t vaddr;
+	int type;
+{
+	struct ktr_request *req;
+	struct ktr_fault *kf;
+
+	req = ktr_getrequest(KTR_FAULT);
+	if (req == NULL)
+		return;
+	kf = &req->ktr_data.ktr_fault;
+	kf->vaddr = vaddr;
+	kf->type = type;
+	ktr_enqueuerequest(curthread, req);
+}
+
+void
+ktrfaultend(result)
+	int result;
+{
+	struct ktr_request *req;
+	struct ktr_faultend *kf;
+
+	req = ktr_getrequest(KTR_FAULTEND);
+	if (req == NULL)
+		return;
+	kf = &req->ktr_data.ktr_faultend;
+	kf->result = result;
+	ktr_enqueuerequest(curthread, req);
+}
 #endif /* KTRACE */
 
 /* Interface and common routines */
Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c	(.../mirror/FreeBSD/stable/8)	(revision 221926)
+++ sys/vm/vm_fault.c	(.../stable/8)	(revision 221926)
@@ -74,6 +74,7 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_ktrace.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
@@ -86,6 +87,9 @@
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
@@ -114,6 +118,9 @@
 
 static int vm_fault_additional_pages(vm_page_t, int, int, vm_page_t *, int 
*);
 static void vm_fault_prefault(pmap_t, vm_offset_t, vm_map_entry_t);
+#ifdef KTRACE
+static int vm_fault_traced(vm_map_t, vm_offset_t, vm_prot_t, int);
+#endif
 
 #define VM_FAULT_READ_AHEAD 8
 #define VM_FAULT_READ_BEHIND 7
@@ -209,7 +216,25 @@
 int
 vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
 	 int fault_flags)
+#ifdef KTRACE
 {
+	struct thread *td;
+	int result;
+
+	td = curthread;
+	if (map != kernel_map && KTRPOINT(td, KTR_FAULT))
+		ktrfault(vaddr, fault_type);
+	result = vm_fault_traced(map, vaddr, fault_type, fault_flags);
+	if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND))
+		ktrfaultend(result);
+	return (result);
+}
+
+int
+vm_fault_traced(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+    int fault_flags)
+#endif
+{
 	vm_prot_t prot;
 	int is_first_object_locked, result;
 	boolean_t are_queues_locked, growstack, wired;
Index: sys/sys/ktrace.h
===================================================================
--- sys/sys/ktrace.h	(.../mirror/FreeBSD/stable/8)	(revision 221926)
+++ sys/sys/ktrace.h	(.../stable/8)	(revision 221926)
@@ -178,6 +182,23 @@
 #define KTR_PROCDTOR	11
 
 /*
+ * KTR_FAULT - page fault record
+ */
+#define KTR_FAULT	12
+struct ktr_fault {
+	vm_offset_t vaddr;
+	int type;
+};
+
+/*
+ * KTR_FAULTEND - end of page fault record
+ */
+#define	KTR_FAULTEND	13
+struct ktr_faultend {
+	int result;
+};
+
+/*
  * KTR_DROP - If this bit is set in ktr_type, then at least one event
  * between the previous record and this record was dropped.
  */
@@ -198,6 +219,8 @@
 #define KTRFAC_SYSCTL	(1<<KTR_SYSCTL)
 #define KTRFAC_PROCCTOR	(1<<KTR_PROCCTOR)
 #define KTRFAC_PROCDTOR	(1<<KTR_PROCDTOR)
+#define	KTRFAC_FAULT	(1<<KTR_FAULT)
+#define	KTRFAC_FAULTEND	(1<<KTR_FAULTEND)
 
 /*
  * trace flags (also in p_traceflags)
@@ -210,6 +233,8 @@
 void	ktrnamei(char *);
 void	ktrcsw(int, int);
 void	ktrpsig(int, sig_t, sigset_t *, int);
+void	ktrfault(vm_offset_t, int);
+void	ktrfaultend(int);
 void	ktrgenio(int, enum uio_rw, struct uio *, int);
 void	ktrsyscall(int, int narg, register_t args[]);
 void	ktrsysctl(int *name, u_int namelen);

-- 
John Baldwin



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201105021537.19507.jhb>