Date: Fri, 31 Oct 2008 17:53:14 GMT From: Peter Wemm <peter@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 152286 for review Message-ID: <200810311753.m9VHrEQg086630@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=152286 Change 152286 by peter@peter_daintree on 2008/10/31 17:52:42 Integrate @152285 Affected files ... .. //depot/projects/valgrind/VEX/priv/guest-generic/bb_to_IR.c#3 integrate .. //depot/projects/valgrind/cachegrind/docs/cg-manual.xml#3 integrate .. //depot/projects/valgrind/callgrind/docs/cl-manual.xml#3 integrate .. //depot/projects/valgrind/callgrind/dump.c#3 integrate .. //depot/projects/valgrind/coregrind/Makefile.am#7 integrate .. //depot/projects/valgrind/coregrind/m_aspacemgr/aspacemgr-linux.c#4 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/debuginfo.c#6 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/priv_storage.h#3 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/priv_tytypes.h#2 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/readdwarf3.c#2 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/readelf.c#4 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/storage.c#3 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/tytypes.c#2 integrate .. //depot/projects/valgrind/coregrind/m_demangle/ansidecl.h#2 integrate .. //depot/projects/valgrind/coregrind/m_demangle/cp-demangle.c#3 integrate .. //depot/projects/valgrind/coregrind/m_demangle/cp-demangle.h#1 branch .. //depot/projects/valgrind/coregrind/m_demangle/cplus-dem.c#3 integrate .. //depot/projects/valgrind/coregrind/m_demangle/demangle.c#3 integrate .. //depot/projects/valgrind/coregrind/m_demangle/demangle.h#3 integrate .. //depot/projects/valgrind/coregrind/m_demangle/dyn-string.c#3 integrate .. //depot/projects/valgrind/coregrind/m_demangle/dyn-string.h#2 integrate .. //depot/projects/valgrind/coregrind/m_demangle/safe-ctype.c#2 integrate .. //depot/projects/valgrind/coregrind/m_demangle/safe-ctype.h#2 integrate .. //depot/projects/valgrind/coregrind/m_demangle/vg_libciface.h#1 branch .. //depot/projects/valgrind/coregrind/m_errormgr.c#3 integrate .. //depot/projects/valgrind/coregrind/m_execontext.c#3 integrate .. //depot/projects/valgrind/coregrind/m_libcbase.c#5 integrate .. //depot/projects/valgrind/coregrind/m_main.c#9 integrate .. //depot/projects/valgrind/coregrind/m_stacktrace.c#4 integrate .. //depot/projects/valgrind/coregrind/m_syswrap/syswrap-amd64-linux.c#3 integrate .. //depot/projects/valgrind/coregrind/m_syswrap/syswrap-generic.c#7 integrate .. //depot/projects/valgrind/coregrind/m_syswrap/syswrap-main.c#10 integrate .. //depot/projects/valgrind/coregrind/m_trampoline.S#5 integrate .. //depot/projects/valgrind/coregrind/m_xarray.c#2 integrate .. //depot/projects/valgrind/coregrind/pub_core_debuginfo.h#4 integrate .. //depot/projects/valgrind/docs/internals/3_3_BUGSTATUS.txt#2 integrate .. //depot/projects/valgrind/docs/internals/BIG_APP_NOTES.txt#1 branch .. //depot/projects/valgrind/docs/internals/Makefile.am#3 integrate .. //depot/projects/valgrind/docs/internals/howto_BUILD_KDE42.txt#1 branch .. //depot/projects/valgrind/docs/internals/howto_oprofile.txt#1 branch .. //depot/projects/valgrind/docs/xml/manual-core.xml#3 integrate .. //depot/projects/valgrind/docs/xml/manual.xml#3 integrate .. //depot/projects/valgrind/docs/xml/valgrind-manpage.xml#3 integrate .. //depot/projects/valgrind/exp-ptrcheck/Makefile.am#2 integrate .. //depot/projects/valgrind/exp-ptrcheck/README.ABOUT.PTRCHECK.txt#2 delete .. //depot/projects/valgrind/exp-ptrcheck/docs/Makefile.am#2 integrate .. //depot/projects/valgrind/exp-ptrcheck/docs/pc-manual.xml#1 branch .. //depot/projects/valgrind/exp-ptrcheck/h_main.c#3 integrate .. //depot/projects/valgrind/glibc-2.34567-NPTL-helgrind.supp#2 integrate .. //depot/projects/valgrind/helgrind/Makefile.am#4 integrate .. //depot/projects/valgrind/helgrind/README_MSMProp2.txt#1 branch .. //depot/projects/valgrind/helgrind/README_YARD.txt#1 branch .. //depot/projects/valgrind/helgrind/helgrind.h#3 integrate .. //depot/projects/valgrind/helgrind/hg_basics.c#1 branch .. //depot/projects/valgrind/helgrind/hg_basics.h#1 branch .. //depot/projects/valgrind/helgrind/hg_errors.c#1 branch .. //depot/projects/valgrind/helgrind/hg_errors.h#1 branch .. //depot/projects/valgrind/helgrind/hg_intercepts.c#2 integrate .. //depot/projects/valgrind/helgrind/hg_lock_n_thread.c#1 branch .. //depot/projects/valgrind/helgrind/hg_lock_n_thread.h#1 branch .. //depot/projects/valgrind/helgrind/hg_main.c#3 integrate .. //depot/projects/valgrind/helgrind/hg_wordset.c#2 integrate .. //depot/projects/valgrind/helgrind/hg_wordset.h#2 integrate .. //depot/projects/valgrind/helgrind/libhb.h#1 branch .. //depot/projects/valgrind/helgrind/libhb_core.c#1 branch .. //depot/projects/valgrind/include/pub_tool_execontext.h#3 integrate .. //depot/projects/valgrind/include/pub_tool_libcbase.h#3 integrate .. //depot/projects/valgrind/massif/hp2ps/AreaBelow.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/AreaBelow.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/AuxFile.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/AuxFile.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Axes.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Axes.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/CHANGES#2 delete .. //depot/projects/valgrind/massif/hp2ps/Curves.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Curves.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Defines.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Deviation.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Deviation.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Dimensions.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Dimensions.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Error.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Error.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/HpFile.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/HpFile.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/INSTALL#2 delete .. //depot/projects/valgrind/massif/hp2ps/Key.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Key.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/LICENSE#2 delete .. //depot/projects/valgrind/massif/hp2ps/Main.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Main.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Makefile.am#2 delete .. //depot/projects/valgrind/massif/hp2ps/Makefile.old#2 delete .. //depot/projects/valgrind/massif/hp2ps/Marks.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Marks.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/PsFile.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/PsFile.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/README#2 delete .. //depot/projects/valgrind/massif/hp2ps/Reorder.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Reorder.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Scale.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Scale.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Shade.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Shade.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/TopTwenty.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/TopTwenty.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/TraceElement.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/TraceElement.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Utilities.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Utilities.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/hp2ps.1#2 delete .. //depot/projects/valgrind/memcheck/mc_translate.c#3 integrate .. //depot/projects/valgrind/none/tests/amd64/Makefile.am#3 integrate .. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.c#1 branch .. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.stderr.exp#1 branch .. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.stdout.exp#1 branch .. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.vgtest#1 branch .. //depot/projects/valgrind/xfree-4.supp#4 integrate Differences ... ==== //depot/projects/valgrind/VEX/priv/guest-generic/bb_to_IR.c#3 (text+ko) ==== @@ -376,9 +376,12 @@ irsb->stmts[selfcheck_idx+3] = IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) ); - p_adler_helper = abiinfo_both->host_ppc_calls_use_fndescrs - ? ((HWord*)(&genericg_compute_adler32))[0] - : (HWord)&genericg_compute_adler32; + if (abiinfo_both->host_ppc_calls_use_fndescrs) { + HWord* fndescr = (HWord*)&genericg_compute_adler32; + p_adler_helper = fndescr[0]; + } else { + p_adler_helper = (HWord)&genericg_compute_adler32; + } irsb->stmts[selfcheck_idx+4] = IRStmt_Exit( ==== //depot/projects/valgrind/cachegrind/docs/cg-manual.xml#3 (text+ko) ==== @@ -807,16 +807,12 @@ instructions.</para> <para>To do this, you just need to assemble your -<computeroutput>.s</computeroutput> files with assembler-level -debug information. gcc doesn't do this, but you can use the GNU -assembler with the <computeroutput>--gstabs</computeroutput> -option to generate object files with this information, eg:</para> - -<programlisting><![CDATA[ -as --gstabs foo.s]]></programlisting> - -<para>You can then profile and annotate source files in the same -way as for C/C++ programs.</para> +<computeroutput>.s</computeroutput> files with assembly-level debug +information. You can use <computeroutput>gcc +-S</computeroutput> to compile C/C++ programs to assembly code, and then +<computeroutput>gcc -g</computeroutput> on the assembly code files to +achieve this. You can then profile and annotate the assembly code source +files in the same way as C/C++ source files.</para> </sect2> ==== //depot/projects/valgrind/callgrind/docs/cl-manual.xml#3 (text+ko) ==== @@ -197,7 +197,7 @@ <computeroutput>callgrind_control -i on</computeroutput> just before the interesting code section is executed. To exactly specify the code position where profiling should start, use the client request - <computeroutput>CALLGRIND_START_INSTRUMENTATION</computeroutput>.</para> + <computeroutput><xref linkend="cr.start-instr"/></computeroutput>.</para> <para>If you want to be able to see assembly code level annotation, specify <option><xref linkend="opt.dump-instr"/>=yes</option>. This will produce @@ -292,18 +292,13 @@ <listitem> <para><command>Program controlled dumping.</command> - Put <screen><![CDATA[#include <valgrind/callgrind.h>]]></screen> - into your source and add - <computeroutput>CALLGRIND_DUMP_STATS;</computeroutput> when you - want a dump to happen. Use - <computeroutput>CALLGRIND_ZERO_STATS;</computeroutput> to only - zero cost centers.</para> - <para>In Valgrind terminology, this method is called "Client - requests". The given macros generate a special instruction - pattern with no effect at all (i.e. a NOP). When run under - Valgrind, the CPU simulation engine detects the special - instruction pattern and triggers special actions like the ones - described above.</para> + Insert + <computeroutput><xref linkend="cr.dump-stats"/>;</computeroutput> + at the position in your code where you want a profile dump to happen. Use + <computeroutput><xref linkend="cr.zero-stats"/>;</computeroutput> to only + zero profile counters. + See <xref linkend="cl-manual.clientrequests"/> for more information on + Callgrind specific client requests.</para> </listitem> </itemizedlist> @@ -338,8 +333,8 @@ with <screen>callgrind_control -i on</screen> and off by specifying "off" instead of "on". Furthermore, instrumentation state can be programatically changed with - the macros <computeroutput>CALLGRIND_START_INSTRUMENTATION;</computeroutput> - and <computeroutput>CALLGRIND_STOP_INSTRUMENTATION;</computeroutput>. + the macros <computeroutput><xref linkend="cr.start-instr"/>;</computeroutput> + and <computeroutput><xref linkend="cr.stop-instr"/>;</computeroutput>. </para> <para>In addition to enabling instrumentation, you must also enable @@ -471,6 +466,27 @@ </sect2> + <sect2 id="cl-manual.forkingprograms" xreflabel="Forking Programs"> + <title>Forking Programs</title> + + <para>If your program forks, the child will inherit all the profiling + data that has been gathered for the parent. To start with empty profile + counter values in the child, the client request + <computeroutput><xref linkend="cr.zero-stats"/>;</computeroutput> + can be inserted into code to be executed by the child, directly after + <computeroutput>fork()</computeroutput>.</para> + + <para>However, you will have to make sure that the output file format string + (controlled by <option>--callgrind-out-file</option>) does contain + <option>%p</option> (which is true by default). Otherwise, the + outputs from the parent and child will overwrite each other or will be + intermingled, which almost certainly is not what you want.</para> + + <para>You will be able to control the new child independently from + the parent via <computeroutput>callgrind_control</computeroutput>.</para> + + </sect2> + </sect1> @@ -701,7 +717,7 @@ </listitem> </varlistentry> - <varlistentry id="opt.collect-atstart"> + <varlistentry id="opt.collect-atstart" xreflabel="--collect-atstart"> <term> <option><![CDATA[--collect-atstart=<yes|no> [default: yes] ]]></option> </term> @@ -733,13 +749,9 @@ specification of <computeroutput>--toggle-collect</computeroutput> implicitly sets <computeroutput>--collect-state=no</computeroutput>.</para> - <para>Collection state can be toggled also by using a Valgrind - Client Request in your application. For this, include - <computeroutput>valgrind/callgrind.h</computeroutput> and specify - the macro - <computeroutput>CALLGRIND_TOGGLE_COLLECT</computeroutput> at the - needed positions. This only will have any effect if run under - supervision of the Callgrind tool.</para> + <para>Collection state can be toggled also by inserting the client request + <computeroutput><xref linkend="cr.toggle-collect"/>;</computeroutput> + at the needed code positions.</para> </listitem> </varlistentry> @@ -912,4 +924,94 @@ </sect1> +<sect1 id="cl-manual.clientrequests" xreflabel="Client request reference"> +<title>Callgrind specific client requests</title> + +<para>In Valgrind terminology, a client request is a C macro which +can be inserted into your code to request specific functionality when +run under Valgrind. For this, special instruction patterns resulting +in NOPs are used, but which can be detected by Valgrind.</para> + +<para>Callgrind provides the following specific client requests. +To use them, add the line +<screen><![CDATA[#include <valgrind/callgrind.h>]]></screen> +into your code for the macro definitions. +.</para> + +<variablelist id="cl.clientrequests.list"> + + <varlistentry id="cr.dump-stats" xreflabel="CALLGRIND_DUMP_STATS"> + <term> + <computeroutput>CALLGRIND_DUMP_STATS</computeroutput> + </term> + <listitem> + <para>Force generation of a profile dump at specified position + in code, for the current thread only. Written counters will be reset + to zero.</para> + </listitem> + </varlistentry> + + <varlistentry id="cr.dump-stats-at" xreflabel="CALLGRIND_DUMP_STATS_AT"> + <term> + <computeroutput>CALLGRIND_DUMP_STATS_AT(string)</computeroutput> + </term> + <listitem> + <para>Same as CALLGRIND_DUMP_STATS, but allows to specify a string + to be able to distinguish profile dumps.</para> + </listitem> + </varlistentry> + + <varlistentry id="cr.zero-stats" xreflabel="CALLGRIND_ZERO_STATS"> + <term> + <computeroutput>CALLGRIND_ZERO_STATS</computeroutput> + </term> + <listitem> + <para>Reset the profile counters for the current thread to zero.</para> + </listitem> + </varlistentry> + + <varlistentry id="cr.toggle-collect" xreflabel="CALLGRIND_TOGGLE_COLLECT"> + <term> + <computeroutput>CALLGRIND_TOGGLE_COLLECT</computeroutput> + </term> + <listitem> + <para>Toggle the collection state. This allows to ignore events + with regard to profile counters. See also options + <xref linkend="opt.collect-atstart"/> and + <xref linkend="opt.toggle-collect"/>.</para> + </listitem> + </varlistentry> + + <varlistentry id="cr.start-instr" xreflabel="CALLGRIND_START_INSTRUMENTATION"> + <term> + <computeroutput>CALLGRIND_START_INSTRUMENTATION</computeroutput> + </term> + <listitem> + <para>Start full Callgrind instrumentation if not already switched on. + When cache simulation is done, this will flush the simulated cache + and lead to an artifical cache warmup phase afterwards with + cache misses which would not have happened in reality. + See also option <xref linkend="opt.instr-atstart"/>.</para> + </listitem> + </varlistentry> + + <varlistentry id="cr.stop-instr" xreflabel="CALLGRIND_STOP_INSTRUMENTATION"> + <term> + <computeroutput>CALLGRIND_STOP_INSTRUMENTATION</computeroutput> + </term> + <listitem> + <para>Stop full Callgrind instrumentation if not already switched off. + This flushes Valgrinds translation cache, and does no additional + instrumentation afterwards: it effectivly will run at the same + speed as the "none" tool, ie. at minimal slowdown. Use this to + speed up the Callgrind run for uninteresting code parts. Use + <xref linkend="cr.start-instr"/> to switch on instrumentation again. + See also option <xref linkend="opt.instr-atstart"/>.</para> + </listitem> + </varlistentry> + +</variablelist> + +</sect1> + </chapter> ==== //depot/projects/valgrind/callgrind/dump.c#3 (text+ko) ==== @@ -64,13 +64,13 @@ Char* CLG_(get_out_file)() { - CLG_ASSERT(dumps_initialized); + CLG_(init_dumps)(); return out_file; } Char* CLG_(get_out_directory)() { - CLG_ASSERT(dumps_initialized); + CLG_(init_dumps)(); return out_directory; } @@ -1616,6 +1616,8 @@ CLG_DEBUG(2, "+ dump_profile(Trigger '%s')\n", trigger ? trigger : (Char*)"Prg.Term."); + CLG_(init_dumps)(); + if (VG_(clo_verbosity) > 1) VG_(message)(Vg_DebugMsg, "Start dumping at BB %llu (%s)...", CLG_(stat).bb_executions, @@ -1673,15 +1675,35 @@ * <out_file> always starts with a full absolute path. * If the output format string represents a relative path, the current * working directory at program start is used. + * + * This function has to be called every time a profile dump is generated + * to be able to react on PID changes. */ void CLG_(init_dumps)() { Int lastSlash, i; SysRes res; + static int thisPID = 0; + int currentPID = VG_(getpid)(); + if (currentPID == thisPID) { + /* already initialized, and no PID change */ + CLG_ASSERT(out_file != 0); + return; + } + thisPID = currentPID; + if (!CLG_(clo).out_format) CLG_(clo).out_format = DEFAULT_OUTFORMAT; + /* If a file name was already set, clean up before */ + if (out_file) { + VG_(free)(out_file); + VG_(free)(out_directory); + VG_(free)(filename); + out_counter = 0; + } + // Setup output filename. out_file = VG_(expand_file_name)("--callgrind-out-file", CLG_(clo).out_format); @@ -1721,7 +1743,8 @@ } if (!res.isError) VG_(close)( (Int)res.res ); - init_cmdbuf(); + if (!dumps_initialized) + init_cmdbuf(); dumps_initialized = True; } ==== //depot/projects/valgrind/coregrind/Makefile.am#7 (text+ko) ==== @@ -169,9 +169,11 @@ m_debuginfo/priv_readelf.h \ m_debuginfo/priv_readxcoff.h \ m_demangle/ansidecl.h \ + m_demangle/cp-demangle.h \ m_demangle/dyn-string.h \ m_demangle/demangle.h \ m_demangle/safe-ctype.h \ + m_demangle/vg_libciface.h \ m_scheduler/priv_sema.h \ m_syswrap/priv_types_n_macros.h \ m_syswrap/priv_syswrap-generic.h \ ==== //depot/projects/valgrind/coregrind/m_aspacemgr/aspacemgr-linux.c#4 (text+ko) ==== @@ -325,6 +325,7 @@ /* ------ end of STATE for the address-space manager ------ */ /* ------ Forwards decls ------ */ +inline static Int find_nsegment_idx ( Addr a ); static void parse_procselfmaps ( @@ -1101,8 +1102,19 @@ /*-----------------------------------------------------------------*/ /* Binary search the interval array for a given address. Since the - array covers the entire address space the search cannot fail. */ -static Int find_nsegment_idx ( Addr a ) + array covers the entire address space the search cannot fail. The + _WRK function does the real work. Its caller (just below) caches + the results thereof, to save time. With N_CACHE of 63 we get a hit + rate exceeding 90% when running OpenOffice. + + Re ">> 12", it doesn't matter that the page size of some targets + might be different from 12. Really "(a >> 12) % N_CACHE" is merely + a hash function, and the actual cache entry is always validated + correctly against the selected cache entry before use. +*/ +/* Don't call find_nsegment_idx_WRK; use find_nsegment_idx instead. */ +__attribute__((noinline)) +static Int find_nsegment_idx_WRK ( Addr a ) { Addr a_mid_lo, a_mid_hi; Int mid, @@ -1126,6 +1138,52 @@ } } +inline static Int find_nsegment_idx ( Addr a ) +{ +# define N_CACHE 63 + static Addr cache_pageno[N_CACHE]; + static Int cache_segidx[N_CACHE]; + static Bool cache_inited = False; + + static UWord n_q = 0; + static UWord n_m = 0; + + UWord ix; + + if (LIKELY(cache_inited)) { + /* do nothing */ + } else { + for (ix = 0; ix < N_CACHE; ix++) { + cache_pageno[ix] = 0; + cache_segidx[ix] = -1; + } + cache_inited = True; + } + + ix = (a >> 12) % N_CACHE; + + n_q++; + if (0 && 0 == (n_q & 0xFFFF)) + VG_(debugLog)(0,"xxx","find_nsegment_idx: %lu %lu\n", n_q, n_m); + + if ((a >> 12) == cache_pageno[ix] + && cache_segidx[ix] >= 0 + && cache_segidx[ix] < nsegments_used + && nsegments[cache_segidx[ix]].start <= a + && a <= nsegments[cache_segidx[ix]].end) { + /* hit */ + /* aspacem_assert( cache_segidx[ix] == find_nsegment_idx_WRK(a) ); */ + return cache_segidx[ix]; + } + /* miss */ + n_m++; + cache_segidx[ix] = find_nsegment_idx_WRK(a); + cache_pageno[ix] = a >> 12; + return cache_segidx[ix]; +# undef N_CACHE +} + + /* Finds the segment containing 'a'. Only returns file/anon/resvn segments. This returns a 'NSegment const *' - a pointer to ==== //depot/projects/valgrind/coregrind/m_debuginfo/debuginfo.c#6 (text+ko) ==== @@ -99,6 +99,13 @@ /*------------------------------------------------------------*/ +/*--- fwdses ---*/ +/*------------------------------------------------------------*/ + +static void cfsi_cache__invalidate ( void ); + + +/*------------------------------------------------------------*/ /*--- Root structure ---*/ /*------------------------------------------------------------*/ @@ -320,10 +327,11 @@ /* Repeatedly scan debugInfo_list, looking for DebugInfos with text AVMAs intersecting [start,start+length), and call discard_DebugInfo to get rid of them. This modifies the list, hence the multiple - iterations. + iterations. Returns True iff any such DebugInfos were found. */ -static void discard_syms_in_range ( Addr start, SizeT length ) +static Bool discard_syms_in_range ( Addr start, SizeT length ) { + Bool anyFound = False; Bool found; DebugInfo* curr; @@ -347,8 +355,11 @@ } if (!found) break; + anyFound = True; discard_DebugInfo( curr ); } + + return anyFound; } @@ -479,8 +490,86 @@ } +/* Debuginfo reading for 'di' has just been successfully completed. + Check that the invariants stated in + "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in + priv_storage.h are observed. */ +static void check_CFSI_related_invariants ( DebugInfo* di ) +{ + DebugInfo* di2 = NULL; + vg_assert(di); + /* This fn isn't called until after debuginfo for this object has + been successfully read. And that shouldn't happen until we have + both a r-x and rw- mapping for the object. Hence: */ + vg_assert(di->have_rx_map); + vg_assert(di->have_rw_map); + /* degenerate case: r-x section is empty */ + if (di->rx_map_size == 0) { + vg_assert(di->cfsi == NULL); + return; + } + /* normal case: r-x section is nonempty */ + /* invariant (0) */ + vg_assert(di->rx_map_size > 0); + /* invariant (1) */ + for (di2 = debugInfo_list; di2; di2 = di2->next) { + if (di2 == di) + continue; + if (di2->rx_map_size == 0) + continue; + vg_assert(di->rx_map_avma + di->rx_map_size <= di2->rx_map_avma + || di2->rx_map_avma + di2->rx_map_size <= di->rx_map_avma); + } + di2 = NULL; + /* invariant (2) */ + if (di->cfsi) { + vg_assert(di->cfsi_minavma <= di->cfsi_maxavma); /* duh! */ + vg_assert(di->cfsi_minavma >= di->rx_map_avma); + vg_assert(di->cfsi_maxavma < di->rx_map_avma + di->rx_map_size); + } + /* invariants (3) and (4) */ + if (di->cfsi) { + Word i; + vg_assert(di->cfsi_used > 0); + vg_assert(di->cfsi_size > 0); + for (i = 0; i < di->cfsi_used; i++) { + DiCfSI* cfsi = &di->cfsi[i]; + vg_assert(cfsi->len > 0); + vg_assert(cfsi->base >= di->cfsi_minavma); + vg_assert(cfsi->base + cfsi->len - 1 <= di->cfsi_maxavma); + if (i > 0) { + DiCfSI* cfsip = &di->cfsi[i-1]; + vg_assert(cfsip->base + cfsip->len <= cfsi->base); + } + } + } else { + vg_assert(di->cfsi_used == 0); + vg_assert(di->cfsi_size == 0); + } +} + + /*--------------------------------------------------------------*/ /*--- ---*/ +/*--- TOP LEVEL: INITIALISE THE DEBUGINFO SYSTEM ---*/ +/*--- ---*/ +/*--------------------------------------------------------------*/ + +void VG_(di_initialise) ( void ) +{ + /* There's actually very little to do here, since everything + centers around the DebugInfos in debugInfo_list, they are + created and destroyed on demand, and each one is treated more or + less independently. */ + vg_assert(debugInfo_list == NULL); + + /* flush the CFI fast query cache. */ + cfsi_cache__invalidate(); +} + + +/*--------------------------------------------------------------*/ +/*--- ---*/ /*--- TOP LEVEL: NOTIFICATION (ACQUIRE/DISCARD INFO) (LINUX) ---*/ /*--- ---*/ /*--------------------------------------------------------------*/ @@ -719,6 +808,8 @@ TRACE_SYMTAB("\n------ Canonicalising the " "acquired info ------\n"); + /* invalidate the CFI unwind cache. */ + cfsi_cache__invalidate(); /* prepare read data for use */ ML_(canonicaliseTables)( di ); /* notify m_redir about it */ @@ -728,6 +819,10 @@ di->have_dinfo = True; tl_assert(di->handle > 0); di_handle = di->handle; + /* Check invariants listed in + Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in + priv_storage.h. */ + check_CFSI_related_invariants(di); } else { TRACE_SYMTAB("\n------ ELF reading failed ------\n"); @@ -735,6 +830,7 @@ this DebugInfo? No - it contains info on the rw/rx mappings, at least. */ di_handle = 0; + vg_assert(di->have_dinfo == False); } TRACE_SYMTAB("\n"); @@ -751,8 +847,11 @@ [a, a+len). */ void VG_(di_notify_munmap)( Addr a, SizeT len ) { + Bool anyFound; if (0) VG_(printf)("DISCARD %#lx %#lx\n", a, a+len); - discard_syms_in_range(a, len); + anyFound = discard_syms_in_range(a, len); + if (anyFound) + cfsi_cache__invalidate(); } @@ -766,8 +865,11 @@ # if defined(VGP_x86_linux) || defined(VGP_x86_freebsd) exe_ok = exe_ok || toBool(prot & VKI_PROT_READ); # endif - if (0 && !exe_ok) - discard_syms_in_range(a, len); + if (0 && !exe_ok) { + Bool anyFound = discard_syms_in_range(a, len); + if (anyFound) + cfsi_cache__invalidate(); + } } #endif /* defined(VGO_linux) */ @@ -798,6 +900,10 @@ { ULong hdl = 0; + /* play safe; always invalidate the CFI cache. Not + that it should be used on AIX, but still .. */ + cfsi_cache__invalidate(); + if (acquire) { Bool ok; @@ -841,6 +947,10 @@ di->have_dinfo = True; hdl = di->handle; vg_assert(hdl > 0); + /* Check invariants listed in + Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in + priv_storage.h. */ + check_CFSI_related_invariants(di); } else { /* Something went wrong (eg. bad XCOFF file). */ discard_DebugInfo( di ); @@ -851,8 +961,11 @@ /* Dump all the debugInfos whose text segments intersect code_start/code_len. */ + /* CFI cache is always invalidated at start of this routine. + Hence it's safe to ignore the return value of + discard_syms_in_range. */ if (code_len > 0) - discard_syms_in_range( code_start, code_len ); + (void)discard_syms_in_range( code_start, code_len ); } @@ -894,11 +1007,11 @@ If findText==False, only data symbols are searched for. */ static void search_all_symtabs ( Addr ptr, /*OUT*/DebugInfo** pdi, - /*OUT*/Int* symno, + /*OUT*/Word* symno, Bool match_anywhere_in_sym, Bool findText ) { - Int sno; + Word sno; DebugInfo* di; Bool inRange; @@ -945,9 +1058,9 @@ *pdi to the relevant DebugInfo, and *locno to the loctab entry *number within that. If not found, *pdi is set to NULL. */ static void search_all_loctabs ( Addr ptr, /*OUT*/DebugInfo** pdi, - /*OUT*/Int* locno ) + /*OUT*/Word* locno ) { - Int lno; + Word lno; DebugInfo* di; for (di = debugInfo_list; di != NULL; di = di->next) { if (di->text_present @@ -978,7 +1091,7 @@ Bool findText, /*OUT*/OffT* offsetP ) { DebugInfo* di; - Int sno; + Word sno; Int offset; search_all_symtabs ( a, &di, &sno, match_anywhere_in_sym, findText ); @@ -1020,7 +1133,7 @@ Addr VG_(get_tocptr) ( Addr guest_code_addr ) { DebugInfo* si; - Int sno; + Word sno; search_all_symtabs ( guest_code_addr, &si, &sno, True/*match_anywhere_in_fun*/, @@ -1187,7 +1300,7 @@ Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename ) { DebugInfo* si; - Int locno; + Word locno; search_all_loctabs ( a, &si, &locno ); if (si == NULL) return False; @@ -1199,7 +1312,7 @@ Bool VG_(get_linenum)( Addr a, UInt* lineno ) { DebugInfo* si; - Int locno; + Word locno; search_all_loctabs ( a, &si, &locno ); if (si == NULL) return False; @@ -1218,7 +1331,7 @@ /*OUT*/UInt* lineno ) { DebugInfo* si; - Int locno; + Word locno; vg_assert( (dirname == NULL && dirname_available == NULL) || @@ -1542,6 +1655,122 @@ } +/* Search all the DebugInfos in the entire system, to find the DiCfSI + that pertains to 'ip'. + + If found, set *diP to the DebugInfo in which it resides, and + *ixP to the index in that DebugInfo's cfsi array. + + If not found, set *diP to (DebugInfo*)1 and *ixP to zero. +*/ +__attribute__((noinline)) +static void find_DiCfSI ( /*OUT*/DebugInfo** diP, + /*OUT*/Word* ixP, + Addr ip ) +{ + DebugInfo* di; + Word i = -1; + + static UWord n_search = 0; + static UWord n_steps = 0; + n_search++; + + if (0) VG_(printf)("search for %#lx\n", ip); + + for (di = debugInfo_list; di != NULL; di = di->next) { + Word j; + n_steps++; + + /* Use the per-DebugInfo summary address ranges to skip + inapplicable DebugInfos quickly. */ + if (di->cfsi_used == 0) + continue; + if (ip < di->cfsi_minavma || ip > di->cfsi_maxavma) + continue; + + /* It might be in this DebugInfo. Search it. */ + j = ML_(search_one_cfitab)( di, ip ); + vg_assert(j >= -1 && j < (Word)di->cfsi_used); + + if (j != -1) { + i = j; + break; /* found it */ + } + } + + if (i == -1) { + + /* we didn't find it. */ + *diP = (DebugInfo*)1; + *ixP = 0; + + } else { + + /* found it. */ + /* ensure that di is 4-aligned (at least), so it can't possibly + be equal to (DebugInfo*)1. */ + vg_assert(di && VG_IS_4_ALIGNED(di)); + vg_assert(i >= 0 && i < di->cfsi_used); + *diP = di; + *ixP = i; + + /* Start of performance-enhancing hack: once every 64 (chosen + hackily after profiling) successful searches, move the found + DebugInfo one step closer to the start of the list. This + makes future searches cheaper. For starting konqueror on + amd64, this in fact reduces the total amount of searching + done by the above find-the-right-DebugInfo loop by more than + a factor of 20. */ + if ((n_search & 0xF) == 0) { + /* Move di one step closer to the start of the list. */ + move_DebugInfo_one_step_forward( di ); + } + /* End of performance-enhancing hack. */ + + if (0 && ((n_search & 0x7FFFF) == 0)) + VG_(printf)("find_DiCfSI: %lu searches, " + "%lu DebugInfos looked at\n", + n_search, n_steps); + + } + +} + + +/* Now follows a mechanism for caching queries to find_DiCfSI, since + they are extremely frequent on amd64-linux, during stack unwinding. + + Each cache entry binds an ip value to a (di, ix) pair. Possible + values: + + di is non-null, ix >= 0 ==> cache slot in use, "di->cfsi[ix]" + di is (DebugInfo*)1 ==> cache slot in use, no associated di + di is NULL ==> cache slot not in use + + Hence simply zeroing out the entire cache invalidates all + entries. + + Why not map ip values directly to DiCfSI*'s? Because this would + cause problems if/when the cfsi array is moved due to resizing. + Instead we cache .cfsi array index value, which should be invariant + across resizing. (That said, I don't think the current + implementation will resize whilst during queries, since the DiCfSI + records are added all at once, when the debuginfo for an object is + read, and is not changed ever thereafter. */ + +#define N_CFSI_CACHE 511 + +typedef + struct { Addr ip; DebugInfo* di; Word ix; } + CFSICacheEnt; + +static CFSICacheEnt cfsi_cache[N_CFSI_CACHE]; + +static void cfsi_cache__invalidate ( void ) { + VG_(memset)(&cfsi_cache, 0, sizeof(cfsi_cache)); +} + + /* The main function for DWARF2/3 CFI-based stack unwinding. Given an IP/SP/FP triple, produce the IP/SP/FP values for the previous frame, if possible. */ @@ -1554,61 +1783,47 @@ Addr min_accessible, Addr max_accessible ) { - Bool ok; - Int i; - DebugInfo* si; - DiCfSI* cfsi = NULL; - Addr cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev; + Bool ok; + DebugInfo* di; + DiCfSI* cfsi = NULL; + Addr cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev; CfiExprEvalContext eec; - static UInt n_search = 0; - static UInt n_steps = 0; - n_search++; + static UWord n_q = 0, n_m = 0; + n_q++; + if (0 && 0 == (n_q & 0x1FFFFF)) + VG_(printf)("QQQ %lu %lu\n", n_q, n_m); - if (0) VG_(printf)("search for %#lx\n", *ipP); + { UWord hash = (*ipP) % N_CFSI_CACHE; + CFSICacheEnt* ce = &cfsi_cache[hash]; - for (si = debugInfo_list; si != NULL; si = si->next) { - n_steps++; + if (LIKELY(ce->ip == *ipP) && LIKELY(ce->di != NULL)) { + /* found an entry in the cache .. */ + } else { + /* not found in cache. Search and update. */ + n_m++; + ce->ip = *ipP; + find_DiCfSI( &ce->di, &ce->ix, *ipP ); + } - /* Use the per-DebugInfo summary address ranges to skip - inapplicable DebugInfos quickly. */ - if (si->cfsi_used == 0) >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200810311753.m9VHrEQg086630>