From owner-p4-projects@FreeBSD.ORG Fri Oct 31 17:53:16 2008 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 527F6106568B; Fri, 31 Oct 2008 17:53:16 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 142A4106567D for ; Fri, 31 Oct 2008 17:53:16 +0000 (UTC) (envelope-from peter-gmail@wemm.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 007698FC20 for ; Fri, 31 Oct 2008 17:53:16 +0000 (UTC) (envelope-from peter-gmail@wemm.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.3/8.14.3) with ESMTP id m9VHrFkC086632 for ; Fri, 31 Oct 2008 17:53:16 GMT (envelope-from peter-gmail@wemm.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.3/8.14.3/Submit) id m9VHrEQg086630 for perforce@freebsd.org; Fri, 31 Oct 2008 17:53:14 GMT (envelope-from peter-gmail@wemm.org) Date: Fri, 31 Oct 2008 17:53:14 GMT Message-Id: <200810311753.m9VHrEQg086630@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to peter-gmail@wemm.org using -f From: Peter Wemm To: Perforce Change Reviews Cc: Subject: PERFORCE change 152286 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 31 Oct 2008 17:53:16 -0000 http://perforce.freebsd.org/chv.cgi?CH=152286 Change 152286 by peter@peter_daintree on 2008/10/31 17:52:42 Integrate @152285 Affected files ... .. //depot/projects/valgrind/VEX/priv/guest-generic/bb_to_IR.c#3 integrate .. //depot/projects/valgrind/cachegrind/docs/cg-manual.xml#3 integrate .. //depot/projects/valgrind/callgrind/docs/cl-manual.xml#3 integrate .. //depot/projects/valgrind/callgrind/dump.c#3 integrate .. //depot/projects/valgrind/coregrind/Makefile.am#7 integrate .. //depot/projects/valgrind/coregrind/m_aspacemgr/aspacemgr-linux.c#4 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/debuginfo.c#6 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/priv_storage.h#3 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/priv_tytypes.h#2 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/readdwarf3.c#2 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/readelf.c#4 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/storage.c#3 integrate .. //depot/projects/valgrind/coregrind/m_debuginfo/tytypes.c#2 integrate .. //depot/projects/valgrind/coregrind/m_demangle/ansidecl.h#2 integrate .. //depot/projects/valgrind/coregrind/m_demangle/cp-demangle.c#3 integrate .. //depot/projects/valgrind/coregrind/m_demangle/cp-demangle.h#1 branch .. //depot/projects/valgrind/coregrind/m_demangle/cplus-dem.c#3 integrate .. //depot/projects/valgrind/coregrind/m_demangle/demangle.c#3 integrate .. //depot/projects/valgrind/coregrind/m_demangle/demangle.h#3 integrate .. //depot/projects/valgrind/coregrind/m_demangle/dyn-string.c#3 integrate .. //depot/projects/valgrind/coregrind/m_demangle/dyn-string.h#2 integrate .. //depot/projects/valgrind/coregrind/m_demangle/safe-ctype.c#2 integrate .. //depot/projects/valgrind/coregrind/m_demangle/safe-ctype.h#2 integrate .. //depot/projects/valgrind/coregrind/m_demangle/vg_libciface.h#1 branch .. //depot/projects/valgrind/coregrind/m_errormgr.c#3 integrate .. //depot/projects/valgrind/coregrind/m_execontext.c#3 integrate .. //depot/projects/valgrind/coregrind/m_libcbase.c#5 integrate .. //depot/projects/valgrind/coregrind/m_main.c#9 integrate .. //depot/projects/valgrind/coregrind/m_stacktrace.c#4 integrate .. //depot/projects/valgrind/coregrind/m_syswrap/syswrap-amd64-linux.c#3 integrate .. //depot/projects/valgrind/coregrind/m_syswrap/syswrap-generic.c#7 integrate .. //depot/projects/valgrind/coregrind/m_syswrap/syswrap-main.c#10 integrate .. //depot/projects/valgrind/coregrind/m_trampoline.S#5 integrate .. //depot/projects/valgrind/coregrind/m_xarray.c#2 integrate .. //depot/projects/valgrind/coregrind/pub_core_debuginfo.h#4 integrate .. //depot/projects/valgrind/docs/internals/3_3_BUGSTATUS.txt#2 integrate .. //depot/projects/valgrind/docs/internals/BIG_APP_NOTES.txt#1 branch .. //depot/projects/valgrind/docs/internals/Makefile.am#3 integrate .. //depot/projects/valgrind/docs/internals/howto_BUILD_KDE42.txt#1 branch .. //depot/projects/valgrind/docs/internals/howto_oprofile.txt#1 branch .. //depot/projects/valgrind/docs/xml/manual-core.xml#3 integrate .. //depot/projects/valgrind/docs/xml/manual.xml#3 integrate .. //depot/projects/valgrind/docs/xml/valgrind-manpage.xml#3 integrate .. //depot/projects/valgrind/exp-ptrcheck/Makefile.am#2 integrate .. //depot/projects/valgrind/exp-ptrcheck/README.ABOUT.PTRCHECK.txt#2 delete .. //depot/projects/valgrind/exp-ptrcheck/docs/Makefile.am#2 integrate .. //depot/projects/valgrind/exp-ptrcheck/docs/pc-manual.xml#1 branch .. //depot/projects/valgrind/exp-ptrcheck/h_main.c#3 integrate .. //depot/projects/valgrind/glibc-2.34567-NPTL-helgrind.supp#2 integrate .. //depot/projects/valgrind/helgrind/Makefile.am#4 integrate .. //depot/projects/valgrind/helgrind/README_MSMProp2.txt#1 branch .. //depot/projects/valgrind/helgrind/README_YARD.txt#1 branch .. //depot/projects/valgrind/helgrind/helgrind.h#3 integrate .. //depot/projects/valgrind/helgrind/hg_basics.c#1 branch .. //depot/projects/valgrind/helgrind/hg_basics.h#1 branch .. //depot/projects/valgrind/helgrind/hg_errors.c#1 branch .. //depot/projects/valgrind/helgrind/hg_errors.h#1 branch .. //depot/projects/valgrind/helgrind/hg_intercepts.c#2 integrate .. //depot/projects/valgrind/helgrind/hg_lock_n_thread.c#1 branch .. //depot/projects/valgrind/helgrind/hg_lock_n_thread.h#1 branch .. //depot/projects/valgrind/helgrind/hg_main.c#3 integrate .. //depot/projects/valgrind/helgrind/hg_wordset.c#2 integrate .. //depot/projects/valgrind/helgrind/hg_wordset.h#2 integrate .. //depot/projects/valgrind/helgrind/libhb.h#1 branch .. //depot/projects/valgrind/helgrind/libhb_core.c#1 branch .. //depot/projects/valgrind/include/pub_tool_execontext.h#3 integrate .. //depot/projects/valgrind/include/pub_tool_libcbase.h#3 integrate .. //depot/projects/valgrind/massif/hp2ps/AreaBelow.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/AreaBelow.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/AuxFile.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/AuxFile.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Axes.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Axes.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/CHANGES#2 delete .. //depot/projects/valgrind/massif/hp2ps/Curves.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Curves.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Defines.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Deviation.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Deviation.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Dimensions.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Dimensions.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Error.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Error.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/HpFile.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/HpFile.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/INSTALL#2 delete .. //depot/projects/valgrind/massif/hp2ps/Key.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Key.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/LICENSE#2 delete .. //depot/projects/valgrind/massif/hp2ps/Main.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Main.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Makefile.am#2 delete .. //depot/projects/valgrind/massif/hp2ps/Makefile.old#2 delete .. //depot/projects/valgrind/massif/hp2ps/Marks.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Marks.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/PsFile.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/PsFile.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/README#2 delete .. //depot/projects/valgrind/massif/hp2ps/Reorder.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Reorder.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Scale.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Scale.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Shade.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Shade.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/TopTwenty.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/TopTwenty.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/TraceElement.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/TraceElement.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/Utilities.c#2 delete .. //depot/projects/valgrind/massif/hp2ps/Utilities.h#2 delete .. //depot/projects/valgrind/massif/hp2ps/hp2ps.1#2 delete .. //depot/projects/valgrind/memcheck/mc_translate.c#3 integrate .. //depot/projects/valgrind/none/tests/amd64/Makefile.am#3 integrate .. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.c#1 branch .. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.stderr.exp#1 branch .. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.stdout.exp#1 branch .. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.vgtest#1 branch .. //depot/projects/valgrind/xfree-4.supp#4 integrate Differences ... ==== //depot/projects/valgrind/VEX/priv/guest-generic/bb_to_IR.c#3 (text+ko) ==== @@ -376,9 +376,12 @@ irsb->stmts[selfcheck_idx+3] = IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) ); - p_adler_helper = abiinfo_both->host_ppc_calls_use_fndescrs - ? ((HWord*)(&genericg_compute_adler32))[0] - : (HWord)&genericg_compute_adler32; + if (abiinfo_both->host_ppc_calls_use_fndescrs) { + HWord* fndescr = (HWord*)&genericg_compute_adler32; + p_adler_helper = fndescr[0]; + } else { + p_adler_helper = (HWord)&genericg_compute_adler32; + } irsb->stmts[selfcheck_idx+4] = IRStmt_Exit( ==== //depot/projects/valgrind/cachegrind/docs/cg-manual.xml#3 (text+ko) ==== @@ -807,16 +807,12 @@ instructions. To do this, you just need to assemble your -.s files with assembler-level -debug information. gcc doesn't do this, but you can use the GNU -assembler with the --gstabs -option to generate object files with this information, eg: - - - -You can then profile and annotate source files in the same -way as for C/C++ programs. +.s files with assembly-level debug +information. You can use gcc +-S to compile C/C++ programs to assembly code, and then +gcc -g on the assembly code files to +achieve this. You can then profile and annotate the assembly code source +files in the same way as C/C++ source files. ==== //depot/projects/valgrind/callgrind/docs/cl-manual.xml#3 (text+ko) ==== @@ -197,7 +197,7 @@ callgrind_control -i on just before the interesting code section is executed. To exactly specify the code position where profiling should start, use the client request - CALLGRIND_START_INSTRUMENTATION. + . If you want to be able to see assembly code level annotation, specify . This will produce @@ -292,18 +292,13 @@ Program controlled dumping. - Put ]]> - into your source and add - CALLGRIND_DUMP_STATS; when you - want a dump to happen. Use - CALLGRIND_ZERO_STATS; to only - zero cost centers. - In Valgrind terminology, this method is called "Client - requests". The given macros generate a special instruction - pattern with no effect at all (i.e. a NOP). When run under - Valgrind, the CPU simulation engine detects the special - instruction pattern and triggers special actions like the ones - described above. + Insert + ; + at the position in your code where you want a profile dump to happen. Use + ; to only + zero profile counters. + See for more information on + Callgrind specific client requests. @@ -338,8 +333,8 @@ with callgrind_control -i on and off by specifying "off" instead of "on". Furthermore, instrumentation state can be programatically changed with - the macros CALLGRIND_START_INSTRUMENTATION; - and CALLGRIND_STOP_INSTRUMENTATION;. + the macros ; + and ;. In addition to enabling instrumentation, you must also enable @@ -471,6 +466,27 @@ + + Forking Programs + + If your program forks, the child will inherit all the profiling + data that has been gathered for the parent. To start with empty profile + counter values in the child, the client request + ; + can be inserted into code to be executed by the child, directly after + fork(). + + However, you will have to make sure that the output file format string + (controlled by ) does contain + (which is true by default). Otherwise, the + outputs from the parent and child will overwrite each other or will be + intermingled, which almost certainly is not what you want. + + You will be able to control the new child independently from + the parent via callgrind_control. + + + @@ -701,7 +717,7 @@ - + @@ -733,13 +749,9 @@ specification of --toggle-collect implicitly sets --collect-state=no. - Collection state can be toggled also by using a Valgrind - Client Request in your application. For this, include - valgrind/callgrind.h and specify - the macro - CALLGRIND_TOGGLE_COLLECT at the - needed positions. This only will have any effect if run under - supervision of the Callgrind tool. + Collection state can be toggled also by inserting the client request + ; + at the needed code positions. @@ -912,4 +924,94 @@ + +Callgrind specific client requests + +In Valgrind terminology, a client request is a C macro which +can be inserted into your code to request specific functionality when +run under Valgrind. For this, special instruction patterns resulting +in NOPs are used, but which can be detected by Valgrind. + +Callgrind provides the following specific client requests. +To use them, add the line +]]> +into your code for the macro definitions. +. + + + + + + CALLGRIND_DUMP_STATS + + + Force generation of a profile dump at specified position + in code, for the current thread only. Written counters will be reset + to zero. + + + + + + CALLGRIND_DUMP_STATS_AT(string) + + + Same as CALLGRIND_DUMP_STATS, but allows to specify a string + to be able to distinguish profile dumps. + + + + + + CALLGRIND_ZERO_STATS + + + Reset the profile counters for the current thread to zero. + + + + + + CALLGRIND_TOGGLE_COLLECT + + + Toggle the collection state. This allows to ignore events + with regard to profile counters. See also options + and + . + + + + + + CALLGRIND_START_INSTRUMENTATION + + + Start full Callgrind instrumentation if not already switched on. + When cache simulation is done, this will flush the simulated cache + and lead to an artifical cache warmup phase afterwards with + cache misses which would not have happened in reality. + See also option . + + + + + + CALLGRIND_STOP_INSTRUMENTATION + + + Stop full Callgrind instrumentation if not already switched off. + This flushes Valgrinds translation cache, and does no additional + instrumentation afterwards: it effectivly will run at the same + speed as the "none" tool, ie. at minimal slowdown. Use this to + speed up the Callgrind run for uninteresting code parts. Use + to switch on instrumentation again. + See also option . + + + + + + + ==== //depot/projects/valgrind/callgrind/dump.c#3 (text+ko) ==== @@ -64,13 +64,13 @@ Char* CLG_(get_out_file)() { - CLG_ASSERT(dumps_initialized); + CLG_(init_dumps)(); return out_file; } Char* CLG_(get_out_directory)() { - CLG_ASSERT(dumps_initialized); + CLG_(init_dumps)(); return out_directory; } @@ -1616,6 +1616,8 @@ CLG_DEBUG(2, "+ dump_profile(Trigger '%s')\n", trigger ? trigger : (Char*)"Prg.Term."); + CLG_(init_dumps)(); + if (VG_(clo_verbosity) > 1) VG_(message)(Vg_DebugMsg, "Start dumping at BB %llu (%s)...", CLG_(stat).bb_executions, @@ -1673,15 +1675,35 @@ * always starts with a full absolute path. * If the output format string represents a relative path, the current * working directory at program start is used. + * + * This function has to be called every time a profile dump is generated + * to be able to react on PID changes. */ void CLG_(init_dumps)() { Int lastSlash, i; SysRes res; + static int thisPID = 0; + int currentPID = VG_(getpid)(); + if (currentPID == thisPID) { + /* already initialized, and no PID change */ + CLG_ASSERT(out_file != 0); + return; + } + thisPID = currentPID; + if (!CLG_(clo).out_format) CLG_(clo).out_format = DEFAULT_OUTFORMAT; + /* If a file name was already set, clean up before */ + if (out_file) { + VG_(free)(out_file); + VG_(free)(out_directory); + VG_(free)(filename); + out_counter = 0; + } + // Setup output filename. out_file = VG_(expand_file_name)("--callgrind-out-file", CLG_(clo).out_format); @@ -1721,7 +1743,8 @@ } if (!res.isError) VG_(close)( (Int)res.res ); - init_cmdbuf(); + if (!dumps_initialized) + init_cmdbuf(); dumps_initialized = True; } ==== //depot/projects/valgrind/coregrind/Makefile.am#7 (text+ko) ==== @@ -169,9 +169,11 @@ m_debuginfo/priv_readelf.h \ m_debuginfo/priv_readxcoff.h \ m_demangle/ansidecl.h \ + m_demangle/cp-demangle.h \ m_demangle/dyn-string.h \ m_demangle/demangle.h \ m_demangle/safe-ctype.h \ + m_demangle/vg_libciface.h \ m_scheduler/priv_sema.h \ m_syswrap/priv_types_n_macros.h \ m_syswrap/priv_syswrap-generic.h \ ==== //depot/projects/valgrind/coregrind/m_aspacemgr/aspacemgr-linux.c#4 (text+ko) ==== @@ -325,6 +325,7 @@ /* ------ end of STATE for the address-space manager ------ */ /* ------ Forwards decls ------ */ +inline static Int find_nsegment_idx ( Addr a ); static void parse_procselfmaps ( @@ -1101,8 +1102,19 @@ /*-----------------------------------------------------------------*/ /* Binary search the interval array for a given address. Since the - array covers the entire address space the search cannot fail. */ -static Int find_nsegment_idx ( Addr a ) + array covers the entire address space the search cannot fail. The + _WRK function does the real work. Its caller (just below) caches + the results thereof, to save time. With N_CACHE of 63 we get a hit + rate exceeding 90% when running OpenOffice. + + Re ">> 12", it doesn't matter that the page size of some targets + might be different from 12. Really "(a >> 12) % N_CACHE" is merely + a hash function, and the actual cache entry is always validated + correctly against the selected cache entry before use. +*/ +/* Don't call find_nsegment_idx_WRK; use find_nsegment_idx instead. */ +__attribute__((noinline)) +static Int find_nsegment_idx_WRK ( Addr a ) { Addr a_mid_lo, a_mid_hi; Int mid, @@ -1126,6 +1138,52 @@ } } +inline static Int find_nsegment_idx ( Addr a ) +{ +# define N_CACHE 63 + static Addr cache_pageno[N_CACHE]; + static Int cache_segidx[N_CACHE]; + static Bool cache_inited = False; + + static UWord n_q = 0; + static UWord n_m = 0; + + UWord ix; + + if (LIKELY(cache_inited)) { + /* do nothing */ + } else { + for (ix = 0; ix < N_CACHE; ix++) { + cache_pageno[ix] = 0; + cache_segidx[ix] = -1; + } + cache_inited = True; + } + + ix = (a >> 12) % N_CACHE; + + n_q++; + if (0 && 0 == (n_q & 0xFFFF)) + VG_(debugLog)(0,"xxx","find_nsegment_idx: %lu %lu\n", n_q, n_m); + + if ((a >> 12) == cache_pageno[ix] + && cache_segidx[ix] >= 0 + && cache_segidx[ix] < nsegments_used + && nsegments[cache_segidx[ix]].start <= a + && a <= nsegments[cache_segidx[ix]].end) { + /* hit */ + /* aspacem_assert( cache_segidx[ix] == find_nsegment_idx_WRK(a) ); */ + return cache_segidx[ix]; + } + /* miss */ + n_m++; + cache_segidx[ix] = find_nsegment_idx_WRK(a); + cache_pageno[ix] = a >> 12; + return cache_segidx[ix]; +# undef N_CACHE +} + + /* Finds the segment containing 'a'. Only returns file/anon/resvn segments. This returns a 'NSegment const *' - a pointer to ==== //depot/projects/valgrind/coregrind/m_debuginfo/debuginfo.c#6 (text+ko) ==== @@ -99,6 +99,13 @@ /*------------------------------------------------------------*/ +/*--- fwdses ---*/ +/*------------------------------------------------------------*/ + +static void cfsi_cache__invalidate ( void ); + + +/*------------------------------------------------------------*/ /*--- Root structure ---*/ /*------------------------------------------------------------*/ @@ -320,10 +327,11 @@ /* Repeatedly scan debugInfo_list, looking for DebugInfos with text AVMAs intersecting [start,start+length), and call discard_DebugInfo to get rid of them. This modifies the list, hence the multiple - iterations. + iterations. Returns True iff any such DebugInfos were found. */ -static void discard_syms_in_range ( Addr start, SizeT length ) +static Bool discard_syms_in_range ( Addr start, SizeT length ) { + Bool anyFound = False; Bool found; DebugInfo* curr; @@ -347,8 +355,11 @@ } if (!found) break; + anyFound = True; discard_DebugInfo( curr ); } + + return anyFound; } @@ -479,8 +490,86 @@ } +/* Debuginfo reading for 'di' has just been successfully completed. + Check that the invariants stated in + "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in + priv_storage.h are observed. */ +static void check_CFSI_related_invariants ( DebugInfo* di ) +{ + DebugInfo* di2 = NULL; + vg_assert(di); + /* This fn isn't called until after debuginfo for this object has + been successfully read. And that shouldn't happen until we have + both a r-x and rw- mapping for the object. Hence: */ + vg_assert(di->have_rx_map); + vg_assert(di->have_rw_map); + /* degenerate case: r-x section is empty */ + if (di->rx_map_size == 0) { + vg_assert(di->cfsi == NULL); + return; + } + /* normal case: r-x section is nonempty */ + /* invariant (0) */ + vg_assert(di->rx_map_size > 0); + /* invariant (1) */ + for (di2 = debugInfo_list; di2; di2 = di2->next) { + if (di2 == di) + continue; + if (di2->rx_map_size == 0) + continue; + vg_assert(di->rx_map_avma + di->rx_map_size <= di2->rx_map_avma + || di2->rx_map_avma + di2->rx_map_size <= di->rx_map_avma); + } + di2 = NULL; + /* invariant (2) */ + if (di->cfsi) { + vg_assert(di->cfsi_minavma <= di->cfsi_maxavma); /* duh! */ + vg_assert(di->cfsi_minavma >= di->rx_map_avma); + vg_assert(di->cfsi_maxavma < di->rx_map_avma + di->rx_map_size); + } + /* invariants (3) and (4) */ + if (di->cfsi) { + Word i; + vg_assert(di->cfsi_used > 0); + vg_assert(di->cfsi_size > 0); + for (i = 0; i < di->cfsi_used; i++) { + DiCfSI* cfsi = &di->cfsi[i]; + vg_assert(cfsi->len > 0); + vg_assert(cfsi->base >= di->cfsi_minavma); + vg_assert(cfsi->base + cfsi->len - 1 <= di->cfsi_maxavma); + if (i > 0) { + DiCfSI* cfsip = &di->cfsi[i-1]; + vg_assert(cfsip->base + cfsip->len <= cfsi->base); + } + } + } else { + vg_assert(di->cfsi_used == 0); + vg_assert(di->cfsi_size == 0); + } +} + + /*--------------------------------------------------------------*/ /*--- ---*/ +/*--- TOP LEVEL: INITIALISE THE DEBUGINFO SYSTEM ---*/ +/*--- ---*/ +/*--------------------------------------------------------------*/ + +void VG_(di_initialise) ( void ) +{ + /* There's actually very little to do here, since everything + centers around the DebugInfos in debugInfo_list, they are + created and destroyed on demand, and each one is treated more or + less independently. */ + vg_assert(debugInfo_list == NULL); + + /* flush the CFI fast query cache. */ + cfsi_cache__invalidate(); +} + + +/*--------------------------------------------------------------*/ +/*--- ---*/ /*--- TOP LEVEL: NOTIFICATION (ACQUIRE/DISCARD INFO) (LINUX) ---*/ /*--- ---*/ /*--------------------------------------------------------------*/ @@ -719,6 +808,8 @@ TRACE_SYMTAB("\n------ Canonicalising the " "acquired info ------\n"); + /* invalidate the CFI unwind cache. */ + cfsi_cache__invalidate(); /* prepare read data for use */ ML_(canonicaliseTables)( di ); /* notify m_redir about it */ @@ -728,6 +819,10 @@ di->have_dinfo = True; tl_assert(di->handle > 0); di_handle = di->handle; + /* Check invariants listed in + Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in + priv_storage.h. */ + check_CFSI_related_invariants(di); } else { TRACE_SYMTAB("\n------ ELF reading failed ------\n"); @@ -735,6 +830,7 @@ this DebugInfo? No - it contains info on the rw/rx mappings, at least. */ di_handle = 0; + vg_assert(di->have_dinfo == False); } TRACE_SYMTAB("\n"); @@ -751,8 +847,11 @@ [a, a+len). */ void VG_(di_notify_munmap)( Addr a, SizeT len ) { + Bool anyFound; if (0) VG_(printf)("DISCARD %#lx %#lx\n", a, a+len); - discard_syms_in_range(a, len); + anyFound = discard_syms_in_range(a, len); + if (anyFound) + cfsi_cache__invalidate(); } @@ -766,8 +865,11 @@ # if defined(VGP_x86_linux) || defined(VGP_x86_freebsd) exe_ok = exe_ok || toBool(prot & VKI_PROT_READ); # endif - if (0 && !exe_ok) - discard_syms_in_range(a, len); + if (0 && !exe_ok) { + Bool anyFound = discard_syms_in_range(a, len); + if (anyFound) + cfsi_cache__invalidate(); + } } #endif /* defined(VGO_linux) */ @@ -798,6 +900,10 @@ { ULong hdl = 0; + /* play safe; always invalidate the CFI cache. Not + that it should be used on AIX, but still .. */ + cfsi_cache__invalidate(); + if (acquire) { Bool ok; @@ -841,6 +947,10 @@ di->have_dinfo = True; hdl = di->handle; vg_assert(hdl > 0); + /* Check invariants listed in + Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in + priv_storage.h. */ + check_CFSI_related_invariants(di); } else { /* Something went wrong (eg. bad XCOFF file). */ discard_DebugInfo( di ); @@ -851,8 +961,11 @@ /* Dump all the debugInfos whose text segments intersect code_start/code_len. */ + /* CFI cache is always invalidated at start of this routine. + Hence it's safe to ignore the return value of + discard_syms_in_range. */ if (code_len > 0) - discard_syms_in_range( code_start, code_len ); + (void)discard_syms_in_range( code_start, code_len ); } @@ -894,11 +1007,11 @@ If findText==False, only data symbols are searched for. */ static void search_all_symtabs ( Addr ptr, /*OUT*/DebugInfo** pdi, - /*OUT*/Int* symno, + /*OUT*/Word* symno, Bool match_anywhere_in_sym, Bool findText ) { - Int sno; + Word sno; DebugInfo* di; Bool inRange; @@ -945,9 +1058,9 @@ *pdi to the relevant DebugInfo, and *locno to the loctab entry *number within that. If not found, *pdi is set to NULL. */ static void search_all_loctabs ( Addr ptr, /*OUT*/DebugInfo** pdi, - /*OUT*/Int* locno ) + /*OUT*/Word* locno ) { - Int lno; + Word lno; DebugInfo* di; for (di = debugInfo_list; di != NULL; di = di->next) { if (di->text_present @@ -978,7 +1091,7 @@ Bool findText, /*OUT*/OffT* offsetP ) { DebugInfo* di; - Int sno; + Word sno; Int offset; search_all_symtabs ( a, &di, &sno, match_anywhere_in_sym, findText ); @@ -1020,7 +1133,7 @@ Addr VG_(get_tocptr) ( Addr guest_code_addr ) { DebugInfo* si; - Int sno; + Word sno; search_all_symtabs ( guest_code_addr, &si, &sno, True/*match_anywhere_in_fun*/, @@ -1187,7 +1300,7 @@ Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename ) { DebugInfo* si; - Int locno; + Word locno; search_all_loctabs ( a, &si, &locno ); if (si == NULL) return False; @@ -1199,7 +1312,7 @@ Bool VG_(get_linenum)( Addr a, UInt* lineno ) { DebugInfo* si; - Int locno; + Word locno; search_all_loctabs ( a, &si, &locno ); if (si == NULL) return False; @@ -1218,7 +1331,7 @@ /*OUT*/UInt* lineno ) { DebugInfo* si; - Int locno; + Word locno; vg_assert( (dirname == NULL && dirname_available == NULL) || @@ -1542,6 +1655,122 @@ } +/* Search all the DebugInfos in the entire system, to find the DiCfSI + that pertains to 'ip'. + + If found, set *diP to the DebugInfo in which it resides, and + *ixP to the index in that DebugInfo's cfsi array. + + If not found, set *diP to (DebugInfo*)1 and *ixP to zero. +*/ +__attribute__((noinline)) +static void find_DiCfSI ( /*OUT*/DebugInfo** diP, + /*OUT*/Word* ixP, + Addr ip ) +{ + DebugInfo* di; + Word i = -1; + + static UWord n_search = 0; + static UWord n_steps = 0; + n_search++; + + if (0) VG_(printf)("search for %#lx\n", ip); + + for (di = debugInfo_list; di != NULL; di = di->next) { + Word j; + n_steps++; + + /* Use the per-DebugInfo summary address ranges to skip + inapplicable DebugInfos quickly. */ + if (di->cfsi_used == 0) + continue; + if (ip < di->cfsi_minavma || ip > di->cfsi_maxavma) + continue; + + /* It might be in this DebugInfo. Search it. */ + j = ML_(search_one_cfitab)( di, ip ); + vg_assert(j >= -1 && j < (Word)di->cfsi_used); + + if (j != -1) { + i = j; + break; /* found it */ + } + } + + if (i == -1) { + + /* we didn't find it. */ + *diP = (DebugInfo*)1; + *ixP = 0; + + } else { + + /* found it. */ + /* ensure that di is 4-aligned (at least), so it can't possibly + be equal to (DebugInfo*)1. */ + vg_assert(di && VG_IS_4_ALIGNED(di)); + vg_assert(i >= 0 && i < di->cfsi_used); + *diP = di; + *ixP = i; + + /* Start of performance-enhancing hack: once every 64 (chosen + hackily after profiling) successful searches, move the found + DebugInfo one step closer to the start of the list. This + makes future searches cheaper. For starting konqueror on + amd64, this in fact reduces the total amount of searching + done by the above find-the-right-DebugInfo loop by more than + a factor of 20. */ + if ((n_search & 0xF) == 0) { + /* Move di one step closer to the start of the list. */ + move_DebugInfo_one_step_forward( di ); + } + /* End of performance-enhancing hack. */ + + if (0 && ((n_search & 0x7FFFF) == 0)) + VG_(printf)("find_DiCfSI: %lu searches, " + "%lu DebugInfos looked at\n", + n_search, n_steps); + + } + +} + + +/* Now follows a mechanism for caching queries to find_DiCfSI, since + they are extremely frequent on amd64-linux, during stack unwinding. + + Each cache entry binds an ip value to a (di, ix) pair. Possible + values: + + di is non-null, ix >= 0 ==> cache slot in use, "di->cfsi[ix]" + di is (DebugInfo*)1 ==> cache slot in use, no associated di + di is NULL ==> cache slot not in use + + Hence simply zeroing out the entire cache invalidates all + entries. + + Why not map ip values directly to DiCfSI*'s? Because this would + cause problems if/when the cfsi array is moved due to resizing. + Instead we cache .cfsi array index value, which should be invariant + across resizing. (That said, I don't think the current + implementation will resize whilst during queries, since the DiCfSI + records are added all at once, when the debuginfo for an object is + read, and is not changed ever thereafter. */ + +#define N_CFSI_CACHE 511 + +typedef + struct { Addr ip; DebugInfo* di; Word ix; } + CFSICacheEnt; + +static CFSICacheEnt cfsi_cache[N_CFSI_CACHE]; + +static void cfsi_cache__invalidate ( void ) { + VG_(memset)(&cfsi_cache, 0, sizeof(cfsi_cache)); +} + + /* The main function for DWARF2/3 CFI-based stack unwinding. Given an IP/SP/FP triple, produce the IP/SP/FP values for the previous frame, if possible. */ @@ -1554,61 +1783,47 @@ Addr min_accessible, Addr max_accessible ) { - Bool ok; - Int i; - DebugInfo* si; - DiCfSI* cfsi = NULL; - Addr cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev; + Bool ok; + DebugInfo* di; + DiCfSI* cfsi = NULL; + Addr cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev; CfiExprEvalContext eec; - static UInt n_search = 0; - static UInt n_steps = 0; - n_search++; + static UWord n_q = 0, n_m = 0; + n_q++; + if (0 && 0 == (n_q & 0x1FFFFF)) + VG_(printf)("QQQ %lu %lu\n", n_q, n_m); - if (0) VG_(printf)("search for %#lx\n", *ipP); + { UWord hash = (*ipP) % N_CFSI_CACHE; + CFSICacheEnt* ce = &cfsi_cache[hash]; - for (si = debugInfo_list; si != NULL; si = si->next) { - n_steps++; + if (LIKELY(ce->ip == *ipP) && LIKELY(ce->di != NULL)) { + /* found an entry in the cache .. */ + } else { + /* not found in cache. Search and update. */ + n_m++; + ce->ip = *ipP; + find_DiCfSI( &ce->di, &ce->ix, *ipP ); + } - /* Use the per-DebugInfo summary address ranges to skip - inapplicable DebugInfos quickly. */ - if (si->cfsi_used == 0) >>> TRUNCATED FOR MAIL (1000 lines) <<<