Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 31 Oct 2008 17:53:14 GMT
From:      Peter Wemm <peter@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 152286 for review
Message-ID:  <200810311753.m9VHrEQg086630@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=152286

Change 152286 by peter@peter_daintree on 2008/10/31 17:52:42

	Integrate @152285

Affected files ...

.. //depot/projects/valgrind/VEX/priv/guest-generic/bb_to_IR.c#3 integrate
.. //depot/projects/valgrind/cachegrind/docs/cg-manual.xml#3 integrate
.. //depot/projects/valgrind/callgrind/docs/cl-manual.xml#3 integrate
.. //depot/projects/valgrind/callgrind/dump.c#3 integrate
.. //depot/projects/valgrind/coregrind/Makefile.am#7 integrate
.. //depot/projects/valgrind/coregrind/m_aspacemgr/aspacemgr-linux.c#4 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/debuginfo.c#6 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/priv_storage.h#3 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/priv_tytypes.h#2 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/readdwarf3.c#2 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/readelf.c#4 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/storage.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_debuginfo/tytypes.c#2 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/ansidecl.h#2 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/cp-demangle.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/cp-demangle.h#1 branch
.. //depot/projects/valgrind/coregrind/m_demangle/cplus-dem.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/demangle.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/demangle.h#3 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/dyn-string.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/dyn-string.h#2 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/safe-ctype.c#2 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/safe-ctype.h#2 integrate
.. //depot/projects/valgrind/coregrind/m_demangle/vg_libciface.h#1 branch
.. //depot/projects/valgrind/coregrind/m_errormgr.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_execontext.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_libcbase.c#5 integrate
.. //depot/projects/valgrind/coregrind/m_main.c#9 integrate
.. //depot/projects/valgrind/coregrind/m_stacktrace.c#4 integrate
.. //depot/projects/valgrind/coregrind/m_syswrap/syswrap-amd64-linux.c#3 integrate
.. //depot/projects/valgrind/coregrind/m_syswrap/syswrap-generic.c#7 integrate
.. //depot/projects/valgrind/coregrind/m_syswrap/syswrap-main.c#10 integrate
.. //depot/projects/valgrind/coregrind/m_trampoline.S#5 integrate
.. //depot/projects/valgrind/coregrind/m_xarray.c#2 integrate
.. //depot/projects/valgrind/coregrind/pub_core_debuginfo.h#4 integrate
.. //depot/projects/valgrind/docs/internals/3_3_BUGSTATUS.txt#2 integrate
.. //depot/projects/valgrind/docs/internals/BIG_APP_NOTES.txt#1 branch
.. //depot/projects/valgrind/docs/internals/Makefile.am#3 integrate
.. //depot/projects/valgrind/docs/internals/howto_BUILD_KDE42.txt#1 branch
.. //depot/projects/valgrind/docs/internals/howto_oprofile.txt#1 branch
.. //depot/projects/valgrind/docs/xml/manual-core.xml#3 integrate
.. //depot/projects/valgrind/docs/xml/manual.xml#3 integrate
.. //depot/projects/valgrind/docs/xml/valgrind-manpage.xml#3 integrate
.. //depot/projects/valgrind/exp-ptrcheck/Makefile.am#2 integrate
.. //depot/projects/valgrind/exp-ptrcheck/README.ABOUT.PTRCHECK.txt#2 delete
.. //depot/projects/valgrind/exp-ptrcheck/docs/Makefile.am#2 integrate
.. //depot/projects/valgrind/exp-ptrcheck/docs/pc-manual.xml#1 branch
.. //depot/projects/valgrind/exp-ptrcheck/h_main.c#3 integrate
.. //depot/projects/valgrind/glibc-2.34567-NPTL-helgrind.supp#2 integrate
.. //depot/projects/valgrind/helgrind/Makefile.am#4 integrate
.. //depot/projects/valgrind/helgrind/README_MSMProp2.txt#1 branch
.. //depot/projects/valgrind/helgrind/README_YARD.txt#1 branch
.. //depot/projects/valgrind/helgrind/helgrind.h#3 integrate
.. //depot/projects/valgrind/helgrind/hg_basics.c#1 branch
.. //depot/projects/valgrind/helgrind/hg_basics.h#1 branch
.. //depot/projects/valgrind/helgrind/hg_errors.c#1 branch
.. //depot/projects/valgrind/helgrind/hg_errors.h#1 branch
.. //depot/projects/valgrind/helgrind/hg_intercepts.c#2 integrate
.. //depot/projects/valgrind/helgrind/hg_lock_n_thread.c#1 branch
.. //depot/projects/valgrind/helgrind/hg_lock_n_thread.h#1 branch
.. //depot/projects/valgrind/helgrind/hg_main.c#3 integrate
.. //depot/projects/valgrind/helgrind/hg_wordset.c#2 integrate
.. //depot/projects/valgrind/helgrind/hg_wordset.h#2 integrate
.. //depot/projects/valgrind/helgrind/libhb.h#1 branch
.. //depot/projects/valgrind/helgrind/libhb_core.c#1 branch
.. //depot/projects/valgrind/include/pub_tool_execontext.h#3 integrate
.. //depot/projects/valgrind/include/pub_tool_libcbase.h#3 integrate
.. //depot/projects/valgrind/massif/hp2ps/AreaBelow.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/AreaBelow.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/AuxFile.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/AuxFile.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Axes.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Axes.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/CHANGES#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Curves.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Curves.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Defines.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Deviation.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Deviation.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Dimensions.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Dimensions.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Error.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Error.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/HpFile.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/HpFile.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/INSTALL#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Key.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Key.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/LICENSE#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Main.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Main.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Makefile.am#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Makefile.old#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Marks.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Marks.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/PsFile.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/PsFile.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/README#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Reorder.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Reorder.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Scale.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Scale.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Shade.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Shade.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/TopTwenty.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/TopTwenty.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/TraceElement.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/TraceElement.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Utilities.c#2 delete
.. //depot/projects/valgrind/massif/hp2ps/Utilities.h#2 delete
.. //depot/projects/valgrind/massif/hp2ps/hp2ps.1#2 delete
.. //depot/projects/valgrind/memcheck/mc_translate.c#3 integrate
.. //depot/projects/valgrind/none/tests/amd64/Makefile.am#3 integrate
.. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.c#1 branch
.. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.stderr.exp#1 branch
.. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.stdout.exp#1 branch
.. //depot/projects/valgrind/none/tests/amd64/bug156404-amd64.vgtest#1 branch
.. //depot/projects/valgrind/xfree-4.supp#4 integrate

Differences ...

==== //depot/projects/valgrind/VEX/priv/guest-generic/bb_to_IR.c#3 (text+ko) ====

@@ -376,9 +376,12 @@
      irsb->stmts[selfcheck_idx+3]
         = IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) );
 
-     p_adler_helper = abiinfo_both->host_ppc_calls_use_fndescrs
-                      ? ((HWord*)(&genericg_compute_adler32))[0]
-                      : (HWord)&genericg_compute_adler32;
+     if (abiinfo_both->host_ppc_calls_use_fndescrs) {
+        HWord* fndescr = (HWord*)&genericg_compute_adler32;
+        p_adler_helper = fndescr[0];
+     } else {
+        p_adler_helper = (HWord)&genericg_compute_adler32;
+     }
 
      irsb->stmts[selfcheck_idx+4]
         = IRStmt_Exit( 

==== //depot/projects/valgrind/cachegrind/docs/cg-manual.xml#3 (text+ko) ====

@@ -807,16 +807,12 @@
 instructions.</para>
 
 <para>To do this, you just need to assemble your
-<computeroutput>.s</computeroutput> files with assembler-level
-debug information.  gcc doesn't do this, but you can use the GNU
-assembler with the <computeroutput>--gstabs</computeroutput>
-option to generate object files with this information, eg:</para>
-
-<programlisting><![CDATA[
-as --gstabs foo.s]]></programlisting>
-
-<para>You can then profile and annotate source files in the same
-way as for C/C++ programs.</para>
+<computeroutput>.s</computeroutput> files with assembly-level debug
+information.  You can use <computeroutput>gcc
+-S</computeroutput> to compile C/C++ programs to assembly code, and then
+<computeroutput>gcc -g</computeroutput> on the assembly code files to
+achieve this.  You can then profile and annotate the assembly code source
+files in the same way as C/C++ source files.</para>
 
 </sect2>
 

==== //depot/projects/valgrind/callgrind/docs/cl-manual.xml#3 (text+ko) ====

@@ -197,7 +197,7 @@
   <computeroutput>callgrind_control -i on</computeroutput> just before the 
   interesting code section is executed. To exactly specify
   the code position where profiling should start, use the client request
-  <computeroutput>CALLGRIND_START_INSTRUMENTATION</computeroutput>.</para>
+  <computeroutput><xref linkend="cr.start-instr"/></computeroutput>.</para>
 
   <para>If you want to be able to see assembly code level annotation, specify
   <option><xref linkend="opt.dump-instr"/>=yes</option>. This will produce
@@ -292,18 +292,13 @@
 
     <listitem>
       <para><command>Program controlled dumping.</command>
-      Put <screen><![CDATA[#include <valgrind/callgrind.h>]]></screen>
-      into your source and add 
-      <computeroutput>CALLGRIND_DUMP_STATS;</computeroutput> when you
-      want a dump to happen. Use 
-      <computeroutput>CALLGRIND_ZERO_STATS;</computeroutput> to only 
-      zero cost centers.</para>
-      <para>In Valgrind terminology, this method is called "Client
-      requests".  The given macros generate a special instruction
-      pattern with no effect at all (i.e. a NOP). When run under
-      Valgrind, the CPU simulation engine detects the special
-      instruction pattern and triggers special actions like the ones
-      described above.</para>
+      Insert
+      <computeroutput><xref linkend="cr.dump-stats"/>;</computeroutput>
+      at the position in your code where you want a profile dump to happen. Use 
+      <computeroutput><xref linkend="cr.zero-stats"/>;</computeroutput> to only 
+      zero profile counters.
+      See <xref linkend="cl-manual.clientrequests"/> for more information on
+      Callgrind specific client requests.</para>
     </listitem>
   </itemizedlist>
 
@@ -338,8 +333,8 @@
   with <screen>callgrind_control -i on</screen>
   and off by specifying "off" instead of "on".
   Furthermore, instrumentation state can be programatically changed with
-  the macros <computeroutput>CALLGRIND_START_INSTRUMENTATION;</computeroutput>
-  and <computeroutput>CALLGRIND_STOP_INSTRUMENTATION;</computeroutput>.
+  the macros <computeroutput><xref linkend="cr.start-instr"/>;</computeroutput>
+  and <computeroutput><xref linkend="cr.stop-instr"/>;</computeroutput>.
   </para>
   
   <para>In addition to enabling instrumentation, you must also enable
@@ -471,6 +466,27 @@
 
   </sect2>
 
+  <sect2 id="cl-manual.forkingprograms" xreflabel="Forking Programs">
+  <title>Forking Programs</title>
+
+  <para>If your program forks, the child will inherit all the profiling
+  data that has been gathered for the parent. To start with empty profile
+  counter values in the child, the client request
+  <computeroutput><xref linkend="cr.zero-stats"/>;</computeroutput>
+  can be inserted into code to be executed by the child, directly after
+  <computeroutput>fork()</computeroutput>.</para>
+
+  <para>However, you will have to make sure that the output file format string
+  (controlled by <option>--callgrind-out-file</option>) does contain
+  <option>%p</option> (which is true by default). Otherwise, the
+  outputs from the parent and child will overwrite each other or will be
+  intermingled, which almost certainly is not what you want.</para>
+
+  <para>You will be able to control the new child independently from
+  the parent via <computeroutput>callgrind_control</computeroutput>.</para>
+
+  </sect2>
+
 </sect1>
 
 
@@ -701,7 +717,7 @@
     </listitem>
   </varlistentry>
   
-  <varlistentry id="opt.collect-atstart">
+  <varlistentry id="opt.collect-atstart" xreflabel="--collect-atstart">
     <term>
       <option><![CDATA[--collect-atstart=<yes|no> [default: yes] ]]></option>
     </term>
@@ -733,13 +749,9 @@
       specification of <computeroutput>--toggle-collect</computeroutput>
       implicitly sets
       <computeroutput>--collect-state=no</computeroutput>.</para>
-      <para>Collection state can be toggled also by using a Valgrind
-      Client Request in your application.  For this, include
-      <computeroutput>valgrind/callgrind.h</computeroutput> and specify
-      the macro
-      <computeroutput>CALLGRIND_TOGGLE_COLLECT</computeroutput> at the
-      needed positions. This only will have any effect if run under
-      supervision of the Callgrind tool.</para>
+      <para>Collection state can be toggled also by inserting the client request
+      <computeroutput><xref linkend="cr.toggle-collect"/>;</computeroutput>
+      at the needed code positions.</para>
     </listitem>
   </varlistentry>
 
@@ -912,4 +924,94 @@
 
 </sect1>
 
+<sect1 id="cl-manual.clientrequests" xreflabel="Client request reference">
+<title>Callgrind specific client requests</title>
+
+<para>In Valgrind terminology, a client request is a C macro which
+can be inserted into your code to request specific functionality when
+run under Valgrind. For this, special instruction patterns resulting
+in NOPs are used, but which can be detected by Valgrind.</para>
+
+<para>Callgrind provides the following specific client requests.
+To use them, add the line
+<screen><![CDATA[#include <valgrind/callgrind.h>]]></screen>
+into your code for the macro definitions.
+.</para>
+
+<variablelist id="cl.clientrequests.list">
+  
+  <varlistentry id="cr.dump-stats" xreflabel="CALLGRIND_DUMP_STATS">
+    <term>
+      <computeroutput>CALLGRIND_DUMP_STATS</computeroutput>
+    </term>
+    <listitem>
+      <para>Force generation of a profile dump at specified position
+      in code, for the current thread only. Written counters will be reset
+      to zero.</para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="cr.dump-stats-at" xreflabel="CALLGRIND_DUMP_STATS_AT">
+    <term>
+      <computeroutput>CALLGRIND_DUMP_STATS_AT(string)</computeroutput>
+    </term>
+    <listitem>
+      <para>Same as CALLGRIND_DUMP_STATS, but allows to specify a string
+      to be able to distinguish profile dumps.</para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="cr.zero-stats" xreflabel="CALLGRIND_ZERO_STATS">
+    <term>
+      <computeroutput>CALLGRIND_ZERO_STATS</computeroutput>
+    </term>
+    <listitem>
+      <para>Reset the profile counters for the current thread to zero.</para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="cr.toggle-collect" xreflabel="CALLGRIND_TOGGLE_COLLECT">
+    <term>
+      <computeroutput>CALLGRIND_TOGGLE_COLLECT</computeroutput>
+    </term>
+    <listitem>
+      <para>Toggle the collection state. This allows to ignore events
+      with regard to profile counters. See also options
+      <xref linkend="opt.collect-atstart"/> and
+      <xref linkend="opt.toggle-collect"/>.</para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="cr.start-instr" xreflabel="CALLGRIND_START_INSTRUMENTATION">
+    <term>
+      <computeroutput>CALLGRIND_START_INSTRUMENTATION</computeroutput>
+    </term>
+    <listitem>
+      <para>Start full Callgrind instrumentation if not already switched on.
+      When cache simulation is done, this will flush the simulated cache
+      and lead to an artifical cache warmup phase afterwards with
+      cache misses which would not have happened in reality.
+      See also option <xref linkend="opt.instr-atstart"/>.</para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="cr.stop-instr" xreflabel="CALLGRIND_STOP_INSTRUMENTATION">
+    <term>
+      <computeroutput>CALLGRIND_STOP_INSTRUMENTATION</computeroutput>
+    </term>
+    <listitem>
+      <para>Stop full Callgrind instrumentation if not already switched off.
+      This flushes Valgrinds translation cache, and does no additional
+      instrumentation afterwards: it effectivly will run at the same
+      speed as the "none" tool, ie. at minimal slowdown. Use this to
+      speed up the Callgrind run for uninteresting code parts. Use
+      <xref linkend="cr.start-instr"/> to switch on instrumentation again.
+      See also option <xref linkend="opt.instr-atstart"/>.</para>
+    </listitem>
+  </varlistentry>
+
+</variablelist>
+
+</sect1>
+
 </chapter>

==== //depot/projects/valgrind/callgrind/dump.c#3 (text+ko) ====

@@ -64,13 +64,13 @@
 
 Char* CLG_(get_out_file)()
 {
-    CLG_ASSERT(dumps_initialized);
+    CLG_(init_dumps)();
     return out_file;
 }
 
 Char* CLG_(get_out_directory)()
 {
-    CLG_ASSERT(dumps_initialized);
+    CLG_(init_dumps)();
     return out_directory;
 }
 
@@ -1616,6 +1616,8 @@
    CLG_DEBUG(2, "+ dump_profile(Trigger '%s')\n",
 	    trigger ? trigger : (Char*)"Prg.Term.");
 
+   CLG_(init_dumps)();
+
    if (VG_(clo_verbosity) > 1)
        VG_(message)(Vg_DebugMsg, "Start dumping at BB %llu (%s)...",
 		    CLG_(stat).bb_executions,
@@ -1673,15 +1675,35 @@
  * <out_file> always starts with a full absolute path.
  * If the output format string represents a relative path, the current
  * working directory at program start is used.
+ *
+ * This function has to be called every time a profile dump is generated
+ * to be able to react on PID changes.
  */
 void CLG_(init_dumps)()
 {
    Int lastSlash, i;
    SysRes res;
 
+   static int thisPID = 0;
+   int currentPID = VG_(getpid)();
+   if (currentPID == thisPID) {
+       /* already initialized, and no PID change */
+       CLG_ASSERT(out_file != 0);
+       return;
+   }
+   thisPID = currentPID;
+   
    if (!CLG_(clo).out_format)
      CLG_(clo).out_format = DEFAULT_OUTFORMAT;
 
+   /* If a file name was already set, clean up before */
+   if (out_file) {
+       VG_(free)(out_file);
+       VG_(free)(out_directory);
+       VG_(free)(filename);
+       out_counter = 0;
+   }
+
    // Setup output filename.
    out_file =
        VG_(expand_file_name)("--callgrind-out-file", CLG_(clo).out_format);
@@ -1721,7 +1743,8 @@
     }
     if (!res.isError) VG_(close)( (Int)res.res );
 
-    init_cmdbuf();
+    if (!dumps_initialized)
+	init_cmdbuf();
 
     dumps_initialized = True;
 }

==== //depot/projects/valgrind/coregrind/Makefile.am#7 (text+ko) ====

@@ -169,9 +169,11 @@
 	m_debuginfo/priv_readelf.h	\
 	m_debuginfo/priv_readxcoff.h	\
 	m_demangle/ansidecl.h	\
+	m_demangle/cp-demangle.h \
 	m_demangle/dyn-string.h	\
 	m_demangle/demangle.h	\
 	m_demangle/safe-ctype.h \
+	m_demangle/vg_libciface.h \
 	m_scheduler/priv_sema.h \
 	m_syswrap/priv_types_n_macros.h \
 	m_syswrap/priv_syswrap-generic.h \

==== //depot/projects/valgrind/coregrind/m_aspacemgr/aspacemgr-linux.c#4 (text+ko) ====

@@ -325,6 +325,7 @@
 /* ------ end of STATE for the address-space manager ------ */
 
 /* ------ Forwards decls ------ */
+inline
 static Int  find_nsegment_idx ( Addr a );
 
 static void parse_procselfmaps (
@@ -1101,8 +1102,19 @@
 /*-----------------------------------------------------------------*/
 
 /* Binary search the interval array for a given address.  Since the
-   array covers the entire address space the search cannot fail. */
-static Int find_nsegment_idx ( Addr a )
+   array covers the entire address space the search cannot fail.  The
+   _WRK function does the real work.  Its caller (just below) caches
+   the results thereof, to save time.  With N_CACHE of 63 we get a hit
+   rate exceeding 90% when running OpenOffice.
+
+   Re ">> 12", it doesn't matter that the page size of some targets
+   might be different from 12.  Really "(a >> 12) % N_CACHE" is merely
+   a hash function, and the actual cache entry is always validated
+   correctly against the selected cache entry before use.
+*/
+/* Don't call find_nsegment_idx_WRK; use find_nsegment_idx instead. */
+__attribute__((noinline))
+static Int find_nsegment_idx_WRK ( Addr a )
 {
    Addr a_mid_lo, a_mid_hi;
    Int  mid,
@@ -1126,6 +1138,52 @@
    }
 }
 
+inline static Int find_nsegment_idx ( Addr a )
+{
+#  define N_CACHE 63
+   static Addr cache_pageno[N_CACHE];
+   static Int  cache_segidx[N_CACHE];
+   static Bool cache_inited = False;
+
+   static UWord n_q = 0;
+   static UWord n_m = 0;
+
+   UWord ix;
+
+   if (LIKELY(cache_inited)) {
+      /* do nothing */
+   } else {
+      for (ix = 0; ix < N_CACHE; ix++) {
+         cache_pageno[ix] = 0;
+         cache_segidx[ix] = -1;
+      }
+      cache_inited = True;
+   }
+
+   ix = (a >> 12) % N_CACHE;
+
+   n_q++;
+   if (0 && 0 == (n_q & 0xFFFF))
+      VG_(debugLog)(0,"xxx","find_nsegment_idx: %lu %lu\n", n_q, n_m);
+
+   if ((a >> 12) == cache_pageno[ix]
+       && cache_segidx[ix] >= 0
+       && cache_segidx[ix] < nsegments_used
+       && nsegments[cache_segidx[ix]].start <= a
+       && a <= nsegments[cache_segidx[ix]].end) {
+      /* hit */
+      /* aspacem_assert( cache_segidx[ix] == find_nsegment_idx_WRK(a) ); */
+      return cache_segidx[ix];
+   }
+   /* miss */
+   n_m++;
+   cache_segidx[ix] = find_nsegment_idx_WRK(a);
+   cache_pageno[ix] = a >> 12;
+   return cache_segidx[ix];
+#  undef N_CACHE
+}
+
+
 
 /* Finds the segment containing 'a'.  Only returns file/anon/resvn
    segments.  This returns a 'NSegment const *' - a pointer to 

==== //depot/projects/valgrind/coregrind/m_debuginfo/debuginfo.c#6 (text+ko) ====

@@ -99,6 +99,13 @@
 
 
 /*------------------------------------------------------------*/
+/*--- fwdses                                               ---*/
+/*------------------------------------------------------------*/
+
+static void cfsi_cache__invalidate ( void );
+
+
+/*------------------------------------------------------------*/
 /*--- Root structure                                       ---*/
 /*------------------------------------------------------------*/
 
@@ -320,10 +327,11 @@
 /* Repeatedly scan debugInfo_list, looking for DebugInfos with text
    AVMAs intersecting [start,start+length), and call discard_DebugInfo
    to get rid of them.  This modifies the list, hence the multiple
-   iterations.
+   iterations.  Returns True iff any such DebugInfos were found.
 */
-static void discard_syms_in_range ( Addr start, SizeT length )
+static Bool discard_syms_in_range ( Addr start, SizeT length )
 {
+   Bool       anyFound = False;
    Bool       found;
    DebugInfo* curr;
 
@@ -347,8 +355,11 @@
       }
 
       if (!found) break;
+      anyFound = True;
       discard_DebugInfo( curr );
    }
+
+   return anyFound;
 }
 
 
@@ -479,8 +490,86 @@
 }
 
 
+/* Debuginfo reading for 'di' has just been successfully completed.
+   Check that the invariants stated in
+   "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in
+   priv_storage.h are observed. */
+static void check_CFSI_related_invariants ( DebugInfo* di )
+{
+   DebugInfo* di2 = NULL;
+   vg_assert(di);
+   /* This fn isn't called until after debuginfo for this object has
+      been successfully read.  And that shouldn't happen until we have
+      both a r-x and rw- mapping for the object.  Hence: */
+   vg_assert(di->have_rx_map);
+   vg_assert(di->have_rw_map);
+   /* degenerate case: r-x section is empty */
+   if (di->rx_map_size == 0) {
+      vg_assert(di->cfsi == NULL);
+      return;
+   }
+   /* normal case: r-x section is nonempty */
+   /* invariant (0) */
+   vg_assert(di->rx_map_size > 0);
+   /* invariant (1) */
+   for (di2 = debugInfo_list; di2; di2 = di2->next) {
+      if (di2 == di)
+         continue;
+      if (di2->rx_map_size == 0)
+         continue;
+      vg_assert(di->rx_map_avma + di->rx_map_size <= di2->rx_map_avma
+                || di2->rx_map_avma + di2->rx_map_size <= di->rx_map_avma);
+   }
+   di2 = NULL;
+   /* invariant (2) */
+   if (di->cfsi) {
+      vg_assert(di->cfsi_minavma <= di->cfsi_maxavma); /* duh! */
+      vg_assert(di->cfsi_minavma >= di->rx_map_avma);
+      vg_assert(di->cfsi_maxavma < di->rx_map_avma + di->rx_map_size);
+   }
+   /* invariants (3) and (4) */
+   if (di->cfsi) {
+      Word i;
+      vg_assert(di->cfsi_used > 0);
+      vg_assert(di->cfsi_size > 0);
+      for (i = 0; i < di->cfsi_used; i++) {
+         DiCfSI* cfsi = &di->cfsi[i];
+         vg_assert(cfsi->len > 0);
+         vg_assert(cfsi->base >= di->cfsi_minavma);
+         vg_assert(cfsi->base + cfsi->len - 1 <= di->cfsi_maxavma);
+         if (i > 0) {
+            DiCfSI* cfsip = &di->cfsi[i-1];
+            vg_assert(cfsip->base + cfsip->len <= cfsi->base);
+         }
+      }
+   } else {
+      vg_assert(di->cfsi_used == 0);
+      vg_assert(di->cfsi_size == 0);
+   }
+}
+
+
 /*--------------------------------------------------------------*/
 /*---                                                        ---*/
+/*--- TOP LEVEL: INITIALISE THE DEBUGINFO SYSTEM             ---*/
+/*---                                                        ---*/
+/*--------------------------------------------------------------*/
+
+void VG_(di_initialise) ( void )
+{
+   /* There's actually very little to do here, since everything
+      centers around the DebugInfos in debugInfo_list, they are
+      created and destroyed on demand, and each one is treated more or
+      less independently. */
+   vg_assert(debugInfo_list == NULL);
+
+   /* flush the CFI fast query cache. */
+   cfsi_cache__invalidate();
+}
+
+
+/*--------------------------------------------------------------*/
+/*---                                                        ---*/
 /*--- TOP LEVEL: NOTIFICATION (ACQUIRE/DISCARD INFO) (LINUX) ---*/
 /*---                                                        ---*/
 /*--------------------------------------------------------------*/
@@ -719,6 +808,8 @@
 
       TRACE_SYMTAB("\n------ Canonicalising the "
                    "acquired info ------\n");
+      /* invalidate the CFI unwind cache. */
+      cfsi_cache__invalidate();
       /* prepare read data for use */
       ML_(canonicaliseTables)( di );
       /* notify m_redir about it */
@@ -728,6 +819,10 @@
       di->have_dinfo = True;
       tl_assert(di->handle > 0);
       di_handle = di->handle;
+      /* Check invariants listed in
+         Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in
+         priv_storage.h. */
+      check_CFSI_related_invariants(di);
 
    } else {
       TRACE_SYMTAB("\n------ ELF reading failed ------\n");
@@ -735,6 +830,7 @@
          this DebugInfo?  No - it contains info on the rw/rx
          mappings, at least. */
       di_handle = 0;
+      vg_assert(di->have_dinfo == False);
    }
 
    TRACE_SYMTAB("\n");
@@ -751,8 +847,11 @@
    [a, a+len).  */
 void VG_(di_notify_munmap)( Addr a, SizeT len )
 {
+   Bool anyFound;
    if (0) VG_(printf)("DISCARD %#lx %#lx\n", a, a+len);
-   discard_syms_in_range(a, len);
+   anyFound = discard_syms_in_range(a, len);
+   if (anyFound)
+      cfsi_cache__invalidate();
 }
 
 
@@ -766,8 +865,11 @@
 #  if defined(VGP_x86_linux) || defined(VGP_x86_freebsd)
    exe_ok = exe_ok || toBool(prot & VKI_PROT_READ);
 #  endif
-   if (0 && !exe_ok)
-      discard_syms_in_range(a, len);
+   if (0 && !exe_ok) {
+      Bool anyFound = discard_syms_in_range(a, len);
+      if (anyFound)
+         cfsi_cache__invalidate();
+   }
 }
 
 #endif /* defined(VGO_linux) */
@@ -798,6 +900,10 @@
 {
    ULong hdl = 0;
 
+   /* play safe; always invalidate the CFI cache.  Not
+      that it should be used on AIX, but still .. */
+   cfsi_cache__invalidate();
+
    if (acquire) {
 
       Bool       ok;
@@ -841,6 +947,10 @@
          di->have_dinfo = True;
          hdl = di->handle;
          vg_assert(hdl > 0);
+         /* Check invariants listed in
+            Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in
+            priv_storage.h. */
+         check_CFSI_related_invariants(di);
       } else {
          /*  Something went wrong (eg. bad XCOFF file). */
          discard_DebugInfo( di );
@@ -851,8 +961,11 @@
 
       /* Dump all the debugInfos whose text segments intersect
          code_start/code_len. */
+      /* CFI cache is always invalidated at start of this routine.
+         Hence it's safe to ignore the return value of
+         discard_syms_in_range. */
       if (code_len > 0)
-         discard_syms_in_range( code_start, code_len );
+         (void)discard_syms_in_range( code_start, code_len );
 
    }
 
@@ -894,11 +1007,11 @@
    If findText==False, only data symbols are searched for.
 */
 static void search_all_symtabs ( Addr ptr, /*OUT*/DebugInfo** pdi,
-                                           /*OUT*/Int* symno,
+                                           /*OUT*/Word* symno,
                                  Bool match_anywhere_in_sym,
                                  Bool findText )
 {
-   Int        sno;
+   Word       sno;
    DebugInfo* di;
    Bool       inRange;
 
@@ -945,9 +1058,9 @@
    *pdi to the relevant DebugInfo, and *locno to the loctab entry
    *number within that.  If not found, *pdi is set to NULL. */
 static void search_all_loctabs ( Addr ptr, /*OUT*/DebugInfo** pdi,
-                                           /*OUT*/Int* locno )
+                                           /*OUT*/Word* locno )
 {
-   Int        lno;
+   Word       lno;
    DebugInfo* di;
    for (di = debugInfo_list; di != NULL; di = di->next) {
       if (di->text_present
@@ -978,7 +1091,7 @@
                     Bool findText, /*OUT*/OffT* offsetP )
 {
    DebugInfo* di;
-   Int        sno;
+   Word       sno;
    Int        offset;
 
    search_all_symtabs ( a, &di, &sno, match_anywhere_in_sym, findText );
@@ -1020,7 +1133,7 @@
 Addr VG_(get_tocptr) ( Addr guest_code_addr )
 {
    DebugInfo* si;
-   Int        sno;
+   Word       sno;
    search_all_symtabs ( guest_code_addr, 
                         &si, &sno,
                         True/*match_anywhere_in_fun*/,
@@ -1187,7 +1300,7 @@
 Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename )
 {
    DebugInfo* si;
-   Int      locno;
+   Word       locno;
    search_all_loctabs ( a, &si, &locno );
    if (si == NULL) 
       return False;
@@ -1199,7 +1312,7 @@
 Bool VG_(get_linenum)( Addr a, UInt* lineno )
 {
    DebugInfo* si;
-   Int      locno;
+   Word       locno;
    search_all_loctabs ( a, &si, &locno );
    if (si == NULL) 
       return False;
@@ -1218,7 +1331,7 @@
                                  /*OUT*/UInt* lineno )
 {
    DebugInfo* si;
-   Int      locno;
+   Word       locno;
 
    vg_assert( (dirname == NULL && dirname_available == NULL)
               ||
@@ -1542,6 +1655,122 @@
 }
 
 
+/* Search all the DebugInfos in the entire system, to find the DiCfSI
+   that pertains to 'ip'. 
+
+   If found, set *diP to the DebugInfo in which it resides, and
+   *ixP to the index in that DebugInfo's cfsi array.
+
+   If not found, set *diP to (DebugInfo*)1 and *ixP to zero.
+*/
+__attribute__((noinline))
+static void find_DiCfSI ( /*OUT*/DebugInfo** diP, 
+                          /*OUT*/Word* ixP,
+                          Addr ip )
+{
+   DebugInfo* di;
+   Word       i = -1;
+
+   static UWord n_search = 0;
+   static UWord n_steps = 0;
+   n_search++;
+
+   if (0) VG_(printf)("search for %#lx\n", ip);
+
+   for (di = debugInfo_list; di != NULL; di = di->next) {
+      Word j;
+      n_steps++;
+
+      /* Use the per-DebugInfo summary address ranges to skip
+         inapplicable DebugInfos quickly. */
+      if (di->cfsi_used == 0)
+         continue;
+      if (ip < di->cfsi_minavma || ip > di->cfsi_maxavma)
+         continue;
+
+      /* It might be in this DebugInfo.  Search it. */
+      j = ML_(search_one_cfitab)( di, ip );
+      vg_assert(j >= -1 && j < (Word)di->cfsi_used);
+
+      if (j != -1) {
+         i = j;
+         break; /* found it */
+      }
+   }
+
+   if (i == -1) {
+
+      /* we didn't find it. */
+      *diP = (DebugInfo*)1;
+      *ixP = 0;
+
+   } else {
+
+      /* found it. */
+      /* ensure that di is 4-aligned (at least), so it can't possibly
+         be equal to (DebugInfo*)1. */
+      vg_assert(di && VG_IS_4_ALIGNED(di));
+      vg_assert(i >= 0 && i < di->cfsi_used);
+      *diP = di;
+      *ixP = i;
+
+      /* Start of performance-enhancing hack: once every 64 (chosen
+         hackily after profiling) successful searches, move the found
+         DebugInfo one step closer to the start of the list.  This
+         makes future searches cheaper.  For starting konqueror on
+         amd64, this in fact reduces the total amount of searching
+         done by the above find-the-right-DebugInfo loop by more than
+         a factor of 20. */
+      if ((n_search & 0xF) == 0) {
+         /* Move di one step closer to the start of the list. */
+         move_DebugInfo_one_step_forward( di );
+      }
+      /* End of performance-enhancing hack. */
+
+      if (0 && ((n_search & 0x7FFFF) == 0))
+         VG_(printf)("find_DiCfSI: %lu searches, "
+                     "%lu DebugInfos looked at\n", 
+                     n_search, n_steps);
+
+   }
+
+}
+
+
+/* Now follows a mechanism for caching queries to find_DiCfSI, since
+   they are extremely frequent on amd64-linux, during stack unwinding.
+
+   Each cache entry binds an ip value to a (di, ix) pair.  Possible
+   values:
+
+   di is non-null, ix >= 0  ==>  cache slot in use, "di->cfsi[ix]"
+   di is (DebugInfo*)1      ==>  cache slot in use, no associated di
+   di is NULL               ==>  cache slot not in use
+
+   Hence simply zeroing out the entire cache invalidates all
+   entries.
+
+   Why not map ip values directly to DiCfSI*'s?  Because this would
+   cause problems if/when the cfsi array is moved due to resizing.
+   Instead we cache .cfsi array index value, which should be invariant
+   across resizing.  (That said, I don't think the current
+   implementation will resize whilst during queries, since the DiCfSI
+   records are added all at once, when the debuginfo for an object is
+   read, and is not changed ever thereafter. */
+
+#define N_CFSI_CACHE 511
+
+typedef
+   struct { Addr ip; DebugInfo* di; Word ix; }
+   CFSICacheEnt;
+
+static CFSICacheEnt cfsi_cache[N_CFSI_CACHE];
+
+static void cfsi_cache__invalidate ( void ) {
+   VG_(memset)(&cfsi_cache, 0, sizeof(cfsi_cache));
+}
+
+
 /* The main function for DWARF2/3 CFI-based stack unwinding.
    Given an IP/SP/FP triple, produce the IP/SP/FP values for the
    previous frame, if possible. */
@@ -1554,61 +1783,47 @@
                         Addr min_accessible,
                         Addr max_accessible )
 {
-   Bool     ok;
-   Int      i;
-   DebugInfo* si;
-   DiCfSI*  cfsi = NULL;
-   Addr     cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev;
+   Bool       ok;
+   DebugInfo* di;
+   DiCfSI*    cfsi = NULL;
+   Addr       cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev;
 
    CfiExprEvalContext eec;
 
-   static UInt n_search = 0;
-   static UInt n_steps = 0;
-   n_search++;
+   static UWord n_q = 0, n_m = 0;
+   n_q++;
+   if (0 && 0 == (n_q & 0x1FFFFF))
+      VG_(printf)("QQQ %lu %lu\n", n_q, n_m);
 
-   if (0) VG_(printf)("search for %#lx\n", *ipP);
+   { UWord hash = (*ipP) % N_CFSI_CACHE;
+     CFSICacheEnt* ce = &cfsi_cache[hash];
 
-   for (si = debugInfo_list; si != NULL; si = si->next) {
-      n_steps++;
+     if (LIKELY(ce->ip == *ipP) && LIKELY(ce->di != NULL)) {
+        /* found an entry in the cache .. */
+     } else {
+        /* not found in cache.  Search and update. */
+        n_m++;
+        ce->ip = *ipP;
+        find_DiCfSI( &ce->di, &ce->ix, *ipP );
+     }
 
-      /* Use the per-DebugInfo summary address ranges to skip
-         inapplicable DebugInfos quickly. */
-      if (si->cfsi_used == 0)

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200810311753.m9VHrEQg086630>