Logo Search packages:      
Sourcecode: callgrind version File versions

main.c

/*--------------------------------------------------------------------*/
/*--- Callgrind                                                    ---*/
/*---                                                    ct_main.c ---*/
/*--------------------------------------------------------------------*/

/*
   This file is part of Callgrind, a Valgrind skin for call graph
   profiling programs.

   Copyright (C) 2002-2005, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)

   This skin is derived from and contains lot of code from Cachegrind
   Copyright (C) 2002 Nicholas Nethercote (njn25@cam.ac.uk)

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.

   The GNU General Public License is contained in the file COPYING.
*/

#include "callgrind.h"
#include "global.h"
#include "events.h"
#include "costs.h"

#if VG_CORE_INTERFACE_MAJOR_VERSION > 4
void SK_(pre_clo_init)(void);
VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
#else
VG_DETERMINE_INTERFACE_VERSION
#endif


/*------------------------------------------------------------*/
/*--- Global variables                                     ---*/
/*------------------------------------------------------------*/

/* for all threads */
CommandLineOptions SK_(clo);
Statistics SK_(stat);
Bool SK_(instrument_state) = True; /* Instrumentation on ? */

/* thread and signal handler specific */
exec_state SK_(current_state);


/*------------------------------------------------------------*/
/*--- Statistics                                           ---*/
/*------------------------------------------------------------*/

static void SK_(init_statistics)(Statistics* s)
{
  s->call_counter        = 0;
  s->jcnd_counter        = 0;
  s->jump_counter        = 0;
  s->rec_call_counter    = 0;
  s->ret_counter         = 0;
  s->bb_executions       = 0;

  s->context_counter     = 0;
  s->bb_retranslations   = 0;

  s->distinct_objs       = 0;
  s->distinct_files      = 0;
  s->distinct_fns        = 0;
  s->distinct_contexts   = 0;
  s->distinct_bbs        = 0;
  s->distinct_bbccs      = 0;
  s->distinct_instrs     = 0;
  s->distinct_skips      = 0;

  s->bb_hash_resizes     = 0;
  s->bbcc_hash_resizes   = 0;
  s->jcc_hash_resizes    = 0;
  s->cxt_hash_resizes    = 0;
  s->fn_array_resizes    = 0;
  s->call_stack_resizes  = 0;
  s->fn_stack_resizes    = 0;

  s->full_debug_BBs      = 0;
  s->file_line_debug_BBs = 0;
  s->fn_name_debug_BBs   = 0;
  s->no_debug_BBs        = 0;
  s->bbcc_lru_misses     = 0;
  s->jcc_lru_misses      = 0;
  s->cxt_lru_misses      = 0;
  s->bbcc_clones         = 0;
}


    

/*------------------------------------------------------------*/
/*--- Cache simulation instrumentation phase               ---*/
/*------------------------------------------------------------*/

static
Bool is_valid_data_size(Int data_size)
{
   return (4 == data_size || 2  == data_size || 1 == data_size ||
           8 == data_size || 10 == data_size ||
         MIN_LINE_SIZE == data_size);
}


static
EventSet* insert_simcall(UCodeBlock* cb, InstrInfo* ii,
                   Int data_size,
                   Int t_read,  Int t_read_addr,
                   Int t_write, Int t_write_addr)
{
  Addr    helper;
  Int     argc;
  Int     t_CC_addr,
          t_data_addr1 = INVALID_TEMPREG,
          t_data_addr2 = INVALID_TEMPREG;
  EventSet* es;

#define IS_(X)      (INVALID_TEMPREG != t_##X##_addr)
#define INV(qqt)    (INVALID_TEMPREG == (qqt))

  /* Work out what kind of x86 instruction it is */
  if (!IS_(read) && !IS_(write)) {
    CT_ASSERT( 0 == data_size );
    CT_ASSERT(INV(t_read) && INV(t_write));
    helper = (Addr) SK_(cachesim).log_0D;
    argc = 1;
    es = SK_(sets).D0;

  } else if (IS_(read) && !IS_(write)) {
    CT_ASSERT( is_valid_data_size(data_size) );
    CT_ASSERT(!INV(t_read) && INV(t_write));
    helper = (Addr) SK_(cachesim).log_1Dr;
    argc = 2;
    t_data_addr1 = t_read_addr;
    es = SK_(sets).D1r;

  } else if (!IS_(read) && IS_(write)) {
    CT_ASSERT( is_valid_data_size(data_size) );
    CT_ASSERT(INV(t_read) && !INV(t_write));
    helper = (Addr) SK_(cachesim).log_1Dw;
    argc = 2;
    t_data_addr1 = t_write_addr;
    es = SK_(sets).D1w;
   
  } else {
    CT_ASSERT(IS_(read) && IS_(write));
    CT_ASSERT( is_valid_data_size(data_size) );
    CT_ASSERT(!INV(t_read) && !INV(t_write));
    if (t_read == t_write) {
      /* Modify instruction: suppose write access, as this is
       * more resource consuming */
      helper = (Addr) SK_(cachesim).log_1Dw;
      argc = 2;
      t_data_addr1 = t_read_addr;
      es = SK_(sets).D1w;

    } else {
      helper = (Addr) SK_(cachesim).log_2D;
      argc = 3;
      t_data_addr1 = t_read_addr;
      t_data_addr2 = t_write_addr;
      es = SK_(sets).D2;
    }
  }
#undef IS_
#undef INV

  /* helper could be unset depending on the simulator used */
  if (helper == 0) return 0;

  /* Setup 1st arg: InstrInfo */
  t_CC_addr = newTemp(cb);
  uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_CC_addr);
  uLiteral(cb, (Addr)ii);

  /* Call the helper */
  if      (1 == argc)
    uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
  else if (2 == argc)
    uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
          TempReg, t_data_addr1);
  else if (3 == argc)
    uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
          TempReg, t_data_addr1,
          TempReg, t_data_addr2);
  else
    VG_(skin_panic)("argc... not 1 or 2 or 3?");

  uCCall(cb, helper, argc, argc, False);

  return es;
}



UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
{
/* Use this rather than eg. -1 because it's a UInt. */
#define INVALID_DATA_SIZE   999999

   UCodeBlock* cb;
   EventSet*   es;
   InstrInfo*  ii;
   Int         i;
   UInstr*     u_in;
   BB*         bb;
   Int         t_read_addr, t_write_addr, t_read, t_write;
   Addr        instr_addr = orig_addr;
   UInt        instr_count = 0, cost_offset = 0;
   UInt        instr_size, data_size = 0;
   Bool        bb_seen_before     = False;
   Bool        instrumented_Jcond = False;

   if (! SK_(instrument_state)) {
     CT_DEBUG(5, "instrument(BB 0x%x) [Instrumentation OFF]\n", orig_addr);
     return cb_in;
   }

   CT_DEBUG(3, "+ instrument(BB 0x%x)\n", orig_addr);

   /* Get BB (creating if necessary) */
   bb = SK_(get_bb)(orig_addr, cb_in, &bb_seen_before);

   cb = VG_(setup_UCodeBlock)(cb_in);

   /* 
    * Precondition:
    * - bbcc_jmpkind has the kind of last jump executed (CALL/RET/COND...)
    * - current_bbcc has a pointer to the BBCC of the last executed BB
    *   Thus, if bbcc_jmpkind is != -1 (JmpNone),
    *     current_bbcc->bb->jmp_addr
    *   gives the address of the jump source.
    *   
    * The BBCC setup does 2 things:
    * - trace call:
    *   * Unwind own call stack, i.e sync our ESP with real ESP
    *     This is for ESP manipulation (longjmps, C++ exec handling) and RET
    *   * For CALLs or JMPs crossing objects, record call arg +
    *     push are on own call stack
    *
    * - prepare for cache log functions:
    *   Set current_bbcc to BBCC that gets the costs for this BB execution
    *   attached
    */

  /* 1st arg: BB addr */
  t_read_addr = newTemp(cb);
  uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_read_addr);
  uLiteral(cb, (Addr)bb);
  uInstr1(cb, CCALL, 0, TempReg, t_read_addr);
  uCCall(cb, (Addr) & SK_(setup_bbcc), 1, 1, False);

  t_read_addr = t_write_addr = t_read = t_write = INVALID_TEMPREG; 

  for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
      u_in = VG_(get_instr)(cb_in, i);

      /* What this is all about:  we want to instrument each x86 instruction 
       * translation.  The end of these are marked in three ways.  The three
       * ways, and the way we instrument them, are as follows:
       *
       * 1. UCode, INCEIP         --> UCode, Instrumentation, INCEIP
       * 2. UCode, Juncond        --> UCode, Instrumentation, Juncond
       * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
       *
       * The last UInstr in a basic block is always a Juncond.  Jconds,
       * when they appear, are always second last.  We check this with 
       * various assertions.
       *
       * We must put the instrumentation before any jumps so that it is always
       * executed.  We don't have to put the instrumentation before the INCEIP
       * (it could go after) but we do so for consistency.
       *
       * x86 instruction sizes are obtained from INCEIPs (for case 1) or
       * from .extra4b field of the final JMP (for case 2 & 3).
       *
       * Note that JIFZ is treated differently.
       *
       * The instrumentation is just a call to the appropriate helper function,
       * passing it the address of the instruction's CC.
       */
      if (instrumented_Jcond) CT_ASSERT(u_in->opcode == JMP);

      switch (u_in->opcode) {
      case NOP:  case LOCK:  case CALLM_E:  case CALLM_S:
      break;
      
      /* For memory-ref instrs, copy the data_addr into a temporary to be
       * passed to the log_* helper at the end of the instruction.
       */
      case LOAD:
      case SSE3ag_MemRd_RegWr:
      t_read      = u_in->val1;
      t_read_addr = newTemp(cb);
      uInstr2(cb, MOV, 4, TempReg, u_in->val1,  TempReg, t_read_addr);
      data_size = u_in->size;
      VG_(copy_UInstr)(cb, u_in);
      break;

      case FPU_R:
      case MMX2_MemRd:
      t_read      = u_in->val2;
      t_read_addr = newTemp(cb);
      uInstr2(cb, MOV, 4, TempReg, u_in->val2,  TempReg, t_read_addr);
      data_size = u_in->size;
      VG_(copy_UInstr)(cb, u_in);
      break;

      case SSE2a_MemRd:
      case SSE2a1_MemRd:
      case SSE3a_MemRd:
#if VG_CORE_INTERFACE_MAJOR_VERSION > 3
      case SSE3a1_MemRd: /* Supported since VG-20031104 */
#endif
      t_read = u_in->val3;
      t_read_addr = newTemp(cb);
      uInstr2(cb, MOV, 4, TempReg, u_in->val3,  TempReg, t_read_addr);
      data_size = u_in->size;
      VG_(copy_UInstr)(cb, u_in);
      break;

      /* Note that we must set t_write_addr even for mod instructions;
       * That's how the code above determines whether it does a write.
       * Without it, it would think a mod instruction is a read.
       * As for the MOV, if it's a mod instruction it's redundant, but it's
       * not expensive and mod instructions are rare anyway. */
      case STORE:
      case FPU_W:
      case MMX2_MemWr:
      t_write      = u_in->val2;
      t_write_addr = newTemp(cb);
      uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
      data_size = u_in->size;
      VG_(copy_UInstr)(cb, u_in);
      break;

      case SSE2a_MemWr:
      case SSE3a_MemWr:
      t_write = u_in->val3;
      t_write_addr = newTemp(cb);
      uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr);
      data_size = u_in->size;
      VG_(copy_UInstr)(cb, u_in);
      break;

      /* INCEIP: insert instrumentation */
      case INCEIP:
      instr_size = u_in->val1;
      goto instrument_x86_instr;

      /* JMP: insert instrumentation if the first JMP */
      case JMP:

      if (instrumented_Jcond) {
        if (SK_(clo).collect_jumps) {
          /* Overwrite conditional jmpkind, as not followed */
                
          Int tmp = newTemp(cb);
          Int tmp_jk_addr = newTemp(cb);

          uInstr2(cb, MOV,   4, Literal, 0, TempReg, tmp_jk_addr);
          uLiteral(cb, (Addr) & SK_(current_state).jmpkind);
          uInstr2(cb, MOV,   4, Literal, 0, TempReg, tmp);
          uLiteral(cb, JmpNone);
          uInstr2(cb, STORE, 4, TempReg, tmp, TempReg, tmp_jk_addr);
        }

        CT_ASSERT(CondAlways == u_in->cond);
        CT_ASSERT(i+1 == VG_(get_num_instrs)(cb_in));
        VG_(copy_UInstr)(cb, u_in);
        instrumented_Jcond = False;    /* reset */
        break;
      }

      /* The first JMP... instrument. */

      /* as preparation for the setup_bbcc call at the
       * beginning of the next basic block, we store the
       * jmpkind into a global variable if its a Call or Ret.
       *
       * if we instrument conditional jumps, use -2 (JmpCond) as jmpkind.
       * Before the final (2nd) jump, jmpkind is reset to -1 (JmpNone).
       * This way, jmpkind will only be -2 in setup_bbcc if the
       * conditional jump was followed.
       */
      if ( ((CondAlways != u_in->cond) && SK_(clo).collect_jumps) ||
           ((CondAlways == u_in->cond) &&
            ((u_in->jmpkind == JmpCall) ||
             (u_in->jmpkind == JmpRet) ||
             (SK_(clo).collect_jumps &&
            (u_in->jmpkind == JmpBoring)) )) ) {

        Int tmp = newTemp(cb);
        Int tmp_jk_addr = newTemp(cb);

        /* Address of bbcc_jmpkind into temp reg for STORE */
        uInstr2(cb, MOV,   4, Literal, 0, TempReg, tmp_jk_addr);
        uLiteral(cb, (Addr) & SK_(current_state).jmpkind);
        uInstr2(cb, MOV,   4, Literal, 0, TempReg, tmp);
        uLiteral(cb, 
               (CondAlways != u_in->cond) ? JmpCond : u_in->jmpkind);
        uInstr2(cb, STORE, 4, TempReg, tmp, TempReg, tmp_jk_addr);
      }
          
      if (bb_seen_before) {
        CT_ASSERT(bb_jmpaddr(bb) == instr_addr);
      }
      else {
        bb->jmp_offset = instr_addr - orig_addr;
      }

      if (CondAlways != u_in->cond) {
        CT_ASSERT(i+2 == VG_(get_num_instrs)(cb_in));
        instrumented_Jcond = True;
      } else {
        CT_ASSERT(i+1 == VG_(get_num_instrs)(cb_in));
      }

      /* Get x86 instr size from final JMP. */
      instr_size = VG_(get_last_instr)(cb_in)->extra4b;

      goto instrument_x86_instr;


      /* Code executed at the end of each x86 instruction. */
      instrument_x86_instr:

      /* Large (eg. 28B, 108B, 512B) data-sized instructions will be
         * done inaccurately but they're very rare and this avoids
       * errors from hitting more than two cache lines in the
       * simulation.
       */
      if (data_size > MIN_LINE_SIZE) data_size = MIN_LINE_SIZE;

      CT_ASSERT(instr_size >= 1 && instr_size <= MAX_x86_INSTR_SIZE);

      ii = &(bb->instr[instr_count]);

      /* returns 0 if simulator needs no instrumentation */
      es = insert_simcall(cb, ii, data_size,
                      t_read, t_read_addr,
                      t_write, t_write_addr);

      if (bb_seen_before) {
        CT_ASSERT(ii->instr_offset == instr_addr - orig_addr);
        CT_ASSERT(ii->instr_size == instr_size);
        CT_ASSERT(ii->data_size == data_size);
        CT_ASSERT(ii->cost_offset == cost_offset);
        CT_ASSERT(ii->eventset == es);
      }
      else {
        ii->instr_offset = instr_addr - orig_addr;
        ii->instr_size = instr_size;
        ii->data_size = data_size;
        ii->cost_offset = cost_offset;
        ii->eventset = es;

        SK_(stat).distinct_instrs++;
      }

      cost_offset += es ? es->size : 0;

      CT_DEBUG(5, "  Instr +%2d (Size %d, DSize %d): ESet %s (Size %d)\n",
             instr_addr - orig_addr, instr_size, data_size, 
             es ? es->name : (Char*)"(no Instr)",
             es ? es->size : 0);

      /* Copy original UInstr (INCEIP or JMP) */
      VG_(copy_UInstr)(cb, u_in);

      instr_count++;

      /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
      instr_addr += instr_size;
      t_read_addr = t_write_addr = t_read = t_write = INVALID_TEMPREG;
      data_size = 0;
      break;

      default:
      VG_(copy_UInstr)(cb, u_in);
      break;
      }
  }

  VG_(free_UCodeBlock)(cb_in);

  CT_ASSERT(bb->instr_count == instr_count);
  if (bb_seen_before) {    
    CT_ASSERT(bb->instr_len == instr_addr - orig_addr);
    CT_ASSERT(bb->cost_count == cost_offset);
  }
  else {
    bb->instr_len = instr_addr - orig_addr;
    bb->cost_count = cost_offset;
  }

  CT_DEBUG(3, "- instrument(BB 0x%x): byteLen %d, CostLen %d \n",
         orig_addr, bb->instr_len, bb->cost_count);

  return cb;

#undef INVALID_DATA_SIZE
}



/*------------------------------------------------------------*/
/*--- SK_(fini)() and related function                     ---*/
/*------------------------------------------------------------*/



static void zero_thread_cost(thread_info* t)
{
  Int i;

  for(i = 0; i < SK_(current_call_stack).sp; i++) {
    if (!SK_(current_call_stack).entry[i].jcc) continue;

    /* reset call counters to current for active calls */
    SK_(copy_cost)( SK_(sets).full, 
                SK_(current_call_stack).entry[i].enter_cost,
                SK_(current_state).cost );
  }

  SK_(forall_bbccs)(SK_(zero_bbcc));

  /* set counter for last dump */
  SK_(copy_cost)( SK_(sets).full, 
              t->lastdump_cost, SK_(current_state).cost );
}

void SK_(zero_all_cost)(Bool only_current_thread)
{
  if (VG_(clo_verbosity) > 1)
    VG_(message)(Vg_DebugMsg, "  Zeroing costs...");

  if (only_current_thread)
    zero_thread_cost(SK_(get_current_thread)());
  else
    SK_(forall_threads)(zero_thread_cost);

  if (VG_(clo_verbosity) > 1)
    VG_(message)(Vg_DebugMsg, "  ...done");
}

void unwind_thread(thread_info* t)
{
  /* unwind signal handlers */
  while(SK_(current_state).sig !=0)
    SK_(post_signal)(SK_(current_tid),SK_(current_state).sig);

  /* unwind regular call stack */
  while(SK_(current_call_stack).sp>0)
    SK_(pop_call_stack)();
}

/* Ups, this can go wrong... */
extern void VG_(invalidate_translations) ( Addr, UInt, Bool);

void SK_(set_instrument_state)(Char* reason, Bool state)
{
  if (SK_(instrument_state) == state) {
    CT_DEBUG(2, "%s: instrumentation already %s\n",
           reason, state ? "ON" : "OFF");
    return;
  }
  SK_(instrument_state) = state;
  CT_DEBUG(2, "%s: Switching instrumentation %s ...\n",
         reason, state ? "ON" : "OFF");

  /* invalidate all instrumentations */
  VG_(invalidate_translations)( 0x1000, 0xbfff0000, False);

  /* reset internal state: call stacks, simulator */
  SK_(forall_threads)(unwind_thread);
  (*SK_(cachesim).clear)();
  if (0)
    SK_(forall_threads)(zero_thread_cost);

  if (!state)
    SK_(init_exec_state)( &SK_(current_state) );

  if (VG_(clo_verbosity) > 1)
    VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
             reason, state ? "ON" : "OFF");
}
  


#if VG_CORE_INTERFACE_MAJOR_VERSION < 3
Bool SK_(handle_client_request)(ThreadState* tst, UInt *args, UInt *ret)
#else
#if VG_CORE_INTERFACE_MAJOR_VERSION < 7
Bool SK_(handle_client_request)(ThreadId tid, UInt *args, UInt *ret)
#else
Bool SK_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
#endif
#endif
{
   if (!VG_IS_SKIN_USERREQ('C','T',args[0]))
      return False;

   switch(args[0]) {
   case VG_USERREQ__DUMP_STATS:     
      SK_(dump_profile)("Client Request", True);
      *ret = 0;                 /* meaningless */
      break;

   case VG_USERREQ__DUMP_STATS_AT:
     {
       Char buf[512];
       VG_(sprintf)(buf,"Client Request: %d", args[1]);
       SK_(dump_profile)(buf, True);
       *ret = 0;                 /* meaningless */
     }
     break;

   case VG_USERREQ__ZERO_STATS:
     SK_(zero_all_cost)(True);
      *ret = 0;                 /* meaningless */
      break;

   case VG_USERREQ__TOGGLE_COLLECT:
     SK_(current_state).collect = !SK_(current_state).collect;
     CT_DEBUG(2, "Client Request: toggled collection state to %s\n",
            SK_(current_state).collect ? "ON" : "OFF");
     *ret = 0;                 /* meaningless */
     break;

   case VG_USERREQ__START_INSTRUMENTATION:
     SK_(set_instrument_state)("Client Request", True);
     *ret = 0;                 /* meaningless */
     break;

   case VG_USERREQ__STOP_INSTRUMENTATION:
     SK_(set_instrument_state)("Client Request", False);
     *ret = 0;                 /* meaningless */
     break;

   default:
      return False;
   }

   return True;
}


/* Syscall Timing */

/* struct timeval syscalltime[VG_N_THREADS]; */
#if CT_MICROSYSTIME
#include <sys/time.h>
#include <sys/syscall.h>
extern Int VG_(do_syscall) ( UInt, ... );

ULong syscalltime[VG_N_THREADS];
#else
UInt syscalltime[VG_N_THREADS];
#endif

void* SK_(pre_syscalltime)(ThreadId tid, UInt syscallno, Bool is_blocking)
{
  if (SK_(clo).collect_systime) {
#if CT_MICROSYSTIME
    struct vki_timeval tv_now;
    VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
    syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
#else
    syscalltime[tid] = VG_(read_millisecond_timer)();
#endif
  }
  return 0;
}

void SK_(post_syscalltime)(ThreadId tid, UInt syscallno,
                   void* pre_result, Int res, Bool is_blocking)
{
  if (SK_(clo).collect_systime) {
    Int o = SK_(sets).off_full_systime;
#if CT_MICROSYSTIME
    struct vki_timeval tv_now;
    ULong diff;
    
    VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
    diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
#else
    UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
#endif  
    
    CT_DEBUG(0,"   Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff);
    
    if (o<0) return;

    SK_(current_state).cost[o] ++;
    SK_(current_state).cost[o+1] += diff;
    if (!SK_(current_state).bbcc->skipped)
      SK_(init_cost_lz)(SK_(sets).full,
                  &(SK_(current_state).bbcc->skipped));
    SK_(current_state).bbcc->skipped[o] ++;
    SK_(current_state).bbcc->skipped[o+1] += diff;
  }
}


void finish()
{
  char buf[RESULTS_BUF_LEN];

  CT_DEBUG(0, "finish()\n");

  (*SK_(cachesim).finish)();

  /* pop all remaining items from CallStack for correct sum
   */
  SK_(forall_threads)(unwind_thread);
  
  SK_(dump_profile)(0, False);
  
  SK_(finish_command)();
  
  if (VG_(clo_verbosity) == 0) return;
  
  /* Hash table stats */
  if (VG_(clo_verbosity) > 1) {
    int BB_lookups =
      SK_(stat).full_debug_BBs +
      SK_(stat).fn_name_debug_BBs +
      SK_(stat).file_line_debug_BBs +
      SK_(stat).no_debug_BBs;
    
    VG_(message)(Vg_DebugMsg, "");
    VG_(message)(Vg_DebugMsg, "Distinct objects: %d",
             SK_(stat).distinct_objs);
    VG_(message)(Vg_DebugMsg, "Distinct files:   %d",
             SK_(stat).distinct_files);
    VG_(message)(Vg_DebugMsg, "Distinct fns:     %d",
             SK_(stat).distinct_fns);
    VG_(message)(Vg_DebugMsg, "Distinct contexts:%d",
             SK_(stat).distinct_contexts);
    VG_(message)(Vg_DebugMsg, "Distinct BBs:     %d",
             SK_(stat).distinct_bbs);
    VG_(message)(Vg_DebugMsg, "Cost entries:     %d (Chunks %d)",
             SK_(costarray_entries), SK_(costarray_chunks));
    VG_(message)(Vg_DebugMsg, "Distinct BBCCs:   %d",
             SK_(stat).distinct_bbccs);
    VG_(message)(Vg_DebugMsg, "Distinct JCCs:    %d",
             SK_(stat).distinct_jccs);
    VG_(message)(Vg_DebugMsg, "Distinct skips:   %d",
             SK_(stat).distinct_skips);
    VG_(message)(Vg_DebugMsg, "BB lookups:       %d",
             BB_lookups);
    if (BB_lookups>0) {
      VG_(message)(Vg_DebugMsg, "With full      debug info:%3d%% (%d)", 
               SK_(stat).full_debug_BBs    * 100 / BB_lookups,
               SK_(stat).full_debug_BBs);
      VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)", 
               SK_(stat).file_line_debug_BBs * 100 / BB_lookups,
               SK_(stat).file_line_debug_BBs);
      VG_(message)(Vg_DebugMsg, "With fn name   debug info:%3d%% (%d)", 
               SK_(stat).fn_name_debug_BBs * 100 / BB_lookups,
               SK_(stat).fn_name_debug_BBs);
      VG_(message)(Vg_DebugMsg, "With no        debug info:%3d%% (%d)", 
               SK_(stat).no_debug_BBs      * 100 / BB_lookups,
               SK_(stat).no_debug_BBs);
    }
    VG_(message)(Vg_DebugMsg, "BBCC Clones:       %d",
             SK_(stat).bbcc_clones);
    VG_(message)(Vg_DebugMsg, "BBs Retranslated:  %d",
             SK_(stat).bb_retranslations);
    VG_(message)(Vg_DebugMsg, "Distinct instrs:   %d",
             SK_(stat).distinct_instrs);
    VG_(message)(Vg_DebugMsg, "");
    
    VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d",
             SK_(stat).cxt_lru_misses);
    VG_(message)(Vg_DebugMsg, "LRU BBCC Misses:   %d",
             SK_(stat).bbcc_lru_misses);
    VG_(message)(Vg_DebugMsg, "LRU JCC Misses:    %d",
             SK_(stat).jcc_lru_misses);
    VG_(message)(Vg_DebugMsg, "BBs Executed:      %llu",
             SK_(stat).bb_executions);
    VG_(message)(Vg_DebugMsg, "Calls:             %llu",
             SK_(stat).call_counter);
    VG_(message)(Vg_DebugMsg, "CondJMP followed:  %llu",
             SK_(stat).jcnd_counter);
    VG_(message)(Vg_DebugMsg, "Boring JMPs:       %llu",
             SK_(stat).jump_counter);
    VG_(message)(Vg_DebugMsg, "Recursive calls:   %llu",
             SK_(stat).rec_call_counter);
    VG_(message)(Vg_DebugMsg, "Returns:           %llu",
             SK_(stat).ret_counter);

    VG_(message)(Vg_DebugMsg, "");
  }

  SK_(sprint_eventmapping)(buf, SK_(dumpmap));
  VG_(message)(Vg_UserMsg, "Events    : %s", buf);
  SK_(sprint_mappingcost)(buf, SK_(dumpmap), SK_(total_cost));
  VG_(message)(Vg_UserMsg, "Collected : %s", buf);
  VG_(message)(Vg_UserMsg, "");

  //  if (SK_(clo).simulate_cache)
  (*SK_(cachesim).printstat)();
}


void SK_(fini)(Int exitcode)
{
  finish();
  if (SK_(clo).collect_data) SK_(finish_data)();
}


/*--------------------------------------------------------------------*/
/*--- Setup                                                        ---*/
/*--------------------------------------------------------------------*/

void SK_(pre_clo_init)(void)
{
    VG_(details_name)            ("Callgrind");
    VG_(details_version)         (VERSION);
    VG_(details_description)     ("a call-graph generating cache profiler");
    VG_(details_copyright_author)("Copyright (C) 2002-2005, and GNU GPL'd, "
                          "by J.Weidendorfer, N.Nethercote et al.");
    VG_(details_bug_reports_to)  ("Josef.Weidendorfer@gmx.de");
    VG_(details_avg_translation_sizeB) ( 155 );

    VG_(needs_command_line_options)();
    VG_(needs_client_requests)();
    VG_(needs_syscall_wrapper)();

#if VG_CORE_INTERFACE_MAJOR_VERSION > 4
    VG_(init_thread_run) ( & SK_(run_thread) );
    VG_(init_pre_deliver_signal)  ( & SK_(pre_signal) );
    VG_(init_post_deliver_signal)  ( & SK_(post_signal) );
    VG_(init_pre_syscall)( & SK_(pre_syscalltime) );
    VG_(init_post_syscall)( & SK_(post_syscalltime) );
#else
    VG_(track_thread_run) ( & SK_(run_thread) );
    VG_(track_pre_deliver_signal)  ( & SK_(pre_signal) );
    VG_(track_post_deliver_signal)  ( & SK_(post_signal) );
    VG_(track_pre_syscall)( & SK_(pre_syscalltime) );
    VG_(track_post_syscall)( & SK_(post_syscalltime) );
#endif

    /* for data collection */
    VG_(init_new_mem_startup)(SK_(my_new_mem_startup));
    VG_(init_new_mem_stack_signal)(SK_(my_new_mem_stack_signal));
    VG_(init_new_mem_brk)(SK_(my_new_mem_brk));
    VG_(init_new_mem_mmap)(SK_(my_new_mem_mmap));

    SK_(set_clo_defaults)();
}

/* Valgrind 2.1.1 (FV - "Full Virtualization") introduced pointer
 * checks, i.e. does not allow writes to Valgrind space from client
 * code.
 * But Callgrinds instrumentation does exactly this, so we want to
 * always switch off pointer checks.
 */
#if (VG_CORE_INTERFACE_MAJOR_VERSION >5)
#define NEED_POINTERCHECKOFF 1
#endif

#ifdef NEED_POINTERCHECKOFF
extern Bool VG_(clo_pointercheck);
#endif

void SK_(post_clo_init)(void)
{
  Char *dir = 0, *fname = 0;

#ifdef NEED_POINTERCHECKOFF
   VG_(clo_pointercheck) = False;
#endif

   CT_DEBUG(1, "  dump threads: %s\n", SK_(clo).separate_threads ? "Yes":"No");
   CT_DEBUG(1, "  call sep. : %d\n", SK_(clo).separate_callers);
   CT_DEBUG(1, "  rec. sep. : %d\n", SK_(clo).separate_recursions);

   if (!SK_(clo).dump_line && !SK_(clo).dump_instr && !SK_(clo).dump_bb) {
       VG_(message)(Vg_UserMsg, "Using source line as position.");
       SK_(clo).dump_line = True;
   }

   SK_(init_files)(&dir,&fname);
   SK_(init_command)(dir,fname);

   (*SK_(cachesim).post_clo_init)();

#if VG_CORE_INTERFACE_MAJOR_VERSION < 7
   // with VG 2.3, no need to register helpers any more
   VG_(register_compact_helper)((Addr) SK_(cachesim).log_0D);
   VG_(register_compact_helper)((Addr) SK_(cachesim).log_1Dr);
   VG_(register_compact_helper)((Addr) SK_(cachesim).log_1Dw);
   VG_(register_compact_helper)((Addr) SK_(cachesim).log_2D);
   VG_(register_compact_helper)((Addr) & SK_(setup_bbcc));
#endif

   SK_(init_eventsets)(0);
   SK_(init_statistics)(& SK_(stat));
   SK_(init_cost_lz)( SK_(sets).full, &SK_(total_cost) );

   if (SK_(clo).collect_data) {
     VG_(needs_data_syms)();
     SK_(init_data)();
   }

   /* initialize hash tables */
   SK_(init_obj_table)();
   SK_(init_cxt_table)();
   SK_(init_bb_hash)();

   SK_(init_threads)();
   SK_(run_thread)(1);

   SK_(instrument_state) = SK_(clo).instrument_atstart;
}


/*--------------------------------------------------------------------*/
/*--- end                                                cg_main.c ---*/
/*--------------------------------------------------------------------*/

Generated by  Doxygen 1.6.0   Back to index