/*
 * Copyright (c) 2003 Hewlett-Packard Development Company, L.P.
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * go
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE. 
 */

#include "config.h"

# ifndef _GNU_SOURCE
#   define _GNU_SOURCE
# endif
#include <errno.h>
#include <sys/time.h>
#include <signal.h>
#include <unistd.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <execinfo.h>
#include <link.h>
#include "prof_utils.h"
#include "q-lib.h"
#define AO_REQUIRE_CAS		/* Really we need fetch_and_add.	*/
				/* Compare-and-swap gives us that.	*/
#include "atomic_ops.h"
#ifdef HAVE_LIBUNWIND_H
#include <libunwind.h>
#else
#define NO_UNWIND
#endif

static long interval = 10000;	/* Profiling interval.  Usecs or count.      */
static int real_time = 1;	/* using real or virtual time? */

static void QPROF_default_error(const char *msg)
{
  write (2, msg, strlen (msg));
  write (2, "\n", 1);
  if (getenv("QPROF_LOOP_ON_ERROR") == NULL) {
    for (;;);
  }
  abort ();
}

static void QPROF_default_warn(const char *msg)
{
  write (2, msg, strlen (msg));
  write (2, "\n", 1);
}
  
  
void (*QPROF_error)(const char *msg) = QPROF_default_error;

void (*QPROF_warn)(const char *msg) = QPROF_default_warn;

typedef enum
{ TIMER, HW_EVENT, OTHER }
signal_sources;
	/* add others as necessary ... */


static volatile AO_T sigaction_called = 0;

#if defined(__ia64__) && defined(HW_EVENT_SUPPORT)

/*
 * Support for profiling based on Itanium hardware performance counters.
 * This uses Stephane Eranian's perfmon kernel support and libpfm
 * library.  We try to revert to timer-based sampling if any pieces
 * are missing.
 *
 * The intent here is that we could provide alternate implementations
 * of these routines for other hardware and performance counter libraries.
 * For higher level libraries this is likely to be either easier or
 * impossible.
 *
 * This borrows heavily from Stephane's sample code.
 */

# include <errno.h>
# include <perfmon/pfmlib.h>
# include <dlfcn.h>

/* The following may be written repeatedly by different threads.	*/
/* But all writes should write the same value.				*/
static typeof(perfmonctl) * my_perfmonctl;
#if 0
  /* These are actually inline assembly code; they don't need the library. */
  static void (* my_pfm_start)(void);
  static void (* my_pfm_stop)(void);
#endif 

/*
 * Returns 1 if we set things up to receive signals in response to
 * PMU events.  We do so only if QPROF_HW_EVENT was set to a proper PMU
 * event.
 */
static int
QPROF_setup_hw_event (long interval)
{
  void *pfm_handle;
  const char *event_string = getenv("QPROF_HW_EVENT");
  pfarg_context_t ctx[1];
  pfmlib_param_t evt;
  pfarg_reg_t pd[PMU_MAX_PMDS];
  int ret;
  int pid;
  typeof(pfm_initialize) * my_pfm_initialize;
  typeof(pfm_find_event) * my_pfm_find_event;
  typeof(pfm_dispatch_events) * my_pfm_dispatch_events;

  if (0 == event_string) return 0;
  if ((pfm_handle = dlopen("libpfm.so.2", RTLD_LAZY)) == NULL)
    {
      QPROF_warn("Can't load libpfm: using timer instead.\n");
      return 0;
    }
  my_pfm_initialize = dlsym(pfm_handle, "pfm_initialize");
  my_pfm_find_event = dlsym(pfm_handle, "pfm_find_event");
  my_pfm_dispatch_events = dlsym(pfm_handle, "pfm_dispatch_events");
  my_perfmonctl = dlsym(pfm_handle, "perfmonctl");
# if 0
    my_pfm_start = dlsym(pfm_handle, "pfm_start");
    my_pfm_stop = dlsym(pfm_handle, "pfm_stop");
# endif

  if (!my_pfm_initialize || !my_pfm_find_event || !my_pfm_dispatch_events
      || !my_perfmonctl /* || !my_pfm_start || !my_pfm_stop */)
    {
      QPROF_warn("Missing libpfm symbol: using timer instead.\n");
      return 0;
    }
  if (my_pfm_initialize() != PFMLIB_SUCCESS)
    {
      QPROF_warn("Can't initialize perfmon library: using timer instead.\n");
      return 0;
    }

  memset(ctx, 0, sizeof(ctx));
  /*
   * Prepare parameter structure for pfm_dispatch_events.
   * We don't use any Itanium specific features here.
   * So evt.pfp_model is NULL.
   */
  memset(&evt,0, sizeof(evt));
  ret = my_pfm_find_event(/* FIXME */ (char *)event_string, &evt.pfp_events[0].event);
  if (ret != PFMLIB_SUCCESS)
    {
      QPROF_warn("Cannot find event: using timer instead\n");
      return 0;
    }
  /*
   * set the default privilege mode for all counters:
   * 	PFM_PLM3 : user level only
   */
  evt.pfp_dfl_plm = PFM_PLM3; 

  /*
   * how many counters we use
   */
  evt.pfp_event_count = 1;

  /*
   * Let the library figure out the values for the PMC registers.
   * This reads evt.pfp_events (and friends) and writes efp.pfp_pc
   * (and friends).
   */
  if ((ret = my_pfm_dispatch_events(&evt)) != PFMLIB_SUCCESS)
    {
      QPROF_warn("Cannot configure events: using timer instead\n");
    }

  /*
   * Arrange to be notified on counter overflow.
   */
  ctx[0].ctx_flags      = PFM_FL_INHERIT_NONE;
  ctx[0].ctx_notify_pid = pid = getpid();
  evt.pfp_pc[0].reg_flags 	|= PFM_REGFL_OVFL_NOTIFY;
  evt.pfp_pc[0].reg_reset_pmds[0] |= 1UL << evt.pfp_pc[1].reg_num;

  /*
   * now create the context for self monitoring/per-task
   */
  if (my_perfmonctl(pid, PFM_CREATE_CONTEXT, ctx, 1) == -1 )
    {
      if (errno == ENOSYS)
  	QPROF_warn("Kernel lacks performance monitoring support: "
	           "Using timer.\n");
      else
  	QPROF_warn("Failed to create perfmon context: "
	           "Using timer.\n");
      return 0;
  }
  /* 
   * Must be done before any PMD/PMD calls (unfreeze PMU). Initialize
   * PMC/PMD to safe values. psr.up is cleared.
   */
  if (my_perfmonctl(pid, PFM_ENABLE, NULL, 0) == -1) {
  	QPROF_error("perfmonctl(...PFM_ENABLE...) failed\n");
  }

  /*
   * Initialize PMDs.
   */
    {
      unsigned i;

      memset(pd, 0, sizeof(pd));
      for (i=0; i < evt.pfp_event_count; i++)
        pd[i].reg_num = evt.pfp_pc[i].reg_num;
    }
    /*
     * we arm the first counter, such that it will overflow
     * after interval events have been observed
     */
    pd[0].reg_value       = (~0UL) - interval + 1;
    pd[0].reg_long_reset  = (~0UL) - interval + 1;

  /*
   * Now program the registers.
   */
  if (my_perfmonctl(pid, PFM_WRITE_PMCS, evt.pfp_pc, evt.pfp_pc_count) == -1)
    QPROF_error("perfmonctl(PFM_WRITE_PMCS...) failed.\n");

  if (my_perfmonctl(pid, PFM_WRITE_PMDS, pd, evt.pfp_event_count) == -1)
    QPROF_error("perfmonctl(PFM_WRITE_PMDS...) failed.\n");

  return 1;
}

/*
 * Start monitoring.
 */
void
QPROF_start_hw_event()
{
  /* my_*/pfm_start();
}

/*
 * Stop monitoring.
 */
void
QPROF_stop_hw_event()
{
  /* my_*/pfm_stop();

  if (my_perfmonctl(getpid(), PFM_DESTROY_CONTEXT, NULL, 0) == -1)
    {
      QPROF_warn("perfmonctl(PFM_DESTROY...) failed\n");
    }
}

/*
 * Restart monitoring.
 * This needs to be done in the signal handler.
 */
void
QPROF_restart_hw_event()
{
  if (my_perfmonctl(getpid(), PFM_RESTART,NULL, 0) == -1)
    QPROF_error("Perfmonctl(...PFM_RESTART...) failed\n");
}

#define HW_SIGNO SIGPROF
#define HAVE_HW_EVENTS

#endif

#ifndef HAVE_HW_EVENTS

# define QPROF_setup_hw_event(interval) 0
# define QPROF_start_hw_event()
# define QPROF_stop_hw_event()
# define QPROF_restart_hw_event()
# define HW_SIGNO -1	/* Shouldn't really be used. Must be defined.	*/

#endif /* HAVE_HW_EVENTS */

static signal_sources source = TIMER;

int
QPROF_setup_signals (void (*handler) (int, siginfo_t *, void *))
{
  int timer = ITIMER_PROF;
  int QPROF_signo = SIGPROF;

  {
    char *interval_string = getenv ("QPROF_INTERVAL");

    if (NULL != interval_string)
      {
	long tmp = atoi (interval_string);
	if (tmp >= 1)
	  {
	    interval = tmp;
	  }
	else
	  QPROF_error("QPROF_INTERVAL must be >= 1");
      }
  }
  if (QPROF_setup_hw_event(interval))
    source = HW_EVENT;
  real_time = 1;
  if (0 != getenv ("QPROF_REAL"))
    {
      timer = ITIMER_REAL;
      QPROF_signo = SIGALRM;
    }
  if (0 != getenv ("QPROF_VIRTUAL"))
    {
      timer = ITIMER_VIRTUAL;
      QPROF_signo = SIGVTALRM;
      real_time = 0;
    }
  if (source == HW_EVENT)
    QPROF_signo = HW_SIGNO;
  if (!AO_load(&sigaction_called))  /* Probably first thread */
    {
      AO_store_release(&sigaction_called, 1);  /* Visible after AO_load */
      /* Set up signal handler. OK to do in multiple threads. */
      {
	struct sigaction act;

#	if defined(__i386__) || defined(__alpha__)
	  act.sa_flags = SA_RESTART;
#	else
	  act.sa_flags = SA_RESTART | SA_SIGINFO;
#	endif
	act.sa_sigaction = handler;
	sigemptyset(&act.sa_mask);
	sigaction (QPROF_signo, &act, 0);
      }
    }

  if (TIMER == source)
    {
      struct itimerval interval_spec;

      /* fprintf (stderr, "Setting timer in %d\n", getpid ()); */
      if (interval >= 1000000)
	interval = 999000;
      interval_spec.it_value.tv_usec =
	interval_spec.it_interval.tv_usec = interval;
      interval_spec.it_value.tv_sec = interval_spec.it_interval.tv_sec = 0;
      if (0 != setitimer (timer, &interval_spec, 0))
	QPROF_error("Setitimer for QPROF_setup_signals failed");
    }
  if (HW_EVENT == source)
    QPROF_start_hw_event();
  return QPROF_signo;
}

#include <stdio.h>

char *
QPROF_get_exe_name (void)
{
# define EXE_SZ 100
  static char exe_name[EXE_SZ];
  static int found_exe_name = 0;
  static char *result;
  int ret_code;

  if (found_exe_name)
    return result;
  /* FIXME - This is probably not 100% thread-safe. */
  ret_code = readlink ("/proc/self/exe", exe_name, EXE_SZ);
  if (ret_code < 0 || ret_code >= EXE_SZ || exe_name[0] != '/')
    {
      result = 0;
      found_exe_name = 1;	/* Dont try again. */
      return result;
    }
  exe_name[ret_code] = '\0';
  found_exe_name = 1;
  result = exe_name;
  return result;
}

/*
 * Write a pc value in human-readable form into result_buf, of length ncols+1,
 * padding the total output to ncols.
 */
void
QPROF_format_pc (char *result_buf, unsigned long pc, size_t ncols)
{
  static unsigned long reentry_count = 0;
  FILE *pipe;
  unsigned long old_count = AO_fetch_and_add1_acquire(&reentry_count);

  if (old_count > 0)
    {
      /* Avoid excessive recursion; punt      */
      sprintf (result_buf, "[%p]", (void *) pc);
      goto done;
    }
  /* Try for a line number with the addr2line program. */
  {
#   define CMD_SZ 200
    char cmd_buf[CMD_SZ];
    size_t result_len;
    char *exe_name;
    char *old_preload;
#   define PRELOAD_SZ 200
    char preload_buf[PRELOAD_SZ];
    static int will_always_fail = 0;
    char * addr2line_path;
#   define TMP_SZ 1024
    char tmp_buf[TMP_SZ];

    /* First we get the name of the executable:             */
    if (will_always_fail)
      goto fail;
    exe_name = QPROF_get_exe_name ();
    if (exe_name == 0 || strcmp(exe_name,"/bin/bash") == 0)
      {
	/* We don't try this inside bash, since it seems to handle	*/
	/* environments explicitly, and hence our updates don't work.	*/
	will_always_fail = 1;	/* Dont try again. */
	goto fail;
      }
    /* Then we use popen to start addr2line -e <exe> <addr> */
    /* There are faster ways to do this, but hopefully this */
    /* isn't time critical.                                 */
    /* In the best case this gets us a string of the form   */
    /* <fn name>\n<full path name>:<line no.>.		    */
    addr2line_path = getenv("QPROF_ADDR2LINE");
    if (0 == addr2line_path)
      addr2line_path = "/usr/bin/addr2line";
    snprintf (cmd_buf, CMD_SZ,
	      "%s -C -f -e %s 0x%lx", addr2line_path, exe_name, pc);
    old_preload = getenv ("LD_PRELOAD");
    if (0 != old_preload)
      {
	if (strlen (old_preload) >= PRELOAD_SZ)
	  {
	    will_always_fail = 1;
	    goto fail;
	  }
	strcpy (preload_buf, old_preload);
	unsetenv ("LD_PRELOAD");
      }
    pipe = popen (cmd_buf, "r");
    if (0 != old_preload && 0 != setenv ("LD_PRELOAD", preload_buf, 0))
      {
	abort ();
      }
    if (pipe != NULL)
      {
	result_len = fread (tmp_buf, 1, TMP_SZ, pipe);
	pclose(pipe);
      }
    if (pipe == NULL || result_len <= 0)
      {
	will_always_fail = 1;
	goto fail;
      }
    if (tmp_buf[result_len - 1] == '\n')
      --result_len;
    tmp_buf[result_len] = '\0';
    if (tmp_buf[result_len - 2] == ':' && tmp_buf[result_len - 1] == '0')
      {
	if (tmp_buf[0] == '?')
	  goto fail;
	else
	  {
	    /* The function name tends to be better than what we get	*/
	    /* from backtrace_symbols.					*/
	    char *nl = strchr (tmp_buf, '\n');
	    if (nl == NULL || nl == tmp_buf)
	      goto fail;
	    *nl = '\0';
	    result_len = nl - tmp_buf;
	  }
      }
    /* Get rid of embedded newline, if any.  			    */
    /* Remove everything but last pathname component in the file.   */
    {
      char *nl = strchr (tmp_buf, '\n');
      char *slash;
      if (nl != NULL)
	{
	  *nl = ':';
	  slash = strrchr (nl, '/');
	  if (0 != slash)
	    {
	      memmove (nl + 1, slash + 1, strlen (slash) /* incl. null */ );
	      result_len -= (slash - nl);
	    }
	}
    }
    /* We are finished shrinking the string.  Now copy it to its real	*/
    /* destination.							*/
    if (result_len > ncols)
      {
	result_len = ncols;
	tmp_buf[result_len] = '\0';
      }
    strcpy(result_buf, tmp_buf);
    if (result_len < ncols - ((sizeof(char *) == 4)? 14: 22))
      {
	/* Add in hex address */
	sprintf (result_buf + result_len, " [%p]", (void *) pc);
      }
    goto done;
  fail:
    {
      unsigned long my_pc = pc;
      char **sym_name = backtrace_symbols ((void **) (&my_pc), 1);
      char *name = strrchr (sym_name[0], '/');

      if (0 == name)
	{
	  name = sym_name[0];
	}
      else
	{
	  /* Use only the last component of the file name plus      */
	  /* what follows.                                          */
	  ++name;
	}
      strncpy (result_buf, name, ncols);
      result_buf[ncols] = '\0';
      free (sym_name);
    }
  }
done:
  {
    size_t result_len = strlen (result_buf);
    size_t i;

    for (i = result_len; i < ncols; ++i)
      {
	result_buf[i] = ' ';
      }
    result_buf[ncols] = '\0';
  }
  AO_fetch_and_sub1_release(&reentry_count);
}

/*
 * Routines to update and print the contents of a list of PC samples.
 * FIXME: Implement a variant for hsd_map.
 */

volatile struct prof_entry
{
  AO_T pc;
} *QPROF_buffer = 0;

static int
prof_compare (const void *arg1, const void *arg2)
{
  unsigned long first = ((struct prof_entry *) arg1)->pc;
  unsigned long second = ((struct prof_entry *) arg2)->pc;

  if (first < second)
    return -1;
  if (first > second)
    return 1;
  return 0;
}

unsigned long QPROF_buf_size = 400000;

volatile AO_T QPROF_buf_pos = 0;  /* Next position in buffer to be used */

volatile AO_T QPROF_samples = 0;  /* Number of times we sampled.  Only	*/
				  /* incremented once even if entire	*/
				  /* call stack is sampled.		*/

volatile AO_T QPROF_buffer_overflowed = 0;

static void
add_sample(unsigned long pc)
{
  unsigned long my_index = AO_fetch_and_add1_release(&QPROF_buf_pos);
  /* Current write may not be visible when QPROF_buf_pos is	*/
  /* incremented.  Earlier writes are.				*/

  if (my_index < QPROF_buf_size)
    {
      AO_store(&(QPROF_buffer[my_index].pc ), pc);
    }
  else
    {
      /* Make wrap-around VERY unlikely.      		*/
      /* This effectively truncates the profile.	*/
      AO_store(&QPROF_buf_pos, QPROF_buf_size);
      AO_store(&QPROF_buffer_overflowed, 1);
    }
}


#if defined(__i386__)
  /* FIXME: This assumes SA_SIGINFO is not specified, which makes	*/
  /* this uglier than necessary.  Currently our patched libunwind 	*/
  /* makes the same assumption, so it's premature to fix this.		*/
# define SET_PC \
    struct sigcontext *sc = (struct sigcontext *) &si; \
    unsigned long pc = (AO_T)(sc->eip)
#elif defined(__ia64__)
# define SET_PC \
    struct sigcontext *sc = (struct sigcontext *) scv; \
    unsigned long pc = (AO_T)(sc->sc_ip)
#elif defined(__hppa__)
# include <ucontext.h>
# define SET_PC \
    struct sigcontext *sc = (struct sigcontext *) scv; \
    unsigned long pc = (AO_T)(sc->sc_iaoq[0]) & ~3 /* Remove PL bits */;
    /* FIXME: The above offset of 24 is an empirically derived hack,	*/
    /* which appears to work on 32 bit kernels.  As I recall, we should */
    /* be using ucontext_t * as the type of the last argument.  But it	*/
    /* appears hard to extract iaoq[0] fromt that.			*/
#else
# define SET_PC \
    struct sigcontext *sc = (struct sigcontext *) scv; \
    unsigned long pc = (AO_T)(sc->sc_pc)
#endif

void
QPROF_pc_sample_list_handler (int signo, siginfo_t * si, void *scv)
{
  SET_PC;

  add_sample(pc);
  AO_fetch_and_add1(&QPROF_samples);
  if (HW_EVENT == source)
    QPROF_restart_hw_event();
}

#ifndef NO_UNWIND
void
QPROF_pc_sample_list_stack_handler (int signo, siginfo_t * si, void *scv)
{
  unw_context_t ucntxt;
  unw_cursor_t c;
  int saw_signal_frame = 0;

  if (unw_getcontext(&ucntxt) != 0)
    QPROF_error("unw_getcontext failed");
  if (unw_init_local(&c, &ucntxt) != 0)
    QPROF_error("unw_init_local failed");
  do
    {
      unw_word_t ip;

      if (saw_signal_frame)
	{
          if (unw_get_reg(&c, UNW_REG_IP, &ip) != 0)
	    QPROF_error("unw_get_reg failed");
          add_sample(ip);
	}
      else
        saw_signal_frame = (unw_is_signal_frame(&c) > 0);
    }
  while (unw_step(&c) > 0);
  AO_fetch_and_add1(&QPROF_samples);
  if (HW_EVENT == source)
    QPROF_restart_hw_event();
}
#endif /* !NO_UNWIND */

void QPROF_pc_sample_list_init()
{
  char *buf_size_string = getenv ("QPROF_BUFFER_SIZE");

  if (NULL != buf_size_string)
    {
      long tmp = atoi (buf_size_string);
      if (tmp >= 10 && tmp < LONG_MAX / sizeof (struct prof_entry))
        {
          QPROF_buf_size = tmp;
        }
      else
	QPROF_error("QPROF_BUFFER_SIZE must be >= 10, < address space");
    }
  QPROF_buffer = sbrk (QPROF_buf_size * sizeof (struct prof_entry));
  if ((void *) (-1) == QPROF_buffer)
    QPROF_error("Sbrk for QPROF_buffer failed");
}

static int QPROF_color = -1;
			/* Character attribute value in escape sequence. */
  			/* -1 means no color change.			 */
  			/* Conceivably this could really be a font	 */
  			/* change etc.					 */
static int QPROF_undo_color = -1;
			/* Character attribute to change color back;	 */
  			/* currently a guess.  -1 means no need to	 */
  			/* change back.				 	 */

static int QPROF_color_initialized = 0;

void QPROF_start_color(FILE *f)
{
  if (!QPROF_color_initialized)
    {
      char * color_string = getenv ("QPROF_COLOR");
      if (color_string != 0)
        {
          if (strcmp(color_string, "blue") == 0)
            QPROF_color = 34;
          else if (strcmp(color_string, "red") == 0)
            QPROF_color = 31;
          else if (strcmp(color_string, "green") == 0)
            QPROF_color = 32;
          else
            {
              QPROF_color = atoi(color_string);
              if (QPROF_color < 1 || QPROF_color >= 60)
    	      QPROF_color = 34;	/* blue fg */
            }
          if (QPROF_color >= 30 && QPROF_color <= 38)
    	    QPROF_undo_color = 39;  /* default fg QPROF_color */
          if (QPROF_color >= 40 && QPROF_color <= 48)
    	    QPROF_undo_color = 49;  /* default bg QPROF_color */
          if (QPROF_color >= 1 && QPROF_color <= 9)
    	    QPROF_undo_color = 0;   /* default mode     */
          if (QPROF_color >= 11 && QPROF_color <= 19)
    	    QPROF_undo_color = 10;  /* primary font     */
        }
      QPROF_color_initialized = 1;
    }

  if (QPROF_color != -1)
    fprintf(f, "\033[%dm", QPROF_color);
}

void QPROF_end_color(FILE *f)
{
  if (QPROF_undo_color != -1)
    fprintf(f, "\033[%dm", QPROF_undo_color);
}

struct map
  {
    struct map *next;
    unsigned long long start;
    unsigned long long end;
    unsigned long long offset;
    const char *path;
  };

struct callback_info
  {
    struct map *maps;
    FILE *fp;
  };

static struct map*
QPROF_get_maps (void)
{
  FILE *fp = fopen ("/proc/self/maps", "r");
  struct map *m, *list = NULL, *list_end = NULL;
  char *line = NULL;
  size_t line_size = 0;

  while (getline (&line, &line_size, fp) > 0)
    {
      unsigned long long start, end, offset;
      char path[4096], perm[4];

      if (sscanf (line, "%llx-%llx %4s %llx %*x:%*x %*u %4096s\n",
		  &start, &end, perm, &offset, path) == 5)
	{
	  if (strncmp (perm, "---", 3) == 0)
	    /* ignore inaccessible mappings */
	    continue;

#if 0
	  printf ("start=%llx, size=%llu, off=%llx, path=%s\n",
		  start, end - start, offset, path);
#endif
	  m = malloc (sizeof (*m));
	  if (!m)
	    {
	      fprintf (stderr, "qprof: failed to allocate %Zu bytes (%s)\n",
		       sizeof (*m), strerror (errno));
	      return list;	/* return what we have... */
	    }

	  memset (m, 0, sizeof (*m));
	  m->start = start;
	  m->end = end;
	  m->offset = offset;
	  m->path = strdup (path);
	  m->next = NULL;
	  if (list_end)
	    {
	      list_end->next = m;
	      list_end = m;
	    }
	  else
	    list = list_end = m;
	}
    }
  if (line)
    free (line);
  fclose (fp);
  return list;
}

static int
QPROF_write_object_info (struct dl_phdr_info *info, size_t size, void *data)
{
  struct callback_info *ci = data;
  const char *path, *name;
  FILE *ofp = ci->fp;
  char buf[PATH_MAX];
  struct map *m;
  int first = 1;

  if (info->dlpi_name[0])
    path = info->dlpi_name;
  else
    path = QPROF_get_exe_name ();

  name = strrchr (path, '/');
  if (name)
    ++name;
  else
    name = path;

  path = "";

  if (size < sizeof (struct dl_phdr_info))
    return -1;

  for (m = ci->maps; m; m = m->next)
    if (m->start <= (unsigned long long) (size_t) info->dlpi_phdr
	&& m->end > (unsigned long long) (size_t) info->dlpi_phdr)
      {
	path = m->path;
	break;
      }

  fprintf (ofp, "(q:object '((q:name . \"%s\")\n", name);

  if (!path[0])
    return 0;

  for (m = ci->maps; m; m = m->next)
    {
      if (m->path[0] == path[0] && strcmp (m->path, path) == 0)
	{
	  if (first)
	    {
	      first = 0;
	      q_checksummed_link (Q_LINK_ANY, buf, sizeof (buf), name, path);
	      fprintf (ofp,
		       "            (q:file . \"%s\")\n"
		       "            (q:maps . (",
		       buf);
	    }
	  else
	    fprintf (ofp, "\n                        ");
	  fprintf (ofp, "((q:addr . #x%llx) (q:size . %llu) "
		   "(q:offset . #x%llx))",
		   m->start, m->end - m->start, m->offset);
	}
    }
  if (!first)
    fprintf (ofp, "))");
  fprintf (ofp, "))\n");
  return 0;
}

void
QPROF_pc_sample_list_write_q_profile (void)
{
    unsigned long my_buf_size = AO_load_acquire_read(&QPROF_buf_pos);
    char buf[PATH_MAX], *exe_path, *prog_name;
    struct map *maps, *m, *m_next;
    const char *hist_file_name;
    unsigned long count = 0;
    FILE *info, *hist, *fp;
    pid_t pid = getpid ();
    unsigned long i;

    exe_path = QPROF_get_exe_name ();
    prog_name = strrchr (exe_path, '/');
    if (prog_name)
      ++prog_name;
    else
      prog_name = exe_path;

    snprintf (buf, sizeof (buf), "%s-pid%d.info", prog_name, pid);
    info = q_create_file (buf, sizeof (buf));
    if (!info)
      {
	fprintf (stderr, "qprof: couldn't create info file `%s'\n", buf);
	return;
      }

    /* dump the command-line to the info file: */
    fprintf (info, "(q:info '((q:cmdline . (");
    fp = fopen ("/proc/self/cmdline", "r");
    if (fp)
      {
	char *line = NULL;
	size_t line_size = 0;
	int not_first = 0;

	while (!feof (fp))
	  {
	    if (getdelim (&line, &line_size, '\0', fp) >= 0)
	      {
		if (not_first)
		  fputc (' ', info);
		fprintf (info, "\"%s\"", line);
		not_first = 1;
	      }
	  }
	if (line)
	  free (line);
	fclose (fp);
      }
    fprintf (info, "))))\n");

    /* dump object info: */
    maps = QPROF_get_maps ();
    if (maps)
      {
	struct callback_info ci;
	ci.maps = maps;
	ci.fp = info;
	dl_iterate_phdr (QPROF_write_object_info, &ci);
	for (m = maps; m; m = m_next)
	  {
	    m_next = m->next;
	    if (m->path)
	      free ((void *) m->path);
	    free (m);
	  }
      }

    snprintf (buf, sizeof (buf), "%s-pid%d.hist", prog_name, pid);
    hist = q_create_file (buf, sizeof (buf));
    if (!hist)
      {
	fprintf (stderr, "qprof: couldn't create histogram file\n");
	return;
      }
    hist_file_name = strdup (buf);

    fprintf (info,
	     "(q:histogram '((q:file . \"%s\")\n"
	     "                (q:x-unit-label . \"address\")",
	     hist_file_name);
    if (source == TIMER)
      {
	fprintf (info, "\n"
		 "                (q:event-name . \"%s\")\n"
		 "                (q:y-unit-label . \"seconds\")\n"
		 "                (q:y-unit-conversion-factor . %g)\n"
		 "                (q:y-granularity . %g)",
		 (real_time ? "time" : "virtual-time"),
		 interval * 1e-6, interval * 1e-6);
      }
    else
      {
	const char *event_name = getenv ("QPROF_HW_EVENT");

	/* XXX make this more general and independent of Itanium PMU... */
	if (strcmp (event_name, "CPU_CYCLES") == 0)
	  fprintf (info, "\n"
		   "                (q:y-unit-label . \"seconds\")");
	fprintf (info, "\n"
		 "                (q:event-name . \"%s\")\n"
		 "                (q:y-granularity . %ld)\n",
		 event_name, interval);
      }
    fprintf (info, "))\n");

    qsort ((struct prof_entry *) QPROF_buffer, my_buf_size,
	   sizeof (struct prof_entry), prof_compare);
    for (i = 0; i < my_buf_size; ++i) {
	if (AO_load(&QPROF_buffer[i].pc) == 0)
	    /* This can happen if a signal handler is still running */
	    continue;
	++count;
	if (i + 1 < my_buf_size
	    && prof_compare((const void *) (QPROF_buffer + i),
			    (const void *) (QPROF_buffer + i + 1)) == 0)
	    continue;
	fprintf (hist, "0x%lx %lu\n", QPROF_buffer[i].pc, count);
	count = 0;
    }

    q_close (hist);
    q_close (info);
}

void
QPROF_pc_sample_list_print_profile (FILE * f)
{
  unsigned long my_buf_size = AO_load_acquire_read(&QPROF_buf_pos);
  size_t i;
  unsigned long repeated = 0;
  char *summary_level = getenv ("QPROF_GRANULARITY");
  int per_function = 0, per_instruction = 0, per_line = 1;
  char *ncols_text = getenv ("QPROF_NCOLS");
# define MAX_NCOLS 1024
# define MIN_NCOLS 30
# define DEFAULT_NCOLS 64
  int ncols = DEFAULT_NCOLS;
  char prev_text[MAX_NCOLS + 1];
  char curr_text[MAX_NCOLS + 1];
  unsigned long nsamples;
  
  if (HW_EVENT == source)
    QPROF_stop_hw_event();

  QPROF_start_color(f);
  if (ncols_text != 0)
    {
      ncols = atoi(ncols_text);
      if (ncols < MIN_NCOLS)
	{
	  fprintf(f, "Setting QPROF_NCOLS to minimum of %d\n", MIN_NCOLS);
	  ncols = MIN_NCOLS;
	}
      if (ncols > MAX_NCOLS)
	{
	  fprintf(f, "Setting QPROF_NCOLS to maximum of %d\n", MAX_NCOLS);
	  ncols = MAX_NCOLS;
	}
    }
  {
    char * exe_name = QPROF_get_exe_name ();
    char buf[100];

    if (0 == QPROF_get_exe_name ()) {
      /* Use pid instead.	*/
      sprintf(buf, "pid%d", getpid());
      exe_name = buf;
    }
    nsamples = (unsigned long)AO_load(&QPROF_samples);
    fprintf (f, "qprof: %s: %lu samples, %lu counts\n",
	     exe_name, nsamples, my_buf_size);
  }
  /* New entries may still be added; we ignore them.      */
  if (AO_load(&QPROF_buffer_overflowed))
    fprintf (f, "qprof: WARNING: buffer overflowed, "
	     "samples were dropped.  " "Try setting QPROF_BUFFER_SIZE.\n");
  qsort ((struct prof_entry *) QPROF_buffer, my_buf_size,
	 sizeof (struct prof_entry), prof_compare);

  if (0 != summary_level)
    {
      per_instruction = (strcmp (summary_level, "instruction") == 0);
      per_function = (strcmp (summary_level, "function") == 0);
      per_line = (!per_instruction && !per_function);
      if (per_line && strcmp (summary_level, "line") != 0)
        {
          fprintf (f, "qprof: WARNING: QPROF_GRANULARITY should be instruction,"
	           " line, or function.  Assuming line.");
        }
    }
  prev_text[0] = '\0';
  for (i = 0; i < my_buf_size; ++i)
    {
      if (AO_load(&(QPROF_buffer[i].pc)) == 0)
	{
	  /* This can happen if a signal handler is still running	*/
	  continue;
	}
      if (i != 0
	  && prof_compare ((const void *) (QPROF_buffer + i - 1),
			   (const void *) (QPROF_buffer + i)) == 0)
	{
	  ++repeated;
	  continue;
	}
      QPROF_format_pc (curr_text, AO_load(&(QPROF_buffer[i].pc)), ncols);
      /* If we want line or function granularity, we take advantage	*/
      /* of the fact that that a prefix of the textual pc 		*/
      /* PC representation contains exactly enough text to distinguish	*/
      /* samples that should be considered different.  We find that	*/
      /* prefix, remove the stuff we don't want to see, and merge with	*/
      /* the previous entry if the remaining textual description is 	*/
      /* identical.  This is all a quick-and-dirty hack, but it 	*/
      /* works ...							*/
      if (per_line || per_function)
	{
	  char *delim = strrchr (curr_text, '+');

	  if (0 == delim)
	    {
	      if (per_line)
		{
		  delim = strchr (curr_text, ' ');
		}
	      else
		{
		  delim = strrchr (curr_text, ':');
		}
	    }
	  else
	    {
	      *delim = ')';
	      ++delim;
	    }
	  if (0 != delim)
	    {
	      char *p;
	      for (p = delim; p < curr_text + ncols; ++p)
		*p = ' ';
	    }
	  if (delim != 0
	      && strncmp (prev_text, curr_text, delim - curr_text) == 0)
	    {
	      ++repeated;
	      continue;
	    }
	}
      if (i != 0)
	{
	  fprintf (f, "%s %lu\t(%3ld%%)\n", prev_text, repeated,
			  		    (100*repeated+nsamples/2)/nsamples);
	}
      strcpy (prev_text, curr_text);
      repeated = 1;
    }
  if (repeated != 0)
    fprintf (f, "%s %lu\t(%3ld%%)\n", prev_text, repeated,
			  	      (100*repeated+nsamples/2)/nsamples);

  QPROF_end_color(f);
}
