/* stringfuncs.c: -*- C -*-  String manipulation functions for Meta-HTML. */

/*  Copyright (c) 1997 Brian J. Fox
    Author: Brian J. Fox (bfox@ai.mit.edu) Sat Jul 19 14:44:32 1997.

   This file is part of <Meta-HTML>(tm), a system for the rapid deployment
   of Internet and Intranet applications via the use of the Meta-HTML
   language.

   Copyright (c) 1995, 1996, 1997 Brian J. Fox (bfox@ai.mit.edu).
   Copyright (c) 1996, 1997 Universal Access Inc. (http://www.ua.com).

   Meta-HTML is free software; you can redistribute it and/or modify
   it under the terms of the UAI Free Software License as published
   by Universal Access Inc.; either version 1, or (at your option) any
   later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   UAI Free Software License for more details.

   You should have received a copy of the UAI Free Software License
   along with this program; if you have not, you may obtain one by
   writing to:

   Universal Access Inc.
   129 El Paseo Court
   Santa Barbara, CA
   93101  */

#include "language.h"

/************************************************************/
/*							    */
/*		   String Manipulation Functions	    */
/*							    */
/************************************************************/

#if defined (__cplusplus)
extern "C"
{
#endif

static void pf_string_length (PFunArgs);
static void pf_match (PFunArgs);
static void pf_string_compare (PFunArgs);
static void pf_substring (PFunArgs);
static void pf_subst_in_var (PFunArgs);
static void pf_subst_in_string (PFunArgs);
static void pf_upcase (PFunArgs);
static void pf_downcase (PFunArgs);
static void pf_capitalize (PFunArgs);
static void pf_string_eq (PFunArgs);
static void pf_string_neq (PFunArgs);
static void pf_plain_text (PFunArgs);
static void pf_char_offsets (PFunArgs);

  /* Random string operations. */
static PFunDesc func_table[] =
{
  { "STRING-LENGTH",	0, 0, pf_string_length },
  { "MATCH",		0, 0, pf_match },
  { "STRING-COMPARE",	0, 0, pf_string_compare },
  { "SUBSTRING",	0, 0, pf_substring },
  { "SUBST-IN-STRING",	0, 0, pf_subst_in_string },
  { "SUBST-IN-VAR",	0, 0, pf_subst_in_var },
  { "UPCASE",		0, 0, pf_upcase },
  { "DOWNCASE",		0, 0, pf_downcase },
  { "CAPITALIZE",	0, 0, pf_capitalize },
  { "STRING-EQ",	0, 0, pf_string_eq },
  { "STRING-NEQ",	0, 0, pf_string_neq },
  { "PLAIN-TEXT",	1, 0, pf_plain_text },
  { "CHAR-OFFSETS",	0, 0, pf_char_offsets },

  { (char *)NULL,	0, 0, (PFunHandler *)NULL }
};

PACKAGE_INITIALIZER (initialize_string_functions)
DEFINE_SECTION (STRING-OPERATORS, strings; characters; changing case,
"There is a single function in <meta-html> which performs pattern
matching, substring extraction, and substring deletion.  For
convenience, a blind substring extraction function is supplied as
well.  Three functions perform the three most common case changes.
Finally, the <funref \"string operators\" pad> function allows
alignment of fixed-width text.  ", "")

DEFUN (pf_string_length, string,
"Returns the number of characters present in <var string>.

<complete-example>
<string-length \"This is an interesting string\">
</complete-example>")
{
  char *string = mhtml_evaluate_string (get_positional_arg (vars, 0));
  int length = 0;

  if (string != (char *)NULL)
    length = strlen (string);

  xfree (string);

  bprintf_insert (page, start, "%d", length);
}

#define MAX_SUBEXPS 10
DEFUN (pf_match, string regex
       &key action=[delete|extract|report|startpos|endpos|length],
"Matches <var regexp> against <var string>, and then performs the
indicated <var action>.  The default for <var action> is \"report\".

When action is \"report\" (the default), returns \"true\" if <var
regex> matched.<br>
When action is \"extract\", returns the substring of <var string>
matching <var regex>.<br>
When action is \"delete\", returns <var string> with the matched
substring removed.<br>
When action is \"startpos\", returns the numeric offset of the start of
the matched substring.<br>
When action is \"endpos\", returns the numeric offset of the end of the
matched substring.

<var regexp> is an extended Unix regular expression, the complete syntax of
which is beyond the scope of this document.  However, the essential
basics are:
<ul>
<li> A period (<code>.</code>) matches any one character.
<li> An asterisk (<code>*</code>) matches any number of occurrences of
the preceding expression, including none.
<li> A plus-sign matches one or more occurrences of the preceding expression.
<li> Square brackets are used to enclose lists of characters which may
match.  For example, \"[a-zA-Z]+\" matches one or more occurrences of
alphabetic characters.
<li> The vertical bar is used to separate alternate expressions to
match against.  For example, \"foo|bar\" says to match either \"foo\"
<i>or</i> \"bar\".
<li> A dollar-sign (<code>$</code>) matches the end of <var STRING>.
<li> Parenthesis are used to group subexpressions.
</ul>

Here are a few examples:

<example>
  <match \"foobar\" \".*\">                 --> \"true\"
  <match \"foobar\" \"foo\">                --> \"true\"
  <match \"foobar\" \"foo\" action=extract> --> \"foo\"
  <match \"foobar\" \"oob\" action=delete>  --> \"far\"
  <match \"foobar\" \"oob\" action=startpos>--> \"1\"
  <match \"foobar\" \"oob\" action=endpos>  --> \"4\"
  <match \"foobar\" \"oob\" action=length>  --> \"3\"
  <match \"foobar\" \"[0-9]+\">             --> \"\"
</example>")
{
  char *_string = get_positional_arg (vars, 0);
  char *_regex = get_positional_arg (vars, 1);
  char *result = (char *)NULL;

  if (_string && _regex)
    {
      char *string = mhtml_evaluate_string (_string);
      char *regex = mhtml_evaluate_string (_regex);
      int caseless = var_present_p (vars, "caseless");
      char *action = (char *)NULL;

      if ((string != (char *)NULL) && (regex != (char *)NULL))
	{
	  /* Only up to MAX_SUBEXPS subexpressions kept. */
	  regex_t re;
	  regmatch_t offsets[MAX_SUBEXPS];
	  int slen = strlen (string);
	  int matched;
	  int so = 0, eo = 0, len = 0;
	  char *temp = mhtml_evaluate_string (get_value (vars, "action"));
	  char *packname = mhtml_evaluate_string (get_value (vars, "package"));

	  if (!empty_string_p (temp))
	    action = temp;
	  else
	    {
	      xfree (temp);
	      action = strdup ("report");
	    }

	  regcomp (&re, regex, REG_EXTENDED | (caseless ? REG_ICASE : 0));

	  matched = (regexec (&re, string, MAX_SUBEXPS, offsets, 0) == 0);

	  if (matched)
	    {
	      so = offsets[0].rm_so;
	      eo = offsets[0].rm_eo;
	      len = eo - so;
	    }

	  /* If the caller has specified a package to receive the detailed
	     results of the match, put the information there now. */
	  if (matched && packname)
	    {
	      register int i, limit;
	      Package *p = symbol_get_package (packname);
	      Symbol *starts, *ends, *lengths;
	      Symbol *matches = (Symbol *)NULL;
	      char digitbuff[40];

	      forms_set_tag_value_in_package (p, "expr", regex);
	      starts = symbol_intern_in_package (p, "start");
	      ends = symbol_intern_in_package (p, "end");
	      lengths = symbol_intern_in_package (p, "length");
	      if (strcasecmp (action, "extract") == 0)
		matches = symbol_intern_in_package (p, "matches");

	      for (limit = MAX_SUBEXPS; limit; limit--)
		if (offsets[limit - 1].rm_so != -1)
		  break;

	      sprintf (digitbuff, "%d", limit - 1);
	      forms_set_tag_value_in_package (p, "matched", digitbuff);

	      for (i = 0; i < limit; i++)
		{
		  int sublen = offsets[i].rm_eo - offsets[i].rm_so;

		  sprintf (digitbuff, "%d", offsets[i].rm_so);
		  symbol_add_value (starts, digitbuff);
		  sprintf (digitbuff, "%d", offsets[i].rm_eo);
		  symbol_add_value (ends, digitbuff);
		  sprintf (digitbuff, "%d", sublen);
		  symbol_add_value (lengths, digitbuff);

		  if (matches != (Symbol *)NULL)
		    {
		      char *substring = (char *)xmalloc (1 + sublen);
		      strncpy (substring, string + offsets[i].rm_so, sublen);
		      substring[sublen] = '\0';
		      symbol_add_value (matches, substring);
		      free (substring);
		    }
		}
	    }

	  if (packname != (char *)NULL) free (packname);
	      
	  if (matched && strcasecmp (action, "report") == 0)
	    {
	      result = strdup ("true");
	    }
	  else if (matched && (strcasecmp (action, "extract") == 0))
	    {
	      result = (char *)xmalloc (1 + len);
	      strncpy (result, string + so, len);
	      result[len] = '\0';
	    }
	  else if (strcasecmp (action, "delete") == 0)
	    {
	      result = strdup (string);
	      if (matched)
		memmove (result + so, result + eo, (slen + 1) - eo);
	    }
	  else if ((strcasecmp (action, "startpos") == 0) ||
		   (strcasecmp (action, "endpos") == 0) ||
		   (strcasecmp (action, "length") == 0))
	    {
	      result = (char *)xmalloc (20);
	      result[0]= '\0';

	      if (matched)
		{
		  if (strcasecmp (action, "startpos") == 0)
		    sprintf (result, "%d", so);
		  else if (strcasecmp (action, "endpos") == 0)
		    sprintf (result, "%d", eo);
		  else
		    sprintf (result, "%d", len);
		}
	    }
	  regfree (&re);
	}

      xfree (string);
      xfree (regex);
      xfree (action);
    }

  if (result)
    {
      bprintf_insert (page, start, "%s", result);
      *newstart += strlen (result);
      free (result);
    }
}

DEFUN (pf_substring, string &key start end,
"Extracts the substring of <var string> whose first character starts
at offset <var start>, and whose last character ends at offset
<var end>. The indexing is zero-based, so that:

<example>
  <substring \"Hello\" 1 2> --> \"e\"
</example>

This function is useful when you know in advance which part of the
string you would like to extract, and do not need the pattern matching
facilities of <funref string-operators match>.

If you wish to index through each character of a string, the most direct
way is to convert it to an array first using
<funref string-operators string-to-array>, and then use the
<funref array-operators foreach> function to iterate over the members.
<complete-example>
<set-var s=\"This is a string.\">
<string-to-array <get-var-once s> chars>
<foreach character chars><get-var character>-</foreach>
</complete-example>")
{
  char *str_arg = mhtml_evaluate_string (get_positional_arg (vars, 0));
  char *beg_arg = mhtml_evaluate_string (get_positional_arg (vars, 1));
  char *end_arg = mhtml_evaluate_string (get_positional_arg (vars, 2));

  if (str_arg != (char *)NULL)
    {
      register int i;
      char *temp;
      int len = strlen (str_arg);
      int beg_index = 0;
      int end_index = len;

      /* If not all digits, lookup arg as variable name. */
      if (!empty_string_p (beg_arg))
	{
	  if (!number_p (beg_arg))
	    {
	      for (i = 0; whitespace (beg_arg[i]); i++);
	      temp = pagefunc_get_variable (beg_arg + i);
	      if (temp != (char *)NULL)
		beg_index = atoi (temp);
	    }
	  else
	    beg_index = atoi (beg_arg);
	}

      if (!empty_string_p (end_arg))
	{
	  if (!number_p (end_arg))
	    {
	      for (i = 0; whitespace (end_arg[i]); i++);
	      temp = pagefunc_get_variable (end_arg + i);
	      if (temp != (char *)NULL)
		end_index = atoi (temp);
	    }
	  else
	    end_index = atoi (end_arg);
	}

      if (beg_index > end_index)
	{ i = beg_index; beg_index = end_index; end_index = i; }

      if (end_index > len) end_index = len;

      if ((beg_index != end_index) && (beg_index < len))
	{
	  if ((end_index - beg_index) < 100)
	    {
	      char buffer[100];

	      strncpy (buffer, str_arg + beg_index, end_index - beg_index);
	      buffer[end_index - beg_index] = '\0';
	      bprintf_insert (page, start, "%s", buffer);
	      *newstart += (end_index - beg_index);
	    }
	  else
	    {
	      temp = (char *)xmalloc (1 + (end_index - beg_index));
	      strncpy (temp, str_arg + beg_index, end_index - beg_index);
	      temp[end_index - beg_index] = '\0';
	      bprintf_insert (page, start, "%s", temp);
	      *newstart += (end_index - beg_index);
	      free (temp);
	    }
	}
    }

  if (str_arg) free (str_arg);
  if (beg_arg) free (beg_arg);
  if (end_arg) free (end_arg);
}

static char *
subst_in_string_internal (char *contents, Package *vars, int debug_level)
{
  char *result = (char *)NULL;

  if (contents != (char *)NULL)
    {
      int done = 0;
      int arg = 1;
      PAGE *temp = page_create_page ();
      page_set_contents (temp, contents);

      while (!done)
	{
	  char *this_string = get_positional_arg (vars, arg++);
	  char *with_that = get_positional_arg (vars, arg++);

	  if (this_string == (char *)NULL)
	    done = 1;
	  else
	    {
	      this_string = mhtml_evaluate_string (this_string);
	      with_that = mhtml_evaluate_string (with_that);

	      if (debug_level > 5)
		page_debug
		  ("<subst-in-xxx \"%s\" \"%s\" \"%s\">",
		   contents, this_string, with_that ? with_that : "");

	      if (this_string != (char *)NULL)
		page_subst_in_page (temp, this_string, with_that);

	      if (debug_level > 5)
		page_debug ("--> `%s'", temp->buffer ? temp->buffer : "");

	      xfree (this_string);
	      xfree (with_that);
	    }
	}

      result = temp->buffer;
      free (temp);
    }

  return (result);
}

DEFUN (pf_subst_in_var, varname &optional this-string with-that,
"Replaces all occurrences of <var this-string> with <var with-that> in the
contents of the variable named <var varname>.  Both <var this-string> and
<var with-that> are evaluated before the replacement is done. <var
this-string> can be any regular expression allowed by the POSIX extended
regular expression matching.  This command can be useful when parsing
the output of <funref osfuncs cgi-exec>.")
{
  char *varname = mhtml_evaluate_string (get_positional_arg (vars, 0));

  if (!empty_string_p (varname))
    {
      char *contents = pagefunc_get_variable (varname);

      if (contents != (char *)NULL)
	{
	  char *result = subst_in_string_internal
	    (contents, vars, debug_level);

	  pagefunc_set_variable (varname, result);
	  xfree (result);
	}
    }
  xfree (varname);
}

DEFUN (pf_subst_in_string, string &rest regexp replacement,
"Replaces all occurrences of <var regexp> with <var replacement> in
<var string>.

<var regexp> can be any regular expression allowed by POSIX extended
regular expression matching.

In the replacement string, a backslash followed by a number <var n> is
replaced with the contents of the <var n>th subexpression from <var
regexp>.

<example>
<set-var foo=\"This is a list\">
<subst-in-string <get-var foo> \"is\" \"HELLO\">
     --> \"ThHELLO HELLO a lHELLOt\"
.blank
<subst-in-string \"abc\" \"([a-z])\" \"\\\\1 \"> --> \"a b c \"
</example>")
{
  char *contents = mhtml_evaluate_string (get_positional_arg (vars, 0));

  if (contents != (char *)NULL)
    {
      char *result = subst_in_string_internal (contents, vars, debug_level);

      free (contents);

      if (result)
	{
	  bprintf_insert (page, start, "%s", result);
	  *newstart += strlen (result);
	  free (result);
	}
    }
}

DEFUN (pf_downcase, string,
"Converts all of the uppercase characters in <var string> to
lowercase.

<complete-example>
<downcase \"This is Written in Meta-HTML\">
</complete-example>")
{
  unsigned char *value = (unsigned char *)
    mhtml_evaluate_string (get_positional_arg (vars, 0));

  if (value != (unsigned char *)NULL)
    {
      register int i;

      for (i = 0; value[i] != '\0'; i++)
	if (isupper (value[i]))
	  value[i] = tolower (value[i]);

      bprintf_insert (page, start, "%s", value);
      *newstart += i;
      free (value);
    }
}

DEFUN (pf_upcase, string,
"Converts all of the lowercase characters in <var string> to
uppercase.

<complete-example>
<upcase \"This is Written in Meta-HTML\">
</complete-example>")
{
  unsigned char *value = (unsigned char *)
    mhtml_evaluate_string (get_positional_arg (vars, 0));

  if (value != (unsigned char *)NULL)
    {
      register int i;

      for (i = 0; value[i] != '\0'; i++)
	if (islower (value[i]))
	  value[i] = toupper (value[i]);

      bprintf_insert (page, start, "%s", value);
      *newstart += i;
      free (value);
    }
}

DEFUN (pf_capitalize, string,
"Changes the case of each character in <var string> to uppercase or
lowercase depending on the surrounding characters.

<complete-example>
<capitalize \"This is a list\">
</complete-example>

Also see <funref string-operators downcase>, and
<funref string-operators upcase>.")
{
  unsigned char *value = (unsigned char *)
    mhtml_evaluate_string (get_positional_arg (vars, 0));

  if (value != (unsigned char *)NULL)
    {
      register int i;
      int capnext = 1;

      for (i = 0; value[i] != '\0'; i++)
	{
	  if (value[i] > 127)
	    continue;

	  if (!isalpha (value[i]))
	    capnext = 1;
	  else
	    {
	      if (capnext)
		{
		  if (islower (value[i]))
		    value[i] = toupper (value[i]);

		  capnext = 0;
		}
	      else
		{
		  if (isupper (value[i]))
		    value[i] = tolower (value[i]);
		}
	    }
	}

      bprintf_insert (page, start, "%s", value);
      *newstart += i;
      free (value);
    }
}

DEFUN (pf_string_compare, string1 string2 &key caseless,
"Compare the two strings <var string1> and <var string2>, and return
a string which specifies the relationship between them.  The
comparison is normall case-sensitive, unless the keyword argument <var
caseless=true> is given.

The possible return values are:
<ol>
<li> equal<br>
The two strings are exactly alike.
<li> greater<br>
<var string1> is lexically greater than <var string2>.
<li> less<br>
<var string1> is lexically less than <var string2>.
</ol>

Examples:

<example>
<string-compare \"aaa\" \"aab\">               --> less
<string-compare \"zzz\" \"aab\">               --> greater
<string-compare \"zzz\" \"ZZZ\">               --> greater
<string-compare \"zzz\" \"ZZZ\" caseless=true> --> equal
</example>")
{
  char *string_1 = mhtml_evaluate_string (get_positional_arg (vars, 0));
  char *string_2 = mhtml_evaluate_string (get_positional_arg (vars, 1));
  int caseless_p = get_value (vars, "caseless") != (char *)NULL;
  char *result = (char *)NULL;

  /* Both strings empty? */
  if (string_1 == string_2)
    result = "equal";
  else if (string_1 == (char *)NULL)
    result = "less";
  else if (string_2 == (char *)NULL)
    result = "greater";
  else
    {
      int temp;

      if (caseless_p)
	temp = strcasecmp (string_1, string_2);
      else
	temp = strcmp (string_1, string_2);

    switch (temp)
      {
      case 0: result = "equal"; break;
      case 1: result = "greater"; break;
      case -1: result = "less"; break;
      }
    }

  if (string_1 != (char *)NULL) free (string_1);
  if (string_2 != (char *)NULL) free (string_2);

  if (result)
    {
      bprintf_insert (page, start, "%s", result);
      *newstart = start + strlen (result);
    }
}

DEFUN (pf_string_eq, string-1 string-2 &key caseless=true,
 "Compare <var string1> to <var string2> and return the string
<code>\"true\"</code> if they are character-wise identical.

The optional keyword argument <var caseless=true> indicates that no
consideration should be given to the case of the characters during
comparison.

<example>
<string-eq \"foo\" \"FOO\">               -->
<string-eq \"foo\" \"foo\">               -->true
<string-eq <upcase \"foo\"> \"FOO\">      -->true
<string-eq \"foo\" \"FOO\" caseless=true> -->true
</example>")
{
  char *arg1 = mhtml_evaluate_string (get_positional_arg (vars, 0));
  char *arg2 = mhtml_evaluate_string (get_positional_arg (vars, 1));
  char *caseless = mhtml_evaluate_string (get_value (vars, "caseless"));
  int caseless_p = 0;

  if (debug_level > 10)
    {
      page_debug ("[ arg1 = '%s', \n  arg2 = '%s' ]",
		  arg1 ? arg1 : "",
		  arg2 ? arg2 : "");
    }
		  
  if (!empty_string_p (caseless))
    caseless_p++;

  xfree (caseless);

  if (((empty_string_p (arg1)) && (empty_string_p (arg2))) ||
      ((arg1 && arg2) &&
       (((!caseless_p) && (strcmp (arg1, arg2) == 0)) ||
	((caseless_p) && (strcasecmp (arg1, arg2) == 0)))))
    {
      bprintf_insert (page, start, "true");
      *newstart = start + 4;
    }

  xfree (arg1);
  xfree (arg2);
}

DEFUN (pf_string_neq, string-1 string-2 &key caseless=true,
 "Compare <var string1> to <var string2> and return the string
<code>\"true\"</code> if they are NOT character-wise identical.

The optional keyword argument <var caseless=true> indicates that no
consideration should be given to the case of the characters during
comparison.

<example>
<string-neq \"foo\" \"FOO\">               -->true
<string-neq \"foo\" \"foo\">               -->
<string-neq <upcase \"foo\"> \"FOO\">      -->
<string-neq \"foo\" \"FOO\" caseless=true> -->
</example>")
{
  char *arg1 = mhtml_evaluate_string (get_positional_arg (vars, 0));
  char *arg2 = mhtml_evaluate_string (get_positional_arg (vars, 1));
  char *caseless = mhtml_evaluate_string (get_value (vars, "caseless"));
  int caseless_p = 0;

  if (!empty_string_p (caseless))
    caseless_p++;

  xfree (caseless);

  if ((empty_string_p (arg1) && !empty_string_p (arg2)) ||
      (empty_string_p (arg2) && !empty_string_p (arg1)) ||
      ((arg1 && arg2) &&
       (((!caseless_p) && (strcmp (arg1, arg2) != 0)) ||
	((caseless_p) && (strcasecmp (arg1, arg2) != 0)))))
    {
      bprintf_insert (page, start, "true");
      *newstart = start + 4;
    }

  xfree (arg1);
  xfree (arg2);
}

/* Does modifications to the plain text in BODY.  Usually, this simply
   inserts paragraph breaks where they appear, and optionally operates
   on the first character of paragraphs.  The text starts with a <P>,
   unless the variable NOBR is set.*/
DEFMACRO (pf_plain_text, &key first-char=expr nobr=true,
"Performs the following steps:

<ol>
  <li> Replace occurrences of pairs of newline characters with a
  single <example code><P></example> tag.

  <li> Applies the function <var expr> to the first character of every
  paragraph, and inserts the closing tag after that character.
</ol>

The output will start with a <example code><P></example> tag, unless the
optional argument <var nobr=true> is given.

<complete-example>
<plain-text first-char=<font size=\"+1\"> nobr=true>
This is line 1.
.blank
This is line 2.
</plain-text>
</complete-example>")
{
  register int i;
  char *first_char = mhtml_evaluate_string (get_value (vars, "FIRST-CHAR"));
  char *nobr = mhtml_evaluate_string (get_value (vars, "NOBR"));
  char *nolower = mhtml_evaluate_string (get_value (vars, "NOLOWER"));
  BPRINTF_BUFFER *evalbody = bprintf_create_buffer ();
  char *pval = mhtml_evaluate_string ("\n<p>\n");
  int pval_len = strlen (pval);

  evalbody->buffer = mhtml_evaluate_string (body->buffer);
  evalbody->bsize = evalbody->buffer ? strlen (evalbody->buffer) : 0;
  evalbody->bindex = evalbody->bsize;

  /* Insert one blank line in the front of BODY. */
  bprintf_insert (evalbody, 0, "%s", pval);

  /* Modify blank lines in BODY such that they contain <p> instead. */
  page_subst_in_page (evalbody, "\n[ \t]*\n", pval);

  /* Modify the first character of every paragraph by inserting the
     open tag before it, and inserting a matching close tag after it. */
  if (first_char)
    {
      register int begin;
      char *closer = (char *)NULL;
      int o_len = strlen (first_char);
      int c_len = 0;
      char *buffer = (char *)NULL;

      if (*first_char == '<')
	{
	  register int c;

	  for (i = 1; whitespace (first_char[i]); i++);

	  begin = i;

	  for (i = begin; (c = first_char[i]) != '\0'; i++)
	    if ((c == '>') || (whitespace (c)))
	      break;

	  closer = (char *)xmalloc (4 + (i - begin));
	  closer[0] = '<';
	  closer[1] = '/';
	  strncpy (closer + 2, first_char + begin, i - begin);
	  closer[(i - begin) + 2] = '>';
	  closer[(i - begin) + 3] = '\0';
	  c_len = strlen (closer);
	}

      buffer = (char *)xmalloc (3 + o_len + c_len);
      strcpy (buffer, first_char);
      if (c_len)
	{
	  strcpy (buffer + o_len + 1, closer);
	  free (closer);
	}
      else
	buffer[o_len + 1] = '\0';
      
      /* Now quickly find occurences of "<p>" in EVALBODY. */
      begin = 0;

      while ((begin = page_search (evalbody, pval, begin)) != -1)
	{
	  begin += pval_len;

	  while ((begin < evalbody->bindex) &&
		 (whitespace (evalbody->buffer[begin])))
	    begin++;

	  if ((begin < evalbody->bindex) &&
	      (isalnum (evalbody->buffer[begin])) &&
	      ((empty_string_p (nolower)) ||
	       (isupper (evalbody->buffer[begin]))))
	    {
	      char *temp;

	      buffer[o_len] = evalbody->buffer[begin];
	      temp = mhtml_evaluate_string (buffer);
	      bprintf_delete_range (evalbody, begin, begin + 1);
	      bprintf_insert (evalbody, begin, "%s", temp);
	      begin += strlen (temp);
	      free (temp);
	    }
	}
      free (buffer);
    }

  /* Insert the modified evalbody. */
  {
    int length = evalbody->bindex;
    int offset = 0;

    if (nobr)
      {
	offset = pval_len;
	length -= pval_len;
      }
    bprintf_insert (page, start, "%s", evalbody->buffer + offset);
    *newstart += length;
  }

  xfree (nobr);
  xfree (nolower);
  xfree (first_char);
  xfree (pval);
  bprintf_free_buffer (evalbody);
}

DEFUN (pf_char_offsets, string ch &key caseless,
"Return an array of numbers,  each one representing the offset from the
start of <var string> of <var ch>.  This function is useful for finding
candidate locations for word-wrapping, for example.
Here is a complete example:
<complete-example>
<char-offsets \"This is a list\" \" \">
</complete-example>")
{
  char *string = mhtml_evaluate_string (get_positional_arg (vars, 0));
  char *ch = mhtml_evaluate_string (get_positional_arg (vars, 1));
  char *caseless_arg = mhtml_evaluate_string (get_value (vars, "CASELESS"));
  int caseless_p = !empty_string_p (caseless_arg);

  if ((string != (char *)NULL) && (ch != (char *)NULL))
    {
      register int i;

      for (i = 0; string[i] != '\0'; i++)
	{
	  if ((string[i] == *ch) ||
	      (caseless_p && (tolower (string[i]) == tolower (*ch))))
	    {
	      static char digits[40];
	      sprintf (digits, "%d", i);
	      bprintf_insert (page, start, "%s\n", digits);
	      start += 1 + strlen (digits);
	    }
	}
    }

  *newstart = start;
  xfree (string);
  xfree (ch);
  xfree (caseless_arg);
}

#if defined (__cplusplus)
}
#endif
