/*
 *
 * (c) Vladi Belperchinov-Shabanski "Cade" <cade@biscom.net> 1996-1999
 *
 * SEE `README',`LICENSE' OR `COPYING' FILE FOR LICENSE AND OTHER DETAILS!
 *
 */

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>

#include <vstring.h>
#include <regexp2.h>

#ifndef ASSERT
#define ASSERT assert
#endif

#include "grep.h"

////////////////////////////////////////////////////////////////////////////
//
//  Common globals
//

#define MAXCHAR  256 // max char value
#define MAXPATT 1024 // max pattern length

////////////////////////////////////////////////////////////////////////////
//
//  HEX pattern conversion
//

  static inline int __InHex( int ch )
  {
    return ( ch >= '0' && ch <= '9' ) ||
           ( ch >= 'A' && ch <= 'F' ) ||
           ( ch >= 'a' && ch <= 'f' );
  }
  
  static inline int __HexCode( int ch )
  {
    if( ch >= '0' && ch <= '9' )
      return ch - '0';
    if( ch >= 'A' && ch <= 'F' )
      return ch - 'A' + 10;
    if( ch >= 'a' && ch <= 'f' )
      return ch - 'a' + 10;
    assert( 0 );
  }
  
  int Hex2Pat( const char *str, char* pat )
  {
     const char *pcStart = pat;
     int nHex;
     while( *str )
     {
       if( *str == ' ' || *str == '\t' )
       {
         str ++;
         continue;
       }
       if( ! __InHex( *str ) )
         return 0;
       nHex = __HexCode( *str );
       nHex <<= 4;
       str++;
       if( ! *str )
         *pat = nHex;
       else
       if( *str == ' ' || *str == '\t' )
         return 0;
       else
       {
         nHex += __HexCode( *str );
         str ++;
         *pat = nHex;
         pat ++;
       }
     }
     return pat - pcStart;
  };

////////////////////////////////////////////////////////////////////////////
//
// Search interface functions
//

  long FSearchStr( const char *pat, const char* pfIn, int nocase, int spos )
  {
    return FSearch( pat, strlen(pat), pfIn, nocase, spos );
  };
  
  long FSearchStr( const char *pat, FILE* pfIn, int nocase, int spos )
  {
    return FSearch( pat, strlen(pat), pfIn, nocase, spos );
  };
  
  long FSearch( const char *pat, int patsize, const char* pfIn, int nocase, int spos )
  {
    FILE *f = fopen( pfIn, "rb" );
    if (!f) return -1;
    int pos = FSearch( pat, patsize, f, nocase, spos );
    fclose(f);
    return pos;
  }
  
  long FSearch( const char *pat, int patsize, FILE* pfIn, int nocase, int spos )
  {
    char newpat[MAXPATT+1];
    ASSERT( patsize < MAXPATT );
    if (pat[0] == '~')
      { // regular expression
        return FGrep( pat+1, pfIn, nocase, spos );
      } else
    if (pat[0] == '$')
      { // hex pattern
        int z = Hex2Pat( pat+1, newpat );
        if (z > 0)
          return FKMPSearch( newpat, z, pfIn, nocase, spos );
        else
          return -2;
      }
    else
      { // normal patters string
        if (pat[0] == '\\')
          return FKMPSearch( pat+1, patsize-1, pfIn, nocase, spos );
        else
          return FKMPSearch( pat, patsize, pfIn, nocase, spos );
      }
  };

////////////////////////////////////////////////////////////////////////////
//
// Knuth-Morris-Pratt search
//

void __KMPPreprocPat( const char* pat, int patsize, int* next )
{
 int i=0;
 int j=next[0]=-1;
 while (i < patsize) 
   {
   while ((j > -1) && (pat[i] != pat[j])) j=next[j];
   i++;
   j++;
   if (pat[i] == pat[j]) 
     next[i]=next[j];
   else 
     next[i]=j;
   }
}

long FKMPSearch( const char *pat, int patsize, FILE* pfIn, int nocase, int spos )
{
  int  next[MAXPATT];
  int  j;
  char newpat[MAXPATT+1];
 
  long opos = ftell( pfIn );
 
  if (spos != -1) fseek( pfIn, spos, SEEK_SET );
 
  if( patsize < 1 || patsize > MAXPATT ) return -1;
  memcpy( newpat, pat, patsize );
  if (nocase)
    {
    for ( j = 0; j < patsize; j++)
      if ( newpat[j] >= 'a' && newpat[j] <= 'z' )
        newpat[j] = toupper(newpat[j]);
    }
  memset( &next, 0, sizeof(next) );
  __KMPPreprocPat( newpat, patsize, next );

  int nCh;
  j = 0;
  while( (nCh = getc(pfIn)) != EOF )
    {
    if (nocase) nCh = toupper( nCh );
    while( j > -1 && newpat[j] != (char)nCh ) j = next[j];
    j++;
    if ( j >= patsize )
      {
      fflush( pfIn );
      int rpos = ftell( pfIn );
      fseek( pfIn, opos, SEEK_SET );
      ASSERT( rpos-patsize >= 0 );
      return rpos-patsize;
      // to continue:
      // j = next[j];
      }
    }
  fseek( pfIn, opos, SEEK_SET );
  return( -1 );
}

////////////////////////////////////////////////////////////////////////////
//
// FGrep -- regular expression search (I know `G' here stands for <nothing>:))
//

char *__grep_fgets( char *buff, int maxlen, FILE *f )
{
  int z = 0;
  buff[z] = 0;
  int ch;
  while( (ch = fgetc( f )) != EOF )
    {
    buff[z] = ch;
    z++;
    if (ch == '\n') break;
    if (z == maxlen) break;
    }
  buff[z] = 0;
  return (z > 0) ? buff : (char*)NULL;
}

int FGrepMaxLine = 4096;
int FGrepLines = 0;
long FGrep     ( const char *pat, FILE* pfIn, int nocase, int spos )
{
  if ( strlen(pat) >= (size_t)FGrepMaxLine ) return -1; // just in case, and for now...

  char newpat[MAXPATT+1];
  strcpy( newpat, pat );
  if ( nocase ) StrUpCase( newpat );
  regexp *re = regcomp( newpat );
  if (re == NULL) return -1;
  char *line = (char*)malloc( FGrepMaxLine+1 );

  int opos = ftell( pfIn );
  if (spos != -1) fseek( pfIn, spos, SEEK_SET );

  long cpos = ftell( pfIn );
  fseek( pfIn, 0, SEEK_END );
  long filesize = ftell( pfIn );
  fseek( pfIn, cpos, SEEK_SET );
  
  int found = 0;
  while( __grep_fgets( line, FGrepMaxLine, pfIn ) )
    {
    if ( nocase ) StrUpCase( line );
    if ( regexec( re, line ) )
      {
      found = 1;
      break;
      }
    cpos = ftell( pfIn );
    FGrepLines++;
    if (cpos >= filesize) break;
    }

  fseek( pfIn, opos, SEEK_SET );
  if (found)
    cpos += ( re->startp[0] - line );

  free(line);
  free(re);
  FGrepMaxLine = 4096;
  return found ? cpos : -1;
}

