/*  seqport.c
* ===========================================================================
*
*                            PUBLIC DOMAIN NOTICE                            
*               National Center for Biotechnology Information
*                                                                            
*  This software/database is a "United States Government Work" under the     
*  terms of the United States Copyright Act.  It was written as part of      
*  the author's official duties as a United States Government employee and 
*  thus cannot be copyrighted.  This software/database is freely available 
*  to the public for use. The National Library of Medicine and the U.S.      
*  Government have not placed any restriction on its use or reproduction.  
*                                                                            
*  Although all reasonable efforts have been taken to ensure the accuracy  
*  and reliability of the software and data, the NLM and the U.S.            
*  Government do not and cannot warrant the performance or results that      
*  may be obtained by using this software or data. The NLM and the U.S.      
*  Government disclaim all warranties, express or implied, including         
*  warranties of performance, merchantability or fitness for any particular
*  purpose.                                                                  
*                                                                            
*  Please cite the author in any work or product based on this material.     
*
* ===========================================================================
*
* File Name:  seqport.c
*
* Author:  James Ostell
*   
* Version Creation Date: 7/13/91
*
* $Revision: 6.3 $
*
* File Description:  Ports onto Bioseqs
*
* Modifications:  
* --------------------------------------------------------------------------
* Date	   Name        Description of modification
* -------  ----------  -----------------------------------------------------
*
* $Log: seqport.c,v $
* Revision 6.3  1998/02/18 15:23:58  kans
* spp->eos = FALSE when moving off segment (JO)
*
* Revision 6.2  1997/12/16 18:53:38  kans
* ProteinFromCdRegionEx always removes trailing X generated by incomplete last codon
*
* Revision 6.1  1997/09/16 15:31:26  kans
* added aaFeatLoc_to_dnaFeatLoc (JO)
*
* Revision 6.0  1997/08/25 18:07:12  madden
* Revision changed to 6.0
*
* Revision 5.18  1997/08/18 20:48:46  kans
* fix aaInterval_to_dnaInterval for frame
*
* Revision 5.17  1997/08/15 17:02:41  madden
* Added new function ProteinFromCdRegionEx with remove_trailingX Boolean
*
* Revision 5.16  1997/07/25 21:00:47  madden
* Do not remove trailing Xs if include_stop is FALSE
*
* Revision 5.15  1997/06/19 18:38:49  vakatov
* [WIN32,MSVC++]  Adopted for the "NCBIOBJ.LIB" DLL'ization
*
* Revision 5.14  1997/03/31 17:08:31  shavirin
* Removed warmings detected by C++ compiler and changed function
* SPRebuildDNA to use function BSrebuildDNA_4na()
*
 * Revision 5.13  1997/03/18  19:17:13  shavirin
 * Changed handling of virtual sequence in SPCompressRead() function
 *
 * Revision 5.12  1997/03/06  22:56:31  kans
 * removed an unused local variable
 *
 * Revision 5.11  1997/03/06  22:47:17  shavirin
 * Moved SPCompress functions from sequtil.c
 *
 * Revision 5.10  1997/01/02  22:48:28  tatiana
 * aaLoc_to_dnaLoc handles SeqBondPtr
 *
 * Revision 5.9  1996/11/04  15:22:31  kans
 * backed out check for consen, left in extra null check
 *
 * Revision 5.8  1996/11/03  21:14:19  kans
 * fixes to seqport to handle consen, and check for first curspp == NULL
 *
 * Revision 5.7  1996/10/23  20:31:02  tatiana
 * check for one residue overlap in dnaLoc_to_aaLoc()
 *
 * Revision 5.6  1996/09/13  21:58:08  kans
 * added fuzziness to dnaLoc_to_aaLoc (JZ)
 *
 * Revision 5.4  1996/08/09  15:27:47  ostell
 * added BioseqRev(), BioseqComp(), BioseqRevComp()
 *
 * Revision 5.3  1996/07/15  19:03:28  epstein
 * add new param to dnaLoc_to_aaLoc() to optionally report frame
 *
 * Revision 5.2  1996/06/15  17:29:44  ostell
 * fixed minor delta seq bug
 *
 * Revision 5.1  1996/06/14  20:36:39  epstein
 * correct arithmetic logic in dnaLoc_to_aaLoc()
 *
 * Revision 5.0  1996/05/28  13:23:23  ostell
 * Set to revision 5.0
 *
 * Revision 4.9  1996/02/05  18:16:17  kans
 * aa to dna location converter now properly does split codons
 *
 * Revision 4.8  1996/02/01  21:40:06  kans
 * fixed minor bugs in aa_to_dna and dna_to_aa location converters
 *
 * Revision 4.7  1996/01/30  16:24:04  ostell
 * added merge argument to dnaLoc_to_aaLoc()
 * change calls to SeqLocPackage
 *
 * Revision 4.6  1996/01/29  22:03:52  ostell
 * added aaLoc_to_dnaLoc() and dnsLoc_to_aaLoc()
 *
 * Revision 4.5  1996/01/28  07:00:05  ostell
 * made fisxes to support deeply nexted segmented seqports
 *
 * Revision 4.4  1996/01/27  22:19:00  ostell
 * added SeqPortSet_.. functions
 * refined support for virtual seqeunces
 *
 * Revision 4.3  1996/01/10  22:25:25  ostell
 * added aaInterval_to_seqloc()
 *
 * Revision 4.2  1995/12/29  21:31:44  ostell
 * made SeqPort helper functions public for use by edutil for delta seqs
 *
 * Revision 4.1  1995/12/26  22:29:34  ostell
 * added support for delta seq to SeqPort
 *
 * Revision 4.0  1995/07/26  13:49:01  ostell
 * force revision to 4.0
 *
 * Revision 2.39  1995/07/20  19:33:10  tatiana
 * change SeqIdPrint to SeqIdWrite
 *
 * Revision 2.38  1995/05/12  22:09:01  ostell
 * added MemFree(spp) to early error returns in SeqPortNew()
 *
*
*
*
* ==========================================================================
*/

/** for ErrPostEx() ****/

static char *this_module = "ncbiapi";
#define THIS_MODULE this_module
static char *this_file = __FILE__;
#define THIS_FILE this_file

/**********************/


#include <seqport.h>
#include <edutil.h>    /* for SeqLoc creation functions */
#include <gather.h>    /* for SeqLocOffset function */

#define SPC_BUFF_CHUNK 1024

/*****************************************************************************
*
*   SeqPort Routines
*
*****************************************************************************/

/*****************************************************************************
*
*   SeqPortFree(spp)
*
*****************************************************************************/
NLM_EXTERN SeqPortPtr SeqPortFree (SeqPortPtr spp)

{
    SeqPortPtr tspp, nextspp;

    if (spp == NULL)
        return NULL;

	if (spp->locked)              /* locked during access */
		BioseqUnlock(spp->bsp);   /* make available for freeing */

    tspp = spp->segs;
    while (tspp != NULL)
    {
        nextspp = tspp->next;
        SeqPortFree(tspp);
        tspp = nextspp;
    }

	MemFree(spp->cache);

    MemFree(spp);

    return NULL;
}

/*****************************************************************************
*
*   SeqPortSetValues(spp)
*      Copies the values is_circle, is_seg, and do_virtual from spp to
*        any dependent SeqPortPtrs it contains. This is necessary for segmented
*        reference, or delta types of Bioseqs and on SeqPortNewByLoc()
*
*      SeqPortSet_... functions call this function
*
*****************************************************************************/
NLM_EXTERN Boolean LIBCALL SeqPortSetValues (SeqPortPtr spp)
{
	SeqPortPtr tmp;

	if (spp == NULL)
		return FALSE;

	for (tmp = spp->segs; tmp != NULL; tmp = tmp->next)
	{
		tmp->is_circle = spp->is_circle;
		tmp->is_seg = spp->is_seg;
		tmp->do_virtual = spp->do_virtual;

		if (tmp->segs != NULL)
			SeqPortSetValues(tmp);
	}

	return TRUE;
}


NLM_EXTERN Boolean LIBCALL SeqPortSet_is_circle (SeqPortPtr spp, Boolean value)
{
	if (spp == NULL)
		return FALSE;
	spp->is_circle = value;
	return SeqPortSetValues(spp);
}

NLM_EXTERN Boolean LIBCALL SeqPortSet_is_seg (SeqPortPtr spp, Boolean value)
{
	if (spp == NULL)
		return FALSE;
	spp->is_seg = value;
	return SeqPortSetValues(spp);
}

NLM_EXTERN Boolean LIBCALL SeqPortSet_do_virtual (SeqPortPtr spp, Boolean value)
{
	if (spp == NULL)
		return FALSE;
	spp->do_virtual = value;
	return SeqPortSetValues(spp);
}


NLM_EXTERN Boolean LIBCALL SeqPortSetUpFields (SeqPortPtr spp, Int4 start, Int4 stop, Uint1 
strand, Uint1 newcode)
{
	if (spp == NULL) return FALSE;
    spp->start = start;
    spp->stop = stop;
    spp->strand = strand;
    spp->curpos = -1;    /* not set */
    spp->totlen = stop - start + 1;
    spp->newcode = newcode;
    spp->sctp = SeqCodeTableFind(newcode);

	return TRUE;
}
NLM_EXTERN Boolean LIBCALL SeqPortSetUpAlphabet(SeqPortPtr spp, Uint1 curr_code, Uint1 
newcode)
{
	if (spp == NULL) return FALSE;

        spp->oldcode = curr_code;
        spp->sctp = SeqCodeTableFind(curr_code);

        switch (curr_code)
        {
            case Seq_code_ncbi2na:
                spp->bc = 4;            /* bit shifts needed */
                spp->rshift = 6;
                spp->lshift = 2;
                spp->mask = 192;
                break;
            case Seq_code_ncbi4na:
                spp->bc = 2;
                spp->rshift = 4;
                spp->lshift = 4;
                spp->mask = 240;
                break;
            default:
                spp->bc = 1;
                spp->mask = 255;
                break;
        }

        if ((newcode) && (newcode != curr_code))    /* conversion alphabet */
        {
            if ((spp->smtp = SeqMapTableFind(newcode, curr_code)) != NULL)
                spp->sctp = SeqCodeTableFind(newcode);
        }

		return TRUE;
}

/*****************************************************************************
*
*   SeqPortNew(bsp, start, stop, strand, newcode)
*       if bsp == NULL, creates an empty port
*       see objloc.h for strand defines
*
*****************************************************************************/
NLM_EXTERN SeqPortPtr SeqPortNew (BioseqPtr bsp, Int4 start, Int4 stop, Uint1 strand, Uint1 
newcode)

{
    SeqPortPtr spp, spps, sppcurr = NULL, spprev, prev, curr;
    Uint1 curr_code, repr, tstrand;
    SeqLocPtr the_segs, currseg;
    Int4 len, ctr, tlen, tfrom, tto, xfrom, xto, tstart, tstop;
	Char errbuf[41];
	ValNode fake;
	Boolean done, started;
	BioseqPtr tbsp;
	ValNodePtr currchunk;  /* can be a SeqLoc or an element of a Delta Seq 
*/
	Boolean do_multi_loc, cycle2;
	SeqLitPtr slitp;

    spp = (SeqPortPtr) MemNew(sizeof(SeqPort));
	errbuf[0] = '\0';

    if (bsp == NULL)     /* a NULL section */
        return spp;

    spp->bsp = bsp;					/* get ready for error 
msgs */
	SeqIdWrite(SeqIdFindBest(bsp->id, 0), errbuf, PRINTID_FASTA_SHORT, 40);
    len = BioseqGetLen(bsp);
    if (start < 0)
    {
        ErrPostEx(SEV_ERROR, 0,0  ,
				 "SeqPortNew: %s start (%ld)< 0", errbuf, 
(long)start);
        MemFree(spp);
        return NULL;
    }
    if (start >= len)
    {
        ErrPostEx(SEV_ERROR,0,0,
				 "SeqPortNew: %s start(%ld) >= len(%ld)",
					errbuf, (long)start, (long)len);
        MemFree(spp);
        return NULL;
    }
    if (stop == LAST_RESIDUE)
        stop = len - 1;
    else if (stop < start)
    {
        ErrPostEx(SEV_ERROR,0,0,
				 "SeqPortNew: %s stop(%ld) < start(%ld)",
					errbuf, (long)stop, (long)start);
        MemFree(spp);
        return NULL;
    }
    else if (stop >= len)
    {
        ErrPostEx(SEV_ERROR,0,0,
				 "SeqPortNew: %s stop(%ld) >= len(%ld)",
					errbuf, (long)stop, (long)len);
        MemFree(spp);
        return NULL;
    }

    SeqPortSetUpFields (spp, start,stop, strand, newcode);

    spp->currnum = BioseqGetSeqDescr(bsp, Seq_descr_num, NULL);
    if (spp->currnum == NULL)   /* no numbering set */
        spp->currnum = NumberingDefaultGet();   /* use default */

    repr = Bioseq_repr(bsp);
    if ((repr == Seq_repr_virtual) ||    /* virtual sequence */
		(repr == Seq_repr_map ))         /* map sequence */
    {
        spp->isa_virtual = TRUE;
        spp->curpos = 0;
    }
    else if ((repr == Seq_repr_seg) ||   /* segmented */
        (repr == Seq_repr_ref) ||     /* reference */
		(repr == Seq_repr_delta))     /* delta */
    {
        spp->oldcode = 0;        /* no code, not raw */

        if (repr == Seq_repr_seg)  /* segmented */
		{
			fake.choice = SEQLOC_MIX;   /* make SEQUENCE OF Seq-loc, 
into one */
			fake.data.ptrvalue = bsp->seq_ext;
			fake.next = NULL;
			the_segs = (SeqLocPtr)&fake;
		}
		else if (repr == Seq_repr_ref)        /* reference: is a Seq-loc 
*/
	        the_segs = (SeqLocPtr)bsp->seq_ext;

		if (repr == Seq_repr_delta)   /* chain of deltas to follow */
			currchunk = (ValNodePtr)(bsp->seq_ext);
		else                          /* seqlocs */
			currchunk = (ValNodePtr)SeqLocFindNext(the_segs, NULL);

        currseg = NULL;
		ctr = 0;
		done = FALSE;
		started = FALSE;
        while ((! done) && (currchunk != NULL))
        {
			do_multi_loc = FALSE;
			cycle2 = TRUE;     /* only really needed for complicated 
delta seq locs */
			currseg = NULL;
			if (repr == Seq_repr_delta)
			{
				if (currchunk->choice == 1)  /* it's a SeqLocPtr 
*/
				{
					currseg = 
(SeqLocPtr)(currchunk->data.ptrvalue);
					if (! IS_one_loc(currseg, FALSE)) /* 
don't do complicated cases here */
					{
						do_multi_loc = TRUE;
						currseg = 
SeqLocFindNext((SeqLocPtr)(currchunk->data.ptrvalue), NULL);
					}
				}
				else                         /* it's a SeqLitPtr 
*/
				{
					currseg = NULL;
					slitp = 
(SeqLitPtr)(currchunk->data.ptrvalue);
					tlen = slitp->length;
					tstrand = Seq_strand_plus;
					tfrom = 0;
					tto = tlen - 1;
				}
			}
			else
				currseg = (SeqLocPtr)currchunk;

			while (cycle2)   /* normally once, except for 
complicated delta locs */
			{
				if (currseg != NULL)   /* for segs and deltas of 
type loc */
				{
					tlen = SeqLocLen(currseg);
					tstrand = SeqLocStrand(currseg);
					tfrom = SeqLocStart(currseg);
					tto = SeqLocStop(currseg);
				}
		
				if (! started)
				{
					if ((ctr + tlen - 1) >= start)
					{
						tstart = start - ctr;
						started = TRUE;
					}
					else
						tstart = -1;
				}
				else
					tstart = 0;

				if (tstart >= 0)   /* have a start */
				{
					if ((ctr + tlen - 1) >= stop)
					{
						done = TRUE;   /* hit the end */
						tstop = ((ctr + tlen - 1) - 
stop);
					}
					else
						tstop = 0;

					if (tstrand == Seq_strand_minus)
					{
						xfrom = tfrom + tstop;
						xto = tto - tstart;
					}
					else
					{
						xfrom = tfrom + tstart;
						xto = tto - tstop;
					}

					if (currseg != NULL)    /* working off 
locs */
					{
						tbsp = 
BioseqLockById(SeqLocId(currseg));
		
	    				spps = SeqPortNew(tbsp, xfrom, xto, 
tstrand, newcode);
						if (currseg->choice == 
SEQLOC_NULL)
							spps->isa_null = TRUE;
					}
					else
					{
						spps = (SeqPortPtr) MemNew(sizeof(SeqPort));
						SeqPortSetUpFields (spps, xfrom, 
xto, tstrand, newcode);
						SeqPortSetUpAlphabet(spps, 
slitp->seq_data_type, newcode);
						if (slitp->seq_data != NULL)
							spps->bp = 
slitp->seq_data;
						else
							spps->isa_virtual = 
TRUE;
						
					}
		
			    	if (spps == NULL)
				    {
					    ErrPostEx(SEV_ERROR,0,0,
						 "SeqPortNew: %s unexpected null during recursion", 
						 		errbuf);
	            	    return NULL;
		            }

					if (currseg != NULL)
						spps->locked = TRUE;

    			    if (sppcurr == NULL)
        			    spp->segs = spps;
	            	else
		            	sppcurr->next = spps;
			        sppcurr = spps;
				}

				ctr += tlen;

				if (! do_multi_loc)
					cycle2 = FALSE;
				else
				{
					currseg = 
SeqLocFindNext((SeqLocPtr)(currchunk->data.ptrvalue), currseg);
					if (currseg == NULL)
						cycle2 = FALSE;
				}
			}

			if (repr == Seq_repr_delta)
				currchunk = currchunk->next;
			else
				currchunk = SeqLocFindNext(the_segs, currchunk);
        }
        if (strand == Seq_strand_minus)  /* reverse seqport order */
        {
            prev = spp->segs;
            spprev = spp->segs;
            spp->segs = NULL;
            sppcurr = NULL;
            while (prev != NULL)
            {
                curr = spprev;
                prev = NULL;
                while (curr->next != NULL)  /* end of chain */
                {
                    prev = curr;
                    curr = curr->next;
                }
                if (prev != NULL)
                    prev->next = NULL;
                if (sppcurr == NULL)
                    spp->segs = curr;
                else
                    sppcurr->next = curr;
                sppcurr = curr;
            }
            curr->next = NULL;   /* last one in chain */
        }
        spp->curr = spp->segs;

		  if (! started)   /* nothing found */
		  {
		  	 ErrPostEx(SEV_ERROR,0,0,"SeqPortNew: no data found for %s", 
		  	 			errbuf);
			 return SeqPortFree(spp);
		  }
    }
    else if ((repr == Seq_repr_raw) ||   /* sequence not by reference */
        (repr == Seq_repr_const))
    {
        curr_code = BioseqGetCode(bsp);

		SeqPortSetUpAlphabet(spp, curr_code, newcode);
		spp->bp = bsp->seq_data;
    }

    SeqPortSeek(spp, 0, SEEK_SET);
    return spp;
}

/*****************************************************************************
*
*   SeqPortNewByLoc(loc, code)
*       builds a new seqport based on a SeqLoc
*
*****************************************************************************/
NLM_EXTERN SeqPortPtr SeqPortNewByLoc (SeqLocPtr loc, Uint1 code)

{
    BioseqPtr bsp = NULL;
	SeqPortPtr spp = NULL, sppcurr, spps;
    Int4 start, stop;
    Uint1 strand;
	SeqLocPtr currloc = NULL;
	CharPtr locptr, currlocptr;

    if (loc == NULL)
        return spp;

	               /* get the needed components */

	switch (loc->choice)
	{
        case SEQLOC_INT:      /* int */
        case SEQLOC_PNT:      /* pnt */
        case SEQLOC_PACKED_PNT:      /* packed-pnt   */
		    start = SeqLocStart(loc);
		    stop = SeqLocStop(loc);
		    strand = SeqLocStrand(loc);
        case SEQLOC_WHOLE:      /* whole */
			bsp = BioseqLockById(SeqLocId(loc));  /* need the bioseq 
now */
			if (bsp == NULL)
				return NULL;    /* can't do it */
	}



    switch (loc->choice)
    {
        case SEQLOC_EMPTY:      /* empty */
        case SEQLOC_EQUIV:     /* equiv */
        case SEQLOC_BOND:      /* bond */
			break;

        case SEQLOC_NULL:      /* null */
			spp = SeqPortNew(NULL, FIRST_RESIDUE, LAST_RESIDUE, 0, 
code);
			spp->isa_null = TRUE;
			break;

        case SEQLOC_WHOLE:      /* whole */
    		spp = SeqPortNew(bsp, FIRST_RESIDUE, LAST_RESIDUE, 0, code);
			if (spp != NULL)
				spp->locked = TRUE;
			else
				BioseqUnlock(bsp);
			break;

        case SEQLOC_INT:      /* int */
        case SEQLOC_PNT:      /* pnt */
        case SEQLOC_PACKED_PNT:      /* packed-pnt   */
    		spp = SeqPortNew(bsp, start, stop, strand, code);
			if (spp != NULL)
				spp->locked = TRUE;
			else
				BioseqUnlock(bsp);
			break;

        case SEQLOC_PACKED_INT:      /* packed seqint */
        case SEQLOC_MIX:      /* mix */
		    spp = (SeqPortPtr) MemNew(sizeof(SeqPort));
		    spp->totlen = SeqLocLen(loc);
		    spp->start = 0;
		    spp->stop = spp->totlen - 1;
		    spp->curpos = -1;    /* not set */
        	spp->currnum = NULL;   /* use numbering from parts */
	        currloc = NULL;
			sppcurr = NULL;
    	    while ((currloc = SeqLocFindNext(loc, currloc)) != NULL)
        	{
            	spps = SeqPortNewByLoc(currloc, code);
	            if (spps == NULL)
    	        {
					locptr = SeqLocPrint(loc);
					currlocptr = SeqLocPrint(currloc);
			        ErrPostEx(SEV_ERROR, 0,0  ,
		"SeqPortNewByLoc unexpected null during recursion [loc=%s][curr=%s]",
					locptr, currlocptr);
					MemFree(locptr);
					MemFree(currlocptr);
					SeqPortFree(spp);
            	    return NULL;
	            }
	            if (sppcurr == NULL)
    	            spp->segs = spps;
        	    else
            	    sppcurr->next = spps;
	            sppcurr = spps;
	        }
    	    spp->curr = spp->segs;
            break;
        case SEQLOC_FEAT:
	        ErrPostEx(SEV_ERROR, 0,0  ,
				 "SeqLocNewByLoc: Seq-loc.feat not supported");
            break;
	}

    SeqPortSeek(spp, 0, SEEK_SET);

    return spp;
}

/*****************************************************************************
*
*   SeqPortSeek(spp, offset, origin)
*       works like fseek()
*           returns 0 on success   (weird but true)
*           non-zero on fail
*       uses coordinates 0-(len - 1)  no matter what region seqport covers
*       
*
*****************************************************************************/
NLM_EXTERN Int2 SeqPortSeek (SeqPortPtr spp, Int4 offset, Int2 origin)

{
	Int4 sp, curpos, left, pos, lim, diff;
	Boolean plus_strand;
    Uint1 the_byte, the_residue;
    Int2 bitctr;
    SeqPortPtr curspp;
	Uint1Ptr buf;
	SPCachePtr spcp;

    if (spp == NULL)
        return 1;

	spp->eos = FALSE;   /* unset flag set when moving off segment */

                                /* get position as positive offset from 0 */
    if (spp->strand == Seq_strand_minus)
        plus_strand = FALSE;
    else
        plus_strand = TRUE;

    sp = spp->curpos;    /* current offset, 0 - (totlen - 1)  */
	switch (origin)
	{
		case SEEK_SET:
			if ((offset > spp->totlen) || (offset < 0))
				return 1;
			sp = offset;
			break;
		case SEEK_CUR:
			if (((sp + offset) > spp->totlen) ||
				((sp + offset) < 0 ))
            {
                if (! spp->is_circle)
    				return 1;
            }
            else
    			sp += offset;
            if (spp->is_circle)
            {
                while (sp >= spp->totlen)   /* circle adjustments */
                    sp -= spp->totlen;
                while (sp < 0)
                    sp += spp->totlen;
            }
			break;
		case SEEK_END:
			if ((ABS(offset) > spp->totlen) || (offset > 0))
				return 1;
			sp = spp->totlen + offset;
			break;
		default:
			return 1;
	}

    if (sp == spp->curpos)     /* already in right position */
        return 0;

	if (sp == spp->totlen)    /* seek to EOF */
	{
        spp->curpos = sp;
        spp->byte = SEQPORT_EOF;    /* set to nothing */
        return 0;
    }

    if (spp->oldcode)       /* has data, is raw or const type */
    {
		if (spp->cache == NULL)     /* allocate a cache */
			spp->cache = (SPCachePtr)MemNew(sizeof(SPCache));
		spcp = spp->cache;
		buf = spcp->buf;

        if (plus_strand)
		{
            curpos = sp + spp->start;
			pos = curpos / (Int4) (spp->bc);
			lim = spp->stop / (Int4) (spp->bc);
			diff = lim - pos + 1;
			if (diff > 100)
			{
				diff = 100;
				lim = pos + diff - 1;
			}
			BSSeek(spp->bp, pos, SEEK_SET);
			spcp->total = (Int2) BSRead(spp->bp, (VoidPtr)buf, 
diff);
			spcp->ctr = 0;
			spp->bytepos = lim;
		}
        else
		{
            curpos = spp->stop - sp;
			pos = curpos / (Int4) (spp->bc);
			lim = spp->start / (Int4) (spp->bc);
			diff = pos - lim + 1;
			if (diff > 100)
			{
				diff = 100;
				lim = pos - diff + 1;
			}
			BSSeek(spp->bp, lim, SEEK_SET);
			spcp->total = (Int2) BSRead(spp->bp, (VoidPtr)buf, 
diff);
			spcp->ctr = (Int2)(diff - 1);
			spp->bytepos = lim;
		}
        left = curpos % (Int4) (spp->bc);
        the_byte = spcp->buf[spcp->ctr];
        if ((plus_strand) || (spp->bc == 1))
            the_residue = the_byte;
        else        /* reverse compressed bit orders */
        {
            left = spp->bc - 1 - left;
            the_residue = 0;
            bitctr = spp->bc;
            while (bitctr)
            {
                the_residue |= the_byte & spp->mask;
                bitctr--;
				if (bitctr)
				{
	                the_residue >>= spp->lshift;
    	            the_byte <<= spp->lshift;
				}
            }
        }
        bitctr = spp->bc;
        while (left)
        {
            the_residue <<= spp->lshift;
            left--; bitctr--;
        }
        spp->byte = the_residue;
        spp->bitctr = (Uint1) bitctr;
    	spp->curpos = sp;
        return 0;
    }
    else if ((spp->isa_virtual) || (spp->isa_null))   /* virtual or NULL */
    {
        spp->curpos = sp;
        return 0;
    }
    else                    /* segmented, reference sequences */
    {
 
		if (spp->backing)  /* check for backing off segment */
		{
			if (spp->curr->curpos == 1)  /* yup */
			{
				spp->curr->curpos = -1;  /* just set the flag */
				spp->curpos -= 2;
				return 0;                /* no eos needed, -1 
will do */
			}
		}

		curpos = 0;
        curspp = spp->segs;
        if (curspp == NULL) return 1;
        while ((curpos + curspp->totlen) <= sp)
        {
            curpos += curspp->totlen;
            curspp = curspp->next;
            if (curspp == NULL)
                return 1;
        }
        if (plus_strand)
            curpos = sp - curpos;
        else
            curpos = (curspp->totlen - 1) - (sp - curpos);
		curspp->backing = spp->backing;
        if (! SeqPortSeek(curspp, curpos, SEEK_SET))
        {
			curspp->backing = FALSE;
            spp->curr = curspp;
        	spp->curpos = sp;
            return 0;
        }
        else
		{
			curspp->backing = FALSE;
            return 1;
		}
    }
}

/*****************************************************************************
*
*   Int4 SeqPortTell(spp)
*
*****************************************************************************/
NLM_EXTERN Int4 SeqPortTell (SeqPortPtr spp)

{
    if (spp == NULL)
        return -1L;

    return spp->curpos;
}

/*****************************************************************************
*
*   SeqPortGetResidue(spp)
*       returns residue at current location in requested codeing
*       SEQPORT_EOF = end of file
*
*****************************************************************************/
NLM_EXTERN Uint1 SeqPortGetResidue (SeqPortPtr spp)

{
    Uint1 residue, the_byte, the_residue, the_code;
    Boolean plus_strand = TRUE, moveup;
    Int2 bitctr, index;
	Int4 pos, lim, diff;
	SPCachePtr spcp;
	SeqPortPtr tmp, prev;

    if ((spp == NULL) || ((spp->bp == NULL) && (spp->oldcode)))
        return SEQPORT_EOF;

	if (spp->isa_null)  /* NULL interval */
		return SEQPORT_VIRT;

	if (spp->eos)       /* end of reverse complement spp */
		return SEQPORT_EOF;

    if (spp->curpos == spp->totlen)
    {
        if (spp->is_circle)
        {
            SeqPortSeek(spp, 0, SEEK_SET);  /* go to start */
            if (spp->is_seg)   /* give EOS? */
                return SEQPORT_EOS;
        }
        else
            return SEQPORT_EOF;         /* EOF really */
    }
 
    if (spp->curpos == -1)		/* backed off end */
    {
        if (spp->is_circle)
        {
            SeqPortSeek(spp, -1, SEEK_END);  /* go to end */
            if (spp->is_seg)   /* give EOS? */
                return SEQPORT_EOS;
        }
        else
            return SEQPORT_EOF;         /* EOF really */
    }

    if (spp->strand == Seq_strand_minus)
        plus_strand = FALSE;

    if (spp->oldcode)    /* its a raw or const sequence */
    {
        residue = spp->byte & spp->mask;
        residue >>= spp->rshift;
        spp->byte <<= spp->lshift;
        spp->bitctr--;
        if (spp->curpos < (spp->totlen - 1))  /* curpos not incremented yet */
        {
            if (spp->bitctr == 0)
            {
				spcp = spp->cache;
                if (! plus_strand) /* need previous byte */
				{
					spcp->ctr--;
					if (spcp->ctr < 0)
					{
						pos = spp->bytepos - 1;
						lim = spp->start / 
(Int4)(spp->bc);
						diff = pos - lim + 1;
						if (diff > 100)
						{
							diff = 100;
							lim = pos - 100 + 1;
						}
						BSSeek(spp->bp, lim, SEEK_SET);
						spcp->total = 
(Int2)BSRead(spp->bp, (VoidPtr)(spcp->buf), diff);
						spcp->ctr = (Int2)(diff - 1);
						spp->bytepos = lim;
					}
				}
				else				/* need next 
byte */
				{
					spcp->ctr++;
					if (spcp->ctr >= spcp->total)
					{
						pos = spp->bytepos + 1;
						lim = spp->stop / 
(Int4)(spp->bc);
						diff = lim - pos + 1;
						if (diff > 100)
						{
							diff = 100;
							lim = pos + diff - 1;
						}
						BSSeek(spp->bp, pos, SEEK_SET);
						spcp->total = 
(Int2)BSRead(spp->bp, (VoidPtr)(spcp->buf), diff);
						spcp->ctr = 0;
						spp->bytepos = lim;
					}
				}
				the_byte = spcp->buf[spcp->ctr];

                if ((plus_strand) || (spp->bc == 1))
                    the_residue = the_byte;
                else        /* reverse compressed bit orders */
                {
                    the_residue = 0;
                    bitctr = spp->bc;
                    while (bitctr)
                    {
                        the_residue |= the_byte & spp->mask;
                        bitctr--;
						if (bitctr)
						{
	                        the_residue >>= spp->lshift;
    	                    the_byte <<= spp->lshift;
						}
                    }       
                }
                spp->byte = the_residue;
                spp->bitctr = spp->bc;
            }
        }

		if (spp->smtp == NULL)   /* no conversion, check now */
		{
			index = (Int2)residue - (Int2)(spp->sctp->start_at);
			if ((index < 0) || (index >= (Int2)(spp->sctp->num)))
				residue = INVALID_RESIDUE;
			else if (*(spp->sctp->names[index]) == '\0')
				residue = INVALID_RESIDUE;
		}
    }
    else if (spp->isa_virtual)  /* virtual */
    {
        if (spp->do_virtual)
        {
			if (spp->newcode)
				the_code = spp->newcode;
			else
				the_code = spp->oldcode;
			residue = GetGapCode (the_code);
			spp->curpos++;
			return residue;
        }
        else
        {
            return SEQPORT_VIRT;
        }
    }
    else              /* segmented or reference sequence */
    {
        while (! IS_residue((residue = SeqPortGetResidue(spp->curr))))
        {
            spp->curr->eos = FALSE;   /* just in case was set */
			moveup = FALSE;

			switch (residue)
			{
				case SEQPORT_VIRT:
				case SEQPORT_EOS:
					if (spp->curr->segs == NULL)  /* this 
did not come up a layer */
						moveup = TRUE;
					break;
				case SEQPORT_EOF:
					moveup = TRUE;
					break;
				default:
					break;
			}

			if (moveup)
			{
				if ((spp->curr->curpos == -1) && (! 
spp->curr->eos))   /* moving backwards, many layers deep */
				{
					prev = NULL;
					for (tmp = spp->segs; tmp != spp->curr; 
tmp = tmp->next)
						prev = tmp;
					if (prev != NULL)
						spp->curr = prev;
					else if (spp->is_circle)  /* go to end 
*/
					{
						for (tmp = spp->segs; tmp->next 
!= NULL; tmp = tmp->next)
							continue;
						spp->curr = tmp;
					}
					else
						return SEQPORT_EOF;

					if (! plus_strand)
						SeqPortSeek(spp->curr, 0, 
SEEK_SET);
					else if (! (spp->curr->isa_null))
						SeqPortSeek(spp->curr, -1, 
SEEK_END);
					else
						spp->curr->curpos = -1;   /* 
flag the null for next time around */
				}
				else                           /* moving 
forwards */
				{
					if (spp->curr->next != NULL)
						spp->curr = spp->curr->next;
					else if (spp->is_circle)
						spp->curr = spp->segs;
					else
						return SEQPORT_EOF;

					if (plus_strand)
						SeqPortSeek(spp->curr, 0, 
SEEK_SET);
					else
						SeqPortSeek(spp->curr, -1, 
SEEK_END);
				}

				if (spp->is_seg)
					return SEQPORT_EOS;
			}

			if ((residue == SEQPORT_VIRT) || (residue == 
INVALID_RESIDUE))
				return residue;
        }

        if (! plus_strand)
        {
			spp->curr->backing = TRUE;     /* signal we are backing 
up */
            if (SeqPortSeek(spp->curr, -2, SEEK_CUR))  /* back up to "next" */
                spp->curr->eos = TRUE;
			spp->curr->backing = FALSE;
        }
    }
    
    if (spp->smtp != NULL)
        residue = SeqMapTableConvert(spp->smtp, residue);

    if (! plus_strand)
        residue = SeqCodeTableComp(spp->sctp, residue);

	spp->curpos++;
    return residue;
}

/*****************************************************************************
*
*   GetGapCode(seqcode)
*   	returns code to use for virtual sequence residues for sequence
*         code seqcode
*       returns INVALID_RESIDUE if seqcode invalid
*
*****************************************************************************/
NLM_EXTERN Uint1 GetGapCode (Uint1 seqcode)
{
	Uint1 residue = INVALID_RESIDUE;
	
	switch (seqcode)
	{
		case Seq_code_iupacna:
			residue = 'N';
			break;
		case Seq_code_iupacaa:
		case Seq_code_ncbieaa:
			residue = 'X';
			break;
		case Seq_code_ncbi2na:    /* there isn't ambiguity */
			break;
		case Seq_code_ncbi8na:
		case Seq_code_ncbi4na:
			residue = 15;
			break;
		case Seq_code_iupacaa3:  /* no 1 letter character */
		case Seq_code_ncbipna:
		case Seq_code_ncbipaa:
			break;
		case Seq_code_ncbistdaa:
			residue = 21;
			break;

	}

	return residue;
}


/*****************************************************************************
*
*   SeqPortRead(spp, buf, len)
*       returns bytes read
*       if returns a negative number, then ABS(return value) gives the
*         same codes as SeqPortGetResidue for EOF or EOS
*
*****************************************************************************/
NLM_EXTERN Int2 SeqPortRead (SeqPortPtr spp, Uint1Ptr buf, Int2 len)

{
    Int2 ctr = 0;
    Uint1 retval;

    if ((spp == NULL) || (buf == NULL) || (len <= 0))
        return 0;

    if (spp->lastmsg)    /* previous EOF or EOS saved */
    {
        ctr = spp->lastmsg;
        spp->lastmsg = 0;
        ctr *= -1;
        return ctr;
    }

    while (ctr < len)
    {                              /* not elegant but works for now */
        retval = SeqPortGetResidue(spp);
        if (IS_residue(retval))
        {
            *buf = retval;
            buf++;
            ctr++;
        }
        else
        {
            if (! ctr)   /* first one */
            {
                ctr = retval;   /* send return as negative number */
                ctr *= -1;
                return ctr;
            }
            else
            {
                spp->lastmsg = retval;
                return ctr;
            }
        }
    }
    return ctr;
}

NLM_EXTERN Uint1 AAForCodon (Uint1Ptr codon, CharPtr codes);

/*****************************************************************************
*
*   ProteinFromCdRegion(sfp, include_stop)
*   	produces a ByteStorePtr containing the protein sequence in
*   ncbieaa code for the CdRegion sfp.  If include_stop, will translate
*   through stop codons.  If NOT include_stop, will stop at first stop
*   codon and return the protein sequence NOT including the terminating
*   stop.  Supports reading frame, alternate genetic codes, and code breaks
*   in the CdRegion. Removes trailing "X" from partial translation.
*
*****************************************************************************/
NLM_EXTERN ByteStorePtr ProteinFromCdRegion(SeqFeatPtr sfp, Boolean include_stop)
{
	return ProteinFromCdRegionEx(sfp, include_stop, TRUE);
}

/*******************************************************************************
*	
*	ProteinFromCdRegionEx( SeqFeatPtr sfp, Boolean include_stop, Boolean remove_trailingX)
*		same behavior as ProteinFromCdRegion, but another Boolean remove_trailingX
*	specifies whether trailing X's should be removed. 
*
********************************************************************************/

NLM_EXTERN ByteStorePtr ProteinFromCdRegionEx(SeqFeatPtr sfp, Boolean include_stop, Boolean remove_trailingX)
{
	SeqPortPtr spp = NULL;
	ByteStorePtr bs = NULL;
	Uint1 residue;
	Int4 pos1, pos2, pos, len;
	Int4Ptr the_breaks = NULL;
	Uint1Ptr the_residues = NULL;
	Int2 num_code_break = 0, use_break;
	SeqLocPtr tmp;
	Int2 i;
	Uint1 codon[3], aa;
	CdRegionPtr crp;
	ValNodePtr vnp;
	GeneticCodePtr gcp;
	CharPtr vals, codes;
	CodeBreakPtr cbp;
	Boolean bad_base, no_start, check_start, got_stop;
	Uint2 part_prod = 0, part_loc = 0;
	Boolean incompleteLastCodon;

	if ((sfp == NULL) || (sfp->data.choice != 3))
		return NULL;

	crp = (CdRegionPtr) sfp->data.value.ptrvalue;

	num_code_break = 0;
	if (crp->code_break != NULL)
	{
		cbp = crp->code_break;
		while (cbp != NULL)
		{
			num_code_break++;
			cbp = cbp->next;
		}
		the_breaks = (Int4Ptr) MemNew((size_t)(num_code_break * sizeof(Int4)));
		the_residues = (Uint1Ptr) MemNew((size_t)(num_code_break * sizeof(Uint1)));

		num_code_break = 0;
		cbp = crp->code_break;
		while (cbp != NULL)
		{
			pos1 = INT4_MAX;
			pos2 = -10;
			tmp = NULL;
			while ((tmp = SeqLocFindNext(cbp->loc, tmp)) != NULL)
			{
				pos = GetOffsetInLoc(tmp, sfp->location, 
SEQLOC_START);
				if (pos < pos1)
					pos1 = pos;
				pos = GetOffsetInLoc(tmp, sfp->location, 
SEQLOC_STOP);
				if (pos > pos2)
					pos2 = pos;
			}
			if ((pos2 - pos1) == 2)   /*  a codon */
			{
				the_breaks[num_code_break] = pos1;
				the_residues[num_code_break] = (Uint1) 
cbp->aa.value.intvalue;
				num_code_break++;
			}
			else
			{
				ErrPost(CTX_NCBIOBJ, 1, "Invalid Code-break.loc");
			}

			cbp = cbp->next;
		}
	}

	gcp = NULL;
	if (crp->genetic_code != NULL)
	{
		vnp = (ValNodePtr)(crp->genetic_code->data.ptrvalue);
		while ((vnp != NULL) && (gcp == NULL))
		{
			switch (vnp->choice)
			{
			case 1:   /* name */
				gcp = GeneticCodeFind(0, 
(CharPtr)vnp->data.ptrvalue);
				break;
			case 2:   /* id */
				gcp = GeneticCodeFind(vnp->data.intvalue, NULL);
				break;
			case 3:   /* ncbieaa */
			case 6:   /* sncbieaa */
			case 4:   /* ncbi8aa */
			case 5:	  /* ncbistdaa */
			case 7:   /* sncbi8aa */
			case 8:   /* sncbistdaa */
			default:
				break;
			}
			vnp = vnp->next;
		}
	}
	if (gcp == NULL)
		gcp = GeneticCodeFind(1, NULL);   /* use universal */
	if (gcp == NULL)
		goto erret;

	vals = NULL;
	codes = NULL;
	for (vnp = (ValNodePtr)gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next)
	{
		if (vnp->choice == 6)   /* sncbieaa */
			vals = (CharPtr)vnp->data.ptrvalue;
		else if (vnp->choice == 3)  /* ncbieaa */
			codes = (CharPtr)vnp->data.ptrvalue;
	}
	if (codes == NULL)
		goto erret;

	no_start = FALSE;
	part_loc = SeqLocPartialCheck(sfp->location);
	part_prod = SeqLocPartialCheck(sfp->product);
	if ((part_loc & SLP_START) || (part_prod & SLP_START))
		no_start = TRUE;

	if ((vals == NULL) || (no_start) || (crp->frame > 1))  /* no special 
starts */
	{
		vals = codes;
		check_start = FALSE;
	}
	else
		check_start = TRUE;

	spp = SeqPortNewByLoc(sfp->location, Seq_code_ncbi4na);
	if (spp == NULL)
		goto erret;

	len = SeqLocLen(sfp->location);    /* size of coding region */
	len /= 3;						   /* size of 
protein */
	len += 1;						   /* allow 
partial codon at end */
	bs = BSNew(len);
	if (bs == NULL)
		goto erret;

	if (crp->frame == 2)     /* skip partial first codon */
		pos = 1;
	else if (crp->frame == 3)
		pos = 2;
	else
		pos = 0;
	SeqPortSeek(spp, pos, SEEK_SET);
	got_stop = FALSE;

	incompleteLastCodon = FALSE;

	do
	{
		use_break = -1;
		for (i = 0; i < num_code_break; i++)
		{
			if (pos == the_breaks[i])
			{
				use_break = i;
				i = num_code_break;
			}
		}

		bad_base = FALSE;
		for (i = 0; i < 3; i++)
		{
			residue = SeqPortGetResidue(spp);
			if (residue == SEQPORT_EOF)
				break;
			if (residue == INVALID_RESIDUE)
				bad_base = TRUE;
			codon[i] = residue;
		}
		if (! i)   /* no bases */
			break;
		while (i < 3)      /* incomplete last codon */
		{
			codon[i] = 15;   /* N */
			i++;
			incompleteLastCodon = TRUE;
		}

		pos += 3;
		if (use_break >= 0)
			aa = the_residues[use_break];
		else if (bad_base)
			aa = 'X';
		else
		{
			aa = AAForCodon(codon, vals);
			if (check_start)   /* first codon on possibly complete 
CDS */
			{
				if (aa == '-')   /* invalid start */
				{
				    /* if no explict partial at either end, but 
feature is */
				    /* annotated as partial, then guess should 
use internal */
				    /* amino acid code */

					if ((! ((part_loc & SLP_STOP) || 
(part_prod & SLP_STOP))) &&
						(sfp->partial))
						aa = AAForCodon(codon, codes);  
/* get internal aa */
				}
				check_start = FALSE;
			}
		}

		if ((! include_stop) && (aa == '*'))
		{
			got_stop = TRUE;
			break;
		}

		BSPutByte(bs, (Int2)aa);

		vals = codes;     /* not a start codon anymore */

	} while (residue != SEQPORT_EOF);

	if ((! got_stop) && incompleteLastCodon) {
		BSSeek(bs, -1, SEEK_END);  /* remove last X if incomplete last codon */
		aa = (Uint1)BSGetByte(bs);
		if ((aa == 'X') && (BSLen(bs)))
		{
			BSSeek(bs, -1, SEEK_END);
			BSDelete(bs, 1);
			BSSeek(bs, -1, SEEK_END);
		}
	}
	if ((! got_stop) && remove_trailingX)   /* only remove trailing X on partial CDS */
	{
		BSSeek(bs, -1, SEEK_END);  /* back up to last residue */
		aa = (Uint1)BSGetByte(bs);
		while ((aa == 'X') && (BSLen(bs)))
		{
			BSSeek(bs, -1, SEEK_END);
			BSDelete(bs, 1);
			BSSeek(bs, -1, SEEK_END);
			aa = (Uint1)BSGetByte(bs);
		}
	}

	if (! BSLen(bs)) goto erret;

ret:
	SeqPortFree(spp);
	MemFree(the_breaks);
	MemFree(the_residues);
	return bs;
erret:
	bs = BSFree(bs);
	goto ret;
}

/*****************************************************************************
*
*   Uint1 AAForCodon (Uint1Ptr codon, CharPtr codes)
*   	codon is 3 values in ncbi4na code
*       codes is the geneic code array to use
*          MUST have 'X' as unknown amino acid
*
*****************************************************************************/
NLM_EXTERN Uint1 AAForCodon (Uint1Ptr codon, CharPtr codes)
{
	register Uint1 aa = 0, taa;
	register int i, j, k, index0, index1, index2;
	static Uint1 mapping[4] = { 8,     /* T in ncbi4na */
							    2,     /* C */
						        1,     /* A */
						        4 };   /* G */


	for (i = 0; i < 4; i++)
	{
		if (codon[0] & mapping[i])
		{
			index0 = i * 16;
			for (j = 0; j < 4; j++)
			{
				if (codon[1] & mapping[j])
				{
					index1 = index0 + (j * 4);
					for (k = 0; k < 4; k++)
					{
						if (codon[2] & mapping[k])
						{
							index2 = index1 + k;
							taa = codes[index2];
							if (! aa)
								aa = taa;
							else
							{
								if (taa != aa)
								{
									aa = 
'X';
									break;
								}
							}
						}
						if (aa == 'X')
							break;
					}
				}
				if (aa == 'X')
					break;
			}
		}
		if (aa == 'X')
			break;
	}
	return aa;
}

static	Uint1 codon_xref [4] = {   /* mapping from NCBI2na to codon codes */
		2,  /* A */
		1,  /* C */
		3,  /* G */
		0 }; /* T */

/*****************************************************************************
*
*   Uint1 IndexForCodon (codon, code)
*   	returns index into genetic codes codon array, give 3 bases of the
*       codon in any alphabet
*       returns INVALID_RESIDUE on failure
*   
*****************************************************************************/
NLM_EXTERN Uint1 IndexForCodon (Uint1Ptr codon, Uint1 code)
{
	Int2 i, j;
	SeqMapTablePtr smtp;
	Uint1 residue, index = 0;

	smtp = SeqMapTableFind(Seq_code_ncbi2na, code);
	if (smtp == NULL) return INVALID_RESIDUE;

	for (i=0, j=16; i < 3; i++, j /= 4)
	{
		residue = SeqMapTableConvert(smtp, codon[i]);
		if (residue > 3) return INVALID_RESIDUE;
		residue = codon_xref[residue];
		index += (Uint1)(residue * j);
	}

	return index;
}

/*****************************************************************************
*
*   Boolean CodonForIndex (index, code, codon)
*   	Fills codon (3 Uint1 array) with codon corresponding to index,
*       in sequence alphabet code.
*       Index is the Genetic code index.
*       returns TRUE on success.
*
*****************************************************************************/
NLM_EXTERN Boolean CodonForIndex (Uint1 index, Uint1 code, Uint1Ptr codon)
{
	Int2 i, j, k;
	SeqMapTablePtr smtp;
	Uint1 residue;
	
	if (codon == NULL) return FALSE;
	if (index > 63) return FALSE;

	smtp = SeqMapTableFind(code, Seq_code_ncbi2na);
	if (smtp == NULL) return FALSE;

	for (i = 0, j = 16; i < 3; i++, j /= 4)
	{
		residue = (Uint1)((Int2)index / j);
		index -= (Uint1)(residue * j);
		for (k = 0; k < 4; k++)
		{
			if (codon_xref[k] == residue)
			{
				residue = (Uint1)k;
				break;
			}
		}
		residue = SeqMapTableConvert(smtp, residue);
		codon[i] = residue;
	}

	return TRUE;
}

/*----------- GetFrameFromLoc()-----------------*/

/*****************************************************************************
*
*   Int2 GetFrameFromLoc (slp)
*   	returns 1,2,3 if can find the frame
*   	0 if not
*
*****************************************************************************/
NLM_EXTERN Int2 GetFrameFromLoc (SeqLocPtr slp)
{
	Int2 frame = 0;
	SeqLocPtr curr, last;
	Boolean is_partial;
	SeqIntPtr sip;
	SeqPntPtr spp;

	if (slp == NULL)
		return frame;

	curr = SeqLocFindNext(slp, NULL);

	is_partial = FALSE;
	switch (curr->choice)
	{
		case SEQLOC_INT:
			sip = (SeqIntPtr)curr->data.ptrvalue;
			if (sip->strand == Seq_strand_minus)
			{
				if (sip->if_to != NULL)
					is_partial = TRUE;
			}
			else if (sip->if_from != NULL)
				is_partial = TRUE;
			break;
		case SEQLOC_PNT:
			spp = (SeqPntPtr)curr->data.ptrvalue;
			if (spp->fuzz != NULL)
				is_partial = TRUE;
			break;
		default:
			return frame;
	}
		

	if (! is_partial)
		return (Int2) 1;    /* complete 5' end, it's frame 1 */

	is_partial = FALSE;
	last = curr;
	while ((curr = SeqLocFindNext(slp, last)) != NULL)
		last = curr;

	switch (last->choice)
	{
		case SEQLOC_INT:
			sip = (SeqIntPtr) last->data.ptrvalue;
			if (sip->strand == Seq_strand_minus)
			{
				if (sip->if_from != NULL)
					return frame;
			}
			else if (sip->if_to != NULL)
				return frame;
			break;
		case SEQLOC_PNT:
			spp = (SeqPntPtr) last->data.ptrvalue;
			if (spp->fuzz != NULL)
				return frame;
			break;
		default:
			return frame;
	}

					  /* have complete last codon, get frame 
from length */
	frame = (Int2)(SeqLocLen(slp) % 3);
	if (frame == 0)
		frame = 1;
	else if (frame == 1)
		frame = 2;
	else
		frame = 3;

	return frame;
}

static Boolean add_fuzziness_to_loc (SeqLocPtr slp, Boolean less)
{
	IntFuzzPtr ifp;
	SeqIntPtr sint;
	SeqPntPtr spnt;	

	sint = NULL;
	spnt = NULL;

	if(slp->choice == SEQLOC_INT)
		sint = (SeqIntPtr) slp->data.ptrvalue;
	else
	{
		if(slp->choice == SEQLOC_PNT)
			spnt = (SeqPntPtr) slp->data.ptrvalue;
		else
			return FALSE;
	}
	ifp = IntFuzzNew();
	ifp->choice = 4;
	ifp->a = less ? 2 : 1;

	if(spnt != NULL)
		spnt->fuzz = ifp;
	else
	{
		if(less)
			sint->if_from = ifp;
		else
			sint->if_to = ifp;
	}

	return TRUE;
}


static Boolean load_fuzz_to_DNA(SeqLocPtr dnaLoc, SeqLocPtr aaLoc, Boolean 
first)
{
	Uint1 strand;
	SeqPntPtr spnt;
	SeqIntPtr sint;
	IntFuzzPtr ifp;
	Boolean load, less;

	load = FALSE;
	strand = SeqLocStrand(aaLoc);
	if(aaLoc->choice == SEQLOC_INT)
	{
		sint = (SeqIntPtr) aaLoc->data.ptrvalue;
		if((first && strand != Seq_strand_minus ) || 
			(!first && strand == Seq_strand_minus))	/*the first 
Seq-loc*/
		{
			ifp = sint->if_from;
			if(ifp && ifp->choice == 4 )
				load = (ifp->a == 2);
		}
		else
		{
			ifp = sint->if_to;
			if(ifp && ifp->choice == 4)
				load = (ifp->a == 1);
		}
	}
	else if(aaLoc->choice == SEQLOC_PNT)
	{
		spnt = (SeqPntPtr) aaLoc->data.ptrvalue;
		ifp = spnt->fuzz;
		if(ifp && ifp->choice == 4)
		{
			if(first)
				load = (ifp->a == 2);
			else
				load = (ifp->a == 1);
		}
	}

	if(load)
	{
		if(SeqLocStrand(dnaLoc) == Seq_strand_minus)
			less = (first == FALSE);
		else
			less = first;
		add_fuzziness_to_loc (dnaLoc, less);
		return TRUE;
	}
	else
		return FALSE;
}	

/******************************************************************
*
*	aaLoc_to_dnaLoc(sfp, aa_loc)
*	map a SeqLoc on the amino acid sequence
*       to a Seq-loc in the	DNA sequence
*       through a CdRegion feature
*
******************************************************************/
NLM_EXTERN SeqLocPtr LIBCALL aaLoc_to_dnaLoc(SeqFeatPtr sfp, SeqLocPtr aa_loc)
{
	SeqLocPtr head = NULL, slp, tmp, next;
	Int4 aa_start, aa_stop;
	SeqBondPtr sbp;
	ValNode vn;


	if ((sfp == NULL) || (aa_loc == NULL)) return head;
	if (sfp->data.choice != 3) return head;
	if (sfp->product == NULL) return head;
	if (! (SeqIdForSameBioseq(SeqLocId(aa_loc), SeqLocId(sfp->product))))
		return head;

	if (aa_loc->choice == SEQLOC_BOND)   /* fake this one in */
	{
		sbp = (SeqBondPtr)(aa_loc->data.ptrvalue);
		tmp = aaInterval_to_dnaIntervals(sfp, sbp->a->point, 
sbp->a->point);
		if (sbp->b == NULL)  /* one point in bond */
			return tmp;

		SeqLocAdd(&head, tmp, TRUE, FALSE);
		tmp = aaInterval_to_dnaIntervals(sfp, sbp->b->point, 
sbp->b->point);
		if (tmp == NULL)
			return head;

		vn.choice = SEQLOC_NULL;   /* make a mix with an internal NULL 
*/
		vn.next = NULL;
		vn.data.ptrvalue = NULL;

		SeqLocAdd(&head, &vn, TRUE, TRUE);  /* copy it in */
		SeqLocAdd(&head, tmp, TRUE, FALSE); /* put real 3 base int in */

		goto ret;
	}

	slp = NULL;
	while ((slp = SeqLocFindNext(aa_loc, slp)) != NULL)
	{
		aa_start = SeqLocStart(slp);
		aa_stop = SeqLocStop(slp);
		if ((aa_start >= 0) && (aa_stop >= 0))
		{
		   tmp = aaInterval_to_dnaIntervals(sfp, aa_start, aa_stop);
		   if(tmp != NULL)
			load_fuzz_to_DNA(tmp, slp, TRUE);
		   while (tmp != NULL)
		   {
			   next = tmp->next;
			   tmp->next = NULL;
			   if(next == NULL)
				load_fuzz_to_DNA(tmp, slp, FALSE);
			   SeqLocAdd(&head, tmp, TRUE, FALSE);
			   tmp = next;
		   }
		}
	}
ret:			   
	return SeqLocPackage(head);
}

/******************************************************************
*
*       aaFeatLoc_to_dnaFeatLoc(sfp, aa_loc)
*       map a SeqLoc on the amino acid sequence
*       to a Seq-loc in the     DNA sequence
*       through a CdRegion feature
*
*       uses aaLoc_to_dnaLoc() but does additional checks to
*       extend dnaLoc at either end to compensate for positions in
*       the dna which do not corresspond to the amino acid sequence
*       (partial codons which are not translated).
*
******************************************************************/
NLM_EXTERN SeqLocPtr LIBCALL aaFeatLoc_to_dnaFeatLoc(SeqFeatPtr sfp,
                                                     SeqLocPtr aa_loc)
{
	SeqLocPtr dnaLoc = NULL;
	Uint2 dnaPartial;
	Int4 aaPos;
	SeqLocPtr tmp1, tmp2, tmp;
	SeqIdPtr sip;
	CdRegionPtr crp;
	SeqIntPtr sp1, sp2;
	BioseqPtr bsp;

	dnaLoc = aaLoc_to_dnaLoc(sfp, aa_loc);
	if (dnaLoc == NULL) return dnaLoc;

	if (! sfp->partial)  /* no partial checks needed */
		return dnaLoc;

	crp = (CdRegionPtr)(sfp->data.value.ptrvalue);

	aaPos = SeqLocStart(aa_loc);
	if ((! aaPos) && (crp->frame > 1))   /* using first amino acid */
	{
		tmp1 = SeqLocFindNext(sfp->location, NULL);
		tmp2 = SeqLocFindNext(dnaLoc, NULL);

		if ((tmp1->choice == SEQLOC_INT) &&
                         (tmp2->choice == SEQLOC_INT))
		{
			sp1 = (SeqIntPtr)(tmp1->data.ptrvalue);
			sp2 = (SeqIntPtr)(tmp2->data.ptrvalue);
			if (sp1->strand ==  Seq_strand_minus)
			{
				sp2->to = sp1->to;  /* add partial codon */
			}
			else
			{
				sp2->from = sp1->from;
			}
		}
	}

	dnaPartial = SeqLocPartialCheck(sfp->location);
	if (dnaPartial & SLP_STOP)   /* missing 3' end of cdregion */
	{
		sip = SeqLocId(aa_loc);
		bsp = BioseqFindCore(sip);
		if (bsp != NULL)
		{
			aaPos = SeqLocStop(aa_loc);
			if (aaPos == (bsp->length - 1)) /* last amino acid */
			{
				tmp = NULL;
				while ((tmp = SeqLocFindNext(sfp->location,tmp)) != NULL)
				{
					tmp1 = tmp;
				}
				tmp = NULL;
				while ((tmp = SeqLocFindNext(dnaLoc,tmp)) != NULL)
				{
					tmp2 = tmp;
				}
			
				if ((tmp1->choice == SEQLOC_INT) &&
					(tmp2->choice == SEQLOC_INT))
				{
					sp1 = (SeqIntPtr)(tmp1->data.ptrvalue);
					sp2 = (SeqIntPtr)(tmp2->data.ptrvalue);
					if (sp1->strand ==  Seq_strand_minus)
					{
						sp2->from = sp1->from;  /* add partial codon */
					}
					else
					{
						sp2->to = sp1->to;
					}
				}
			}
	
		}
	}
	return dnaLoc;
}

/******************************************************************
*
*	aaInterval_to_dnaIntervals(sfp, aa_start, aa_stop)
*	map the amino acid sequence to a chain of Seq-locs in the 
*	DNA sequence through a CdRegion feature
*
******************************************************************/
NLM_EXTERN SeqLocPtr LIBCALL aaInterval_to_dnaIntervals(SeqFeatPtr sfp, Int4 aa_start, Int4 
aa_stop)
{
  Int4 frame_offset, start_offset;	/*for determine the reading frame*/
  SeqLocPtr slp = NULL;
  CdRegionPtr crp;
  SeqLocPtr dna_loc, loc;			/*for the dna location*/

  Boolean is_end;			/**is the end for process reached?**/
  Int4 p_start=0, p_stop=0;		/**protein start & stop in defined
					corresponding CdRegion Seq-loc**/
  Int4 cur_pos;			/**current protein position in process**/
  Int4 cd_len;		/**length of the cDNA for the coding region**/

  Boolean is_new;		/**Is cur_pos at the begin of new exon?**/
  Int4 end_partial;		/*the end of aa is a partial codon*/
  Int4 d_start, d_stop;		/*the start and the stop of the DNA sequence*/
  Int4 offset;			/*offset from the start of the current exon*/
  Int4 aa_len;
  Uint1 strand;
  Int4 p_end_pos;	/*the end of the protein sequence in the current loc*/
  Int4 first_partial;	/*first codon is a partial*/




   if(sfp->data.choice !=3)
	return NULL;


   crp = (CdRegionPtr) sfp->data.value.ptrvalue;
   if(!crp)
	return NULL;
   if(crp->frame>0)
	frame_offset = crp->frame-1;
   else
	frame_offset = 0;
   start_offset = frame_offset;


   cur_pos= aa_start;
   cd_len = 0;
   is_end = FALSE;
   p_start = 0;
   first_partial = 0;
   slp = NULL;
   dna_loc= NULL;
   while(!is_end && ((slp = SeqLocFindNext(sfp->location, slp))!=NULL))
   {
	cd_len += SeqLocLen(slp);
	end_partial = ((cd_len - start_offset)%3);
	p_stop = (cd_len - start_offset)/3 -1;
	if(end_partial != 0)
	   ++p_stop;
	p_end_pos = p_stop;

	if(p_stop > aa_stop || (p_stop == aa_stop && end_partial == 0))
	{
	   p_stop = aa_stop;		/**check if the end is reached**/
	   is_end = TRUE;
	}

	if(p_stop >= cur_pos)	/*get the exon*/
	{
		is_new = (p_start == cur_pos);	/*start a new exon?*/
		if(is_new)	/**special case of the first partial**/
		   offset = 0;
		else
		{
		   if(frame_offset && p_start >0)
			++p_start;
		   offset = 3*(cur_pos - p_start) + frame_offset;
		}
		strand = SeqLocStrand(slp);
		if(strand == Seq_strand_minus)
		   d_start = SeqLocStop(slp) - offset;
		else
		   d_start = SeqLocStart(slp) + offset;

		d_stop = d_start;
		/*first codon*/
		if(is_new && cd_len == SeqLocLen(slp))
		{
			if(strand == Seq_strand_minus)
				d_stop -= frame_offset;
			else
				d_stop += frame_offset;
		}
		aa_len = MIN(p_stop, aa_stop) - cur_pos +1;
		if(end_partial != 0 && (p_end_pos >= aa_start && p_end_pos <= 
aa_stop))
			--aa_len;
		if(first_partial > 0)
			--aa_len;
		if(strand == Seq_strand_minus)
		{
			if(aa_len >= 0)
				d_stop -= (3*aa_len - 1);
			else
				++d_stop;
			if(first_partial >0)
				d_stop -= first_partial;
				
			first_partial = 0;
			if (end_partial > 0 && (p_end_pos >= aa_start && 
p_end_pos <= aa_stop)) {
				d_stop -= end_partial;
				first_partial = 3 - end_partial;
			}
			
			d_stop = MAX(d_stop, SeqLocStart(slp));
			loc = SeqLocIntNew(d_stop, d_start, strand, 
SeqLocId(slp));
		}
		else
		{
			if(aa_len >= 0)
				d_stop += (3*aa_len - 1);
			else
				--d_stop;
				
			if(first_partial > 0)
				d_stop += first_partial;
			first_partial = 0;
			if (end_partial> 0 && (p_end_pos >= aa_start && 
p_end_pos <= aa_stop)) {
				d_stop += end_partial;
				first_partial = 3 - end_partial;
			}
			d_stop = MIN(d_stop, SeqLocStop(slp));
			loc = SeqLocIntNew(d_start, d_stop, strand, 
SeqLocId(slp));
		}
		SeqLocAdd(&dna_loc, loc, TRUE, FALSE);

		if(end_partial != 0)
			cur_pos = p_stop;
		else
			cur_pos = p_stop+1;
	}



	if(end_partial != 0)
	{
	    p_start = p_stop;
	}
	else
	{
	    p_start = p_stop +1;
	}
	

	frame_offset = (cd_len - start_offset)%3;
	 if(frame_offset >0)
	    frame_offset = 3-frame_offset;

   }/**end of while(slp && !is_end) **/

   return dna_loc;

}

static Boolean load_fuzz_to_DNA PROTO((SeqLocPtr dnaLoc, SeqLocPtr aaLoc, 
Boolean first));
/******************************************************************
*
*	dnaLoc_to_aaLoc(sfp, dna_loc, merge)
*	map a SeqLoc on the DNA sequence
*       to a Seq-loc in the	protein sequence
*       through a CdRegion feature
*   if (merge) adjacent intervals on the amino acid sequence
*      are merged into one. This should be the usual case.
*
******************************************************************/
NLM_EXTERN SeqLocPtr LIBCALL dnaLoc_to_aaLoc(SeqFeatPtr sfp, SeqLocPtr dna_loc, Boolean 
merge, Int4Ptr frame)
{
	SeqLocPtr aa_loc = NULL, loc;
	CdRegionPtr crp;
	Int4 cd_len, end_pos, frame_offset;
	GatherRange gr;
	Int4 a_left, a_right, last_aa = -20, aa_from, aa_to;
	SeqLocPtr slp;
	Int2 cmpval;
	SeqIdPtr aa_sip;
	BioseqPtr bsp;

	if ((sfp == NULL) || (dna_loc == NULL)) return aa_loc;
	if (sfp->data.choice != 3) return aa_loc;
	if (sfp->product == NULL) return aa_loc;

	crp = (CdRegionPtr) sfp->data.value.ptrvalue;
	if(crp == NULL) return aa_loc;

	           /* dna_loc must be equal or contained in feature */
	cmpval = SeqLocCompare(dna_loc, sfp->location);
	if (! ((cmpval == SLC_A_IN_B) || (cmpval == SLC_A_EQ_B)))
		return aa_loc;

	aa_sip = SeqLocId(sfp->product);
	if (aa_sip == NULL) return aa_loc;
	bsp = BioseqLockById(aa_sip);
	if (bsp == NULL) return aa_loc;
	end_pos = bsp->length - 1;
	BioseqUnlock(bsp);

	if(crp->frame == 0)
		frame_offset = 0;
	else
		frame_offset = (Int4)crp->frame-1;

	slp = NULL;
	cd_len = 0;
	loc = NULL;
	while ((slp = SeqLocFindNext(sfp->location, slp))!=NULL)
	{
	   if (SeqLocOffset(dna_loc, slp, &gr, 0))
	   {
			SeqLocOffset(slp, dna_loc, &gr, 0);
		
			a_left = gr.left + cd_len - frame_offset;
			a_right = gr.right + cd_len - frame_offset;

			aa_from = a_left / 3;
			aa_to = a_right / 3;

			if (aa_from < 0)
				aa_from = 0;
			if (aa_to > end_pos)
				aa_to = end_pos;

			if (merge)
			{
				if (aa_from <= last_aa)  /* overlap due to 
codons */
					aa_from = last_aa+1;  /* set up to merge 
*/
			}

			if (aa_from <= aa_to)
			{
				if(loc != NULL)
				{
					if(aa_loc == NULL)
						load_fuzz_to_DNA(loc, dna_loc, 
TRUE);
					SeqLocAdd(&aa_loc, loc, merge, FALSE);
				}
				loc = SeqLocIntNew(aa_from, aa_to, 0, aa_sip);
				last_aa = aa_to;
			}
	     }

	     cd_len += SeqLocLen(slp);		
	}

	if(loc != NULL)
	{
		if(aa_loc == NULL)
			load_fuzz_to_DNA(loc, dna_loc, TRUE);
		load_fuzz_to_DNA(loc, dna_loc, FALSE);
		SeqLocAdd(&aa_loc, loc, merge, FALSE);
	}
	if (frame != NULL)
	    *frame = a_left % 3;

	return SeqLocPackage(aa_loc);
}

/*****************************************************************************
*
*   BioseqHash(bsp)
*   	Computes a (almost) unique hash code for a bioseq
*
*****************************************************************************/
NLM_EXTERN Uint4 BioseqHash (BioseqPtr bsp)
{
	Uint4 hashval = 0;
	SeqPortPtr spp;
	Uint1 code;
	Int2 residue;

	if (bsp == NULL) return hashval;

	if (ISA_na(bsp->mol))
		code = Seq_code_iupacna;
	else
		code = Seq_code_ncbieaa;

	spp = SeqPortNew(bsp, 0, -1, 0, code);
	if (spp == NULL) return hashval;

	while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF)
	{
		hashval *= 1103515245;
		hashval += (Uint4)residue + 12345;
	}

	SeqPortFree(spp);

	return hashval;
}


/*-------------- BioseqRevComp () ---------------------------*/
/***********************************************************************
*   BioseqRevComp:   Takes the nucleic acid sequence from Bioseq
*	Entry and gives the reverse complement sequence in place
*       Does not change features.
************************************************************************/
NLM_EXTERN Boolean LIBCALL BioseqRevComp (BioseqPtr bsp)
{
	Boolean retval;

	retval = BioseqReverse (bsp);
	if (retval)
		retval = BioseqComplement(bsp);
	return retval;
}

/*-------------- BioseqComplement () ---------------------------*/
/***********************************************************************
*   BioseqComplement:   Takes the nucleic acid sequence from Bioseq
*	Entry and gives the complement sequence in place
*       Does not change features.
************************************************************************/
NLM_EXTERN Boolean LIBCALL BioseqComplement (BioseqPtr bsp)
{
	SeqCodeTablePtr sctp;
	ByteStorePtr    bysp;
	long		readbyte, bslen;
	Int4            seqlen;
	Uint1           seqtype, byte, byte_to, newbyte, residue;
	Uint1           comp, bitctr, mask, lshift, rshift, bc;
	
        if (bsp == NULL)
        {
                ErrPostEx(SEV_ERROR,0,0, "Error: not a BioseqPtr\n");
                return FALSE;  
        }
                        
        if (bsp->repr != Seq_repr_raw)
        {
                ErrPostEx(SEV_ERROR,0,0, "Error: not a raw sequence\n");
                return FALSE;  
        }
                        
	if (bsp->seq_data == NULL)
	{
		ErrPostEx(SEV_ERROR,0,0, "Error:  no sequence data\n");
		return FALSE;
	}

	seqtype = bsp->seq_data_type;
	if ((sctp = SeqCodeTableFind (seqtype)) == NULL)
	{
		ErrPostEx(SEV_ERROR,0,0, "Can't open table\n");
		return FALSE;
	}
	switch (seqtype)		/*determine type of base encoding*/
	{
		case Seq_code_ncbi2na:
			bc = 4;
			rshift = 6;
			lshift = 2;
			mask = 192;
			break;

		case Seq_code_ncbi4na:
			bc = 2;
			rshift = 4;
			lshift = 4;
			mask = 240;
			break;
		default:			/* ignore amino acid */
			return FALSE;
			break;
	}

	seqlen = bsp->length;
	bysp = bsp->seq_data;
	bslen = BSLen(bysp);
	bitctr = 0;
	readbyte = 0;

	while (readbyte != bslen)
	{
		if (!bitctr)
		{				/*get new byte*/
			BSSeek (bysp, readbyte, SEEK_SET);
			newbyte = byte_to = byte = residue = 0;
			byte = (Uint1)BSGetByte (bysp);
			bitctr = bc;
			readbyte++;
		}

		for (; bitctr; bitctr--)
		{
			residue = byte & mask;	/*mask out all but one base*/
			residue >>= rshift;
			byte <<= lshift;

			comp = SeqCodeTableComp (sctp, residue); /*get 
complement*/

			newbyte <<= lshift;
			byte_to = newbyte;
			newbyte = (comp | byte_to);	/*put complements 
together*/

		}

		if (readbyte)			/*put back byte with comps*/
		{
			BSSeek (bysp, readbyte-1, SEEK_SET);
			BSPutByte (bysp, newbyte);
		}
	}
	return TRUE;

} /* BioseqComplement */

           
/*-------------- BioseqReverse () ---------------------------*/
/***********************************************************************
*   BioseqReverse:   Takes nucleic acid sequence from Bioseq Entry and 
*	reverses the whole sequence in place
*       Does not change features.
************************************************************************/
NLM_EXTERN Boolean LIBCALL BioseqReverse (BioseqPtr bsp)
{
	ByteStorePtr 	bysp1 = '\0';
	ByteStorePtr 	bysp2 = '\0';
	long 		readbyte, bslen = 0;
	Int4 		seqlen, count = 0;
	Uint1 		seqtype, byte, byte2, byte_to, byte_to2, newbyte = 0;
	Uint1		newbyte2, finalbyte, residue, residue2, bitctr, bc2 = 0;
	Uint1 		bitctr2, mask, mask2, lshift, rshift, bc = 0;
	
        if (bsp == NULL)
        {
                ErrPostEx(SEV_ERROR,0,0, "Error: not a BioseqPtr\n");
                return FALSE;  
        }
                        
        if (bsp->repr != Seq_repr_raw)
        {
                ErrPostEx(SEV_ERROR,0,0, "Error: not a raw sequence\n");
                return FALSE;  
        }
                        
        if (bsp->seq_data == NULL)
        {
                ErrPostEx(SEV_ERROR,0,0, "Error:  No sequence data\n");
                return FALSE;
        }
                
	seqlen = bsp->length;
	seqtype = bsp->seq_data_type;
	switch (seqtype){
		case Seq_code_ncbi2na:		/*bitshifts needed*/
			mask = 192;
			mask2 = 3;
			lshift = 2;
			rshift = 6;

			switch (seqlen%4)	/*change if jagged last byte*/
			{
				case 1:
					bc = 1;
					bc2 = 3;
					break;
				case 2:
					bc = 2;
					bc2 = 2;
					break;
				case 3:
					bc = 3;
					bc2 = 1;
					break;
				default:
					bc = 4;
					bc2 = 0;
					break;
			}
			break;
		case Seq_code_ncbi4na:
			mask = 240;
			mask2 = 15;
			lshift = 4;
			rshift = 4;

			switch (seqlen%2)
			{
				case 1:
					bc = 1;
					bc2 = 1;
					break;
				default:
					bc = 2;
					bc2 = 0;
					break;
			}
			break;
		default:		/*ignores amino acid sequence*/
			return FALSE;
			break;
	}
	bysp1 = bsp->seq_data;
	bysp2 = BSDup(bysp1);
	bslen = BSLen (bysp1);
	bitctr = bitctr2 = 0;
	readbyte = 0;
	count = 0;

	if (!(seqlen%4))			/*no jagged last byte*/
	{
		while ((readbyte != BSLen(bysp1)))
		{
			count = rshift;
			if (!bitctr)		/*get new byte*/
			{
				newbyte = byte_to = byte = residue = 0;
				BSSeek (bysp2, --bslen, SEEK_SET);
				byte = (Uint1)BSGetByte (bysp2);
				bitctr = bc;
				readbyte++;
			}

			for (;bitctr; bitctr--)
			{
				residue = byte & mask;
				residue >>= count;
				byte <<= lshift;
				count = count - lshift;
	
				newbyte = (residue | byte_to);
				byte_to = newbyte;
			}

			BSSeek (bysp1, readbyte-1, SEEK_SET);
			BSPutByte (bysp1, newbyte);

		}
	}
	else				/*jagged last byte*/
	{
		/*Gets two bytes prior to loop*/
		newbyte = newbyte2 = byte_to = byte_to2 = 0;
		byte2 = residue = residue2 = 0;
		BSSeek (bysp2, bslen-2, SEEK_SET);
		byte2 = (Uint1) BSGetByte (bysp2);	/*byte closer to 
beginning*/
		byte = (Uint1) BSGetByte (bysp2);
		bitctr = bc;
		bitctr2 = bc2;
		bslen = bslen - 2;
		readbyte = 1;

		while (readbyte != BSLen(bysp1))
		{
			count = rshift;
			if (!bitctr)		/*when needed gets another 
byte*/
			{
				newbyte = newbyte2 = byte_to = byte_to2 = 0;
				byte2 = finalbyte = residue = residue2 = 0;
				BSSeek (bysp2, --bslen, SEEK_SET);
				byte2 = (Uint1) BSGetByte (bysp2);
				bitctr = bc;
				bitctr2 = bc2;
				++readbyte;
			}
			for (; bitctr; bitctr--)
			{
				residue = byte & mask;	    /*reverses 1st 
byte*/
				residue >>= count;
				byte <<= lshift;
				byte_to = newbyte;
				newbyte = (residue | byte_to);
				count = count - lshift;
			}
			for (; bitctr2; bitctr2--)
			{
				residue2 = byte2 & mask2;   /*reverses 2nd */
				byte2 >>= lshift;	    /*partially to 
join*/
				newbyte2 <<= lshift;	    /*with the 1st*/
				byte_to2 = newbyte2;
				newbyte2 = (residue2 | byte_to2);
			}
			newbyte <<= (8 - (bc*lshift));	/*joins 1st & 2nd 
bytes*/
			finalbyte = (newbyte | newbyte2);
			byte2 <<= (bc2 * lshift);
			byte = byte2;

			BSSeek (bysp1, readbyte-1, SEEK_SET);
			BSPutByte (bysp1, finalbyte);
		}
	}
	BSFree(bysp2);
	return TRUE;
} /* BioseqReverse */

/*****************************************************************************
*
*  SPCompressNew(void); - allocated memory for SPCompress structure
*
*****************************************************************************/
NLM_EXTERN SPCompressPtr SPCompressNew(void)
{
    SPCompressPtr spc;
    
    spc = (SPCompressPtr) MemNew(sizeof(SPCompress));
    spc->buffer = (Uint1Ptr) MemNew(SPC_BUFF_CHUNK);
    spc->allocated = SPC_BUFF_CHUNK;
    spc->residues = 0;
    spc->lbytes = NULL;
    
    return spc;
}
/*****************************************************************************
*
*  SPCompressFree(SPCompressPtr spc); -  free SPCompress structure
*
*****************************************************************************/
NLM_EXTERN void SPCompressFree(SPCompressPtr spc)
{

  MemFree(spc->buffer);
  MemFree(spc->lbytes);
  MemFree(spc);

}
/*****************************************************************************
*
*  Int4 SPCompressRead (Pointer data, Uint1Ptr buf, Int4 length);
*        Hook read-function for SPCompressDNA()
*
*****************************************************************************/
static Int4 SPCompressRead (Pointer data, Uint1Ptr buf, Int4 length);
static Int4 SPCompressRead (Pointer data, Uint1Ptr buf, Int4 length)
{
  SeqPortPtr spp;
  Uint1 residue = 0;
  Int4 total_read=0, index=0;

  Boolean second = FALSE;

  spp = (SeqPortPtr) data;
  MemSet(buf, 0, length);  /* Clear buffer first */

  while (index < length && (residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
    if (IS_residue(residue)) {
      if(second) {
        buf[index] += residue;
        index++;
        second = FALSE;
      } else {
        residue <<= 4;
        buf[index] += residue;
        second = TRUE;
      }
      total_read++;
    } else if (residue == SEQPORT_VIRT) { /* No sequence, return NULL. */
      continue;
    } else {
      ErrPostEx(SEV_WARNING, 0, 0,"[Bad residue]\n");
      return -1;
    }
  }
  return total_read;
}

/*****************************************************************************
*
*  Int4 SPCompressWrite (Pointer data, Uint1Ptr buf, Int4 length);
*        Hook write-function for SPCompressDNA()
*
*****************************************************************************/
static Int4 SPCompressWrite (Pointer data, Uint1Ptr buf, Int4 length);
static Int4 SPCompressWrite (Pointer data, Uint1Ptr buf, Int4 length)
{
  SPCompressPtr spc;
  spc = (SPCompressPtr) data;
  
  if((spc->used + length) >= spc->allocated) {
    spc->allocated += SPC_BUFF_CHUNK;
    spc->buffer = (Uint1Ptr)Realloc(spc->buffer, 
                                    spc->allocated); 
  }
  
  if((MemCpy(spc->buffer + spc->used, buf, length)) == NULL)
    return -1;
  
  spc->used += length;
  
  return length;
}

/*****************************************************************************
*
*   SPRebuildDNA(SPCompressPtr spc);
*       translates spc ncbi2na encoding buffer into
*       spc ncbi4na encoding buffer with rebuild ambiguities
*
*       spc - must be valid SPCompress structure returned
*       from SPCompressDNA() function in ncbi2na encoding
*
*****************************************************************************/
NLM_EXTERN Boolean SPRebuildDNA(SPCompressPtr spc)
{
    ByteStorePtr bsp, bsp_plain;
    Int4 residues;

    if(spc == NULL || spc->type != Seq_code_ncbi2na)
        return FALSE;
    
    residues = (spc->used-1)*4 + (spc->buffer[spc->used-1] & 0x3);
    bsp = BSNew(spc->used);
    BSWrite(bsp, spc->buffer, spc->used);
    
    if((bsp_plain = BSConvertSeq(bsp, Seq_code_ncbi4na, 
                                 Seq_code_ncbi2na, residues)) == NULL) {
        return FALSE;
    }
    
    BSRebuildDNA_4na(bsp_plain, spc->lbytes);
    
    spc->buffer = (Uint1Ptr) Realloc(spc->buffer, residues/2+1);
    BSRead(bsp_plain, spc->buffer, residues/2+1);
    spc->type = Seq_code_ncbi4na;
    spc->residues = residues;
    BSFree(bsp_plain);
    
    return TRUE;
}

/*****************************************************************************
*
*   SPCompressDNA(SeqPortPtr spp);
*       converts a ncbi4na taken from spp into ncbi2na
*       buffer stored inside SPCompress structue together
*       with ambiguity information
*       returns pointer SPCompress structure or NULL if error
*
*       NOTE: In this function we do not know - what is length
*             of sequence to compress. Terminated flag for this 
*             function is SEQPORT_EOF returned from spp.
*
*****************************************************************************/
NLM_EXTERN SPCompressPtr SPCompressDNA(SeqPortPtr spp)
{
  SPCompressPtr spc;
  
  if (spp == NULL || spp->newcode != Seq_code_ncbi4na)
    return NULL;
	
  spc = SPCompressNew();
  if(!GenericCompressDNA((VoidPtr) spp, (VoidPtr) spc, 
                         (Uint4) -1, /* Length of sequence unknown */
                         SPCompressRead, 
                         SPCompressWrite, 
                         &spc->lbytes
                         )) {
    return NULL;
  }
  spc->type = Seq_code_ncbi2na;
  return spc;
}
