// MM1UTIL.H : various utility classes for "all atoms"-models.

// Copyright (C) 1998 Tommi Hassinen.

// This program is free software; you can redistribute it and/or modify it
// under the terms of the license (GNU GPL) which comes with this package.

/*################################################################################################*/

#include "config.h"	// this is target-dependent...

#ifndef MM1UTIL_H
#define MM1UTIL_H

// simple storage classes

class mm1_cr;

class mm1_atom;
class mm1_bond;

class mm1_chn_info;

// typerule classes

class mm1_typerule;
struct mm1_tr_subrule;

// sequence-builder classes

class mm1_sb_atm;
class mm1_sb_bnd;
class mm1_sb_res;

class mm1_sequencebuilder;

/*################################################################################################*/

class mm1_mdl;		// mm1mdl.h
class mm1_eng;		// mm1eng.h

#include "define.h"
#include "utility.h"

#include <list>
#include <vector>
using namespace std;

typedef vector<mm1_atom *> atmr_vector;

/*################################################################################################*/

/// A connectivity record class for mm1_mdl.

class mm1_cr
{
//	protected:
	public:		// heavily used...
	
	mm1_atom * atmr;
	mm1_bond * bndr;
	
	public:
	
	mm1_cr(void);
	mm1_cr(mm1_atom *, mm1_bond *);
	~mm1_cr(void);
	
//	bool operator<(const mm1_cr &) const { return false; }		// just for STL...
	bool operator==(const mm1_cr &) const;
};

/*################################################################################################*/

/**	An atom class for mm1_mdl. 
	Used to store all essential information about atoms, including the coordinates.
*/

class mm1_atom
{
//	protected:
	public:		// heavily used...
	
	element el; i32s atmtp;
	
	/// Connectivity records...
	
	list<mm1_cr> cr_list;
	
	/// The actual coordinate data...
	
	vector<a3_fGL> crd_vector;
	
	/**	For thermodynamics stuff several "topologies" (and therefore also several 
		elements, atom types etc...) are needed. COMMON topology???? will affect typerules!!!
		
		NOT REALLY USED YET!
	*/
	
	i32s topology;
	
	/**	The molecule, chain and residue numbers, and an atom ID-number which is totally 
		builder-dependent. mm1_mdl::GatherGroups() will maintain molecule numbers, 
		and sequence builders will handle the rest.
	*/
	
	i32s id[4];
	i32s res_id;
	
	/**	"##index" is the index of this entry in mm1_mdl::atom_list. 
		Updated by mm1_mdl::UpdateIndex().
	*/
	
	i32s index;
	
	/**	GUI's will use this "selected"-flag. 
		Are more flags needed??? Use a vector<bool> instead???
	*/
	
	bool selected;
	
	f64 charge;
	
	public:
	
	mm1_atom(void);
	mm1_atom(element, fGL *, i32u);
	mm1_atom(const mm1_atom &);
	~mm1_atom(void);
	
	bool operator<(const mm1_atom &) const;		// using id-numbers...
	bool operator==(const mm1_atom &) const;	// using pointers...
};

/*################################################################################################*/

#define NFLAGS 2	// the number of flags can be increased if needed...

/**	A bond class for mm1_mdl. 
	Used to store all essential information about bonds (atoms, bondtype).
*/

class mm1_bond
{
//	protected:
	public:		// heavily used...
	
	mm1_atom * atmr[2];
	bondtype bt;
	
	vector<bool> flags;
	
	/**	"index" is the index of this entry in mm1_mdl::bond_list.
		used by the energy terms...
	*/
	
	i32s index;
	
	public:
	
	mm1_bond(void);
	mm1_bond(mm1_atom *, mm1_atom *, bondtype);
	mm1_bond(const mm1_bond &);
	~mm1_bond(void);
	
//	bool operator<(const mm1_bond &) const { return false; }	// just for STL...
	bool operator==(const mm1_bond &) const;			// using atom pointers...
};

/*################################################################################################*/

/**	A "##chain info"-class for mm1_mdl. 
	This is used in the context of peptides/proteins and nucleic acids...
*/

class mm1_chn_info
{
//	protected:
	public:		// mm2-param
	
	enum chn_type
	{
		not_defined = 0, amino_acid = 1, nucleic_acid = 2
	};
	
	char * description;
	
	chn_type type;
	i32s length;
	
	char * sequence;
	char * state;
	
	friend class mm1_sequencebuilder;
	friend class mm1_ribbon;
	
	friend class glut_mm1_docv;
	friend class gnome_mm1_docv;
	
	friend class mm2_mdl;
	
	friend void DefineSecondaryStructure(mm1_mdl *);
	friend f64 HBondEnergy(mm1_mdl *, i32s *, i32s *);
	
	public:
	
	mm1_chn_info(void);
	mm1_chn_info(chn_type, i32s, bool);
	mm1_chn_info(const mm1_chn_info &);
	~mm1_chn_info(void);
};

/*################################################################################################*/

typedef list<mm1_atom>::iterator iter_mm1al;		// al = atom list
typedef list<mm1_bond>::iterator iter_mm1bl;		// bl = bond list

typedef list<mm1_cr>::iterator iter_mm1cl;		// cl = connectivity list

/*################################################################################################*/

/**	A typerule class for atom types, for mm1_mdl.

	Will parse, store and check atom type rulesets written as text. For example a typerule 
	for formaldehyde carbon is written (=O,-H,-H) which means "##bonded to oxygen with a 
	double bond and to 2 hydrogens with single bonds". The trick is that typerules can be 
	recursive: for the acetaldehyde carbon 2 we can write a rule (=O,-C(-H,-H,-H),-H) which 
	means "##bonded to oxygen with a double bond, and with single bonds to a hydrogen and a 
	carbon, which is further bonded to 3 hydrogens with single bonds". Typerules can also 
	contain other rules like number of bonds, ring sizes and rings. For example we can write 
	the following typerule for phenolic hydrogen (-O(-C([~C~C~C~C~C~]))) which means 
	"##single-bonded to oxygen, which is single-bonded to carbon, which is part of a 
	6-member carbon ring connected with aromatic bonds". It is also possible to use 
	wildcards for both atoms and bonds.
	
	The "##wildcard"-mark for elements is '*' and for bondtypes '?'. Does it work for bonds??? 
	More exact rules must be tested before less exact rules; (-C,-*) but NOT (-*,-C)!!!
	
	Here is no test for the element of the atom itself; just test that separately...
*/

class mm1_typerule
{
	protected:
	
	i32s first;
	char buffer[256];
	
	vector<signed char *> ring_vector;
	vector<mm1_tr_subrule> sr_vector;
	
	public:
	
	mm1_typerule(void);
	mm1_typerule(istream *, ostream *);
	mm1_typerule(const mm1_typerule &);
	~mm1_typerule(void);
	
	i32s GetFirst(void) { return first; }
	
	bool Check(mm1_mdl *, mm1_atom *, i32s);
	friend ostream & operator<<(ostream &, const mm1_typerule &);
	
	private:
	
	i32s ReadSubRule(istream *, ostream *);
	bool CheckRules(mm1_mdl *, mm1_atom *, i32s, i32s);
	
	void PrintSubRules(ostream &, i32s) const;
	void PrintRing(ostream &, signed char *) const;
};

struct mm1_tr_subrule
{
	enum
	{
		BondedTo = 0,
		NumAllBonds = 1,	// b1=??? (the count of all bonds)
		NumHABonds = 2,		// b2=??? (the count of heavy-atom bonds)
		RingSize = 3,		// rs=???
		Ring = 4		// [???]
	} type;
	
	bondtype bt;
	element el;
	
	i32s data;
	i32s next;
	i32s sub;
};

/*################################################################################################*/

class mm1_sb_atm
{
	protected:
	
	i32s id[2];
	i32s prev[3];
	
	element el;
	bondtype bt[2];
	
	mm1_typerule * typerule;
	
	f64 ic1[3];
	i32s ic2;
	
	friend class mm1_sequencebuilder;
	
	public:
	
	mm1_sb_atm(void);
	mm1_sb_atm(const mm1_sb_atm &);
	~mm1_sb_atm(void);
	
	friend istream & operator>>(istream &, mm1_sb_atm &);
	friend ostream & operator<<(ostream &, mm1_sb_atm &);
};

/*################################################################################################*/

class mm1_sb_bnd
{
	protected:
	
	i32s atm[2];
	bondtype bt;

	friend class mm1_sequencebuilder;
	
	public:
	
	mm1_sb_bnd(void);
	~mm1_sb_bnd(void);
	
	friend istream & operator>>(istream &, mm1_sb_bnd &);
};

/*################################################################################################*/

class mm1_sb_res
{
	protected:
	
	i32s id;
	
	char symbol;
	char * description;
	
	vector<mm1_sb_atm> atm_vector;
	vector<mm1_sb_bnd> bnd_vector;

	friend class mm1_sequencebuilder;
	friend class mm2_mdl;
	
	public:
	
	mm1_sb_res(void);
	mm1_sb_res(const mm1_sb_res &);
	~mm1_sb_res(void);
	
	void ReadModification(istream &);
	
	friend istream & operator>>(istream &, mm1_sb_res &);
};

/*################################################################################################*/

struct mm1_sb_tdata
{
	element el; bondtype bt;
	i32s id[2]; mm1_atom * ref;
};

/*################################################################################################*/

/**	A generic sequence builder class. This same code will handle both peptides/proteins 
	and nucleic acids. Only the input file read in ctor is different. The sequence builders 
	can both build sequences and identify them.
	
	Sequence builders will handle only heavy atoms. You must add the hydrogens separately. 
	At least for peptides/proteins this is a complicated (and pH-dependent) job...
	
	How to handle the histidine case with various tautomeric forms???
*/

class mm1_sequencebuilder
{
	protected:
	
	mm1_chn_info::chn_type type;
	
	vector<mm1_sb_atm> main_vector;
	vector<mm1_sb_res> residue_vector;	// res_vector???
	
	vector<mm1_typerule> head_vector;
	vector<mm1_typerule> tail_vector;
	
	mm1_sb_res * mod[3];
	
	char buffer[256];
	
	vector<i32s> id_vector;
	atmr_vector ref_vector;
	
	atmr_vector temporary_vector;
	vector<atmr_vector> path_vector;
	
	friend class mm2_mdl;
	
	public:
	
	mm1_sequencebuilder(mm1_chn_info::chn_type, char *);
	~mm1_sequencebuilder(void);
	
	void Build(mm1_mdl *, char *, f64 *);
	void Identify(mm1_mdl *);
	
	private:
	
	void Build(mm1_mdl *, mm1_sb_res *, f64 *);
	void Convert(mm1_atom *[], f64 *, fGL *);
	
	void FindPath(mm1_mdl *, mm1_atom *, mm1_atom *);
	void BuildTemplate(vector<mm1_sb_tdata> &, i32s, bool, bool);
	void BuildTemplate(vector<mm1_sb_tdata> &, vector<mm1_sb_atm> &);
	bool CheckTemplate(vector<mm1_sb_tdata> &, i32s);
};

/*################################################################################################*/

// why not merge these into mm1_mdl !?!?!?!?!?
// why not merge these into mm1_mdl !?!?!?!?!?
// why not merge these into mm1_mdl !?!?!?!?!?

/**	This will find the secondary structure HB patterns.

	Kabsch W, Sander C : "##Dictionary of Protein Secondary Structure: Pattern Recognition 
	of Hydrogen-Bonded and Geometrical Features" Biopolymers 22, 2577-2637, (1983)
	
	This assumes that all identified sequences are peptides/proteins??? 
	Might run in trouble if you have identified also some nucleic acids...
*/

void DefineSecondaryStructure(mm1_mdl *);
f64 HBondEnergy(mm1_mdl *, i32s *, i32s *);

/*################################################################################################*/

#endif	// MM1UTIL_H

// eof
