
/**************************************************************************
 *                                                                        *
 *   Copyright (C) 2001 Grub, Inc.                                        *
 *                                                                        *
 *   This program is free software; you can redistribute it and/or modify *
 *   it under the terms of the GNU General Public License as published by *
 *   the Free Software Foundation; either version 1, or (at your option)  *
 *   any later version.                                                   *
 *                                                                        *
 *   This program is distributed in the hope that it will be useful,      *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of       *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        *
 *   GNU General Public License for more details.                         *
 *                                                                        *
 *   You should have received a copy of the GNU General Public License    *
 *   along with this program; if not, write to the Free Software          *
 *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.            *
 *                                                                        *
 *                                                                        *
 **************************************************************************/
/* Project: GRUB
 * <http://www.grub.org>
 * module: ClientDB
 * Author: Kosta Damevski (Kosta42@hotmail.com), 
 *	   (1) Ledio Ago (lajesus@grub.org)
 * Last revision: 20 Jan, 2001
 *		 (1) 06 March, 2002
 * Files: ClientDB.cpp ClientDB.h ClientDBRecord.cpp ClientDBRecord.cpp
 * 	  dbl_list.c dbl_list.h ClientDBHandler.h
 * Added(1): ServerDb.cpp ServerDb.h CrawlerDb.cpp CrawlerDb.h UtilDb.cpp
 *	  UtilDb.h
 * Removed(1): ClientDBRecord.cpp ClientDBRecord.cpp dbl_list.c dbl_list.h 
 *        ClientDBHandler.h
 * 
 * The ClientDB classes provide the functionality of temporary storage
 * to the grub client. The ClientDB class itself coordinates all other
 * classes and provides an interface to the rest of the grub client
 * which is specific to its purpose. The 4 methods: GetInsert(), GetRetrieve(),
 * CrawlInsert(), and CrawlRetrieve are always performed continously and in the
 * order listed to provide storage for a single URL and its additional data as
 * well as its contents.   
 * 
 * Note: In revision(1) we completely changed the way we archive the 
 * URLs and their information for the client.  From now on we can store 
 * binary data.  The new archive or Client Database(CDB) uses Mk4
 * library.  CDB is based in two data files, CrawlerDb.dat and
 * ServerDb.dat.
 * ServerDb.dat holds the initial URLs that come from the server.  As 
 * the URLs are pulled from the file they are crawled by the crawlers
 * CrawlerDb.dat holds the information of the crawled URLs like the
 * content, CRC, MIME, size, status, redirURL.
 */

#ifndef _ClientDB_H_
#define _ClientDB_H_

#include <stdio.h>
#include <assert.h>

/* Error Codes */
#define OUT_OF_MEM_CDB -55
#define CRAWLED_URLS_TBL_EMPTY -56
#define URL_HANDLE_FAILED_CDB -57
#define BAD_HANDLER_CDB -58
#define FAIL_ALLOC_MEM -59
/* Success codes */
#define SUCCESS_CDB 55

#include "CrawlerDB.h"
#include "ServerDB.h"
#include <clog.h>

#define CRAWLER_DB_DAT "ClientDb.dat"
#define SERVER_DB_DAT "ServerDb.dat"

//***********************************************************
class ClientDB {
public:
	ClientDB();
	
	~ClientDB();
	
	/*
	 * Method:	GetInsert()
	 * Description:	-- called by GET PROTOCOL --
	 *		This method inserts the data before the crawling
	 *		begins. It is meant to be invoked as the GET part of
	 *		the protocol is executing.
	 * Input:	URL -- Url to be stored
	 *		size -- previous size of page in bytes
	 *		CRC -- previous calculated checksum of the contents
	 * Returns: 	Error codes if an error occured, otherwise 0 
	 */
	int GetInsert(const char *URL,unsigned long size,
			unsigned long CRC);
	
	/*
	 * Method:	GetRetrieve()
	 * Description:	-- called by CRAWLER --
	 *		The method which retrieves a ptr to a record 
	 *		from the data which was stored by GetInsert()
	 *		so that the crawler can use them in crawling.
	 * Returns: 	A ptr to a URLHandler structure and NULL if
	 *		an error was encountered
	 */
	URLHandler * GetRetrieve();
	
	/*
	 * Method:	CrawlInsert()
	 * Description: -- called by CRAWLER --
	 * 		This method provides the crawler modules with
	 *		the capability of storing the data which had 
	 *		been crawled.
	 * Input:	handle -- A ptr to the structure to insert into
	 *		URL -- Url to be stored
	 *		contents -- Url contents retrieved
	 *		size -- size of page in bytes
	 *		CRC -- calculated checksum of the contents
	 *		status -- (see protocol specs and ClientDBRecord.h)
	 *		mime -- the mime of the examined document
	 *		redirURL -- a URL that the site redirects to
	 * Returns:
	 */
	int CrawlInsert( const char *URL, const char *contents, unsigned long size,
			unsigned long CRC, status_t status, const char *MIME, 
			const char *redirURL) throw ( GrubExp );
	/*
	 * Method:	CrawlRetrieve
	 * Description:	-- called by PUT PROTOCOL --
	 * 		Retrieve the data from the DB after crawling 
	 *		had been completed and stored in it.
	 * Input:	URLHandle structure that holds URL information.
 	 *		URL -- The url itself
	 *		contents -- Url contents
	 *		size -- size of page in bytes
	 *		CRC -- calculated checksum of the contents
	 *		status -- (see protocol specs and ClientDBRecord.h)
	 * Returns: 	ERROR CODES if an error occured, otherwise 0.
	 *			- CRAWLED_URLS_TBL_EMPTY - The crawled_urls 
	 *			table is empty.
	 */	
	int CrawlRetrieve( URLHandler **handle ) throw (GrubExp);

	/*
	 * Method:	recordCount()
 	 * Description:	Returns the number of records in the 
	 *		crawled_urls table.
 	 */
	int recordCount();
	
	/*
	 * Method:	emptyArchive
	 * Description:	It will delete the records of both databases, served and
	 *		crawled urls.
	 */
	void emptyArchive();

	/*
	 * Method	deleteArchive
	 * Description:	It will completely delete the CDB data files from the
	 *		memory.
	 */
	int deleteArchive();

private:
	/* Pointer to the crawler's database */
        ServerDB *server_db;
        CrawlerDB *crawler_db;
};

#endif


