/*
    libmaus2
    Copyright (C) 2009-2013 German Tischler
    Copyright (C) 2011-2013 Genome Research Limited

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
#if ! defined(LIBMAUS2_GAMMA_SPARSEGAMMAGAPBLOCKENCODER_HPP)
#define LIBMAUS2_GAMMA_SPARSEGAMMAGAPBLOCKENCODER_HPP

#include <libmaus2/gamma/GammaEncoder.hpp>
#include <libmaus2/gamma/GammaDecoder.hpp>
#include <libmaus2/aio/OutputStreamInstance.hpp>
#include <libmaus2/aio/SynchronousGenericOutput.hpp>
#include <libmaus2/aio/SynchronousGenericInput.hpp>
#include <libmaus2/util/GetFileSize.hpp>
#include <libmaus2/util/TempFileRemovalContainer.hpp>

namespace libmaus2
{
	namespace gamma
	{

		template<typename _data_type>
		struct SparseGammaGapBlockEncoderTemplate
		{
			typedef _data_type data_type;
			typedef SparseGammaGapBlockEncoderTemplate<data_type> this_type;

			typedef std::unique_ptr<this_type> unique_ptr_type;
			typedef std::shared_ptr<this_type> shared_ptr_type;

			typedef libmaus2::aio::SynchronousGenericOutput<data_type> stream_type;

			// w output stream
			libmaus2::aio::OutputStreamInstance::unique_ptr_type SGOCOS;
			// w index stream
			std::string const indexfilename;
			libmaus2::aio::OutputStreamInstance::unique_ptr_type indexUP;

			std::ostream & SGOout;
			// std::iostream & indexout;

			stream_type SGO;

			int64_t prevkey;
			libmaus2::gamma::GammaEncoder<stream_type> genc;

			uint64_t const blocksize;
			uint64_t blockleft;

			uint64_t indexentries;

			SparseGammaGapBlockEncoderTemplate(
				std::ostream & out,
				std::string const & rindexfilename,
				int64_t const rprevkey = -1,
				uint64_t const rblocksize = 64*1024
			)
			:
			  indexfilename(rindexfilename),
			  indexUP(new libmaus2::aio::OutputStreamInstance(indexfilename)),
			  SGOout(out),
			  // indexout(*indexUP),
			  SGO(SGOout,8*1024),
			  prevkey(rprevkey),
			  genc(SGO),
			  blocksize(rblocksize),
			  blockleft(0),
			  indexentries(0)
			{
			}

			SparseGammaGapBlockEncoderTemplate(
				std::string const & filename,
				std::string const & rindexfilename,
				uint64_t const rblocksize = 64*1024
			)
			: SGOCOS(new libmaus2::aio::OutputStreamInstance(filename)),
			  indexfilename(rindexfilename),
			  indexUP(new libmaus2::aio::OutputStreamInstance(indexfilename)),
			  SGOout(*SGOCOS),
			  // indexout(*indexUP),
			  SGO(SGOout,8*1024),
			  prevkey(-1),
			  genc(SGO),
			  blocksize(rblocksize),
			  blockleft(0),
			  indexentries(0)
			{
			}

			void encode(uint64_t const key, uint64_t const val)
			{
				// start of next block
				if ( ! blockleft )
				{
					uint64_t const ikey = key;
					uint64_t const ibitoff = genc.getOffset();

					libmaus2::util::NumberSerialisation::serialiseNumber(*indexUP,ikey);
					libmaus2::util::NumberSerialisation::serialiseNumber(*indexUP,ibitoff);
					indexentries++;

					// std::cerr << "ikey=" << ikey << " ibitoff=" << ibitoff << std::endl;

					blockleft = blocksize;
				}

				int64_t const dif = (static_cast<int64_t>(key)-prevkey)-1;
				genc.encode(dif);
				prevkey = key;
				assert ( val );
				genc.encode(val);
				--blockleft;
			}

			void term()
			{
				genc.encode(0);
				genc.encode(0);
				genc.flush();
				SGO.flush();
				SGOout.flush();
				indexUP->flush();
				indexUP.reset();

				libmaus2::aio::InputStreamInstance indexin(indexfilename);
				libmaus2::util::GetFileSize::copy(indexin,SGOout,2*sizeof(uint64_t)*indexentries);
				libmaus2::util::NumberSerialisation::serialiseNumber(SGOout,indexentries ? prevkey : 0); // highest key in file
				libmaus2::util::NumberSerialisation::serialiseNumber(SGOout,indexentries);

				SGOout.flush();
			}

			template<typename it>
			static void encodeArray(it const ita, it const ite, std::ostream & out, std::string const & indexfilename)
			{
				std::sort(ita,ite);

				this_type enc(out,indexfilename);

				it itl = ita;

				while ( itl != ite )
				{
					it ith = itl;

					while ( ith != ite && *ith == *itl )
						++ith;

					enc.encode(*itl,ith-itl);

					itl = ith;
				}

				enc.term();
				out.flush();
			}

			template<typename it>
			static std::vector<std::string> encodeArray(it const gita, it const gite, std::string const & fnprefix, uint64_t const tparts, uint64_t const blocksize = 64*1024)
			{
				std::sort(gita,gite);

				uint64_t const partsize = (gite-gita+tparts-1)/(tparts);

				std::vector<uint64_t> partstarts;
				it gitc = gita;
				while ( gitc != gite )
				{
					while ( gitc != gita && gitc != gite && (*(gitc-1)) == *gitc )
						++gitc;

					assert ( gitc == gita || gitc == gite || ((*(gitc-1)) != (*gitc)) );

					if ( gitc != gite )
						partstarts.push_back(gitc-gita);

					gitc += std::min(partsize,static_cast<uint64_t>(gite-gitc));
				}

				uint64_t const parts = partstarts.size();
				std::vector<std::string> partfn(parts);
				partstarts.push_back(gite-gita);

				for ( uint64_t p = 0; p < parts; ++p )
				{
					std::ostringstream fnostr;
					fnostr << fnprefix << "_" << std::setw(6) << std::setfill('0') << p;
					std::string const fn = fnostr.str();
					partfn[p] = fn;
					std::string const indexfn = fn+".idx";
					libmaus2::util::TempFileRemovalContainer::addTempFile(indexfn);

					libmaus2::aio::OutputStreamInstance COS(fn);

					this_type enc(
						COS,indexfn,
						(p==0)?(-1):gita[partstarts[p]-1],
						blocksize
					);

					it itl = gita + partstarts[p];
					it ite = gita + partstarts[p+1];

					while ( itl != ite )
					{
						it ith = itl;

						while ( ith != ite && *ith == *itl )
							++ith;

						enc.encode(*itl,ith-itl);

						itl = ith;
					}

					enc.term();
					COS.flush();

					libmaus2::aio::FileRemoval::removeFile(indexfn);
				}

				return partfn;
			}

			template<typename it>
			static void encodeArray(it const ita, it const ite, std::string const & fn)
			{
				libmaus2::aio::OutputStreamInstance COS(fn);
				std::string const indexfn = fn+".idx";
				libmaus2::util::TempFileRemovalContainer::addTempFile(indexfn);

				encodeArray(ita,ite,COS,indexfn);

				libmaus2::aio::FileRemoval::removeFile(indexfn);
			}
		};

		typedef SparseGammaGapBlockEncoderTemplate<uint64_t> SparseGammaGapBlockEncoder;
		typedef SparseGammaGapBlockEncoderTemplate< libmaus2::math::UnsignedInteger<4> > SparseGammaGapBlockEncoder2;
	}
}
#endif
