/*
 * color-ed3.cc --
 *
 *      Implementation of Floyd-Steinburg style error diffusion dithering.
 *
 * Copyright (c) 1993-2002 The Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * A. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * B. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * C. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from this
 *    software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef lint
static char rcsid[] =
    "@(#) $Header: /usr/mash/src/repository/mash/mash-1/render/color-ed3.cc,v 1.5 2002/02/03 04:15:14 lim Exp $";
#endif

/*
 * Implementation of Floyd-Steinburg style error diffusion dithering.
 * The diffusion is a simple one-dimensional pattern.  We restrict it
 * to one dimension so that we can have an efficient loop with the
 * current error always contained in a register (i.e., no need to
 * write and read back pixel errors from memory).
 *
 * This serpentine pattern yields reasonable results (considering we're
 * restricted to 1D):
 *
 *		. .-. .-. .-. .-.
 *		| | | | | | | | |
 *		.-. .-. .-. .-. .
 *				|
 *		.-. .-. .-. .-. .
 *		| | | | | | | | |
 *		. .-. .-. .-. .-
 *              |
 *              ...
 *
 * Since we dither on a block basis, we begin each dither run with
 * random error to avoid artifacts induced by boundaries.
 *
 * The dither is carried out in the YUV, rather than the RGB, color space.
 * Color dithering relies on the fact that the three color planes
 * can be dithered independently.  This works because of additivity
 * of the primaries.  However, the YUV space is not a cube and
 * the three planes cannot be dithered independently (in theory).
 * This is because a valid dithering "jump" along one axis can cause
 * the current point to fall far outside the color space.  This
 * happens frequently, especially at low and high luminances where
 * the U-V plane is small.  The dithering jumps are large because
 * the available number of colors is small and the distance between
 * lattice points in the allocated color grid is large.
 * Fortunately, this happens only at low or high luminance levels,
 * where color fidelity is less perceptible, so in practice
 * we're okay.  One problem is at edges.  A large color error
 * can accumulate in a dark region, so at the boundary where
 * the luminance jumps, there can be a large color error which
 * appears as a pastel shadow.  This effect is reduced by
 * scaling down the color errors at low and high luminosities.
 *
 * Another problem is that error-diffusion relies on the placement
 * of color cells on a convex hull of the color space (i.e., so there
 * is always a color on the other side of the input color, wrt to the
 * quantized pixel, to absorb the error).  In RGB, such placement is
 * trivial -- e.g., use the corners of the RGB cube.  But in the YUV
 * space, these points are not obvious.  Instead, we let the dither
 * run outside the color space under the assumption that retaining
 * the error terms is more important than making sure the dither
 * always stays inside the convex hull of available colors.  Of course,
 * the dither is limited to range of the three planes (i.e., the
 * dither never leaves the "YUV-cube" but it may leave YUV-space,
 * which doesn't fill the whole cube).
 */

#include <stdio.h>
#include <osfcn.h>
#include <math.h>
#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include "color.h"
#include "color-pseudo.h"
#include "bsd-endian.h"

class ED3ColorModel : public PseudoColorModel {
public:
	ED3ColorModel(Visual*);
	virtual int alloc_colors();
	WindowRenderer* allocate(VideoWindow* vw, int decimation);
	inline const u_int* yuvmap() const { return (yuvmap_); }
private:
	u_int* yuvmap_;
};

class ED3ColorMatcher : public ColorMatcher {
	virtual ColorModel* match(const char* colormodel);
} matcher_ed3;

ColorModel* ED3ColorMatcher::match(const char* colormodel)
{
	if (strcasecmp(colormodel, "ed3") == 0) {
		Visual* visual = ColorModel::pseudocolor_visual();
		if (visual != 0)
			return (new ED3ColorModel(visual));
	}
	return (0);
}

class ED3WindowRenderer;

typedef void (ED3WindowRenderer::*ED3Method)(const u_char*, u_int,
					     u_int, u_int, u_int) const;

class ED3WindowRenderer : public PseudoWindowRenderer {
public:
	ED3WindowRenderer(VideoWindow* vw, int decimation, ED3ColorModel& cm) :
		PseudoWindowRenderer(vw, decimation), cm_(cm) { }
	void render(const u_char* frm, int off, int x, int w, int h) {
		(this->*method_)(frm, off, x, w, h);
	}
protected:
	ED3ColorModel& cm_;
	virtual void update();
	ED3Method method_;
	void dither_422(const u_char* frm, u_int off, u_int x,
			u_int width, u_int height) const;
	void dither_down2_422(const u_char* frm, u_int off, u_int x,
			      u_int width, u_int height) const;
	void dither_down4_422(const u_char* frm, u_int off, u_int x,
			      u_int width, u_int height) const;
	void dither_down_422(const u_char* frm, u_int off, u_int x,
			     u_int width, u_int height) const;
	void dither_up2_422(const u_char* frm, u_int off, u_int x,
			    u_int width, u_int height) const;
	void dither_411(const u_char* frm, u_int off, u_int x,
			u_int width, u_int height) const;
	void dither_down2_411(const u_char* frm, u_int off, u_int x,
			      u_int width, u_int height) const;
	void dither_down4_411(const u_char* frm, u_int off, u_int x,
			      u_int width, u_int height) const;
	void dither_down_411(const u_char* frm, u_int off, u_int x,
			     u_int width, u_int height) const;
	void dither_up2_411(const u_char* frm, u_int off, u_int x,
			    u_int width, u_int height) const;
};

WindowRenderer* ED3ColorModel::allocate(VideoWindow* vw, int decimation)
{
	return (new ED3WindowRenderer(vw, decimation, *this));
}

void ED3WindowRenderer::update()
{
	static ED3Method methods[] = {
		&dither_up2_411, &dither_up2_422,
			&dither_gray_up, &dither_gray_up,
		&dither_411, &dither_422,
			&dither_gray, &dither_gray,
		&dither_down2_411, &dither_down2_422,
			&dither_gray_down, &dither_gray_down,
		&dither_down4_411, &dither_down4_422,
			&dither_gray_down, &dither_gray_down,
		&dither_down_411, &dither_down_422,
			&dither_gray_down, &dither_gray_down,
	};
	method_ = methods[index()];
}

ED3ColorModel::ED3ColorModel(Visual* visual) : yuvmap_(0)
{
	visual_ = visual;
	eflag_ = 1;
}

extern "C" int ed_ncolor;
extern "C" u_char ed_cmap[];
extern "C" u_char ed_lut[];


int ED3ColorModel::alloc_colors()
{
	if (installLUT(ed_ncolor, ed_cmap, ed_lut) < 0)
		return (-1);

	/*
	 * Map from colormap indices to allocated pixels.
	 */
	u_char* lut = PseudoColorModel::lut_;
#define NLUT (1 << (7+7+4))
	delete yuvmap_;
	yuvmap_ = new u_int[NLUT];

	for (int y = 0; y < 256; y += 16) {
		int u, v;
		u_char valid[128][128];
		color c;
		c.y = y < 128? y + 16 : (y > 128? y - 16 : y);
		for (v = 0; v < 256; v += 2) {
			c.v = v;
			for (u = 0; u < 256; u += 2) {
				c.u = u;
				valid[v >> 1][u >> 1] = yuv_to_rgb(c);
			}
		}
		int yerr = y>=240? 0xf0 : 0;
		for (v = 0; v < 256; v += 4) {
			for (u = 0; u < 256; u += 4) {
				int uerr, verr;
				u_int pix = lut[v << 8 | u << 2 | y >> 4];
				c = *lookup(pix);
				uerr = (u - c.u) & 0xfc;
				verr = (v - c.v) & 0xfc;
				int ub = 0, ua = 0, vb = 0, va = 0;
				if (valid[v >> 1][(u >> 1)+1])
					ua = uerr;
				if (valid[v >> 1][u >> 1])
					ub = uerr;
				if (valid[(v >> 1)+1][u >> 1])
					va = verr;
				if (valid[v >> 1][u >> 1])
					vb = verr;

				u_int ind = v << 9 | u << 2 | y >> 4;
				pix |= yerr << 8;
				yuvmap_[ind] =
						pix | (vb << 21) | (ub << 14)
						+ (random() & (0x3 << 8));
				yuvmap_[ind + (1 << 10)] =
						pix | (vb << 21) | (ua << 14)
						+ (random() & (0x3 << 8));
				yuvmap_[ind + (1 << 17)] =
						pix | (va << 21) | (ub << 14)
						+ (random() & (0x3 << 8));
				yuvmap_[ind + (1 << 17) + (1 << 10)] =
						pix | (va << 21) | (ua << 14)
						+ (random() & (0x3 << 8));
			}
		}
	}
	return (0);
}

/*
 * Declare and initialize variables for ONEPIX macro.
 * 'e' is the error term of the current pixel.  All three
 * errors are stored as parallel 8-bit quantities.
 * Two extra bits are used to detect overflow and underflow,
 * yielding 10 bits per component.  We only ever add
 * an 8-bit unsigned value (the input pixel) to a signed
 * delta.  The delta, d, is coded as follows:
 *
 *		01 xxxx xxxx  d >= 0
 *		00 xxxx xxxx  d < 0
 *
 * (i.e. "xxxx xxxx" is the 2's complement binary representation of d)
 *
 * Coding the deltas this way makes the under/overflow check cheap.
 * The two high bits will be 01 if there was no under/overflow,
 * i.e., no carry out from a positive delta, or a carry out from
 * a negative delta.  The high bits will be 10 on overflow, and
 * 00 on underflow.  So you can check the over/underflow condition
 * with bit 8, and you can tell which of over/under happened with bit 9.
 *
 * 'omask' is the parrallel set of under/overflow check bits.
 */
#define DIFFUSION_SETUP \
	register const u_int* yuvmap = cm_.yuvmap(); \
	u_int l, uv; \
	u_int e = random() & (0x1c << 13 | 0x1c << 6 | 0x7);

/*
 * Process one pixel.  Add in the current pixel to the previous error,
 * checking for overflow.  Look up the closest color in the lookup
 * table.  Splice it into the output word, and compute the next error.
 */
#define ONEPIX(yval, out, off) \
	l = yval; \
	e += uv; \
	e += l; \
	e = yuvmap[e >> 4]; \
	SPLICE((out), e & 0xff, (3-(off))*8); \
	e >>= 8; \
	e += l & 0xf;

void ED3WindowRenderer::dither_422(const u_char* frm, u_int off,
				   u_int x, u_int width, u_int height) const
{
	register u_int iw = inw_;
	register const u_char* yp = frm + off;
	register const u_char* up = frm + size_ + (off >> 1);
	register const u_char* vp = up + (size_ >> 1);
	register u_int* xip = (u_int*)(pixbuf_ + off);
	register int w = width;
	DIFFUSION_SETUP

	for (register int len = w * height; len > 0; len -= 8) {
		register u_int out = 0;

		uv = ((vp[0] & 0xfc) << 13) | ((up[0] & 0xfc) << 6);
		ONEPIX(yp[0], out, 0)
		ONEPIX(yp[1], out, 1)

		uv = ((vp[1] & 0xfc) << 13) | ((up[1] & 0xfc) << 6);
		ONEPIX(yp[2], out, 2)
		ONEPIX(yp[3], out, 3)
		xip[0] = out;

		out = 0;
		uv = ((vp[2] & 0xfc) << 13) | ((up[2] & 0xfc) << 6);
		ONEPIX(yp[4], out, 0)
		ONEPIX(yp[5], out, 1)

		uv = ((vp[3] & 0xfc) << 13) | ((up[3] & 0xfc) << 6);
		ONEPIX(yp[6], out, 2)
		ONEPIX(yp[7], out, 3)
		xip[1] = out;

		xip += 2;
		yp += 8;
		up += 4;
		vp += 4;

		w -= 8;
		if (w <= 0) {
			w = width;
			register int pstride = iw - w;
			register int cstride = pstride >> 1;
			yp += pstride;
			up += cstride;
			vp += cstride;
			xip += (pstride >> 2);
			e = random() & (0x1c << 13 | 0x1c << 6 | 0x7);
		}
	}
}

void ED3WindowRenderer::dither_down2_422(const u_char* frm,
					 u_int off, u_int x,
					 u_int width, u_int height) const
{
}

void ED3WindowRenderer::dither_down4_422(const u_char* frm,
					 u_int off, u_int x,
					 u_int width, u_int height) const
{
}

void ED3WindowRenderer::dither_down_422(const u_char* frm,
					u_int off, u_int x,
					u_int width, u_int height) const
{
}

void ED3WindowRenderer::dither_up2_422(const u_char *frm,
				       u_int off, u_int x,
				       u_int width, u_int height) const
{
}

void ED3WindowRenderer::dither_411(const u_char *frm,
				   u_int off, u_int x,
				   u_int width, u_int height) const
{
}

void ED3WindowRenderer::dither_down2_411(const u_char* frm,
					 u_int off, u_int x,
					 u_int width, u_int height) const
{
}

void ED3WindowRenderer::dither_down4_411(const u_char* frm,
					 u_int off, u_int x,
					 u_int width, u_int height) const
{
}

void ED3WindowRenderer::dither_down_411(const u_char* frm,
					u_int off, u_int x,
					u_int width, u_int height) const
{
}

void ED3WindowRenderer::dither_up2_411(const u_char* frm,
				       u_int off, u_int x,
				       u_int width, u_int height) const
{
}
