diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/conf/files.sh3 src/sys/arch/sh3/conf/files.sh3
--- src.orig/sys/arch/sh3/conf/files.sh3	2008-03-20 11:57:41.000000000 +0900
+++ src/sys/arch/sh3/conf/files.sh3	2008-03-20 12:03:40.000000000 +0900
@@ -37,6 +37,9 @@
 file	arch/sh3/sh3/mmu.c
 file	arch/sh3/sh3/mmu_sh3.c		sh3
 file	arch/sh3/sh3/mmu_sh4.c		sh4
+file	arch/sh3/sh3/fpu.c
+file	arch/sh3/sh3/fpu_sh3.c		sh3
+file	arch/sh3/sh3/fpu_sh4.c		sh4
 file	arch/sh3/sh3/clock.c
 file	arch/sh3/sh3/devreg.c		sh3 & sh4
 file	arch/sh3/sh3/interrupt.c
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/files.fpu src/sys/arch/sh3/fpu/files.fpu
--- src.orig/sys/arch/sh3/fpu/files.fpu	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/files.fpu	2007-01-31 11:48:27.000000000 +0900
@@ -0,0 +1,14 @@
+#	$NetBSD$
+
+# FPU emulation files.
+
+file	arch/sh3/fpu/fpu_emu.c
+file	arch/sh3/fpu/fpu_add.c
+file	arch/sh3/fpu/fpu_compare.c
+file	arch/sh3/fpu/fpu_div.c
+file	arch/sh3/fpu/fpu_explode.c
+file	arch/sh3/fpu/fpu_implode.c
+file	arch/sh3/fpu/fpu_mul.c
+file	arch/sh3/fpu/fpu_sqrt.c
+file	arch/sh3/fpu/fpu_subr.c
+
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_add.c src/sys/arch/sh3/fpu/fpu_add.c
--- src.orig/sys/arch/sh3/fpu/fpu_add.c	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/fpu_add.c	2007-01-31 11:48:51.000000000 +0900
@@ -0,0 +1,228 @@
+/*	$NetBSD$ */
+
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fpu_add.c	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * Perform an FPU add (return x + y).
+ *
+ * To subtract, negate y and call add.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/types.h>
+#if defined(DIAGNOSTIC)||defined(DEBUG)
+#include <sys/systm.h>
+#endif
+
+#include <machine/reg.h>
+#include <machine/fpu.h>
+
+#include <sh3/fpu/fpu_arith.h>
+#include <sh3/fpu/fpu_emu.h>
+#include <sh3/fpu/fpu_extern.h>
+
+struct fpn *
+fpu_add(struct fpemu *fe)
+{
+	struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2, *r;
+	u_int r0, r1, r2, r3;
+	int rd;
+
+	/*
+	 * Put the `heavier' operand on the right (see fpu_emu.h).
+	 * Then we will have one of the following cases, taken in the
+	 * following order:
+	 *
+	 *  - y = NaN.  Implied: if only one is a signalling NaN, y is.
+	 *	The result is y.
+	 *  - y = Inf.  Implied: x != NaN (is 0, number, or Inf: the NaN
+	 *    case was taken care of earlier).
+	 *	If x = -y, the result is NaN.  Otherwise the result
+	 *	is y (an Inf of whichever sign).
+	 *  - y is 0.  Implied: x = 0.
+	 *	If x and y differ in sign (one positive, one negative),
+	 *	the result is +0 except when rounding to -Inf.  If same:
+	 *	+0 + +0 = +0; -0 + -0 = -0.
+	 *  - x is 0.  Implied: y != 0.
+	 *	Result is y.
+	 *  - other.  Implied: both x and y are numbers.
+	 *	Do addition a la Hennessey & Patterson.
+	 */
+	DPRINTF(FPE_REG, ("fpu_add:\n"));
+	DUMPFPN(FPE_REG, x);
+	DUMPFPN(FPE_REG, y);
+	DPRINTF(FPE_REG, ("=>\n"));
+	ORDER(x, y);
+	if (ISNAN(y)) {
+		fe->fe_cx |= FPSCR_VXSNAN;
+		DUMPFPN(FPE_REG, y);
+		return (y);
+	}
+	if (ISINF(y)) {
+		if (ISINF(x) && x->fp_sign != y->fp_sign) {
+			fe->fe_cx |= FPSCR_VXISI;
+			return (fpu_newnan(fe));
+		}
+		DUMPFPN(FPE_REG, y);
+		return (y);
+	}
+	rd = ((fe->fe_fpscr) & FPSCR_RN);
+	if (ISZERO(y)) {
+		if (rd != FSR_RD_RM)	/* only -0 + -0 gives -0 */
+			y->fp_sign &= x->fp_sign;
+		else			/* any -0 operand gives -0 */
+			y->fp_sign |= x->fp_sign;
+		DUMPFPN(FPE_REG, y);
+		return (y);
+	}
+	if (ISZERO(x)) {
+		DUMPFPN(FPE_REG, y);
+		return (y);
+	}
+	/*
+	 * We really have two numbers to add, although their signs may
+	 * differ.  Make the exponents match, by shifting the smaller
+	 * number right (e.g., 1.011 => 0.1011) and increasing its
+	 * exponent (2^3 => 2^4).  Note that we do not alter the exponents
+	 * of x and y here.
+	 */
+	r = &fe->fe_f3;
+	r->fp_class = FPC_NUM;
+	if (x->fp_exp == y->fp_exp) {
+		r->fp_exp = x->fp_exp;
+		r->fp_sticky = 0;
+	} else {
+		if (x->fp_exp < y->fp_exp) {
+			/*
+			 * Try to avoid subtract case iii (see below).
+			 * This also guarantees that x->fp_sticky = 0.
+			 */
+			SWAP(x, y);
+		}
+		/* now x->fp_exp > y->fp_exp */
+		r->fp_exp = x->fp_exp;
+		r->fp_sticky = fpu_shr(y, x->fp_exp - y->fp_exp);
+	}
+	r->fp_sign = x->fp_sign;
+	if (x->fp_sign == y->fp_sign) {
+		FPU_DECL_CARRY
+
+		/*
+		 * The signs match, so we simply add the numbers.  The result
+		 * may be `supernormal' (as big as 1.111...1 + 1.111...1, or
+		 * 11.111...0).  If so, a single bit shift-right will fix it
+		 * (but remember to adjust the exponent).
+		 */
+		/* r->fp_mant = x->fp_mant + y->fp_mant */
+		FPU_ADDS(r->fp_mant[3], x->fp_mant[3], y->fp_mant[3]);
+		FPU_ADDCS(r->fp_mant[2], x->fp_mant[2], y->fp_mant[2]);
+		FPU_ADDCS(r->fp_mant[1], x->fp_mant[1], y->fp_mant[1]);
+		FPU_ADDC(r0, x->fp_mant[0], y->fp_mant[0]);
+		if ((r->fp_mant[0] = r0) >= FP_2) {
+			(void) fpu_shr(r, 1);
+			r->fp_exp++;
+		}
+	} else {
+		FPU_DECL_CARRY
+
+		/*
+		 * The signs differ, so things are rather more difficult.
+		 * H&P would have us negate the negative operand and add;
+		 * this is the same as subtracting the negative operand.
+		 * This is quite a headache.  Instead, we will subtract
+		 * y from x, regardless of whether y itself is the negative
+		 * operand.  When this is done one of three conditions will
+		 * hold, depending on the magnitudes of x and y:
+		 *   case i)   |x| > |y|.  The result is just x - y,
+		 *	with x's sign, but it may need to be normalized.
+		 *   case ii)  |x| = |y|.  The result is 0 (maybe -0)
+		 *	so must be fixed up.
+		 *   case iii) |x| < |y|.  We goofed; the result should
+		 *	be (y - x), with the same sign as y.
+		 * We could compare |x| and |y| here and avoid case iii,
+		 * but that would take just as much work as the subtract.
+		 * We can tell case iii has occurred by an overflow.
+		 *
+		 * N.B.: since x->fp_exp >= y->fp_exp, x->fp_sticky = 0.
+		 */
+		/* r->fp_mant = x->fp_mant - y->fp_mant */
+		FPU_SET_CARRY(y->fp_sticky);
+		FPU_SUBCS(r3, x->fp_mant[3], y->fp_mant[3]);
+		FPU_SUBCS(r2, x->fp_mant[2], y->fp_mant[2]);
+		FPU_SUBCS(r1, x->fp_mant[1], y->fp_mant[1]);
+		FPU_SUBC(r0, x->fp_mant[0], y->fp_mant[0]);
+		if (r0 < FP_2) {
+			/* cases i and ii */
+			if ((r0 | r1 | r2 | r3) == 0) {
+				/* case ii */
+				r->fp_class = FPC_ZERO;
+				r->fp_sign = rd == FSR_RD_RM;
+				return (r);
+			}
+		} else {
+			/*
+			 * Oops, case iii.  This can only occur when the
+			 * exponents were equal, in which case neither
+			 * x nor y have sticky bits set.  Flip the sign
+			 * (to y's sign) and negate the result to get y - x.
+			 */
+#ifdef DIAGNOSTIC
+			if (x->fp_exp != y->fp_exp || r->fp_sticky)
+				panic("fpu_add");
+#endif
+			r->fp_sign = y->fp_sign;
+			FPU_SUBS(r3, 0, r3);
+			FPU_SUBCS(r2, 0, r2);
+			FPU_SUBCS(r1, 0, r1);
+			FPU_SUBC(r0, 0, r0);
+		}
+		r->fp_mant[3] = r3;
+		r->fp_mant[2] = r2;
+		r->fp_mant[1] = r1;
+		r->fp_mant[0] = r0;
+		if (r0 < FP_1)
+			fpu_norm(r);
+	}
+	DUMPFPN(FPE_REG, r);
+	return (r);
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_arith.h src/sys/arch/sh3/fpu/fpu_arith.h
--- src.orig/sys/arch/sh3/fpu/fpu_arith.h	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/fpu_arith.h	2007-02-02 15:23:05.000000000 +0900
@@ -0,0 +1,178 @@
+/*	$NetBSD$ */
+
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fpu_arith.h	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * Extended-precision arithmetic.
+ *
+ * We hold the notion of a `carry register', which may or may not be a
+ * machine carry bit or register.  On the SPARC, it is just the machine's
+ * carry bit.
+ *
+ * In the worst case, you can compute the carry from x+y as
+ *	(unsigned)(x + y) < (unsigned)x
+ * and from x+y+c as
+ *	((unsigned)(x + y + c) <= (unsigned)x && (y|c) != 0)
+ * for example.
+ */
+
+
+#ifndef FPE_USE_ASM
+
+/* set up for extended-precision arithemtic */
+#define	FPU_DECL_CARRY quad_t fpu_carry, fpu_tmp;
+
+/*
+ * We have three kinds of add:
+ *	add with carry:					  r = x + y + c
+ *	add (ignoring current carry) and set carry:	c'r = x + y + 0
+ *	add with carry and set carry:			c'r = x + y + c
+ * The macros use `C' for `use carry' and `S' for `set carry'.
+ * Note that the state of the carry is undefined after ADDC and SUBC,
+ * so if all you have for these is `add with carry and set carry',
+ * that is OK.
+ *
+ * The same goes for subtract, except that we compute x - y - c.
+ *
+ * Finally, we have a way to get the carry into a `regular' variable,
+ * or set it from a value.  SET_CARRY turns 0 into no-carry, nonzero
+ * into carry; GET_CARRY sets its argument to 0 or 1.
+ */
+#define	FPU_ADDC(r, x, y) \
+	(r) = (x) + (y) + (!!fpu_carry)
+#define	FPU_ADDS(r, x, y) \
+	{ \
+		fpu_tmp = (quad_t)(x) + (quad_t)(y); \
+		(r) = (u_int)fpu_tmp; \
+		fpu_carry = ((fpu_tmp & 0xffffffff00000000LL) != 0); \
+	}
+#define	FPU_ADDCS(r, x, y) \
+	{ \
+		fpu_tmp = (quad_t)(x) + (quad_t)(y) + (!!fpu_carry); \
+		(r) = (u_int)fpu_tmp; \
+		fpu_carry = ((fpu_tmp & 0xffffffff00000000LL) != 0); \
+	}
+#define	FPU_SUBC(r, x, y) \
+	(r) = (x) - (y) - (!!fpu_carry)
+#define	FPU_SUBS(r, x, y) \
+	{ \
+		fpu_tmp = (quad_t)(x) - (quad_t)(y); \
+		(r) = (u_int)fpu_tmp; \
+		fpu_carry = ((fpu_tmp & 0xffffffff00000000LL) != 0); \
+	}
+#define	FPU_SUBCS(r, x, y) \
+	{ \
+		fpu_tmp = (quad_t)(x) - (quad_t)(y) - (!!fpu_carry); \
+		(r) = (u_int)fpu_tmp; \
+		fpu_carry = ((fpu_tmp & 0xffffffff00000000LL) != 0); \
+	}
+
+#define	FPU_GET_CARRY(r) (r) = (!!fpu_carry)
+#define	FPU_SET_CARRY(v) fpu_carry = ((v) != 0)
+
+#else
+/* set up for extended-precision arithemtic */
+#define	FPU_DECL_CARRY
+
+/*
+ * We have three kinds of add:
+ *	add with carry:					  r = x + y + c
+ *	add (ignoring current carry) and set carry:	c'r = x + y + 0
+ *	add with carry and set carry:			c'r = x + y + c
+ * The macros use `C' for `use carry' and `S' for `set carry'.
+ * Note that the state of the carry is undefined after ADDC and SUBC,
+ * so if all you have for these is `add with carry and set carry',
+ * that is OK.
+ *
+ * The same goes for subtract, except that we compute x - y - c.
+ *
+ * Finally, we have a way to get the carry into a `regular' variable,
+ * or set it from a value.  SET_CARRY turns 0 into no-carry, nonzero
+ * into carry; GET_CARRY sets its argument to 0 or 1.
+ */
+#define	FPU_ADDC(r, x, y)						      \
+	__asm volatile(							      \
+		"addc %2, %1						\n\t" \
+		"mov %1, %0						\n\t" \
+	    : "=r"(r) : "r"(x), "r"(y))
+#define	FPU_ADDS(r, x, y)						      \
+	__asm volatile(							      \
+		"clrt							\n\t" \
+		"addc %2, %1						\n\t" \
+		"mov %1, %0						\n\t" \
+	    : "=r"(r) : "r"(x), "r"(y))
+#define	FPU_ADDCS(r, x, y)						      \
+	__asm volatile(							      \
+		"addc %2, %1						\n\t" \
+		"mov %1, %0						\n\t" \
+	    : "=r"(r) : "r"(x), "r"(y))
+#define	FPU_SUBC(r, x, y)						      \
+	__asm volatile(							      \
+		"subb %2, %1						\n\t" \
+		"mov %1, %0						\n\t" \
+	    : "=r"(r) : "r"(x), "r"(y))
+#define	FPU_SUBS(r, x, y)						      \
+	__asm volatile(							      \
+		"clrt							\n\t" \
+		"subb %2, %1						\n\t" \
+		"mov %1, %0						\n\t" \
+	    : "=r"(r) : "r"(x), "r"(y))
+#define	FPU_SUBCS(r, x, y)						      \
+	__asm volatile(							      \
+		"subb %2, %1						\n\t" \
+		"mov %1, %0						\n\t" \
+	    : "=r"(r) : "r"(x), "r"(y))
+
+#define	FPU_GET_CARRY(r)						      \
+	__asm volatile(							      \
+		"movt %0						\n\t" \
+	    : "=r"(r))
+#define	FPU_SET_CARRY(v)						      \
+do {									      \
+	int __tmp;							      \
+	__asm volatile(							      \
+		"clrt							\n\t" \
+		"negc %1, %0						\n\t" \
+	    : "r"(__tmp) : "r"(v))					      \
+} while (/*CONSTCOND*/0) 
+
+#define	FPU_SHL1_BY_ADD	/* shift left 1 faster by ADDC than (a<<1)|(b>>31) */
+#endif
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_compare.c src/sys/arch/sh3/fpu/fpu_compare.c
--- src.orig/sys/arch/sh3/fpu/fpu_compare.c	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/fpu_compare.c	2007-01-31 19:14:16.000000000 +0900
@@ -0,0 +1,159 @@
+/*	$NetBSD$ */
+
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fpu_compare.c	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * FCMPU and FCMPO instructions.
+ *
+ * These rely on the fact that our internal wide format is achieved by
+ * adding zero bits to the end of narrower mantissas.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/types.h>
+
+#include <machine/reg.h>
+#include <machine/fpu.h>
+
+#include <sh3/fpu/fpu_arith.h>
+#include <sh3/fpu/fpu_emu.h>
+
+/*
+ * Perform a compare instruction (with or without unordered exception).
+ * This updates the fcc field in the fsr.
+ *
+ * If either operand is NaN, the result is unordered.  For ordered, this
+ * causes an NV exception.  Everything else is ordered:
+ *	|Inf| > |numbers| > |0|.
+ * We already arranged for fp_class(Inf) > fp_class(numbers) > fp_class(0),
+ * so we get this directly.  Note, however, that two zeros compare equal
+ * regardless of sign, while everything else depends on sign.
+ *
+ * Incidentally, two Infs of the same sign compare equal (per the 80387
+ * manual---it would be nice if the SPARC documentation were more
+ * complete).
+ */
+int
+fpu_compare(struct fpemu *fe)
+{
+	struct fpn *a, *b, *r;
+	int res;
+
+	a = &fe->fe_f1;
+	b = &fe->fe_f2;
+	r = &fe->fe_f3;
+	res = 0;
+
+	if (ISNAN(a) || ISNAN(b)) {
+		/*
+		 * In any case, we already got an exception for signalling
+		 * NaNs; here we may replace that one with an identical
+		 * exception, but so what?.
+		 */
+		if (ISSNAN(a) || ISSNAN(b))
+			res = FCMP_INVALID;
+		else	/* qNaN */
+			res = FCMP_UO;
+		goto done;
+	}
+
+	/*
+	 * Must handle both-zero early to avoid sign goofs.  Otherwise,
+	 * at most one is 0, and if the signs differ we are done.
+	 */
+	if (ISZERO(a) && ISZERO(b)) {
+		res = FCMP_EQ;
+		goto done;
+	}
+	if (a->fp_sign) {		/* a < 0 (or -0) */
+		if (!b->fp_sign) {	/* b >= 0 (or if a = -0, b > 0) */
+			res = FCMP_LT;
+			goto done;
+		}
+	} else {			/* a > 0 (or +0) */
+		if (b->fp_sign) {	/* b <= -0 (or if a = +0, b < 0) */
+			res = FCMP_GT;
+			goto done;
+		}
+	}
+
+	/*
+	 * Now the signs are the same (but may both be negative).  All
+	 * we have left are these cases:
+	 *
+	 *	|a| < |b|		[classes or values differ]
+	 *	|a| > |b|		[classes or values differ]
+	 *	|a| == |b|		[classes and values identical]
+	 *
+	 * We define `diff' here to expand these as:
+	 *
+	 *	|a| < |b|, a,b >= 0: a < b => LT
+	 *	|a| < |b|, a,b < 0:  a > b => GT
+	 *	|a| > |b|, a,b >= 0: a > b => GT
+	 *	|a| > |b|, a,b < 0:  a < b => LT
+	 */
+#define opposite_cc(cc) ((cc) == FCMP_GT ? FCMP_LT : FCMP_GT)
+#define	diff(magnitude) (a->fp_sign ? opposite_cc(magnitude) :  (magnitude))
+	if (a->fp_class < b->fp_class) {	/* |a| < |b| */
+		res = diff(FCMP_LT);
+		goto done;
+	}
+	if (a->fp_class > b->fp_class) {	/* |a| > |b| */
+		res = diff(FCMP_GT);
+		goto done;
+	}
+	/* now none can be 0: only Inf and numbers remain */
+	if (ISINF(a)) {				/* |Inf| = |Inf| */
+		res = FCMP_EQ;
+		goto done;
+	}
+	fpu_sub(fe);
+	if (ISZERO(r))
+		res = FCMP_EQ;
+	else if (r->fp_sign)
+		res = FCMP_LT;
+	else
+		res = FCMP_GT;
+done:
+	return res;
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_div.c src/sys/arch/sh3/fpu/fpu_div.c
--- src.orig/sys/arch/sh3/fpu/fpu_div.c	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/fpu_div.c	2007-01-31 12:14:44.000000000 +0900
@@ -0,0 +1,294 @@
+/*	$NetBSD$ */
+
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fpu_div.c	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * Perform an FPU divide (return x / y).
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/types.h>
+#if defined(DIAGNOSTIC)||defined(DEBUG)
+#include <sys/systm.h>
+#endif
+
+#include <machine/reg.h>
+#include <machine/fpu.h>
+
+#include <sh3/fpu/fpu_arith.h>
+#include <sh3/fpu/fpu_emu.h>
+
+/*
+ * Division of normal numbers is done as follows:
+ *
+ * x and y are floating point numbers, i.e., in the form 1.bbbb * 2^e.
+ * If X and Y are the mantissas (1.bbbb's), the quotient is then:
+ *
+ *	q = (X / Y) * 2^((x exponent) - (y exponent))
+ *
+ * Since X and Y are both in [1.0,2.0), the quotient's mantissa (X / Y)
+ * will be in [0.5,2.0).  Moreover, it will be less than 1.0 if and only
+ * if X < Y.  In that case, it will have to be shifted left one bit to
+ * become a normal number, and the exponent decremented.  Thus, the
+ * desired exponent is:
+ *
+ *	left_shift = x->fp_mant < y->fp_mant;
+ *	result_exp = x->fp_exp - y->fp_exp - left_shift;
+ *
+ * The quotient mantissa X/Y can then be computed one bit at a time
+ * using the following algorithm:
+ *
+ *	Q = 0;			-- Initial quotient.
+ *	R = X;			-- Initial remainder,
+ *	if (left_shift)		--   but fixed up in advance.
+ *		R *= 2;
+ *	for (bit = FP_NMANT; --bit >= 0; R *= 2) {
+ *		if (R >= Y) {
+ *			Q |= 1 << bit;
+ *			R -= Y;
+ *		}
+ *	}
+ *
+ * The subtraction R -= Y always removes the uppermost bit from R (and
+ * can sometimes remove additional lower-order 1 bits); this proof is
+ * left to the reader.
+ *
+ * This loop correctly calculates the guard and round bits since they are
+ * included in the expanded internal representation.  The sticky bit
+ * is to be set if and only if any other bits beyond guard and round
+ * would be set.  From the above it is obvious that this is true if and
+ * only if the remainder R is nonzero when the loop terminates.
+ *
+ * Examining the loop above, we can see that the quotient Q is built
+ * one bit at a time ``from the top down''.  This means that we can
+ * dispense with the multi-word arithmetic and just build it one word
+ * at a time, writing each result word when it is done.
+ *
+ * Furthermore, since X and Y are both in [1.0,2.0), we know that,
+ * initially, R >= Y.  (Recall that, if X < Y, R is set to X * 2 and
+ * is therefore at in [2.0,4.0).)  Thus Q is sure to have bit FP_NMANT-1
+ * set, and R can be set initially to either X - Y (when X >= Y) or
+ * 2X - Y (when X < Y).  In addition, comparing R and Y is difficult,
+ * so we will simply calculate R - Y and see if that underflows.
+ * This leads to the following revised version of the algorithm:
+ *
+ *	R = X;
+ *	bit = FP_1;
+ *	D = R - Y;
+ *	if (D >= 0) {
+ *		result_exp = x->fp_exp - y->fp_exp;
+ *		R = D;
+ *		q = bit;
+ *		bit >>= 1;
+ *	} else {
+ *		result_exp = x->fp_exp - y->fp_exp - 1;
+ *		q = 0;
+ *	}
+ *	R <<= 1;
+ *	do  {
+ *		D = R - Y;
+ *		if (D >= 0) {
+ *			q |= bit;
+ *			R = D;
+ *		}
+ *		R <<= 1;
+ *	} while ((bit >>= 1) != 0);
+ *	Q[0] = q;
+ *	for (i = 1; i < 4; i++) {
+ *		q = 0, bit = 1 << 31;
+ *		do {
+ *			D = R - Y;
+ *			if (D >= 0) {
+ *				q |= bit;
+ *				R = D;
+ *			}
+ *			R <<= 1;
+ *		} while ((bit >>= 1) != 0);
+ *		Q[i] = q;
+ *	}
+ *
+ * This can be refined just a bit further by moving the `R <<= 1'
+ * calculations to the front of the do-loops and eliding the first one.
+ * The process can be terminated immediately whenever R becomes 0, but
+ * this is relatively rare, and we do not bother.
+ */
+
+struct fpn *
+fpu_div(struct fpemu *fe)
+{
+	struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
+	u_int q, bit;
+	u_int r0, r1, r2, r3, d0, d1, d2, d3, y0, y1, y2, y3;
+	FPU_DECL_CARRY
+
+	/*
+	 * Since divide is not commutative, we cannot just use ORDER.
+	 * Check either operand for NaN first; if there is at least one,
+	 * order the signalling one (if only one) onto the right, then
+	 * return it.  Otherwise we have the following cases:
+	 *
+	 *	Inf / Inf = NaN, plus NV exception
+	 *	Inf / num = Inf [i.e., return x]
+	 *	Inf / 0   = Inf [i.e., return x]
+	 *	0 / Inf = 0 [i.e., return x]
+	 *	0 / num = 0 [i.e., return x]
+	 *	0 / 0   = NaN, plus NV exception
+	 *	num / Inf = 0
+	 *	num / num = num (do the divide)
+	 *	num / 0   = Inf, plus DZ exception
+	 */
+	DPRINTF(FPE_REG, ("fpu_div:\n"));
+	DUMPFPN(FPE_REG, x);
+	DUMPFPN(FPE_REG, y);
+	DPRINTF(FPE_REG, ("=>\n"));
+	if (ISNAN(x) || ISNAN(y)) {
+		ORDER(x, y);
+		fe->fe_cx |= FPSCR_VXSNAN;
+		DUMPFPN(FPE_REG, y);
+		return (y);
+	}
+	/*
+	 * Need to split the following out cause they generate different
+	 * exceptions. 
+	 */
+	if (ISINF(x)) {
+		if (x->fp_class == y->fp_class) {
+			fe->fe_cx |= FPSCR_VXIDI;
+			return (fpu_newnan(fe));
+		}
+		DUMPFPN(FPE_REG, x);
+		return (x);
+	}
+	if (ISZERO(x)) {
+		fe->fe_cx |= FPSCR_ZX;
+		if (x->fp_class == y->fp_class) {
+			fe->fe_cx |= FPSCR_VXZDZ;
+			return (fpu_newnan(fe));
+		}
+		DUMPFPN(FPE_REG, x);
+		return (x);
+	}
+
+	/* all results at this point use XOR of operand signs */
+	x->fp_sign ^= y->fp_sign;
+	if (ISINF(y)) {
+		x->fp_class = FPC_ZERO;
+		DUMPFPN(FPE_REG, x);
+		return (x);
+	}
+	if (ISZERO(y)) {
+		fe->fe_cx = FPSCR_ZX;
+		x->fp_class = FPC_INF;
+		DUMPFPN(FPE_REG, x);
+		return (x);
+	}
+
+	/*
+	 * Macros for the divide.  See comments at top for algorithm.
+	 * Note that we expand R, D, and Y here.
+	 */
+
+#define	SUBTRACT		/* D = R - Y */ \
+	FPU_SUBS(d3, r3, y3); FPU_SUBCS(d2, r2, y2); \
+	FPU_SUBCS(d1, r1, y1); FPU_SUBC(d0, r0, y0)
+
+#define	NONNEGATIVE		/* D >= 0 */ \
+	((int)d0 >= 0)
+
+#ifdef FPU_SHL1_BY_ADD
+#define	SHL1			/* R <<= 1 */ \
+	FPU_ADDS(r3, r3, r3); FPU_ADDCS(r2, r2, r2); \
+	FPU_ADDCS(r1, r1, r1); FPU_ADDC(r0, r0, r0)
+#else
+#define	SHL1 \
+	r0 = (r0 << 1) | (r1 >> 31), r1 = (r1 << 1) | (r2 >> 31), \
+	r2 = (r2 << 1) | (r3 >> 31), r3 <<= 1
+#endif
+
+#define	LOOP			/* do ... while (bit >>= 1) */ \
+	do { \
+		SHL1; \
+		SUBTRACT; \
+		if (NONNEGATIVE) { \
+			q |= bit; \
+			r0 = d0, r1 = d1, r2 = d2, r3 = d3; \
+		} \
+	} while ((bit >>= 1) != 0)
+
+#define	WORD(r, i)			/* calculate r->fp_mant[i] */ \
+	q = 0; \
+	bit = 1 << 31; \
+	LOOP; \
+	(x)->fp_mant[i] = q
+
+	/* Setup.  Note that we put our result in x. */
+	r0 = x->fp_mant[0];
+	r1 = x->fp_mant[1];
+	r2 = x->fp_mant[2];
+	r3 = x->fp_mant[3];
+	y0 = y->fp_mant[0];
+	y1 = y->fp_mant[1];
+	y2 = y->fp_mant[2];
+	y3 = y->fp_mant[3];
+
+	bit = FP_1;
+	SUBTRACT;
+	if (NONNEGATIVE) {
+		x->fp_exp -= y->fp_exp;
+		r0 = d0, r1 = d1, r2 = d2, r3 = d3;
+		q = bit;
+		bit >>= 1;
+	} else {
+		x->fp_exp -= y->fp_exp + 1;
+		q = 0;
+	}
+	LOOP;
+	x->fp_mant[0] = q;
+	WORD(x, 1);
+	WORD(x, 2);
+	WORD(x, 3);
+	x->fp_sticky = r0 | r1 | r2 | r3;
+
+	DUMPFPN(FPE_REG, x);
+	return (x);
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_emu.c src/sys/arch/sh3/fpu/fpu_emu.c
--- src.orig/sys/arch/sh3/fpu/fpu_emu.c	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/fpu_emu.c	2007-02-02 16:09:53.000000000 +0900
@@ -0,0 +1,665 @@
+/*	$NetBSD$ */
+
+/*
+ * Copyright 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed for the NetBSD Project by
+ *      Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fpu.c	8.1 (Berkeley) 6/11/93
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/signal.h>
+#include <sys/systm.h>
+#include <sys/syslog.h>
+#include <sys/signalvar.h>
+
+#include <machine/reg.h>
+#include <machine/fpu.h>
+#ifdef DDB
+#include <machine/db_machdep.h>
+#endif
+
+#include <sh3/fpu/fpu_emu.h>
+#include <sh3/fpu/fpu_extern.h>
+
+#ifdef DEBUG
+int fpe_debug = 0;
+
+/*
+ * Dump a `fpn' structure.
+ */
+void
+fpu_dumpfpn(struct fpn *fp)
+{
+	static const char *class[] = {
+		"SNAN", "QNAN", "ZERO", "NUM", "INF"
+	};
+
+	printf("%s %c.%x %x %x %xE%d", class[fp->fp_class + 2],
+		fp->fp_sign ? '-' : ' ',
+		fp->fp_mant[0],	fp->fp_mant[1],
+		fp->fp_mant[2], fp->fp_mant[3], 
+		fp->fp_exp);
+}
+#endif
+
+/*
+ * fpu_execute returns the following error numbers (0 = no error):
+ */
+#define	FPE		1	/* take a floating point exception */
+#define	NOTFPU		2	/* not an FPU instruction */
+#define	FAULT		3
+
+
+/*
+ * Emulate a floating-point instruction.
+ * Return zero for success, else signal number.
+ * (Typically: zero, SIGFPE, SIGILL, SIGSEGV)
+ */
+int
+fpu_emulate(struct trapframe *frame, struct fpreg *fpf)
+{
+	static uint16_t insn;
+	static struct fpemu fe;
+	static int lastill = 0;
+	int sig;
+
+	/* initialize insn.is_datasize to tell it is *not* initialized */
+	fe.fe_fpstate = fpf;
+	fe.fe_cx = 0;
+	fe.fe_rm = fpf->fpr_fpscr & FPSCR_RM;
+
+	/* always set this (to avoid a warning) */
+
+	if (copyin((void *)frame->tf_spc, &insn, sizeof(insn))) {
+#ifdef DEBUG
+		printf("fpu_emulate: fault reading opcode\n");
+#endif
+		return SIGSEGV;
+	}
+
+	DPRINTF(FPE_EX, ("fpu_emulate: emulating insn %x at %p\n",
+	    insn, (void *)frame->tf_spc));
+
+	sig = 0;
+	switch (fpu_execute(frame, &fe, &insn)) {
+	case 0:
+		DPRINTF(FPE_EX, ("fpu_emulate: success\n"));
+		frame->tf_spc += 2;
+		break;
+
+	case FPE:
+		DPRINTF(FPE_EX, ("fpu_emulate: SIGFPE\n"));
+		sig = SIGFPE;
+		break;
+
+	case FAULT:
+		DPRINTF(FPE_EX, ("fpu_emulate: SIGSEGV\n"));
+		sig = SIGSEGV;
+		break;
+
+	case NOTFPU:
+	default:
+		DPRINTF(FPE_EX, ("fpu_emulate: SIGILL\n"));
+#ifdef DEBUG
+		if (fpe_debug & FPE_EX) {
+			printf("fpu_emulate:  illegal insn %x at %p:",
+			insn, (void *)frame->tf_spc);
+#ifdef DDB
+			db_disasm((db_addr_t)(frame->tf_spc), 0);
+#endif
+		}
+#endif
+		/*
+		* XXXX retry an illegal insn once due to cache issues.
+		*/
+		if (lastill == frame->tf_spc) {
+			sig = SIGILL;
+#if defined(DEBUG) && defined(DDB)
+			if (fpe_debug & FPE_EX)
+				Debugger();
+#endif
+		}
+		lastill = frame->tf_spc;
+		break;
+	}
+
+	return sig;
+}
+
+/* trapframe register offset table */
+static const int tf_regoff[16] = {
+	offsetof(struct trapframe, tf_r0),
+	offsetof(struct trapframe, tf_r1),
+	offsetof(struct trapframe, tf_r2),
+	offsetof(struct trapframe, tf_r2),
+	offsetof(struct trapframe, tf_r3),
+	offsetof(struct trapframe, tf_r4),
+	offsetof(struct trapframe, tf_r5),
+	offsetof(struct trapframe, tf_r6),
+	offsetof(struct trapframe, tf_r7),
+	offsetof(struct trapframe, tf_r8),
+	offsetof(struct trapframe, tf_r9),
+	offsetof(struct trapframe, tf_r10),
+	offsetof(struct trapframe, tf_r11),
+	offsetof(struct trapframe, tf_r12),
+	offsetof(struct trapframe, tf_r13),
+	offsetof(struct trapframe, tf_r14),
+	offsetof(struct trapframe, tf_r15)
+};
+#define	tf_reg(tf,r)	*(u_int *)((char *)(tf) + tr_regoff[(r)])
+
+#define	check_xdreg(fe, type, reg)					\
+do {									\
+	if ((type) == FTYPE_DBL) {					\
+		if ((reg) & 1) {					\
+			return NOTFPU;					\
+		}							\
+	}								\
+} while (/*CONSTCOND*/0)
+
+
+/*
+ * Execute an FPU instruction (one that runs entirely in the FPU; not
+ * FBfcc or STF, for instance).  On return, fe->fe_fs->fs_fsr will be
+ * modified to reflect the setting the hardware would have left.
+ *
+ * Note that we do not catch all illegal opcodes, so you can, for instance,
+ * multiply two integers this way.
+ */
+int
+fpu_execute(struct trapframe *tf, struct fpemu *fe, uint16_t *insn)
+{
+	union {
+		double d;
+		int i;
+		int a[2];
+	} buf;
+	struct fpn *fp;
+	struct fpreg *fs;
+	int *a;
+	vaddr_t addr;
+	int type, size, bank;
+	int ra, rb, rt, real_ra, real_rb;
+	int res, sett, t;
+	int fsr, cx, enable;
+	uint16_t instr = *insn;
+
+#if defined(DDB) && defined(DEBUG)
+	if (fpe_debug & FPE_EX) {
+		db_addr_t loc = tf->tf_spc;
+
+		printf("Trying to emulate: %p ", (void *)loc);
+		db_disasm(loc, 0);
+	}
+#endif
+
+	/* Setup work. */
+	fp = NULL;
+	fs = fe->fe_fpstate;
+	fe->fe_fpscr = fs->fpr_fpscr;
+
+	type = (fe->fe_fpscr & FPSCR_PR) ? FTYPE_DBL : FTYPE_SNG;
+	size = (fe->fe_fpscr & FPSCR_SZ) ? FSIZE_DBL : FSIZE_SNG;
+	bank = (fe->fe_fpscr & FPSCR_FR);
+	real_ra = ra = (instr >> 8) & 0xf;
+	real_rb = rb = (instr >> 4) & 0xf;
+	if (bank) {
+		ra += FPREGS_PER_BANK;
+		rb += FPREGS_PER_BANK;
+	}
+	rt = FPREG_INVALID;
+	sett = 0;
+	t = 0;
+
+	/*
+	 * `Decode' and execute instruction.
+	 */
+
+	switch ((instr >> 12) & 0xf) {
+	case 0x0:
+		switch (instr & 0xff) {
+		case 0x5a:	/* STS FPUL,Rn */
+			tf_reg(tf, real_ra) = fs->fpr_fpul;
+			break;
+		case 0x6a:	/* STS FPSCR,Rn */
+			tf_reg(tf, real_ra) = fe->fe_fpscr;
+			break;
+
+		default:
+			return NOTFPU;
+		}
+		break;
+
+	case 0x4:
+		switch (instr & 0xff) {
+		case 0x52:	/* STS.L FPUL,@-Rn */
+			addr = (vaddr_t)tf_reg(tf, real_ra) - 4;
+			buf.i = fs->fpr_fpul;
+			if (copyout(&buf.i, (void *)addr, sizeof(buf.i)))
+				return FAULT;
+			(vaddr_t)tf_reg(tf, real_ra) = addr;
+			break;
+		case 0x56:	/* LDS.L @Rm+,FPUL  */
+			addr = (vaddr_t)tf_reg(tf, real_ra);
+			if (copyin((void *)addr, &buf.i sizeof(buf.i)))
+				return FAULT;
+			fe->fe_fpscr = buf.i;
+			(vaddr_t)tf_reg(tf, real_ra) = addr + 4;
+			break;
+		case 0x5a:	/* LDS Rm,FPUL */
+			fs->fpr_fpul = tf_reg(tf, real_ra);
+			break;
+		case 0x62:	/* STS.L FPSCR,@-Rn */
+			addr = (vaddr_t)tf_reg(tf, real_ra) - 4;
+			buf.i = fe->fe_fpscr;
+			if (copyout(&buf.i, (void *)addr, sizeof(buf.i)))
+				return FAULT;
+			(vaddr_t)tf_reg(tf, real_ra) = addr;
+			break;
+		case 0x66:	/* LDS.L @Rm+,FPSCR */
+			addr = (vaddr_t)tf_reg(tf, real_ra);
+			if (copyin((void *)addr, &buf.i sizeof(buf.i)))
+				return FAULT;
+			fe->fe_fpscr = buf.i & FPSCR_MASK;
+			(vaddr_t)tf_reg(tf, real_ra) = addr + 4;
+			break;
+		case 0x6a:	/* LDS Rm,FPSCR */
+			fe->fe_fpscr = tf_reg(tf, real_ra) & FPSCR_MASK;
+			break;
+
+		default:
+			return NOTFPU;
+		}
+		break;
+
+	case 0xf:
+		switch (instr & 0xf) {
+		case 0x0:	/* FADD FRm,FRn */
+			DPRINTF(FPE_INSN, ("fpu_execute: FADD\n"));
+			check_xdreg(ra|rb);
+			fpu_explode(fe, &fe->fe_f1, type, ra);
+			fpu_explode(fe, &fe->fe_f2, type, rb);
+			fp = fpu_add(fe);
+			rt = ra;
+			break;
+		case 0x1:	/* FSUB FRm,FRn / FSUB DRm,DRn */
+			DPRINTF(FPE_INSN, ("fpu_execute: FSUB\n"));
+			check_xdreg(ra|rb);
+			fpu_explode(fe, &fe->fe_f1, type, ra);
+			fpu_explode(fe, &fe->fe_f2, type, rb);
+			fp = fpu_sub(fe);
+			rt = ra;
+			break;
+		case 0x2:	/* FMUL FRm,FRn / FMUL DRm,DRn */
+			DPRINTF(FPE_INSN, ("fpu_execute: FMUL\n"));
+			check_xdreg(ra|rb);
+			fpu_explode(fe, &fe->fe_f1, type, ra);
+			fpu_explode(fe, &fe->fe_f2, type, rc);
+			fp = fpu_mul(fe);
+			rt = ra;
+			break;
+		case 0x3:	/* FDIV FRm,FRn / FDIV DRm,DRn */
+			DPRINTF(FPE_INSN, ("fpu_execute: FDIV\n"));
+			check_xdreg(ra|rb);
+			fpu_explode(fe, &fe->fe_f1, type, ra);
+			fpu_explode(fe, &fe->fe_f2, type, rb);
+			fp = fpu_div(fe);
+			rt = ra;
+			break;
+		case 0x4:	/* FCMP/EQ FRm,FRn / FCMP/EQ DRm,DRn */
+			DPRINTF(FPE_INSN, ("fpu_execute: FCMP/EQ\n"));
+			check_xdreg(ra|rb);
+			fpu_explode(fe, &fe->fe_f1, type, ra);
+			fpu_explode(fe, &fe->fe_f2, type, rb);
+			res = fpu_compare(fe);
+			if (res == FCMP_INVALID)
+				fe->fe_cx |= FP_V_BIT;
+			t = (res == FCMP_EQ) ? PSL_TBIT : 0;
+			sett = 1;
+			break;
+		case 0x5:	/* FCMP/GT FRm,FRn / FCMP/GT DRm,DRn */
+			DPRINTF(FPE_INSN, ("fpu_execute: FCMP/GT\n"));
+			check_xdreg(ra|rb);
+			fpu_explode(fe, &fe->fe_f1, type, ra);
+			fpu_explode(fe, &fe->fe_f2, type, rb);
+			res = fpu_compare(fe);
+			if (res == FCMP_INVALID || res == FCMP_UO)
+				fe->fe_cx |= FP_V_BIT;
+			t = (res == FCMP_GT) ? PSL_TBIT : 0;
+			sett = 1;
+			break;
+		case 0x6:	/* FMOV.S @(R0,Rm),FRn / FMOV @(R0,Rm),DRn / FMOV @(R0,Rm),XDn */
+			addr = tf->tf_r0 + (vaddr_t)tf_reg(tf, real_rb);
+			if (copyin((void *)addr, &buf, size))
+				return FAULT;
+			if (size == FSIZE_SNG) {
+				fs->fpr_fr[ra] = buf.i;
+			} else {
+				if (ra & 1)
+					ra = (ra ^ FP_BANK_BIT) & ~1; /* XDn */
+				a = &fs->fpr_fr[ra];
+				a[DBL_LOWORD] = buf.a[0];
+				a[DBL_HIWORD] = buf.a[1];
+			}
+			break;
+		case 0x7:	/* FMOV.S FRm,@(R0,Rn) / FMOV DRm,@(R0,Rn) / FMOV XDm,@(R0,Rn) */
+			addr = tf->tf_r0 + (vaddr_t)tf_reg(tf, real_ra);
+			if (size == FSIZE_SNG) {
+				buf.i = fs->fpr_fr[rb];
+			} else {
+				if (rb & 1)
+					rb = (rb ^ FP_BANK_BIT) & ~1; /* XDm */
+				buf.a[DBL_HIWORD] = fs->fpr_fr[rb];
+				buf.a[DBL_LOWORD] = fs->fpr_fr[rb + 1];
+			}
+			if (copyout(&buf, (void *)addr, size))
+				return FAULT;
+			break;
+		case 0x8:	/* FMOV.S @Rm,FRn / FMOV @Rm,DRn / FMOV @Rm,XDn */
+			addr = (vaddr_t)tf_reg(tf, real_rb);
+			if (copyin((void *)addr, &buf, size))
+				return FAULT;
+			if (size == FSIZE_SNG) {
+				fs->fpr_fr[ra] = buf.i;
+			} else {
+				if (ra & 1)
+					ra = (ra ^ FP_BANK_BIT) & ~1; /* XDn */
+				a = &fs->fpr_fr[ra];
+				a[DBL_LOWORD] = buf.a[0];
+				a[DBL_HIWORD] = buf.a[1];
+			}
+			break;
+		case 0x9:	/* FMOV.S @Rm+,FRn / FMOV @Rm+,DRn / FMOV @Rm+,XDn */
+			addr = (vaddr_t)tf_reg(tf, real_rb);
+			if (copyin((void *)addr, &buf, size))
+				return FAULT;
+			if (size == FSIZE_SNG) {
+				fs->fpr_fr[ra] = buf.i;
+			} else {
+				if (ra & 1)
+					ra = (ra ^ FP_BANK_BIT) & ~1; /* XDn */
+				a = &fs->fpr_fr[ra];
+				a[DBL_LOWORD] = buf.a[0];
+				a[DBL_HIWORD] = buf.a[1];
+			}
+			tf_reg(tf, real_rb) = addr + 4;
+			break;
+		case 0xa:	/* FMOV.S FRm,@Rn / FMOV DRm,@Rn / FMOV XDm,@Rn */
+			addr = (vaddr_t)tf_reg(tf, real_ra);
+			if (size == FSIZE_SNG) {
+				buf.i = fs->fpr_fr[rb];
+			} else {
+				if (rb & 1)
+					rb = (rb ^ FP_BANK_BIT) & ~1; /* XDm */
+				buf.a[DBL_LOWORD] = fs->fpr_fr[rb];
+				buf.a[DBL_HIWORD] = fs->fpr_fr[rb + 1];
+			}
+			if (copyout(&buf, (void *)addr, size))
+				return FAULT;
+			break;
+		case 0xb:	/* FMOV.S FRm,@-Rn / FMOV DRm,@-Rn / FMOV XDm,@-Rn */
+			addr = (vaddr_t)tf_reg(tf, real_ra) - 4;
+			if (size == FSIZE_SNG) {
+				buf.i = fs->fpr_fr[rb];
+			} else {
+				if (rb & 1)
+					rb = (rb ^ FP_BANK_BIT) & ~1; /* XDm */
+				buf.a[DBL_LOWORD] = fs->fpr_fr[rb];
+				buf.a[DBL_HIWORD] = fs->fpr_fr[rb + 1];
+			}
+			if (copyout(&buf, (void *)addr, size))
+				return FAULT;
+			tf_reg(tf, real_ra) = addr;
+			break;
+		case 0xc:	/* FMOV FRm,FRn / FMOV DRm,DRn / FMOV XDm,DRn / FMOV DRm,XDn / FMOV XDm,XDn */
+			if (size == FSIZE_SNG) {
+				fs->fpr_fr[ra] = fs->fpr_fr[rb];
+			} else {
+				if (ra & 1)
+					ra = (ra ^ FP_BANK_BIT) & ~1; /* XDn */
+				if (rb & 1)
+					rb = (rb ^ FP_BANK_BIT) & ~1; /* XDm */
+				fs->fpr_fr[ra] = fs->fpr_fr[rb];
+				fs->fpr_fr[ra + 1] = fs->fpr_fr[rb + 1];
+			}
+			break;
+		case 0xd:
+			switch ((instr >> 4) & 0xf) {
+			case 0x0:	/* FSTS FPUL,FRn */
+				fs->fpr_fr[ra] = fs->fpr_fpul;
+				break;
+			case 0x1:	/* FLDS FRm,FPUL */
+				fs->fpr_fpul = fs->fpr_fr[ra];
+				break;
+			case 0x2:	/* FLOAT FPUL,FRn / FLOAT FPUL,DRn */
+				fpu_explode(fe, &fe->fe_f1, FTYPE_INT,
+				    FPREG_FPUL);
+				fs = &fe->fe_f1;
+				rt = ra;
+				break;
+			case 0x3:	/* FTRC FRm,FPUL / FTRC DRm,FPUL */
+				fpu_explode(fe, &fe->fe_f1, type, ra);
+				fs = &fe->fe_f1;
+				rt = FPREG_FPUL;
+				type = FTYPE_INT;
+				if (type == FTYPE_DBL)
+					fe->fe_rm = RM_RZ;
+				break;
+			case 0x4:	/* FNEG FRn / FNEG DRn */
+				DPRINTF(FPE_INSN, ("fpu_execute: FNEG\n"));
+				check_xdreg(ra);
+				fs->fpr_fr[ra] ^= (1 << 31);
+				break;
+			case 0x5:	/* FABS FRn / FABS DRn */
+				DPRINTF(FPE_INSN, ("fpu_execute: FABS\n"));
+				check_xdreg(ra);
+				fs->fpr_fr[ra] &= ~(1 << 31);
+				break;
+			case 0x6:	/* FSQRT FRn / FSQRT DRn */
+				DPRINTF(FPE_INSN, ("fpu_execute: FSQRT\n"));
+				check_xdreg(ra|rb);
+				fpu_explode(fe, &fe->fe_f1, type, ra);
+				fpu_explode(fe, &fe->fe_f2, type, rb);
+				fp = fpu_sqrt(fe);
+				rt = ra;
+				break;
+			case 0x8:	/* FLDI0 FRn */
+				if (type != FTYPE_SNG)
+					return NOTFPU;
+				fs->fpr_fr[ra] = 0x00000000;
+				break;
+			case 0x9:	/* FLDI1 FRn */
+				if (type != FTYPE_SNG)
+					return NOTFPU;
+				fs->fpr_fr[ra] = 0x3f800000;
+				break;
+			case 0xa:	/* FCNVSD FPUL,DRn */
+				if (type != FTYPE_DBL)
+					return NOTFPU;
+				fpu_explode(fe, &fe->fe_f1, FTYPE_SNG,
+				    FPREG_FPUL);
+				fs = &fe->fe_f1;
+				rt = ra;
+				break;
+			case 0xb:	/* FCNVDS DRm,FPUL */
+				if (type != FTYPE_DBL)
+					return NOTFPU;
+				fpu_explode(fe, &fe->fe_f1, type, ra);
+				fs = &fe->fe_f1;
+				rt = FPREG_FPUL;
+				type = FTYPE_SNG;
+				break;
+			case 0xe:	/* FIPR FVm,FVn */
+				if (type != FTYPE_SNG)
+					return NOTFPU;
+				return NOTFPU;	/* XXX */
+				break;
+			case 0xf:
+				switch ((instr >> 8) & 0xf) {
+				case 0x3:	/* FSCHG */
+					fe->fe_fpscr ^= FPSCR_SZ;
+					break;
+				case 0xb:	/* FRCHG */
+					fe->fe_fpscr ^= FPSCR_FR;
+					break;
+
+				case 0x1: case 0x5: case 0x9: case 0xd:
+					/* FTRV XMTRX,FVn */
+					return NOTFPU;	/* XXX */
+					break;
+
+				default:
+					return NOTFPU;
+				}
+				break;
+
+			default:
+				return NOTFPU;
+			}
+			break;
+		case 0xe:	/* FMAC FR0,FRm,FRn */
+			if (type != FTYPE_SNG)
+				return NOTFPU;
+			fpu_explode(fe, &fe->fe_f1, type, 0);
+			fpu_explode(fe, &fe->fe_f2, type, rb);
+			(void)fpu_mul(fe);
+			fe->fe_f1 = fe->fe_f3;
+			fpu_explode(fe, &fe->fe_f2, type, ra);
+			fs = fpu_add(fe);
+			rt = ra;
+			break;
+
+		default:
+			return NOTFPU;
+		}
+		break;
+
+	default:
+		return NOTFPU;
+	}
+
+	if ((fp == NULL) && !sett)
+		return 0;
+
+	/*
+	 * ALU operation is complete.  Collapse the result and then check
+	 * for exceptions.  If we got any, and they are enabled, do not
+	 * alter the destination register, just stop with an exception.
+	 * Otherwise set new current exceptions and accrue.
+	 */
+
+	if (fs)
+		fpu_implode(fe, fp, type, (u_int *)buf.a);
+
+	fsr = fe->fe_fpscr & ~(FP_CAUSE_MASK << FP_CAUSE_SHIFT);
+	enable = FP_E_BIT | ((fsr >> FP_ENABLE_SHIFT) & FP_ENABLE_MASK);
+
+	/* V->E->Z->O/U/I */
+	if (fe->fe_cx & FP_V_BIT)
+		fe->fe_cx = FP_V_BIT;
+	else if (fe->fe_cx & FP_E_BIT)
+		fe->fe_cx = FP_E_BIT;
+	else if (fe->fe_cx & FP_Z_BIT)
+		fe->fe_cx = FP_Z_BIT;
+
+	cx = fe->fe_cx & enable;
+	fsr |= (cx & FP_CAUSE_MASK) << FP_CAUSE_SHIFT;
+	fsr |= (fe->fe_cx & FP_FLAG_MASK) << FP_FLAG_SHIFT;
+	fs->fpr_fpscr = fsr;
+
+	if (cx != 0)
+		return FPE;
+
+	if (sett) {
+		tf->tf_ssr &= ~PSL_TBIT;
+		tf->tf_ssr |= t;
+	}
+
+	if (fs) {
+		if ((rt >= 0) && (rt < 32)) {
+			fs->fpr_fr[rt] = buf.a[0];
+			if (type == FTYPE_DBL)
+				fs->fpr_fr[rt + 1] = buf.a[1];
+		} else if (rt == FPREG_FPUL) {
+			fs->fpr_fpul = buf.a[0];
+		} else {
+			panic("fpu_execute(): rt = %d", rt);
+		}
+	}
+	return 0;	/* success */
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_emu.h src/sys/arch/sh3/fpu/fpu_emu.h
--- src.orig/sys/arch/sh3/fpu/fpu_emu.h	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/fpu_emu.h	2007-02-02 15:44:05.000000000 +0900
@@ -0,0 +1,220 @@
+/*	$NetBSD$ */
+
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fpu_emu.h	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * Floating point emulator (tailored for SPARC, but structurally
+ * machine-independent).
+ *
+ * Floating point numbers are carried around internally in an `expanded'
+ * or `unpacked' form consisting of:
+ *	- sign
+ *	- unbiased exponent
+ *	- mantissa (`1.' + 112-bit fraction + guard + round)
+ *	- sticky bit
+ * Any implied `1' bit is inserted, giving a 113-bit mantissa that is
+ * always nonzero.  Additional low-order `guard' and `round' bits are
+ * scrunched in, making the entire mantissa 115 bits long.  This is divided
+ * into four 32-bit words, with `spare' bits left over in the upper part
+ * of the top word (the high bits of fp_mant[0]).  An internal `exploded'
+ * number is thus kept within the half-open interval [1.0,2.0) (but see
+ * the `number classes' below).  This holds even for denormalized numbers:
+ * when we explode an external denorm, we normalize it, introducing low-order
+ * zero bits, so that the rest of the code always sees normalized values.
+ *
+ * Note that a number of our algorithms use the `spare' bits at the top.
+ * The most demanding algorithm---the one for sqrt---depends on two such
+ * bits, so that it can represent values up to (but not including) 8.0,
+ * and then it needs a carry on top of that, so that we need three `spares'.
+ *
+ * The sticky-word is 32 bits so that we can use `OR' operators to goosh
+ * whole words from the mantissa into it.
+ *
+ * All operations are done in this internal extended precision.  According
+ * to Hennesey & Patterson, Appendix A, rounding can be repeated---that is,
+ * it is OK to do a+b in extended precision and then round the result to
+ * single precision---provided single, double, and extended precisions are
+ * `far enough apart' (they always are), but we will try to avoid any such
+ * extra work where possible.
+ */
+struct fpn {
+	int	fp_class;		/* see below */
+	int	fp_sign;		/* 0 => positive, 1 => negative */
+	int	fp_exp;			/* exponent (unbiased) */
+	int	fp_sticky;		/* nonzero bits lost at right end */
+	u_int	fp_mant[4];		/* 115-bit mantissa */
+};
+
+#define	FP_NMANT	115		/* total bits in mantissa (incl g,r) */
+#define	FP_NG		2		/* number of low-order guard bits */
+#define	FP_LG		((FP_NMANT - 1) & 31)	/* log2(1.0) for fp_mant[0] */
+#define	FP_LG2		((FP_NMANT - 1) & 63)	/* log2(1.0) for fp_mant[0] and fp_mant[1] */
+#define	FP_QUIETBIT	(1 << (FP_LG - 1))	/* Quiet bit in NaNs (0.5) */
+#define	FP_1		(1 << FP_LG)		/* 1.0 in fp_mant[0] */
+#define	FP_2		(1 << (FP_LG + 1))	/* 2.0 in fp_mant[0] */
+
+/*
+ * Number classes.  Since zero, Inf, and NaN cannot be represented using
+ * the above layout, we distinguish these from other numbers via a class.
+ * In addition, to make computation easier and to follow Appendix N of
+ * the SPARC Version 8 standard, we give each kind of NaN a separate class.
+ */
+#define	FPC_SNAN	-2		/* signalling NaN (sign irrelevant) */
+#define	FPC_QNAN	-1		/* quiet NaN (sign irrelevant) */
+#define	FPC_ZERO	0		/* zero (sign matters) */
+#define	FPC_NUM		1		/* number (sign matters) */
+#define	FPC_INF		2		/* infinity (sign matters) */
+
+#define	ISSNAN(fp)	((fp)->fp_class == FPC_SNAN)
+#define	ISQNAN(fp)	((fp)->fp_class == FPC_QNAN)
+#define	ISNAN(fp)	((fp)->fp_class < 0)
+#define	ISZERO(fp)	((fp)->fp_class == 0)
+#define	ISINF(fp)	((fp)->fp_class == FPC_INF)
+
+/*
+ * ORDER(x,y) `sorts' a pair of `fpn *'s so that the right operand (y) points
+ * to the `more significant' operand for our purposes.  Appendix N says that
+ * the result of a computation involving two numbers are:
+ *
+ *	If both are SNaN: operand 2, converted to Quiet
+ *	If only one is SNaN: the SNaN operand, converted to Quiet
+ *	If both are QNaN: operand 2
+ *	If only one is QNaN: the QNaN operand
+ *
+ * In addition, in operations with an Inf operand, the result is usually
+ * Inf.  The class numbers are carefully arranged so that if
+ *	(unsigned)class(op1) > (unsigned)class(op2)
+ * then op1 is the one we want; otherwise op2 is the one we want.
+ */
+#define	ORDER(x, y) { \
+	if ((u_int)(x)->fp_class > (u_int)(y)->fp_class) \
+		SWAP(x, y); \
+}
+#define	SWAP(x, y) { \
+	struct fpn *swap; \
+	swap = (x), (x) = (y), (y) = swap; \
+}
+
+/*
+ * Emulator state.
+ */
+struct fpemu {
+	struct	fpreg *fe_fpstate;	/* registers, etc */
+	int	fe_fpscr;		/* fpscr copy (modified during op) */
+	int	fe_cx;			/* keep track of exceptions */
+	int	fe_rm;			/* round mode */
+	struct	fpn fe_f1;		/* operand 1 */
+	struct	fpn fe_f2;		/* operand 2, if required */
+	struct	fpn fe_f3;		/* available storage for result */
+};
+
+/*
+ * Arithmetic functions.
+ * Each of these may modify its inputs (f1,f2) and/or the temporary.
+ * Each returns a pointer to the result and/or sets exceptions.
+ */
+struct	fpn *fpu_add(struct fpemu *);
+#define	fpu_sub(fe) ((fe)->fe_f2.fp_sign ^= 1, fpu_add(fe))
+struct	fpn *fpu_mul(struct fpemu *);
+struct	fpn *fpu_div(struct fpemu *);
+struct	fpn *fpu_sqrt(struct fpemu *);
+
+/*
+ * Other functions.
+ */
+
+/* Perform a compare instruction (with or without unordered exception). */
+int	fpu_compare(struct fpemu *);
+
+/* compare result */
+#define	FCMP_EQ		0
+#define	FCMP_LT		1
+#define	FCMP_GT		2
+#define	FCMP_UO		3	/* Unorderd */
+#define	FCMP_INVALID	4
+
+/* Build a new Quiet NaN (sign=0, frac=all 1's). */
+struct	fpn *fpu_newnan(struct fpemu *);
+
+/*
+ * Shift a number right some number of bits, taking care of round/sticky.
+ * Note that the result is probably not a well-formed number (it will lack
+ * the normal 1-bit mant[0]&FP_1).
+ */
+int	fpu_shr(struct fpn *, int);
+
+void	fpu_explode(struct fpemu *, struct fpn *, int, int);
+void	fpu_implode(struct fpemu *, struct fpn *, int, u_int *);
+
+/* FPU data types. */
+#define	FTYPE_INT	0	/* data = 32-bit signed integer */
+#define	FTYPE_SNG	1	/* data = 32-bit float */
+#define	FTYPE_DBL	2	/* data = 64-bit double */
+
+/* FPU fmov size */
+#define	FSIZE_SNG	4	/* 32bit */
+#define	FSIZE_DBL	8	/* 32bit pair (64bit) */
+
+/* FPU register index */
+#define	FPREG_INVALID	(-1)
+#define	FPREG_FPUL	(32)
+
+#if _BYTE_ORDER == _LITTLE_ENDIAN
+#define	DBL_LOWORD	1
+#define	DBL_HIWORD	0
+#else
+#define	DBL_LOWORD	0
+#define	DBL_HIWORD	1
+#endif
+
+#ifdef DEBUG
+#define	FPE_EX		0x1
+#define	FPE_INSN	0x2
+#define	FPE_OP		0x4
+#define	FPE_REG		0x8
+extern int fpe_debug;
+void	fpu_dumpfpn(struct fpn *);
+#define	DPRINTF(x, y)	if (fpe_debug & (x)) printf y
+#define DUMPFPN(x, f)	if (fpe_debug & (x)) fpu_dumpfpn((f))
+#else
+#define	DPRINTF(x, y)
+#define DUMPFPN(x, f)
+#endif
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_explode.c src/sys/arch/sh3/fpu/fpu_explode.c
--- src.orig/sys/arch/sh3/fpu/fpu_explode.c	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/fpu_explode.c	2007-02-02 13:00:30.000000000 +0900
@@ -0,0 +1,243 @@
+/*	$NetBSD$ */
+
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fpu_explode.c	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * FPU subroutines: `explode' the machine's `packed binary' format numbers
+ * into our internal format.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <machine/ieee.h>
+#include <machine/reg.h>
+#include <machine/fpu.h>
+
+#include <sh3/fpu/fpu_emu.h>
+#include <sh3/fpu/fpu_extern.h>
+#include <sh3/fpu/fpu_arith.h>
+
+/*
+ * N.B.: in all of the following, we assume the FP format is
+ *
+ *	---------------------------
+ *	| s | exponent | fraction |
+ *	---------------------------
+ *
+ * (which represents -1**s * 1.fraction * 2**exponent), so that the
+ * sign bit is way at the top (bit 31), the exponent is next, and
+ * then the remaining bits mark the fraction.  A zero exponent means
+ * zero or denormalized (0.fraction rather than 1.fraction), and the
+ * maximum possible exponent, 2bias+1, signals inf (fraction==0) or NaN.
+ *
+ * Since the sign bit is always the topmost bit---this holds even for
+ * integers---we set that outside all the *tof functions.  Each function
+ * returns the class code for the new number (but note that we use
+ * FPC_QNAN for all NaNs; fpu_explode will fix this if appropriate).
+ */
+
+/*
+ * int -> fpn.
+ */
+int
+fpu_itof(struct fpn *fp, u_int i)
+{
+
+	if (i == 0)
+		return (FPC_ZERO);
+	/*
+	 * The value FP_1 represents 2^FP_LG, so set the exponent
+	 * there and let normalization fix it up.  Convert negative
+	 * numbers to sign-and-magnitude.  Note that this relies on
+	 * fpu_norm()'s handling of `supernormals'; see fpu_subr.c.
+	 */
+	fp->fp_exp = FP_LG;
+	fp->fp_mant[0] = (int)i < 0 ? -i : i;
+	fp->fp_mant[1] = 0;
+	fp->fp_mant[2] = 0;
+	fp->fp_mant[3] = 0;
+	fpu_norm(fp);
+	return (FPC_NUM);
+}
+
+#define	mask(nbits) ((1L << (nbits)) - 1)
+
+/*
+ * All external floating formats convert to internal in the same manner,
+ * as defined here.  Note that only normals get an implied 1.0 inserted.
+ */
+#define	FP_TOF(exp, expbias, allfrac, f0, f1, f2, f3) \
+	if (exp == 0) { \
+		if (allfrac == 0) \
+			return (FPC_ZERO); \
+		fp->fp_exp = 1 - expbias; \
+		fp->fp_mant[0] = f0; \
+		fp->fp_mant[1] = f1; \
+		fp->fp_mant[2] = f2; \
+		fp->fp_mant[3] = f3; \
+		fpu_norm(fp); \
+		return (FPC_NUM); \
+	} \
+	if (exp == (2 * expbias + 1)) { \
+		if (allfrac == 0) \
+			return (FPC_INF); \
+		fp->fp_mant[0] = f0; \
+		fp->fp_mant[1] = f1; \
+		fp->fp_mant[2] = f2; \
+		fp->fp_mant[3] = f3; \
+		return (FPC_QNAN); \
+	} \
+	fp->fp_exp = exp - expbias; \
+	fp->fp_mant[0] = FP_1 | f0; \
+	fp->fp_mant[1] = f1; \
+	fp->fp_mant[2] = f2; \
+	fp->fp_mant[3] = f3; \
+	return (FPC_NUM)
+
+/*
+ * 32-bit single precision -> fpn.
+ * We assume a single occupies at most (64-FP_LG) bits in the internal
+ * format: i.e., needs at most fp_mant[0] and fp_mant[1].
+ */
+int
+fpu_stof(struct fpn *fp, u_int i)
+{
+	int exp;
+	u_int frac, f0, f1;
+#define SNG_SHIFT (SNG_FRACBITS - FP_LG)
+
+	exp = (i >> (32 - 1 - SNG_EXPBITS)) & mask(SNG_EXPBITS);
+	frac = i & mask(SNG_FRACBITS);
+	f0 = frac >> SNG_SHIFT;
+	f1 = frac << (32 - SNG_SHIFT);
+	FP_TOF(exp, SNG_EXP_BIAS, frac, f0, f1, 0, 0);
+}
+
+/*
+ * 64-bit double -> fpn.
+ * We assume this uses at most (96-FP_LG) bits.
+ */
+int
+fpu_dtof(struct fpn *fp, u_int i, u_int j)
+{
+	int exp;
+	u_int frac, f0, f1, f2;
+#define DBL_SHIFT (DBL_FRACBITS - 32 - FP_LG)
+
+	exp = (i >> (32 - 1 - DBL_EXPBITS)) & mask(DBL_EXPBITS);
+	frac = i & mask(DBL_FRACBITS - 32);
+	f0 = frac >> DBL_SHIFT;
+	f1 = (frac << (32 - DBL_SHIFT)) | (j >> DBL_SHIFT);
+	f2 = j << (32 - DBL_SHIFT);
+	frac |= j;
+	FP_TOF(exp, DBL_EXP_BIAS, frac, f0, f1, f2, 0);
+}
+
+/*
+ * Explode the contents of a register / regpair / regquad.
+ * If the input is a signalling NaN, an NV (invalid) exception
+ * will be set.  (Note that nothing but NV can occur until ALU
+ * operations are performed.)
+ */
+void
+fpu_explode(struct fpemu *fe, struct fpn *fp, int type, int reg)
+{
+	u_int s, *space;
+
+	if (reg >= 0 && reg < 32) {
+		space = (u_int *)&fe->fe_fpstate->fpr_fr[reg];
+	} else if (reg == FPREG_FPUL) {
+		space = (u_int *)&fe->fe_fpstate->fpr_fpul;
+	} else {
+		panic("fpu_explode(): reg = %d");
+		/*NOTREACHED*/
+	}
+	s = space[0];
+
+	fp->fp_sign = s >> 31;
+	fp->fp_sticky = 0;
+	switch (type) {
+	case FTYPE_INT:
+		s = fpu_itof(fp, s);
+		break;
+
+	case FTYPE_SNG:
+		s = fpu_stof(fp, s);
+		break;
+
+	case FTYPE_DBL:
+		if (reg == FPREG_FPUL) {
+			panic("fpu_explode(): FTYPE_DBL: reg == FPREG_FPUL");
+			/*NOTREACHED*/
+		}
+		s = fpu_dtof(fp, s, space[1]);
+		break;
+
+	default:
+		panic("fpu_explode: invalid type %d", type);
+		/*NOTREACHED*/
+	}
+
+	if (s == FPC_QNAN && (fp->fp_mant[0] & FP_QUIETBIT) == 0) {
+		/*
+		 * Input is a signalling NaN.  All operations that return
+		 * an input NaN operand put it through a ``NaN conversion'',
+		 * which basically just means ``turn on the quiet bit''.
+		 * We do this here so that all NaNs internally look quiet
+		 * (we can tell signalling ones by their class).
+		 */
+		fp->fp_mant[0] |= FP_QUIETBIT;
+		fe->fe_cx = FP_V_BIT;	/* assert invalid operand */
+		s = FPC_SNAN;
+	}
+	fp->fp_class = s;
+
+	DPRINTF(FPE_REG, ("fpu_explode: %%%c%d => ",
+		((type == FTYPE_INT) ? 'i' : 
+			((type == FTYPE_SNG) ? 's' :
+				((type == FTYPE_DBL) ? 'd' : '?'))), reg));
+	DUMPFPN(FPE_REG, fp);
+	DPRINTF(FPE_REG, ("\n"));
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_extern.h src/sys/arch/sh3/fpu/fpu_extern.h
--- src.orig/sys/arch/sh3/fpu/fpu_extern.h	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/fpu_extern.h	2007-01-31 14:37:50.000000000 +0900
@@ -0,0 +1,81 @@
+/*	$NetBSD$ */
+
+/*-
+ * Copyright (c) 1995 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christos Zoulas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+struct proc;
+struct fpreg;
+struct trapframe;
+struct fpemu;
+struct fpn;
+
+/* fpu.c */
+int fpu_emulate(struct trapframe *, struct fpreg *);
+int fpu_execute(struct trapframe *, struct fpemu *, uint16_t *);
+
+/* fpu_add.c */
+struct fpn *fpu_add(struct fpemu *);
+
+/* fpu_compare.c */
+void fpu_compare(struct fpemu *, int);
+
+/* fpu_div.c */
+struct fpn *fpu_div(struct fpemu *);
+
+/* fpu_explode.c */
+int fpu_itof(struct fpn *, u_int);
+int fpu_xtof(struct fpn *, u_int64_t);
+int fpu_stof(struct fpn *, u_int);
+int fpu_dtof(struct fpn *, u_int, u_int);
+void fpu_explode(struct fpemu *, struct fpn *, int, int);
+
+/* fpu_implode.c */
+u_int fpu_ftoi(struct fpemu *, struct fpn *);
+u_int fpu_ftox(struct fpemu *, struct fpn *, u_int *);
+u_int fpu_ftos(struct fpemu *, struct fpn *);
+u_int fpu_ftod(struct fpemu *, struct fpn *, u_int *);
+void fpu_implode(struct fpemu *, struct fpn *, int, u_int *);
+
+/* fpu_mul.c */
+struct fpn *fpu_mul(struct fpemu *);
+
+/* fpu_sqrt.c */
+struct fpn *fpu_sqrt(struct fpemu *);
+
+/* fpu_subr.c */
+int fpu_shr(struct fpn *, int);
+void fpu_norm(struct fpn *);
+struct fpn *fpu_newnan(struct fpemu *);
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_implode.c src/sys/arch/sh3/fpu/fpu_implode.c
--- src.orig/sys/arch/sh3/fpu/fpu_implode.c	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/fpu_implode.c	2007-02-01 19:02:05.000000000 +0900
@@ -0,0 +1,375 @@
+/*	$NetBSD$ */
+
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fpu_implode.c	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * FPU subroutines: `implode' internal format numbers into the machine's
+ * `packed binary' format.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <machine/ieee.h>
+#include <machine/reg.h>
+#include <machine/fpu.h>
+
+#include <sh3/fpu/fpu_emu.h>
+#include <sh3/fpu/fpu_extern.h>
+#include <sh3/fpu/fpu_arith.h>
+
+static int round(struct fpemu *, struct fpn *);
+static int toinf(struct fpemu *, int);
+
+/*
+ * Round a number (algorithm from Motorola MC68882 manual, modified for
+ * our internal format).  Set inexact exception if rounding is required.
+ * Return true iff we rounded up.
+ *
+ * After rounding, we discard the guard and round bits by shifting right
+ * 2 bits (a la fpu_shr(), but we do not bother with fp->fp_sticky).
+ * This saves effort later.
+ *
+ * Note that we may leave the value 2.0 in fp->fp_mant; it is the caller's
+ * responsibility to fix this if necessary.
+ */
+static int
+round(struct fpemu *fe, struct fpn *fp)
+{
+	u_int m0, m1, m2, m3;
+	int gr, s;
+	FPU_DECL_CARRY;
+
+	m0 = fp->fp_mant[0];
+	m1 = fp->fp_mant[1];
+	m2 = fp->fp_mant[2];
+	m3 = fp->fp_mant[3];
+	gr = m3 & 3;
+	s = fp->fp_sticky;
+
+	/* mant >>= FP_NG */
+	m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG));
+	m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG));
+	m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG));
+	m0 >>= FP_NG;
+
+	if ((gr | s) == 0)	/* result is exact: no rounding needed */
+		goto rounddown;
+
+	fe->fe_cx |= FP_I_BIT;	/* inexact */
+
+	/* Go to rounddown to round down; break to round up. */
+	switch (fe->fe_rm) {
+	case RM_NEAREST:
+		/*
+		 * Round only if guard is set (gr & 2).  If guard is set,
+		 * but round & sticky both clear, then we want to round
+		 * but have a tie, so round to even, i.e., add 1 iff odd.
+		 */
+		if ((gr & 2) == 0)
+			goto rounddown;
+		if ((gr & 1) || fp->fp_sticky || (m3 & 1))
+			break;
+		goto rounddown;
+
+	case RM_ZERO:
+	default:
+		/* Round towards zero, i.e., down. */
+		goto rounddown;
+	}
+
+	FPU_ADDS(m3, m3, 1);
+	FPU_ADDCS(m2, m2, 0);
+	FPU_ADDCS(m1, m1, 0);
+	FPU_ADDC(m0, m0, 0);
+	fp->fp_mant[0] = m0;
+	fp->fp_mant[1] = m1;
+	fp->fp_mant[2] = m2;
+	fp->fp_mant[3] = m3;
+	return (1);
+
+rounddown:
+	fp->fp_mant[0] = m0;
+	fp->fp_mant[1] = m1;
+	fp->fp_mant[2] = m2;
+	fp->fp_mant[3] = m3;
+	return (0);
+}
+
+/*
+ * For overflow: return true if overflow is to go to +/-Inf, according
+ * to the sign of the overflowing result.  If false, overflow is to go
+ * to the largest magnitude value instead.
+ */
+static int
+toinf(struct fpemu *fe, int sign)
+{
+	int inf;
+
+	/* look at rounding direction */
+	switch (fe->fe_rm) {
+	case RM_NEAREST:	/* the nearest value is always Inf */
+		inf = 1;
+		break;
+
+	default:
+	case RM_ZERO:		/* toward 0 => never towards Inf */
+		inf = 0;
+		break;
+	}
+	if (inf)
+		fe->fe_cx |= FP_O_BIT;
+	return (inf);
+}
+
+/*
+ * fpn -> int (int value returned as return value).
+ *
+ * N.B.: this conversion always rounds towards zero (this is a peculiarity
+ * of the SPARC instruction set).
+ */
+u_int
+fpu_ftoi(struct fpemu *fe, struct fpn *fp)
+{
+	u_int i;
+	int sign, exp;
+
+	sign = fp->fp_sign;
+	switch (fp->fp_class) {
+	case FPC_ZERO:
+		return (0);
+
+	case FPC_NUM:
+		/*
+		 * If exp >= 2^32, overflow.  Otherwise shift value right
+		 * into last mantissa word (this will not exceed 0xffffffff),
+		 * shifting any guard and round bits out into the sticky
+		 * bit.  Then ``round'' towards zero, i.e., just set an
+		 * inexact exception if sticky is set (see round()).
+		 * If the result is > 0x80000000, or is positive and equals
+		 * 0x80000000, overflow; otherwise the last fraction word
+		 * is the result.
+		 */
+		if ((exp = fp->fp_exp) >= 32)
+			break;
+		/* NB: the following includes exp < 0 cases */
+		if (fpu_shr(fp, FP_NMANT - 1 - exp) != 0)
+			fe->fe_cx |= FP_U_BIT;
+		i = fp->fp_mant[3];
+		if (i >= ((u_int)0x80000000 + sign))
+			break;
+		return (sign ? -i : i);
+
+	default:		/* Inf, qNaN, sNaN */
+		break;
+	}
+	/* overflow: replace any inexact exception with invalid */
+	fe->fe_cx |= FP_O_BIT;
+	return (0x7fffffff + sign);
+}
+
+/*
+ * fpn -> single (32 bit single returned as return value).
+ * We assume <= 29 bits in a single-precision fraction (1.f part).
+ */
+u_int
+fpu_ftos(struct fpemu *fe, struct fpn *fp)
+{
+	u_int sign = fp->fp_sign << 31;
+	int exp;
+
+#define	SNG_EXP(e)	((e) << SNG_FRACBITS)	/* makes e an exponent */
+#define	SNG_MASK	(SNG_EXP(1) - 1)	/* mask for fraction */
+
+	/* Take care of non-numbers first. */
+	if (ISNAN(fp)) {
+		/*
+		 * Preserve upper bits of NaN, per SPARC V8 appendix N.
+		 * Note that fp->fp_mant[0] has the quiet bit set,
+		 * even if it is classified as a signalling NaN.
+		 */
+		(void) fpu_shr(fp, FP_NMANT - 1 - SNG_FRACBITS);
+		exp = SNG_EXP_INFNAN;
+		goto done;
+	}
+	if (ISINF(fp))
+		return (sign | SNG_EXP(SNG_EXP_INFNAN));
+	if (ISZERO(fp))
+		return (sign);
+
+	/*
+	 * Normals (including subnormals).  Drop all the fraction bits
+	 * (including the explicit ``implied'' 1 bit) down into the
+	 * single-precision range.  If the number is subnormal, move
+	 * the ``implied'' 1 into the explicit range as well, and shift
+	 * right to introduce leading zeroes.  Rounding then acts
+	 * differently for normals and subnormals: the largest subnormal
+	 * may round to the smallest normal (1.0 x 2^minexp), or may
+	 * remain subnormal.  In the latter case, signal an underflow
+	 * if the result was inexact or if underflow traps are enabled.
+	 *
+	 * Rounding a normal, on the other hand, always produces another
+	 * normal (although either way the result might be too big for
+	 * single precision, and cause an overflow).  If rounding a
+	 * normal produces 2.0 in the fraction, we need not adjust that
+	 * fraction at all, since both 1.0 and 2.0 are zero under the
+	 * fraction mask.
+	 *
+	 * Note that the guard and round bits vanish from the number after
+	 * rounding.
+	 */
+	if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) {	/* subnormal */
+		/* -NG for g,r; -SNG_FRACBITS-exp for fraction */
+		(void) fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp);
+		if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1))
+			return (sign | SNG_EXP(1) | 0);
+		fe->fe_cx |= FP_U_BIT;
+		return (sign | SNG_EXP(0) | fp->fp_mant[3]);
+	}
+	/* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */
+	(void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS);
+#ifdef DIAGNOSTIC
+	if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0)
+		panic("fpu_ftos");
+#endif
+	if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2))
+		exp++;
+	if (exp >= SNG_EXP_INFNAN) {
+		/* overflow to inf or to max single */
+		if (toinf(fe, sign))
+			return (sign | SNG_EXP(SNG_EXP_INFNAN));
+		return (sign | SNG_EXP(SNG_EXP_INFNAN - 1) | SNG_MASK);
+	}
+done:
+	/* phew, made it */
+	return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK));
+}
+
+/*
+ * fpn -> double (32 bit high-order result returned; 32-bit low order result
+ * left in res[1]).  Assumes <= 61 bits in double precision fraction.
+ *
+ * This code mimics fpu_ftos; see it for comments.
+ */
+u_int
+fpu_ftod(struct fpemu *fe, struct fpn *fp, u_int *res)
+{
+	u_int sign = fp->fp_sign << 31;
+	int exp;
+
+#define	DBL_EXP(e)	((e) << (DBL_FRACBITS & 31))
+#define	DBL_MASK	(DBL_EXP(1) - 1)
+
+	if (ISNAN(fp)) {
+		(void) fpu_shr(fp, FP_NMANT - 1 - DBL_FRACBITS);
+		exp = DBL_EXP_INFNAN;
+		goto done;
+	}
+	if (ISINF(fp)) {
+		sign |= DBL_EXP(DBL_EXP_INFNAN);
+		goto zero;
+	}
+	if (ISZERO(fp)) {
+zero:		res[1] = 0;
+		return (sign);
+	}
+
+	if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) {
+		(void) fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp);
+		if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) {
+			res[1] = 0;
+			return (sign | DBL_EXP(1) | 0);
+		}
+		fe->fe_cx |= FP_U_BIT;
+		exp = 0;
+		goto done;
+	}
+	(void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS);
+	if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2))
+		exp++;
+	if (exp >= DBL_EXP_INFNAN) {
+		fe->fe_cx |= FPSCR_OX | FPSCR_UX;
+		if (toinf(fe, sign)) {
+			res[1] = 0;
+			return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0);
+		}
+		res[1] = ~0;
+		return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK);
+	}
+done:
+	res[1] = fp->fp_mant[3];
+	return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK));
+}
+
+/*
+ * Implode an fpn, writing the result into the given space.
+ */
+void
+fpu_implode(struct fpemu *fe, struct fpn *fp, int type, u_int *space)
+{
+
+	switch (type) {
+	case FTYPE_INT:
+		space[0] = fpu_ftoi(fe, fp);
+		DPRINTF(FPE_REG, ("fpu_implode: int %x\n",
+			space[1]));
+		break;
+
+	case FTYPE_SNG:
+		space[0] = fpu_ftos(fe, fp);
+		DPRINTF(FPE_REG, ("fpu_implode: single %x\n",
+			space[0]));
+		break;
+
+	case FTYPE_DBL:
+		space[0] = fpu_ftod(fe, fp, space);
+		DPRINTF(FPE_REG, ("fpu_implode: double %x %x\n",
+			space[0], space[1]));
+		break;
+
+	default:
+		panic("fpu_implode: invalid type %d", type);
+		/*NOTREACHED*/
+	}
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_mul.c src/sys/arch/sh3/fpu/fpu_mul.c
--- src.orig/sys/arch/sh3/fpu/fpu_mul.c	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/fpu_mul.c	2007-01-31 12:15:04.000000000 +0900
@@ -0,0 +1,241 @@
+/*	$NetBSD$ */
+
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fpu_mul.c	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * Perform an FPU multiply (return x * y).
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/types.h>
+#if defined(DIAGNOSTIC)||defined(DEBUG)
+#include <sys/systm.h>
+#endif
+
+#include <machine/reg.h>
+#include <machine/fpu.h>
+
+#include <sh3/fpu/fpu_arith.h>
+#include <sh3/fpu/fpu_emu.h>
+
+/*
+ * The multiplication algorithm for normal numbers is as follows:
+ *
+ * The fraction of the product is built in the usual stepwise fashion.
+ * Each step consists of shifting the accumulator right one bit
+ * (maintaining any guard bits) and, if the next bit in y is set,
+ * adding the multiplicand (x) to the accumulator.  Then, in any case,
+ * we advance one bit leftward in y.  Algorithmically:
+ *
+ *	A = 0;
+ *	for (bit = 0; bit < FP_NMANT; bit++) {
+ *		sticky |= A & 1, A >>= 1;
+ *		if (Y & (1 << bit))
+ *			A += X;
+ *	}
+ *
+ * (X and Y here represent the mantissas of x and y respectively.)
+ * The resultant accumulator (A) is the product's mantissa.  It may
+ * be as large as 11.11111... in binary and hence may need to be
+ * shifted right, but at most one bit.
+ *
+ * Since we do not have efficient multiword arithmetic, we code the
+ * accumulator as four separate words, just like any other mantissa.
+ * We use local variables in the hope that this is faster than memory.
+ * We keep x->fp_mant in locals for the same reason.
+ *
+ * In the algorithm above, the bits in y are inspected one at a time.
+ * We will pick them up 32 at a time and then deal with those 32, one
+ * at a time.  Note, however, that we know several things about y:
+ *
+ *    - the guard and round bits at the bottom are sure to be zero;
+ *
+ *    - often many low bits are zero (y is often from a single or double
+ *	precision source);
+ *
+ *    - bit FP_NMANT-1 is set, and FP_1*2 fits in a word.
+ *
+ * We can also test for 32-zero-bits swiftly.  In this case, the center
+ * part of the loop---setting sticky, shifting A, and not adding---will
+ * run 32 times without adding X to A.  We can do a 32-bit shift faster
+ * by simply moving words.  Since zeros are common, we optimize this case.
+ * Furthermore, since A is initially zero, we can omit the shift as well
+ * until we reach a nonzero word.
+ */
+struct fpn *
+fpu_mul(struct fpemu *fe)
+{
+	struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
+	u_int a3, a2, a1, a0, x3, x2, x1, x0, bit, m;
+	int sticky;
+	FPU_DECL_CARRY;
+
+	/*
+	 * Put the `heavier' operand on the right (see fpu_emu.h).
+	 * Then we will have one of the following cases, taken in the
+	 * following order:
+	 *
+	 *  - y = NaN.  Implied: if only one is a signalling NaN, y is.
+	 *	The result is y.
+	 *  - y = Inf.  Implied: x != NaN (is 0, number, or Inf: the NaN
+	 *    case was taken care of earlier).
+	 *	If x = 0, the result is NaN.  Otherwise the result
+	 *	is y, with its sign reversed if x is negative.
+	 *  - x = 0.  Implied: y is 0 or number.
+	 *	The result is 0 (with XORed sign as usual).
+	 *  - other.  Implied: both x and y are numbers.
+	 *	The result is x * y (XOR sign, multiply bits, add exponents).
+	 */
+	DPRINTF(FPE_REG, ("fpu_mul:\n"));
+	DUMPFPN(FPE_REG, x);
+	DUMPFPN(FPE_REG, y);
+	DPRINTF(FPE_REG, ("=>\n"));
+
+	ORDER(x, y);
+	if (ISNAN(y)) {
+		y->fp_sign ^= x->fp_sign;
+		fe->fe_cx |= FPSCR_VXSNAN;
+		DUMPFPN(FPE_REG, y);
+		return (y);
+	}
+	if (ISINF(y)) {
+		if (ISZERO(x)) {
+			fe->fe_cx |= FPSCR_VXIMZ;
+			return (fpu_newnan(fe));
+		}
+		y->fp_sign ^= x->fp_sign;
+			DUMPFPN(FPE_REG, y);
+		return (y);
+	}
+	if (ISZERO(x)) {
+		x->fp_sign ^= y->fp_sign;
+		DUMPFPN(FPE_REG, x);
+		return (x);
+	}
+
+	/*
+	 * Setup.  In the code below, the mask `m' will hold the current
+	 * mantissa byte from y.  The variable `bit' denotes the bit
+	 * within m.  We also define some macros to deal with everything.
+	 */
+	x3 = x->fp_mant[3];
+	x2 = x->fp_mant[2];
+	x1 = x->fp_mant[1];
+	x0 = x->fp_mant[0];
+	sticky = a3 = a2 = a1 = a0 = 0;
+
+#define	ADD	/* A += X */ \
+	FPU_ADDS(a3, a3, x3); \
+	FPU_ADDCS(a2, a2, x2); \
+	FPU_ADDCS(a1, a1, x1); \
+	FPU_ADDC(a0, a0, x0)
+
+#define	SHR1	/* A >>= 1, with sticky */ \
+	sticky |= a3 & 1, a3 = (a3 >> 1) | (a2 << 31), \
+	a2 = (a2 >> 1) | (a1 << 31), a1 = (a1 >> 1) | (a0 << 31), a0 >>= 1
+
+#define	SHR32	/* A >>= 32, with sticky */ \
+	sticky |= a3, a3 = a2, a2 = a1, a1 = a0, a0 = 0
+
+#define	STEP	/* each 1-bit step of the multiplication */ \
+	SHR1; if (bit & m) { ADD; }; bit <<= 1
+
+	/*
+	 * We are ready to begin.  The multiply loop runs once for each
+	 * of the four 32-bit words.  Some words, however, are special.
+	 * As noted above, the low order bits of Y are often zero.  Even
+	 * if not, the first loop can certainly skip the guard bits.
+	 * The last word of y has its highest 1-bit in position FP_NMANT-1,
+	 * so we stop the loop when we move past that bit.
+	 */
+	if ((m = y->fp_mant[3]) == 0) {
+		/* SHR32; */			/* unneeded since A==0 */
+	} else {
+		bit = 1 << FP_NG;
+		do {
+			STEP;
+		} while (bit != 0);
+	}
+	if ((m = y->fp_mant[2]) == 0) {
+		SHR32;
+	} else {
+		bit = 1;
+		do {
+			STEP;
+		} while (bit != 0);
+	}
+	if ((m = y->fp_mant[1]) == 0) {
+		SHR32;
+	} else {
+		bit = 1;
+		do {
+			STEP;
+		} while (bit != 0);
+	}
+	m = y->fp_mant[0];		/* definitely != 0 */
+	bit = 1;
+	do {
+		STEP;
+	} while (bit <= m);
+
+	/*
+	 * Done with mantissa calculation.  Get exponent and handle
+	 * 11.111...1 case, then put result in place.  We reuse x since
+	 * it already has the right class (FP_NUM).
+	 */
+	m = x->fp_exp + y->fp_exp;
+	if (a0 >= FP_2) {
+		SHR1;
+		m++;
+	}
+	x->fp_sign ^= y->fp_sign;
+	x->fp_exp = m;
+	x->fp_sticky = sticky;
+	x->fp_mant[3] = a3;
+	x->fp_mant[2] = a2;
+	x->fp_mant[1] = a1;
+	x->fp_mant[0] = a0;
+
+	DUMPFPN(FPE_REG, x);
+	return (x);
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_sqrt.c src/sys/arch/sh3/fpu/fpu_sqrt.c
--- src.orig/sys/arch/sh3/fpu/fpu_sqrt.c	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/fpu_sqrt.c	2007-01-31 12:15:09.000000000 +0900
@@ -0,0 +1,417 @@
+/*	$NetBSD$ */
+
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fpu_sqrt.c	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * Perform an FPU square root (return sqrt(x)).
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/types.h>
+#if defined(DIAGNOSTIC)||defined(DEBUG)
+#include <sys/systm.h>
+#endif
+
+#include <machine/reg.h>
+#include <machine/fpu.h>
+
+#include <sh3/fpu/fpu_arith.h>
+#include <sh3/fpu/fpu_emu.h>
+
+/*
+ * Our task is to calculate the square root of a floating point number x0.
+ * This number x normally has the form:
+ *
+ *		    exp
+ *	x = mant * 2		(where 1 <= mant < 2 and exp is an integer)
+ *
+ * This can be left as it stands, or the mantissa can be doubled and the
+ * exponent decremented:
+ *
+ *			  exp-1
+ *	x = (2 * mant) * 2	(where 2 <= 2 * mant < 4)
+ *
+ * If the exponent `exp' is even, the square root of the number is best
+ * handled using the first form, and is by definition equal to:
+ *
+ *				exp/2
+ *	sqrt(x) = sqrt(mant) * 2
+ *
+ * If exp is odd, on the other hand, it is convenient to use the second
+ * form, giving:
+ *
+ *				    (exp-1)/2
+ *	sqrt(x) = sqrt(2 * mant) * 2
+ *
+ * In the first case, we have
+ *
+ *	1 <= mant < 2
+ *
+ * and therefore
+ *
+ *	sqrt(1) <= sqrt(mant) < sqrt(2)
+ *
+ * while in the second case we have
+ *
+ *	2 <= 2*mant < 4
+ *
+ * and therefore
+ *
+ *	sqrt(2) <= sqrt(2*mant) < sqrt(4)
+ *
+ * so that in any case, we are sure that
+ *
+ *	sqrt(1) <= sqrt(n * mant) < sqrt(4),	n = 1 or 2
+ *
+ * or
+ *
+ *	1 <= sqrt(n * mant) < 2,		n = 1 or 2.
+ *
+ * This root is therefore a properly formed mantissa for a floating
+ * point number.  The exponent of sqrt(x) is either exp/2 or (exp-1)/2
+ * as above.  This leaves us with the problem of finding the square root
+ * of a fixed-point number in the range [1..4).
+ *
+ * Though it may not be instantly obvious, the following square root
+ * algorithm works for any integer x of an even number of bits, provided
+ * that no overflows occur:
+ *
+ *	let q = 0
+ *	for k = NBITS-1 to 0 step -1 do -- for each digit in the answer...
+ *		x *= 2			-- multiply by radix, for next digit
+ *		if x >= 2q + 2^k then	-- if adding 2^k does not
+ *			x -= 2q + 2^k	-- exceed the correct root,
+ *			q += 2^k	-- add 2^k and adjust x
+ *		fi
+ *	done
+ *	sqrt = q / 2^(NBITS/2)		-- (and any remainder is in x)
+ *
+ * If NBITS is odd (so that k is initially even), we can just add another
+ * zero bit at the top of x.  Doing so means that q is not going to acquire
+ * a 1 bit in the first trip around the loop (since x0 < 2^NBITS).  If the
+ * final value in x is not needed, or can be off by a factor of 2, this is
+ * equivalant to moving the `x *= 2' step to the bottom of the loop:
+ *
+ *	for k = NBITS-1 to 0 step -1 do if ... fi; x *= 2; done
+ *
+ * and the result q will then be sqrt(x0) * 2^floor(NBITS / 2).
+ * (Since the algorithm is destructive on x, we will call x's initial
+ * value, for which q is some power of two times its square root, x0.)
+ *
+ * If we insert a loop invariant y = 2q, we can then rewrite this using
+ * C notation as:
+ *
+ *	q = y = 0; x = x0;
+ *	for (k = NBITS; --k >= 0;) {
+ * #if (NBITS is even)
+ *		x *= 2;
+ * #endif
+ *		t = y + (1 << k);
+ *		if (x >= t) {
+ *			x -= t;
+ *			q += 1 << k;
+ *			y += 1 << (k + 1);
+ *		}
+ * #if (NBITS is odd)
+ *		x *= 2;
+ * #endif
+ *	}
+ *
+ * If x0 is fixed point, rather than an integer, we can simply alter the
+ * scale factor between q and sqrt(x0).  As it happens, we can easily arrange
+ * for the scale factor to be 2**0 or 1, so that sqrt(x0) == q.
+ *
+ * In our case, however, x0 (and therefore x, y, q, and t) are multiword
+ * integers, which adds some complication.  But note that q is built one
+ * bit at a time, from the top down, and is not used itself in the loop
+ * (we use 2q as held in y instead).  This means we can build our answer
+ * in an integer, one word at a time, which saves a bit of work.  Also,
+ * since 1 << k is always a `new' bit in q, 1 << k and 1 << (k+1) are
+ * `new' bits in y and we can set them with an `or' operation rather than
+ * a full-blown multiword add.
+ *
+ * We are almost done, except for one snag.  We must prove that none of our
+ * intermediate calculations can overflow.  We know that x0 is in [1..4)
+ * and therefore the square root in q will be in [1..2), but what about x,
+ * y, and t?
+ *
+ * We know that y = 2q at the beginning of each loop.  (The relation only
+ * fails temporarily while y and q are being updated.)  Since q < 2, y < 4.
+ * The sum in t can, in our case, be as much as y+(1<<1) = y+2 < 6, and.
+ * Furthermore, we can prove with a bit of work that x never exceeds y by
+ * more than 2, so that even after doubling, 0 <= x < 8.  (This is left as
+ * an exercise to the reader, mostly because I have become tired of working
+ * on this comment.)
+ *
+ * If our floating point mantissas (which are of the form 1.frac) occupy
+ * B+1 bits, our largest intermediary needs at most B+3 bits, or two extra.
+ * In fact, we want even one more bit (for a carry, to avoid compares), or
+ * three extra.  There is a comment in fpu_emu.h reminding maintainers of
+ * this, so we have some justification in assuming it.
+ */
+struct fpn *
+fpu_sqrt(struct fpemu *fe)
+{
+	struct fpn *x = &fe->fe_f1;
+	u_int bit, q, tt;
+	u_int x0, x1, x2, x3;
+	u_int y0, y1, y2, y3;
+	u_int d0, d1, d2, d3;
+	int e;
+	FPU_DECL_CARRY;
+
+	/*
+	 * Take care of special cases first.  In order:
+	 *
+	 *	sqrt(NaN) = NaN
+	 *	sqrt(+0) = +0
+	 *	sqrt(-0) = -0
+	 *	sqrt(x < 0) = NaN	(including sqrt(-Inf))
+	 *	sqrt(+Inf) = +Inf
+	 *
+	 * Then all that remains are numbers with mantissas in [1..2).
+	 */
+	DPRINTF(FPE_REG, ("fpu_sqer:\n"));
+	DUMPFPN(FPE_REG, x);
+	DPRINTF(FPE_REG, ("=>\n"));
+	if (ISNAN(x)) {
+		fe->fe_cx |= FPSCR_VXSNAN;
+		DUMPFPN(FPE_REG, x);
+		return (x);
+	}
+	if (ISZERO(x)) {
+		fe->fe_cx |= FPSCR_ZX;
+		x->fp_class = FPC_INF;
+		DUMPFPN(FPE_REG, x);
+		return (x);
+	}
+	if (x->fp_sign) {
+		return (fpu_newnan(fe));
+	}
+	if (ISINF(x)) {
+		fe->fe_cx |= FPSCR_VXSQRT;
+		DUMPFPN(FPE_REG, 0);
+		return (0);
+	}
+
+	/*
+	 * Calculate result exponent.  As noted above, this may involve
+	 * doubling the mantissa.  We will also need to double x each
+	 * time around the loop, so we define a macro for this here, and
+	 * we break out the multiword mantissa.
+	 */
+#ifdef FPU_SHL1_BY_ADD
+#define	DOUBLE_X { \
+	FPU_ADDS(x3, x3, x3); FPU_ADDCS(x2, x2, x2); \
+	FPU_ADDCS(x1, x1, x1); FPU_ADDC(x0, x0, x0); \
+}
+#else
+#define	DOUBLE_X { \
+	x0 = (x0 << 1) | (x1 >> 31); x1 = (x1 << 1) | (x2 >> 31); \
+	x2 = (x2 << 1) | (x3 >> 31); x3 <<= 1; \
+}
+#endif
+#if (FP_NMANT & 1) != 0
+# define ODD_DOUBLE	DOUBLE_X
+# define EVEN_DOUBLE	/* nothing */
+#else
+# define ODD_DOUBLE	/* nothing */
+# define EVEN_DOUBLE	DOUBLE_X
+#endif
+	x0 = x->fp_mant[0];
+	x1 = x->fp_mant[1];
+	x2 = x->fp_mant[2];
+	x3 = x->fp_mant[3];
+	e = x->fp_exp;
+	if (e & 1)		/* exponent is odd; use sqrt(2mant) */
+		DOUBLE_X;
+	/* THE FOLLOWING ASSUMES THAT RIGHT SHIFT DOES SIGN EXTENSION */
+	x->fp_exp = e >> 1;	/* calculates (e&1 ? (e-1)/2 : e/2 */
+
+	/*
+	 * Now calculate the mantissa root.  Since x is now in [1..4),
+	 * we know that the first trip around the loop will definitely
+	 * set the top bit in q, so we can do that manually and start
+	 * the loop at the next bit down instead.  We must be sure to
+	 * double x correctly while doing the `known q=1.0'.
+	 *
+	 * We do this one mantissa-word at a time, as noted above, to
+	 * save work.  To avoid `(1 << 31) << 1', we also do the top bit
+	 * outside of each per-word loop.
+	 *
+	 * The calculation `t = y + bit' breaks down into `t0 = y0, ...,
+	 * t3 = y3, t? |= bit' for the appropriate word.  Since the bit
+	 * is always a `new' one, this means that three of the `t?'s are
+	 * just the corresponding `y?'; we use `#define's here for this.
+	 * The variable `tt' holds the actual `t?' variable.
+	 */
+
+	/* calculate q0 */
+#define	t0 tt
+	bit = FP_1;
+	EVEN_DOUBLE;
+	/* if (x >= (t0 = y0 | bit)) { */	/* always true */
+		q = bit;
+		x0 -= bit;
+		y0 = bit << 1;
+	/* } */
+	ODD_DOUBLE;
+	while ((bit >>= 1) != 0) {	/* for remaining bits in q0 */
+		EVEN_DOUBLE;
+		t0 = y0 | bit;		/* t = y + bit */
+		if (x0 >= t0) {		/* if x >= t then */
+			x0 -= t0;	/*	x -= t */
+			q |= bit;	/*	q += bit */
+			y0 |= bit << 1;	/*	y += bit << 1 */
+		}
+		ODD_DOUBLE;
+	}
+	x->fp_mant[0] = q;
+#undef t0
+
+	/* calculate q1.  note (y0&1)==0. */
+#define t0 y0
+#define t1 tt
+	q = 0;
+	y1 = 0;
+	bit = 1 << 31;
+	EVEN_DOUBLE;
+	t1 = bit;
+	FPU_SUBS(d1, x1, t1);
+	FPU_SUBC(d0, x0, t0);		/* d = x - t */
+	if ((int)d0 >= 0) {		/* if d >= 0 (i.e., x >= t) then */
+		x0 = d0, x1 = d1;	/*	x -= t */
+		q = bit;		/*	q += bit */
+		y0 |= 1;		/*	y += bit << 1 */
+	}
+	ODD_DOUBLE;
+	while ((bit >>= 1) != 0) {	/* for remaining bits in q1 */
+		EVEN_DOUBLE;		/* as before */
+		t1 = y1 | bit;
+		FPU_SUBS(d1, x1, t1);
+		FPU_SUBC(d0, x0, t0);
+		if ((int)d0 >= 0) {
+			x0 = d0, x1 = d1;
+			q |= bit;
+			y1 |= bit << 1;
+		}
+		ODD_DOUBLE;
+	}
+	x->fp_mant[1] = q;
+#undef t1
+
+	/* calculate q2.  note (y1&1)==0; y0 (aka t0) is fixed. */
+#define t1 y1
+#define t2 tt
+	q = 0;
+	y2 = 0;
+	bit = 1 << 31;
+	EVEN_DOUBLE;
+	t2 = bit;
+	FPU_SUBS(d2, x2, t2);
+	FPU_SUBCS(d1, x1, t1);
+	FPU_SUBC(d0, x0, t0);
+	if ((int)d0 >= 0) {
+		x0 = d0, x1 = d1, x2 = d2;
+		q |= bit;
+		y1 |= 1;		/* now t1, y1 are set in concrete */
+	}
+	ODD_DOUBLE;
+	while ((bit >>= 1) != 0) {
+		EVEN_DOUBLE;
+		t2 = y2 | bit;
+		FPU_SUBS(d2, x2, t2);
+		FPU_SUBCS(d1, x1, t1);
+		FPU_SUBC(d0, x0, t0);
+		if ((int)d0 >= 0) {
+			x0 = d0, x1 = d1, x2 = d2;
+			q |= bit;
+			y2 |= bit << 1;
+		}
+		ODD_DOUBLE;
+	}
+	x->fp_mant[2] = q;
+#undef t2
+
+	/* calculate q3.  y0, t0, y1, t1 all fixed; y2, t2, almost done. */
+#define t2 y2
+#define t3 tt
+	q = 0;
+	y3 = 0;
+	bit = 1 << 31;
+	EVEN_DOUBLE;
+	t3 = bit;
+	FPU_SUBS(d3, x3, t3);
+	FPU_SUBCS(d2, x2, t2);
+	FPU_SUBCS(d1, x1, t1);
+	FPU_SUBC(d0, x0, t0);
+	ODD_DOUBLE;
+	if ((int)d0 >= 0) {
+		x0 = d0, x1 = d1, x2 = d2;
+		q |= bit;
+		y2 |= 1;
+	}
+	while ((bit >>= 1) != 0) {
+		EVEN_DOUBLE;
+		t3 = y3 | bit;
+		FPU_SUBS(d3, x3, t3);
+		FPU_SUBCS(d2, x2, t2);
+		FPU_SUBCS(d1, x1, t1);
+		FPU_SUBC(d0, x0, t0);
+		if ((int)d0 >= 0) {
+			x0 = d0, x1 = d1, x2 = d2;
+			q |= bit;
+			y3 |= bit << 1;
+		}
+		ODD_DOUBLE;
+	}
+	x->fp_mant[3] = q;
+
+	/*
+	 * The result, which includes guard and round bits, is exact iff
+	 * x is now zero; any nonzero bits in x represent sticky bits.
+	 */
+	x->fp_sticky = x0 | x1 | x2 | x3;
+	DUMPFPN(FPE_REG, x);
+	return (x);
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_subr.c src/sys/arch/sh3/fpu/fpu_subr.c
--- src.orig/sys/arch/sh3/fpu/fpu_subr.c	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/fpu/fpu_subr.c	2007-02-02 12:31:42.000000000 +0900
@@ -0,0 +1,222 @@
+/*	$NetBSD$ */
+
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fpu_subr.c	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * FPU subroutines.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/types.h>
+#if defined(DIAGNOSTIC)||defined(DEBUG)
+#include <sys/systm.h>
+#endif
+
+#include <machine/reg.h>
+#include <machine/fpu.h>
+
+#include <sh3/fpu/fpu_arith.h>
+#include <sh3/fpu/fpu_emu.h>
+#include <sh3/fpu/fpu_extern.h>
+
+/*
+ * Shift the given number right rsh bits.  Any bits that `fall off' will get
+ * shoved into the sticky field; we return the resulting sticky.  Note that
+ * shifting NaNs is legal (this will never shift all bits out); a NaN's
+ * sticky field is ignored anyway.
+ */
+int
+fpu_shr(struct fpn *fp, int rsh)
+{
+	u_int m0, m1, m2, m3, s;
+	int lsh;
+
+#ifdef DIAGNOSTIC
+	if (rsh <= 0 || (fp->fp_class != FPC_NUM && !ISNAN(fp)))
+		panic("fpu_rightshift 1");
+#endif
+
+	m0 = fp->fp_mant[0];
+	m1 = fp->fp_mant[1];
+	m2 = fp->fp_mant[2];
+	m3 = fp->fp_mant[3];
+
+	/* If shifting all the bits out, take a shortcut. */
+	if (rsh >= FP_NMANT) {
+#ifdef DIAGNOSTIC
+		if ((m0 | m1 | m2 | m3) == 0)
+			panic("fpu_rightshift 2");
+#endif
+		fp->fp_mant[0] = 0;
+		fp->fp_mant[1] = 0;
+		fp->fp_mant[2] = 0;
+		fp->fp_mant[3] = 0;
+#ifdef notdef
+		if ((m0 | m1 | m2 | m3) == 0)
+			fp->fp_class = FPC_ZERO;
+		else
+#endif
+			fp->fp_sticky = 1;
+		return (1);
+	}
+
+	/* Squish out full words. */
+	s = fp->fp_sticky;
+	if (rsh >= 32 * 3) {
+		s |= m3 | m2 | m1;
+		m3 = m0, m2 = 0, m1 = 0, m0 = 0;
+	} else if (rsh >= 32 * 2) {
+		s |= m3 | m2;
+		m3 = m1, m2 = m0, m1 = 0, m0 = 0;
+	} else if (rsh >= 32) {
+		s |= m3;
+		m3 = m2, m2 = m1, m1 = m0, m0 = 0;
+	}
+
+	/* Handle any remaining partial word. */
+	if ((rsh &= 31) != 0) {
+		lsh = 32 - rsh;
+		s |= m3 << lsh;
+		m3 = (m3 >> rsh) | (m2 << lsh);
+		m2 = (m2 >> rsh) | (m1 << lsh);
+		m1 = (m1 >> rsh) | (m0 << lsh);
+		m0 >>= rsh;
+	}
+	fp->fp_mant[0] = m0;
+	fp->fp_mant[1] = m1;
+	fp->fp_mant[2] = m2;
+	fp->fp_mant[3] = m3;
+	fp->fp_sticky = s;
+	return (s);
+}
+
+/*
+ * Force a number to be normal, i.e., make its fraction have all zero
+ * bits before FP_1, then FP_1, then all 1 bits.  This is used for denorms
+ * and (sometimes) for intermediate results.
+ *
+ * Internally, this may use a `supernormal' -- a number whose fp_mant
+ * is greater than or equal to 2.0 -- so as a side effect you can hand it
+ * a supernormal and it will fix it (provided fp->fp_mant[3] == 0).
+ */
+void
+fpu_norm(struct fpn *fp)
+{
+	u_int m0, m1, m2, m3, top, sup, nrm;
+	int lsh, rsh, exp;
+
+	exp = fp->fp_exp;
+	m0 = fp->fp_mant[0];
+	m1 = fp->fp_mant[1];
+	m2 = fp->fp_mant[2];
+	m3 = fp->fp_mant[3];
+
+	/* Handle severe subnormals with 32-bit moves. */
+	if (m0 == 0) {
+		if (m1)
+			m0 = m1, m1 = m2, m2 = m3, m3 = 0, exp -= 32;
+		else if (m2)
+			m0 = m2, m1 = m3, m2 = 0, m3 = 0, exp -= 2 * 32;
+		else if (m3)
+			m0 = m3, m1 = 0, m2 = 0, m3 = 0, exp -= 3 * 32;
+		else {
+			fp->fp_class = FPC_ZERO;
+			return;
+		}
+	}
+
+	/* Now fix any supernormal or remaining subnormal. */
+	nrm = FP_1;
+	sup = nrm << 1;
+	if (m0 >= sup) {
+		/*
+		 * We have a supernormal number.  We need to shift it right.
+		 * We may assume m3==0.
+		 */
+		for (rsh = 1, top = m0 >> 1; top >= sup; rsh++)	/* XXX slow */
+			top >>= 1;
+		exp += rsh;
+		lsh = 32 - rsh;
+		m3 = m2 << lsh;
+		m2 = (m2 >> rsh) | (m1 << lsh);
+		m1 = (m1 >> rsh) | (m0 << lsh);
+		m0 = top;
+	} else if (m0 < nrm) {
+		/*
+		 * We have a regular denorm (a subnormal number), and need
+		 * to shift it left.
+		 */
+		for (lsh = 1, top = m0 << 1; top < nrm; lsh++)	/* XXX slow */
+			top <<= 1;
+		exp -= lsh;
+		rsh = 32 - lsh;
+		m0 = top | (m1 >> rsh);
+		m1 = (m1 << lsh) | (m2 >> rsh);
+		m2 = (m2 << lsh) | (m3 >> rsh);
+		m3 <<= lsh;
+	}
+
+	fp->fp_exp = exp;
+	fp->fp_mant[0] = m0;
+	fp->fp_mant[1] = m1;
+	fp->fp_mant[2] = m2;
+	fp->fp_mant[3] = m3;
+}
+
+/*
+ * Concoct a `fresh' Quiet NaN per Appendix N.
+ * As a side effect, we set NV (invalid) for the current exceptions.
+ */
+struct fpn *
+fpu_newnan(struct fpemu *fe)
+{
+	struct fpn *fp;
+
+	fp = &fe->fe_f3;
+	fp->fp_class = FPC_QNAN;
+	fp->fp_sign = 0;
+	fp->fp_mant[0] = FP_1 - 1;
+	fp->fp_mant[1] = fp->fp_mant[2] = fp->fp_mant[3] = ~0;
+	DUMPFPN(FPE_REG, fp);
+	return (fp);
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/Makefile src/sys/arch/sh3/include/Makefile
--- src.orig/sys/arch/sh3/include/Makefile	2007-02-10 06:55:12.000000000 +0900
+++ src/sys/arch/sh3/include/Makefile	2007-02-10 11:17:30.000000000 +0900
@@ -7,7 +7,7 @@
 	cdefs.h coff_machdep.h cpu.h cputypes.h \
 	disklabel.h \
 	elf_machdep.h endian.h endian_machdep.h \
-	float.h frame.h \
+	float.h fpu.h frame.h \
 	ieee.h ieeefp.h \
 	int_const.h int_fmtio.h int_limits.h int_mwgwtypes.h int_types.h \
 	intr.h \
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/cpu.h src/sys/arch/sh3/include/cpu.h
--- src.orig/sys/arch/sh3/include/cpu.h	2008-03-23 13:30:39.000000000 +0900
+++ src/sys/arch/sh3/include/cpu.h	2008-03-23 13:48:11.000000000 +0900
@@ -60,6 +60,7 @@
 	int	ci_mtx_oldspl;
 	int	ci_want_resched;
 	int	ci_idepth;
+	struct lwp *ci_fpulwp;		/* current owner of FPU */
 };
 
 extern struct cpu_info cpu_info_store;
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/cputypes.h src/sys/arch/sh3/include/cputypes.h
--- src.orig/sys/arch/sh3/include/cputypes.h	2008-04-29 11:39:26.000000000 +0900
+++ src/sys/arch/sh3/include/cputypes.h	2008-04-29 11:48:25.000000000 +0900
@@ -48,13 +48,14 @@
 #define	CPU_PRODUCT_7708R	3
 #define	CPU_PRODUCT_7709	4
 #define	CPU_PRODUCT_7709A	5
+#define	CPU_PRODUCT_7706	6
 
 /* SH4 series */
-#define	CPU_PRODUCT_7750	6
-#define	CPU_PRODUCT_7750S	7
-#define	CPU_PRODUCT_7750R	8
-#define	CPU_PRODUCT_7751	9
-#define	CPU_PRODUCT_7751R	10
+#define	CPU_PRODUCT_7750	7
+#define	CPU_PRODUCT_7750S	8
+#define	CPU_PRODUCT_7750R	9
+#define	CPU_PRODUCT_7751	10
+#define	CPU_PRODUCT_7751R	11
 
 
 #ifndef _LOCORE
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/fpu.h src/sys/arch/sh3/include/fpu.h
--- src.orig/sys/arch/sh3/include/fpu.h	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/include/fpu.h	2007-02-04 00:36:05.000000000 +0900
@@ -0,0 +1,169 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SH3_FPU_H_
+#define	_SH3_FPU_H_
+
+#if defined(_KERNEL)
+
+struct lwp;
+struct ksiginfo;
+struct trapframe;
+
+void sh_fpu_init(void);
+
+#ifdef SH3
+void sh3_fpu_enable(void);
+void sh3_fpu_save_lwp(struct lwp *, int);
+int sh3_fpu_exception(struct lwp *, struct trapframe *, struct ksiginfo *);
+#endif
+
+#ifdef SH4
+void sh4_fpu_enable(void);
+void sh4_fpu_save_lwp(struct lwp *, int);
+int sh4_fpu_exception(struct lwp *, struct trapframe *, struct ksiginfo *);
+#endif
+
+#ifdef SH4FPE
+void sh4fpe_fpu_enable(void);
+void sh4fpe_fpu_save_lwp(struct lwp *, int);
+int sh4fpe_fpu_exception(struct lwp *, struct trapframe *, struct ksiginfo *);
+#endif
+
+#if defined(SH3) && defined(SH4)
+extern void (*__sh_fpu_enable)(void);
+extern void (*__sh_fpu_save_lwp)(struct lwp *, int);
+extern int (*__sh_fpu_exception)(struct lwp *, struct trapframe *,
+    struct ksiginfo *);
+
+#define	sh_fpu_enable()		(*__sh_fpu_enable)()
+#define	sh_fpu_save_lwp(l, d)	(*__sh_fpu_save_lwp)(l, d)
+#define	sh_fpu_exception(l,t,s)	(*__sh_fpu_exception)(l,t,s)
+#define	CPU_HAS_FPU		(CPU_IS_SH4)
+
+#elif defined(SH3)
+
+#define	sh_fpu_enable()		sh3_fpu_enable()
+#define	sh_fpu_save_lwp(l, d)	sh3_fpu_save_lwp(l, d)
+#define	sh_fpu_exception(l,t,s)	sh3_fpu_exception(l,t,s)
+#define	CPU_HAS_FPU		(0)	/* XXX: SH3E */
+
+#elif defined(SH4)
+
+#define	sh_fpu_enable()		sh4_fpu_enable()
+#define	sh_fpu_save_lwp(l, d)	sh4_fpu_save_lwp(l, d)
+#define	sh_fpu_exception(l,t,s)	sh4_fpu_exception(l,t,s)
+#define	CPU_HAS_FPU		(1)
+
+#endif	/* SH3 && SH4 */
+
+#endif	/* _KERNEL */
+
+#if !defined(__ASSEMBLER__)
+/* FPU control register access */
+static __inline int __unused
+get_fpscr(void)
+{
+	int r;
+
+	__asm volatile ("sts fpscr, %0" : "=r"(r));
+
+	return r;
+}
+
+static __inline void __unused
+set_fpscr(int r)
+{
+
+	__asm volatile ("lds %0, fpscr" :: "r"(r));
+}
+
+static __inline int __unused
+get_fpul(void)
+{
+	int r;
+
+	__asm volatile ("sts fpul, %0" : "=r"(r));
+
+	return r;
+}
+
+static __inline void __unused
+set_fpul(int r)
+{
+
+	__asm volatile ("lds %0, fpul" :: "r"(r));
+}
+#endif	/* !__ASSEMBLER__ */
+
+/*
+ * FPU register definition
+ */
+#define	FPREGS_PER_BANK	0x10
+#define	FP_BANK_BIT	0x10
+
+/* fpscr bit */
+#define	FPSCR_RM	(0x03 << 0)	/* Round mode */
+#define	 RM_NEAREST	(0x00 << 0)	/* nearest (SH4 only) */
+#define	 RM_ZERO	(0x01 << 0)	/* round to zero */
+#define	FPSCR_FLAG	(0x1f << 2)	/* FPU exception flag: VZOUI */
+#define	FPSCR_ENABLE	(0x1f << 7)	/* FPU exception enable: VZOUI */
+#define	FPSCR_CAUSE	(0x3f << 12)	/* FPU exception cause: EVZOUI */
+#define	FPSCR_DN	(0x01 << 18)	/* Denormal mode: 0=denormal (SH4 only), 1=0 */
+#define	FPSCR_PR	(0x01 << 19)	/* precision (SH4 only): 0=float, 1=double */
+#define	FPSCR_SZ	(0x01 << 20)	/* fmov size (SH4 only): 0=32, 1=64 */
+#define	FPSCR_FR	(0x01 << 21)	/* register bank (SH4 only) */
+#define	FPSCR_MASK	(0x003fffff)
+
+/* FPU exception flag/enable/cause bit */
+#define	FP_I_BIT	(1 << 0)	/* inexact result (SH4 only) */
+#define	FP_U_BIT	(1 << 1)	/* underflow (SH4 only) */
+#define	FP_O_BIT	(1 << 2)	/* overflow (SH4 only) */
+#define	FP_Z_BIT	(1 << 3)	/* divide by zero */
+#define	FP_V_BIT	(1 << 4)	/* invalid operation */
+#define	FP_E_BIT	(1 << 5)	/* FPU error (SH4 only) */
+#define	FP_ALL_BIT	(FP_I_BIT|FP_U_BIT|FP_O_BIT|FP_Z_BIT|FP_V_BIT)
+
+/* FPU exception flag/enable/cause shift bits */
+#define	FP_FLAG_SHIFT	2
+#define	FP_ENABLE_SHIFT	7
+#define	FP_CAUSE_SHIFT	12
+
+#define	FP_FLAG_MASK	FP_ALL_BIT
+#define	FP_ENABLE_MASK	FP_ALL_BIT
+#define	FP_CAUSE_MASK	(FP_ALL_BIT|FP_E_BIT)
+
+#define	FP_FLAG(r)	(((r) >> FP_FLAG_SHIFT) & FP_FLAG_MASK)
+#define	FP_ENABLE(r)	(((r) >> FP_ENABLE_SHIFT) & FP_ENABLE_MASK)
+#define	FP_CAUSE(r)	(((r) >> FP_CAUSE_SHIFT) & FP_CAUSE_MASK)
+
+/* fpscr initial value */
+#define	SH3_FPSCR_INIT	(RM_ZERO|FPSCR_DN)
+#define	SH4_FPSCR_INIT	(RM_NEAREST)
+
+#endif /* !_SH3_FPU_H_ */
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/mcontext.h src/sys/arch/sh3/include/mcontext.h
--- src.orig/sys/arch/sh3/include/mcontext.h	2008-04-29 11:39:26.000000000 +0900
+++ src/sys/arch/sh3/include/mcontext.h	2008-04-29 11:48:26.000000000 +0900
@@ -65,9 +65,43 @@
 /* Convenience synonym */
 #define	_REG_SP		_REG_R15
 
+#define	_REG_FPSCR	0
+#define	_REG_FPUL	1
+#define	_REG_FR0	2
+#define	_REG_FR1	3
+#define	_REG_FR2	4
+#define	_REG_FR3	5
+#define	_REG_FR4	6
+#define	_REG_FR5	7
+#define	_REG_FR6	8
+#define	_REG_FR7	9
+#define	_REG_FR8	10
+#define	_REG_FR9	11
+#define	_REG_FR10	12
+#define	_REG_FR11	13
+#define	_REG_FR12	14
+#define	_REG_FR13	15
+#define	_REG_FR14	16
+#define	_REG_FR15	17
+#define	_REG_FR16	18
+#define	_REG_FR17	19
+#define	_REG_FR18	20
+#define	_REG_FR19	21
+#define	_REG_FR20	22
+#define	_REG_FR21	23
+#define	_REG_FR22	24
+#define	_REG_FR23	25
+#define	_REG_FR24	26
+#define	_REG_FR25	27
+#define	_REG_FR26	28
+#define	_REG_FR27	29
+#define	_REG_FR28	30
+#define	_REG_FR29	31
+#define	_REG_FR30	32
+#define	_REG_FR31	33
+
 /*
  * FPU state description.
- * XXX: kernel doesn't support FPU yet, so this is just a placeholder.
  */
 typedef struct {
 	int		__fpr_fpscr;
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/pcb.h src/sys/arch/sh3/include/pcb.h
--- src.orig/sys/arch/sh3/include/pcb.h	2008-04-29 11:39:26.000000000 +0900
+++ src/sys/arch/sh3/include/pcb.h	2008-04-29 11:48:26.000000000 +0900
@@ -33,12 +33,17 @@
 #define	_SH3_PCB_H_
 
 #include <sh3/frame.h>
+#include <sh3/mcontext.h>
 
 struct pcb {
 	struct switchframe pcb_sf;	/* kernel context for resume */
 	void *	pcb_onfault;		/* for copyin/out fault */
 	int	pcb_faultbail;		/* bail out before call uvm_fault. */
+	struct cpu_info * volatile pcb_fpcpu; /* CPU with our FP state */
+	__fpregset_t pcb_fpu;		/* floating point processer */
+	int pcb_fpu_flags;		/* floating point denormal flag */
 };
 
 extern struct pcb *curpcb;
+
 #endif /* !_SH3_PCB_H_ */
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/proc.h src/sys/arch/sh3/include/proc.h
--- src.orig/sys/arch/sh3/include/proc.h	2008-02-16 07:22:38.000000000 +0900
+++ src/sys/arch/sh3/include/proc.h	2008-02-16 07:28:39.000000000 +0900
@@ -57,7 +57,7 @@
 };
 
 /* md_flags */
-#define	MDP_USEDFPU	0x0001	/* has used the FPU */
+#define	MDL_USEDFPU	0x0001	/* has used the FPU */
 
 struct lwp;
 
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/psl.h src/sys/arch/sh3/include/psl.h
--- src.orig/sys/arch/sh3/include/psl.h	2008-01-05 11:05:53.000000000 +0900
+++ src/sys/arch/sh3/include/psl.h	2008-01-05 11:22:22.000000000 +0900
@@ -45,16 +45,17 @@
 #define	PSL_IMASK	0x000000f0	/* Interrupt Mask bit */
 #define	PSL_QBIT	0x00000100	/* Q bit */
 #define	PSL_MBIT	0x00000200	/* M bit */
+#define	PSL_FDBIT	0x00008000	/* FD bit (SH4 only) */
 #define	PSL_BL		0x10000000	/* Exception Block bit */
 #define	PSL_RB		0x20000000	/* Register Bank bit */
 #define	PSL_MD		0x40000000	/* Processor Mode bit */
                                         /* 1 = kernel, 0 = user */
 
 #define	PSL_MBO		0x00000000	/* must be one bits */
-#define	PSL_MBZ		0x8ffffc0c	/* must be zero bits */
+#define	PSL_MBZ		0x8fff7c0c	/* must be zero bits */
 
 #define	PSL_USERSET	0
-#define	PSL_USERSTATIC	(PSL_BL|PSL_RB|PSL_MD|PSL_IMASK|PSL_MBO|PSL_MBZ)
+#define	PSL_USERSTATIC	(PSL_BL|PSL_RB|PSL_MD|PSL_FDBIT|PSL_IMASK|PSL_MBO|PSL_MBZ)
 
 #define	KERNELMODE(sr)		((sr) & PSL_MD)
 
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/ptrace.h src/sys/arch/sh3/include/ptrace.h
--- src.orig/sys/arch/sh3/include/ptrace.h	2008-01-25 23:45:35.000000000 +0900
+++ src/sys/arch/sh3/include/ptrace.h	2008-02-02 23:48:22.000000000 +0900
@@ -36,8 +36,12 @@
 
 #define	PT_GETREGS	(PT_FIRSTMACH + 1)
 #define	PT_SETREGS	(PT_FIRSTMACH + 2)
+#define	PT_GETFPREGS	(PT_FIRSTMACH + 3)
+#define	PT_SETFPREGS	(PT_FIRSTMACH + 4)
 
 #define PT_MACHDEP_STRINGS \
 	"(unused)", \
 	"PT_GETREGS", \
-	"PT_SETREGS",
+	"PT_SETREGS", \
+	"PT_GETFPREGS", \
+	"PT_SETFPREGS",
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/reg.h src/sys/arch/sh3/include/reg.h
--- src.orig/sys/arch/sh3/include/reg.h	2005-12-11 21:18:58.000000000 +0900
+++ src/sys/arch/sh3/include/reg.h	2007-01-30 23:51:20.000000000 +0900
@@ -99,4 +99,10 @@
 	int r_r0;
 };
 
+struct fpreg {
+	int fpr_fpscr;
+	int fpr_fpul;
+	int fpr_fr[32];
+};
+
 #endif /* !_SH3_REG_H_ */
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/cache_sh3.c src/sys/arch/sh3/sh3/cache_sh3.c
--- src.orig/sys/arch/sh3/sh3/cache_sh3.c	2008-04-29 11:39:27.000000000 +0900
+++ src/sys/arch/sh3/sh3/cache_sh3.c	2008-04-29 11:48:26.000000000 +0900
@@ -79,6 +79,9 @@
 	case CPU_PRODUCT_7709A:
 		cache_size = 16 * 1024;
 		break;
+	case CPU_PRODUCT_7706:
+		cache_size = 16 * 1024;
+		break;
 	}
 
 	r = _reg_read_4(SH3_CCR);
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/exception.c src/sys/arch/sh3/sh3/exception.c
--- src.orig/sys/arch/sh3/sh3/exception.c	2008-02-23 10:46:34.000000000 +0900
+++ src/sys/arch/sh3/sh3/exception.c	2008-02-23 10:52:46.000000000 +0900
@@ -101,6 +101,7 @@
 #include <uvm/uvm_extern.h>
 
 #include <sh3/cpu.h>
+#include <sh3/fpu.h>
 #include <sh3/mmu.h>
 #include <sh3/exception.h>
 #include <sh3/userret.h>
@@ -208,6 +209,17 @@
 		ksi.ksi_addr = (void *)tf->tf_spc;
 		goto trapsignal;
 
+	case EXPEVT_FPU_DISABLE | EXP_USER: /* FALLTHROUGH */
+	case EXPEVT_FPU_SLOT_DISABLE | EXP_USER:
+		sh_fpu_enable();
+		break;
+
+	case EXPEVT_FPU | EXP_USER:
+		KSI_INIT_TRAP(&ksi);
+		if (sh_fpu_exception(l, tf, &ksi))
+			goto do_panic;
+		goto trapsignal;
+
 	default:
 		goto do_panic;
 	}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/fpu.c src/sys/arch/sh3/sh3/fpu.c
--- src.orig/sys/arch/sh3/sh3/fpu.c	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/sh3/fpu.c	2007-02-01 15:46:34.000000000 +0900
@@ -0,0 +1,70 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <sh3/fpu.h>
+
+#if defined(SH3) && defined(SH4)
+void (*__sh_fpu_enable)(void);
+void (*__sh_fpu_save_lwp)(struct lwp *, int);
+int (*__sh_fpu_exception)(struct lwp *, struct trapframe *, struct ksiginfo *);
+#endif /* SH3 && SH4 */
+
+void
+sh_fpu_init(void)
+{
+
+	/*
+	 * Assign function hooks but only if both SH3 and SH4 are defined.
+	 * They are called directly otherwise.  See <sh3/fpu.h>.
+	 */
+#if defined(SH3) && defined(SH4)
+	if (CPU_IS_SH3) {
+		__sh_fpu_enable = sh3_fpu_enable;
+		__sh_fpu_save_lwp = sh3_fpu_save_lwp;
+		__sh_fpu_exception = sh3_fpu_exception;
+	} else if (CPU_IS_SH4) {
+		__sh_fpu_enable = sh4_fpu_enable;
+		__sh_fpu_save_lwp = sh4_fpu_save_lwp;
+		__sh_fpu_exception = sh4_fpu_exception;
+	} else
+		panic("sh_fpu_init: unknown CPU type");
+#endif /* SH3 && SH4 */
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/fpu_sh3.c src/sys/arch/sh3/sh3/fpu_sh3.c
--- src.orig/sys/arch/sh3/sh3/fpu_sh3.c	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/sh3/fpu_sh3.c	2007-02-01 15:47:11.000000000 +0900
@@ -0,0 +1,76 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/siginfo.h>
+
+#include <sh3/frame.h>
+#include <sh3/fpu.h>
+
+/*
+ * SH3E FPU
+ */
+
+void
+sh3_fpu_enable(void)
+{
+
+	/* Nothing to do. */
+}
+
+/*ARGSUSED*/
+void
+sh3_fpu_save_lwp(struct lwp *l, int discard)
+{
+
+	panic("sh3_fpu_save_lwp: not supported");
+}
+
+int
+sh3_fpu_exception(struct lwp *l, struct trapframe *tf, struct ksiginfo *ksi)
+{
+
+	__unused(l);
+
+	ksi->ksi_signo = SIGFPE;
+	ksi->ksi_code = FPE_FLTINV;
+	ksi->ksi_addr = (void *)tf->tf_spc;
+
+	return 0;	/* trapsignal */
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/fpu_sh4.c src/sys/arch/sh3/sh3/fpu_sh4.c
--- src.orig/sys/arch/sh3/sh3/fpu_sh4.c	1970-01-01 09:00:00.000000000 +0900
+++ src/sys/arch/sh3/sh3/fpu_sh4.c	2007-02-04 18:49:14.000000000 +0900
@@ -0,0 +1,325 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+
+#include <sh3/fpu.h>
+
+static inline int
+get_sr(void)
+{
+	int r;
+
+	__asm volatile ("stc sr, %0" : "=r"(r));
+
+	return r;
+}
+
+static inline void
+set_sr(int r)
+{
+
+	__asm volatile ("ldc %0, sr" :: "r"(r));
+}
+
+static void
+sh4_fpu_save_regs(__fpregset_t *fp)
+{
+	int fpscr;
+
+	fpscr = get_fpscr();
+
+	/* save FPU register */
+	set_fpscr((fpscr | FPSCR_FR) & ~FPSCR_SZ);
+	__asm volatile (
+		"mov	%0, r4\n\t"
+		"fmov.s	fr15, @-r4\n\t"
+		"fmov.s	fr14, @-r4\n\t"
+		"fmov.s	fr13, @-r4\n\t"
+		"fmov.s	fr12, @-r4\n\t"
+		"fmov.s	fr11, @-r4\n\t"
+		"fmov.s	fr10, @-r4\n\t"
+		"fmov.s	fr9, @-r4\n\t"
+		"fmov.s	fr8, @-r4\n\t"
+		"fmov.s	fr7, @-r4\n\t"
+		"fmov.s	fr6, @-r4\n\t"
+		"fmov.s	fr5, @-r4\n\t"
+		"fmov.s	fr4, @-r4\n\t"
+		"fmov.s	fr3, @-r4\n\t"
+		"fmov.s	fr2, @-r4\n\t"
+		"fmov.s	fr1, @-r4\n\t"
+		"fmov.s	fr0, @-r4\n\t"
+		"frchg\n\t"
+		"fmov.s	fr15, @-r4\n\t"
+		"fmov.s	fr14, @-r4\n\t"
+		"fmov.s	fr13, @-r4\n\t"
+		"fmov.s	fr12, @-r4\n\t"
+		"fmov.s	fr11, @-r4\n\t"
+		"fmov.s	fr10, @-r4\n\t"
+		"fmov.s	fr9, @-r4\n\t"
+		"fmov.s	fr8, @-r4\n\t"
+		"fmov.s	fr7, @-r4\n\t"
+		"fmov.s	fr6, @-r4\n\t"
+		"fmov.s	fr5, @-r4\n\t"
+		"fmov.s	fr4, @-r4\n\t"
+		"fmov.s	fr3, @-r4\n\t"
+		"fmov.s	fr2, @-r4\n\t"
+		"fmov.s	fr1, @-r4\n\t"
+		"fmov.s	fr0, @-r4"
+	    :: "r"(&fp->__fpr_regs[32]));
+
+	/* save FPU control register */
+	fp->__fpr_fpul = get_fpul();
+	fp->__fpr_fpscr = fpscr;
+
+	set_fpscr(fpscr);
+}
+
+static void
+sh4_fpu_load_regs(__fpregset_t *fp)
+{
+	int fpscr;
+
+	fpscr = get_fpscr();
+
+	/* load FPU registers */
+	set_fpscr(fpscr & ~(FPSCR_FR|FPSCR_SZ));
+	__asm volatile (
+		"mov	%0, r4\n\t"
+		"fmov.s	@r4+, fr0\n\t"
+		"fmov.s	@r4+, fr1\n\t"
+		"fmov.s	@r4+, fr2\n\t"
+		"fmov.s	@r4+, fr3\n\t"
+		"fmov.s	@r4+, fr4\n\t"
+		"fmov.s	@r4+, fr5\n\t"
+		"fmov.s	@r4+, fr6\n\t"
+		"fmov.s	@r4+, fr7\n\t"
+		"fmov.s	@r4+, fr8\n\t"
+		"fmov.s	@r4+, fr9\n\t"
+		"fmov.s	@r4+, fr10\n\t"
+		"fmov.s	@r4+, fr11\n\t"
+		"fmov.s	@r4+, fr12\n\t"
+		"fmov.s	@r4+, fr13\n\t"
+		"fmov.s	@r4+, fr14\n\t"
+		"fmov.s	@r4+, fr15\n\t"
+		"frchg\n\t"
+		"fmov.s	@r4+, fr0\n\t"
+		"fmov.s	@r4+, fr1\n\t"
+		"fmov.s	@r4+, fr2\n\t"
+		"fmov.s	@r4+, fr3\n\t"
+		"fmov.s	@r4+, fr4\n\t"
+		"fmov.s	@r4+, fr5\n\t"
+		"fmov.s	@r4+, fr6\n\t"
+		"fmov.s	@r4+, fr7\n\t"
+		"fmov.s	@r4+, fr8\n\t"
+		"fmov.s	@r4+, fr9\n\t"
+		"fmov.s	@r4+, fr10\n\t"
+		"fmov.s	@r4+, fr11\n\t"
+		"fmov.s	@r4+, fr12\n\t"
+		"fmov.s	@r4+, fr13\n\t"
+		"fmov.s	@r4+, fr14\n\t"
+		"fmov.s	@r4+, fr15\n\t"
+	    :: "r"(&fp->__fpr_regs[0]));
+
+	/* load FPU control register */
+	set_fpul(fp->__fpr_fpul);
+	set_fpscr(fp->__fpr_fpscr);
+}
+
+static void
+sh4_fpu_save_cpu(void)
+{
+	struct cpu_info *ci = curcpu();
+	struct lwp *l;
+	struct pcb *pcb;
+	int s;
+	int sr;
+
+	s = _cpu_intr_suspend();
+	sr = get_sr();
+	set_sr(sr & ~PSL_FDBIT);
+
+	l = ci->ci_fpulwp;
+	if (l == NULL)
+		goto out;
+
+	pcb = &l->l_addr->u_pcb;
+
+	sh4_fpu_save_regs(&pcb->pcb_fpu);
+
+	pcb->pcb_fpcpu = NULL;
+	ci->ci_fpulwp = NULL;
+out:
+	set_sr(sr);
+	_cpu_intr_resume(s);
+}
+
+void
+sh4_fpu_enable(void)
+{
+	struct cpu_info *ci = curcpu();
+	struct lwp *l = curlwp;
+	struct pcb *pcb = &l->l_addr->u_pcb;
+	struct trapframe *tf = l->l_md.md_regs;
+	int s;
+	int sr;
+
+	KASSERT(pcb->pcb_fpcpu == NULL);
+	if ((l->l_md.md_flags & MDL_USEDFPU) == 0) {
+		memset(&pcb->pcb_fpu, 0, sizeof(pcb->pcb_fpu));
+		pcb->pcb_fpu.__fpr_fpscr = SH4_FPSCR_INIT;
+		l->l_md.md_flags |= MDL_USEDFPU;
+	}
+
+	/*
+	 * If we own the CPU but FP is disabled, simply enable it and return.
+	 */
+	if (ci->ci_fpulwp == l) {
+		tf->tf_ssr &= ~PSL_FDBIT;
+		return;
+	}
+
+	s = _cpu_intr_suspend();
+	sr = get_sr();
+	set_sr(sr & ~PSL_FDBIT);
+
+	if (ci->ci_fpulwp != NULL) {
+		sh4_fpu_save_cpu();
+	}
+	KASSERT(ci->ci_fpulwp == NULL);
+
+	sh4_fpu_load_regs(&pcb->pcb_fpu);
+
+	tf->tf_ssr &= ~PSL_FDBIT;
+	ci->ci_fpulwp = l;
+	pcb->pcb_fpcpu = ci;
+
+	set_sr(sr);
+	_cpu_intr_resume(s);
+}
+
+void
+sh4_fpu_save_lwp(struct lwp *l, int discard)
+{
+	struct pcb *pcb = &l->l_addr->u_pcb;
+	struct cpu_info * const ci = curcpu();
+
+	/*
+	 * If it's already in the PCB, there's nothing to do.
+	 */
+	if (pcb->pcb_fpcpu == NULL)
+		return;
+
+	/*
+	 * If we simply need to discard the information, then don't
+	 * to save anything.
+	 */
+	if (discard) {
+#ifndef MULTIPROCESSOR
+		KASSERT(ci == pcb->pcb_fpcpu);
+#endif
+		KASSERT(l == pcb->pcb_fpcpu->ci_fpulwp);
+		pcb->pcb_fpcpu->ci_fpulwp = NULL;
+		pcb->pcb_fpcpu = NULL;
+		return;
+	}
+
+	/*
+	 * If the state is in the current CPU,
+	 * just flush the current CPU's state.
+	 */
+	if (ci->ci_fpulwp == l) {
+		sh4_fpu_save_cpu();
+		return;
+	}
+
+#ifdef MULTIPROCESSOR
+	/*
+	 * It must be on another CPU, flush it from there.
+	 */
+	/* XXX */
+#endif
+}
+
+int
+sh4_fpu_exception(struct lwp *l, struct trapframe *tf, struct ksiginfo *ksi)
+{
+	static const int cause2sigcode[6] = {
+		FPE_FLTRES,	/* FP_I_BIT */
+		FPE_FLTUND,	/* FP_U_BIT */
+		FPE_FLTOVF,	/* FP_O_BIT */
+		FPE_FLTDIV,	/* FP_Z_BIT */
+		FPE_FLTINV,	/* FP_V_BIT */
+		FPE_FLTRES	/* FP_E_BIT */
+	};
+	struct pcb *pcb = &l->l_addr->u_pcb;
+	int fpscr;
+	int cause;
+	int i;
+
+	fpscr = get_fpscr();
+
+	cause = FP_CAUSE(fpscr);
+	cause &= FP_ENABLE(fpscr) | FP_E_BIT;
+
+#ifdef DEBUG
+	printf("fpscr = %x, cause = %x\n", fpscr, cause);
+#endif
+
+	ksi->ksi_signo = SIGFPE;
+	ksi->ksi_addr = (void *)tf->tf_spc;
+
+	for (i = 0; i < __arraycount(cause2sigcode); i++) {
+		if (cause & (1 << i)) {
+			ksi->ksi_code = cause2sigcode[i];
+			if (i == 5) {
+				/* FP_E_BIT: denormal exception */
+				pcb->pcb_fpu_flags |= FP_E_BIT;
+			}
+			break;
+		}
+	}
+	if (i == __arraycount(cause2sigcode)) {
+		ksi->ksi_code = FPE_FLTINV;
+	}
+
+	return 0;
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/interrupt.c src/sys/arch/sh3/sh3/interrupt.c
--- src.orig/sys/arch/sh3/sh3/interrupt.c	2008-04-29 11:39:27.000000000 +0900
+++ src/sys/arch/sh3/sh3/interrupt.c	2008-04-29 11:48:26.000000000 +0900
@@ -76,6 +76,7 @@
 #ifdef SH3
 	case CPU_PRODUCT_7709:
 	case CPU_PRODUCT_7709A:
+	case CPU_PRODUCT_7706:
 		_reg_write_2(SH7709_IPRC, 0);
 		_reg_write_2(SH7709_IPRD, 0);
 		_reg_write_2(SH7709_IPRE, 0);
@@ -127,7 +128,7 @@
 	/* Priority */
 	intc_intr_priority(evtcode, level);
 
-	/* Sense select (SH7709, SH7709A only) XXX notyet */
+	/* Sense select (SH7709, SH7709A, SH7706 only) XXX notyet */
 
 	return (ih);
 }
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/process_machdep.c src/sys/arch/sh3/sh3/process_machdep.c
--- src.orig/sys/arch/sh3/sh3/process_machdep.c	2007-03-04 15:00:41.000000000 +0900
+++ src/sys/arch/sh3/sh3/process_machdep.c	2007-03-05 09:16:46.000000000 +0900
@@ -110,6 +110,7 @@
 #include <sys/vnode.h>
 #include <sys/ptrace.h>
 
+#include <sh3/fpu.h>
 #include <machine/psl.h>
 #include <machine/reg.h>
 
@@ -189,6 +190,46 @@
 }
 
 int
+process_read_fpregs(struct lwp *l, struct fpreg *fpregs)
+{
+	__fpregset_t *fp;
+	int i;
+
+	if (CPU_HAS_FPU) {
+		fp = &l->l_md.md_pcb->pcb_fpu;
+
+		fpregs->fpr_fpscr = fp->__fpr_fpscr;
+		fpregs->fpr_fpul = fp->__fpr_fpul;
+		for (i = 0; i < __arraycount(fpregs->fpr_fr); i++) {
+			fpregs->fpr_fr[i] = fp->__fpr_regs[i];
+		}
+	} else {
+		memset(fpregs, 0, sizeof(struct fpreg));
+	}
+
+	return (0);
+}
+
+int
+process_write_fpregs(struct lwp *l, const struct fpreg *fpregs)
+{
+	__fpregset_t *fp;
+	int i;
+
+	if (CPU_HAS_FPU) {
+		fp = &l->l_md.md_pcb->pcb_fpu;
+
+		fp->__fpr_fpscr = fpregs->fpr_fpscr;
+		fp->__fpr_fpul = fpregs->fpr_fpul;
+		for (i = 0; i < __arraycount(fp->__fpr_regs); i++) {
+			fp->__fpr_regs[i] = fpregs->fpr_fr[i];
+		}
+	}
+
+	return (0);
+}
+
+int
 process_sstep(struct lwp *l, int sstep)
 {
 
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/sh3_machdep.c src/sys/arch/sh3/sh3/sh3_machdep.c
--- src.orig/sys/arch/sh3/sh3/sh3_machdep.c	2008-04-29 11:39:28.000000000 +0900
+++ src/sys/arch/sh3/sh3/sh3_machdep.c	2008-04-29 11:48:27.000000000 +0900
@@ -100,6 +100,7 @@
 #include <sh3/cache.h>
 #include <sh3/clock.h>
 #include <sh3/exception.h>
+#include <sh3/fpu.h>
 #include <sh3/locore.h>
 #include <sh3/mmu.h>
 #include <sh3/intr.h>
@@ -160,6 +161,9 @@
 	/* MMU access ops. */
 	sh_mmu_init();
 
+	/* FPU access ops. */
+	sh_fpu_init();
+
 	/* Hardclock, RTC initialize. */
 	machine_clock_init();
 
@@ -579,6 +583,7 @@
 	unsigned int *flags;
 {
 	const struct trapframe *tf = l->l_md.md_regs;
+	const struct pcb *pcb = &l->l_addr->u_pcb;
 	__greg_t *gr = mcp->__gregs;
 	__greg_t ras_pc;
 
@@ -612,8 +617,12 @@
 
 	*flags |= _UC_CPU;
 
-	/* FPU context is currently not handled by the kernel. */
-	memset(&mcp->__fpregs, 0, sizeof (mcp->__fpregs));
+	if ((l->l_md.md_flags & MDL_USEDFPU) != 0) {
+		sh_fpu_save_lwp(l, 0);
+		memcpy(&mcp->__fpregs, &pcb->pcb_fpu, sizeof(mcp->__fpregs));
+		*flags |= _UC_FPU;
+	} else
+		memset(&mcp->__fpregs, 0, sizeof (mcp->__fpregs));
 }
 
 int
@@ -623,6 +632,7 @@
 	unsigned int flags;
 {
 	struct trapframe *tf = l->l_md.md_regs;
+	struct pcb *pcb = &l->l_addr->u_pcb;
 	const __greg_t *gr = mcp->__gregs;
 	struct proc *p = l->l_proc;
 
@@ -656,12 +666,10 @@
 		tf->tf_r15    = gr[_REG_R15];
 	}
 
-#if 0
-	/* XXX: FPU context is currently not handled by the kernel. */
-	if (flags & _UC_FPU) {
-		/* TODO */;
+	if ((flags & _UC_FPU) != 0) {
+		sh_fpu_save_lwp(l, 1);
+		memcpy(&pcb->pcb_fpu, &mcp->__fpregs, sizeof(pcb->pcb_fpu));
 	}
-#endif
 
 	mutex_enter(p->p_lock);
 	if (flags & _UC_SETSTACK)
@@ -681,7 +689,7 @@
 {
 	struct trapframe *tf;
 
-	l->l_md.md_flags &= ~MDP_USEDFPU;
+	l->l_md.md_flags &= ~MDL_USEDFPU;
 
 	tf = l->l_md.md_regs;
 
@@ -702,6 +710,8 @@
 	tf->tf_r14 = 0;
 	tf->tf_spc = pack->ep_entry;
 	tf->tf_ssr = PSL_USERSET;
+	if (CPU_IS_SH4)
+		tf->tf_ssr |= PSL_FDBIT;	/* disable FPU */
 	tf->tf_r15 = stack;
 }
 
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/csu/sh3_elf/crt0.c src/lib/csu/sh3_elf/crt0.c
--- src.orig/lib/csu/sh3_elf/crt0.c	2004-08-27 06:16:41.000000000 +0900
+++ src/lib/csu/sh3_elf/crt0.c	2007-02-04 00:15:16.000000000 +0900
@@ -92,3 +92,12 @@
 #endif /* LIBC_SCCS and not lint */
 
 #include "common.c"
+
+#ifdef __SH4__
+#include <sh3/fpu.h>
+
+int __fpscr_values[2] = {
+	SH4_FPSCR_INIT & ~(FPSCR_PR | FPSCR_SZ),	/* float */
+	(SH4_FPSCR_INIT | FPSCR_PR) & ~(FPSCR_SZ)	/* double */
+};
+#endif
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/Makefile.inc src/lib/libc/arch/sh3/Makefile.inc
--- src.orig/lib/libc/arch/sh3/Makefile.inc	2006-06-18 03:04:23.000000000 +0900
+++ src/lib/libc/arch/sh3/Makefile.inc	2007-02-26 22:57:38.000000000 +0900
@@ -2,5 +2,7 @@
 
 SRCS+=	__sigaction14_sigtramp.c __sigtramp2.S
 
+#.if "${MKSOFTFLOAT}" != "no"
 CPPFLAGS+=      -DSOFTFLOAT # -DSOFTFLOAT_NEED_FIXUNS
 .include <softfloat/Makefile.inc>
+#.endif
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/Makefile.inc src/lib/libc/arch/sh3/gen/Makefile.inc
--- src.orig/lib/libc/arch/sh3/gen/Makefile.inc	2006-06-28 23:46:32.000000000 +0900
+++ src/lib/libc/arch/sh3/gen/Makefile.inc	2007-02-26 22:57:38.000000000 +0900
@@ -18,6 +18,11 @@
 
 SRCS+=	nanf.c
 
+#.if "${MKSOFTFLOAT}" == "no"
+#SRCS+=	fpgetmask.c fpgetround.c fpgetsticky.c \
+#	fpsetmask.c fpsetround.c fpsetsticky.c
+#.endif
+
 SRCS.sh3.gen=	Lint__setjmp.c Lint___setjmp14.c Lint___sigsetjmp14.c \
 		Lint_swapcontext.c
 
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fabs.c src/lib/libc/arch/sh3/gen/fabs.c
--- src.orig/lib/libc/arch/sh3/gen/fabs.c	2006-05-21 11:51:15.000000000 +0900
+++ src/lib/libc/arch/sh3/gen/fabs.c	2007-02-04 00:35:25.000000000 +0900
@@ -35,12 +35,26 @@
  * fabs(x) returns the absolute value of x.
  */
 
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+__RCSID("$NetBSD$");
+#endif /* LIBC_SCCS and not lint */
+
+#include <sh3/fpu.h>
+
 double fabs(double x);
 
 double
 fabs(double x)
 {
+#if defined(SOFTFLOAT) || !defined(__SH4__)
 	if (x < 0)
 		x = -x;
+#else	/* !SOFTFLOAT && __SH4__ */
+	extern int __fpscr_values[2];
+
+	set_fpscr(__fpscr_values[1]);
+	__asm volatile ("fabs %0" : "=f"(x) : "f"(x));
+#endif	/* SOFTFLOAT || !__SH4__*/
 	return(x);
 }
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fpgetmask.c src/lib/libc/arch/sh3/gen/fpgetmask.c
--- src.orig/lib/libc/arch/sh3/gen/fpgetmask.c	1970-01-01 09:00:00.000000000 +0900
+++ src/lib/libc/arch/sh3/gen/fpgetmask.c	2007-01-30 22:09:10.000000000 +0900
@@ -0,0 +1,78 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+__RCSID("$NetBSD$");
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
+
+#include <ieeefp.h>
+#include <sh3/fpu.h>
+
+#ifdef __weak_alias
+__weak_alias(fpgetmask,_fpgetmask)
+#endif
+
+fp_except
+fpgetmask(void)
+{
+	fp_except mask = 0;
+	int r, e;
+
+	r = get_fpscr();
+
+#ifdef	__SH4__
+	if ((r & FPSCR_DN) == 0)
+		mask |= FP_X_DNML;
+#endif
+
+	e = (r >> FP_ENABLE_SHIFT) & FP_ENABLE_MASK;
+#ifdef	__SH4__
+	if (e & FP_I_BIT)
+		mask |= FP_X_IMP;
+	if (e & FP_U_BIT)
+		mask |= FP_X_UFL;
+	if (e & FP_O_BIT)
+		mask |= FP_X_OFL;
+#endif
+	if (e & FP_Z_BIT)
+		mask |= FP_X_DZ;
+	if (e & FP_V_BIT)
+		mask |= FP_X_INV;
+
+	return mask;
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fpgetround.c src/lib/libc/arch/sh3/gen/fpgetround.c
--- src.orig/lib/libc/arch/sh3/gen/fpgetround.c	1970-01-01 09:00:00.000000000 +0900
+++ src/lib/libc/arch/sh3/gen/fpgetround.c	2007-01-30 22:09:13.000000000 +0900
@@ -0,0 +1,63 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+__RCSID("$NetBSD$");
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
+
+#include <ieeefp.h>
+#include <sh3/fpu.h>
+
+#ifdef __weak_alias
+__weak_alias(fpgetround,_fpgetround)
+#endif
+
+fp_rnd
+fpgetround(void)
+{
+#ifdef	__SH4__
+	int r;
+
+	r = get_fpscr();
+
+	r &= FPSCR_RM;
+	if (r == RM_NEAREST)
+		return FP_RN;
+#endif
+	return FP_RZ;
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fpgetsticky.c src/lib/libc/arch/sh3/gen/fpgetsticky.c
--- src.orig/lib/libc/arch/sh3/gen/fpgetsticky.c	1970-01-01 09:00:00.000000000 +0900
+++ src/lib/libc/arch/sh3/gen/fpgetsticky.c	2007-02-01 16:16:58.000000000 +0900
@@ -0,0 +1,78 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+__RCSID("$NetBSD$");
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
+
+#include <ieeefp.h>
+#include <sh3/fpu.h>
+
+#ifdef __weak_alias
+__weak_alias(fpgetsticky,_fpgetsticky)
+#endif
+
+fp_except
+fpgetsticky(void)
+{
+	fp_except flags = 0;
+	int r, f;
+
+	r = get_fpscr();
+
+#if 0	/* SH don't have Denormal flag at fpscr. */
+	if (0)
+		flags |= FP_X_DNML;
+#endif
+
+	f = (r >> FP_FLAG_SHIFT) & FP_FLAG_MASK;
+#ifdef	__SH4__
+	if (f & FP_I_BIT)
+		flags |= FP_X_IMP;
+	if (f & FP_U_BIT)
+		flags |= FP_X_UFL;
+	if (f & FP_O_BIT)
+		flags |= FP_X_OFL;
+#endif
+	if (f & FP_Z_BIT)
+		flags |= FP_X_DZ;
+	if (f & FP_V_BIT)
+		flags |= FP_X_INV;
+
+	return flags;
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fpsetmask.c src/lib/libc/arch/sh3/gen/fpsetmask.c
--- src.orig/lib/libc/arch/sh3/gen/fpsetmask.c	1970-01-01 09:00:00.000000000 +0900
+++ src/lib/libc/arch/sh3/gen/fpsetmask.c	2007-02-04 00:38:15.000000000 +0900
@@ -0,0 +1,111 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+__RCSID("$NetBSD$");
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
+
+#include <ieeefp.h>
+#include <sh3/fpu.h>
+
+#ifdef __weak_alias
+__weak_alias(fpsetmask,_fpsetmask)
+#endif
+
+void __set_fpscr(int fpscr);
+
+fp_except
+fpsetmask(mask)
+	fp_except mask;
+{
+	fp_except old = 0;
+	int o, n, e;
+	int b = 0;
+
+	o = get_fpscr();
+
+	/* new mask */
+	n = o;
+	n &= ~(FP_ENABLE_MASK << FP_ENABLE_SHIFT);
+
+#ifdef	__SH4__
+	if (mask & FP_X_DNML)
+		n &= ~FPSCR_DN;
+	else
+#endif
+		n |= FPSCR_DN;
+
+#ifdef	__SH4__
+	if (mask & FP_X_IMP)
+		b |= FP_I_BIT;
+	if (mask & FP_X_UFL)
+		b |= FP_U_BIT;
+	if (mask & FP_X_OFL)
+		b |= FP_O_BIT;
+#endif
+	if (mask & FP_X_DZ)
+		b |= FP_Z_BIT;
+	if (mask & FP_X_INV)
+		b |= FP_V_BIT;
+	n |= (b << FP_ENABLE_SHIFT);	/* enable FPU exception */
+	n &= ~(b << FP_FLAG_SHIFT);	/* clear FPU exception flags */
+
+	__set_fpscr(n);
+
+	/* old mask */
+#ifdef	__SH4__
+	if ((o & FPSCR_DN) == 0)	/* DN */
+		old |= FP_X_DNML;
+#endif
+
+	e = (o >> FP_ENABLE_SHIFT) & FP_ENABLE_MASK;
+#ifdef	__SH4__
+	if (e & FP_I_BIT)
+		old |= FP_X_IMP;
+	if (e & FP_U_BIT)
+		old |= FP_X_UFL;
+	if (e & FP_O_BIT)
+		old |= FP_X_OFL;
+#endif
+	if (e & FP_Z_BIT)
+		old |= FP_X_DZ;
+	if (e & FP_V_BIT)
+		old |= FP_X_INV;
+
+	return old;
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fpsetround.c src/lib/libc/arch/sh3/gen/fpsetround.c
--- src.orig/lib/libc/arch/sh3/gen/fpsetround.c	1970-01-01 09:00:00.000000000 +0900
+++ src/lib/libc/arch/sh3/gen/fpsetround.c	2007-02-04 00:38:38.000000000 +0900
@@ -0,0 +1,88 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+__RCSID("$NetBSD$");
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
+
+#include <ieeefp.h>
+#include <sh3/fpu.h>
+
+#ifdef __weak_alias
+__weak_alias(fpsetround,_fpsetround)
+#endif
+
+void __set_fpscr(int fpscr);
+
+fp_rnd
+fpsetround(rnd_dir)
+	fp_rnd rnd_dir;
+{
+	int old, new;
+	int r;
+
+	old = get_fpscr();
+
+	/* new dir */
+	new = old & ~FPSCR_RM;
+
+	switch (rnd_dir) {
+	case FP_RN:
+#ifdef	__SH4__
+		r = RM_NEAREST;
+		break;
+#endif
+	case FP_RZ:
+	case FP_RM:
+	case FP_RP:
+	default:
+		r = RM_ZERO;
+		break;
+	}
+	new |= r;
+
+	__set_fpscr(new);
+
+	/* old dir */
+#ifdef	__SH4__
+	old &= FPSCR_RM;
+	if (old == RM_NEAREST)
+		return FP_RN;
+#endif
+	return FP_RZ;
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fpsetsticky.c src/lib/libc/arch/sh3/gen/fpsetsticky.c
--- src.orig/lib/libc/arch/sh3/gen/fpsetsticky.c	1970-01-01 09:00:00.000000000 +0900
+++ src/lib/libc/arch/sh3/gen/fpsetsticky.c	2007-02-04 00:38:25.000000000 +0900
@@ -0,0 +1,103 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+__RCSID("$NetBSD$");
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
+
+#include <ieeefp.h>
+#include <sh3/fpu.h>
+
+#ifdef __weak_alias
+__weak_alias(fpsetsticky,_fpsetsticky)
+#endif
+
+void __set_fpscr(int fpscr);
+
+fp_except
+fpsetsticky(sticky)
+	fp_except sticky;
+{
+	fp_except old = 0;
+	int o, n, f;
+	int b = 0;
+
+	o = get_fpscr();
+
+	/* new flags */
+	n = o;
+
+#ifdef	__SH4__
+	if (sticky & FP_X_IMP)
+		b |= FP_I_BIT;
+	if (sticky & FP_X_UFL)
+		b |= FP_U_BIT;
+	if (sticky & FP_X_OFL)
+		b |= FP_O_BIT;
+#endif
+	if (sticky & FP_X_DZ)
+		b |= FP_Z_BIT;
+	if (sticky & FP_X_INV)
+		b |= FP_V_BIT;
+
+	n &= ~(b << FP_FLAG_SHIFT);
+
+	__set_fpscr(n);
+
+	/* old flags */
+#if 0	/* SH don't have Denormal flag at fpscr. */
+	if (0)
+		flags |= FP_X_DNML;
+#endif
+
+	f = (o >> FP_FLAG_SHIFT) & FP_FLAG_MASK;
+#ifdef	__SH4__
+	if (f & FP_I_BIT)
+		old |= FP_X_IMP;
+	if (f & FP_U_BIT)
+		old |= FP_X_UFL;
+	if (f & FP_O_BIT)
+		old |= FP_X_OFL;
+#endif
+	if (f & FP_Z_BIT)
+		old |= FP_X_DZ;
+	if (f & FP_V_BIT)
+		old |= FP_X_INV;
+
+	return old;
+}
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/nanf.c src/lib/libc/arch/sh3/gen/nanf.c
--- src.orig/lib/libc/arch/sh3/gen/nanf.c	2005-04-16 07:39:11.000000000 +0900
+++ src/lib/libc/arch/sh3/gen/nanf.c	2007-01-29 17:14:01.000000000 +0900
@@ -10,8 +10,16 @@
 
 /* bytes for quiet NaN (IEEE single precision) */
 const union __float_u __nanf =
+#if defined(SOFTFLOAT)
 #if BYTE_ORDER == BIG_ENDIAN
 		{ { 0x7f, 0xa0,    0,    0 } };
 #else
 		{ {    0,    0, 0xa0, 0x7f } };
 #endif
+#else	/* !SOFTFLOAT */
+#if BYTE_ORDER == BIG_ENDIAN
+		{ { 0x7f, 0xbf, 0xff, 0xff } };
+#else
+		{ { 0xff, 0xff, 0xbf, 0x7f } };
+#endif
+#endif	/* SOFTFLOAT */
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libpthread/arch/sh3/_context_u.S src/lib/libpthread/arch/sh3/_context_u.S
--- src.orig/lib/libpthread/arch/sh3/_context_u.S	2006-01-05 02:44:53.000000000 +0900
+++ src/lib/libpthread/arch/sh3/_context_u.S	2007-02-04 00:39:44.000000000 +0900
@@ -36,6 +36,75 @@
  */
 #define NOTREACHED	trapa #0xc3
 
+/*
+ * FPU context save/restore
+ */
+#ifdef __SH4__
+#define	FPUFLAG	_UC_FPU
+#define	GETFP(uc)							  \
+	mov.l	.L_uc_fpregs_offset, r1					; \
+	add	uc, r1		/* uc->uc_mcontext.__fpregs */		; \
+									  \
+	sts	fpscr, r0						; \
+	sts	fpul, r2						; \
+	mov.l	r0, @((_REG_FPSCR * 4), r1)				; \
+	mov.l	r2, @((_REG_FPUL * 4), r1)				; \
+									  \
+	mov	#8, r2							; \
+	swap.w	r2, r2				              		; \
+	shll2	r2				/* FPSCR_FR */		; \
+	tst	r2, r0							; \
+	bf/s	1f							; \
+	 add	#(_REG_FR0 * 4), r1					; \
+	add	#(16 * 4), r1			/* bank1 */		; \
+1:	shlr	r2				/* FPSCR_SZ */		; \
+	not	r2, r2							; \
+	and	r0, r2							; \
+	lds	r2, fpscr						; \
+	fmov.s	fr12, @((12 * 4), r1)					; \
+	fmov.s	fr13, @((13 * 4), r1)					; \
+	fmov.s	fr14, @((14 * 4), r1)					; \
+	fmov.s	fr15, @((15 * 4), r1)					; \
+	lds	r0, fpscr
+
+#define	SETFP(uc)							  \
+	mov.l	@(UC_FLAGS, uc), r0					; \
+	mov.l	.L_uc_fpuflags, r2					; \
+	tst	r2, r0							; \
+	bf	.Lsetfp_end						; \
+									  \
+	mov.l	.L_uc_fpregs_offset, r1					; \
+	add	uc, r1		/* uc->uc_mcontext.__fpregs */		; \
+									  \
+	mov.l	@((_REG_FPSCR * 4), r1), r0				; \
+	mov.l	@((_REG_FPUL * 4), r1), r2				; \
+	lds	r2, fpul						; \
+									  \
+	mov	#8, r2							; \
+	swap.w	r2, r2				              		; \
+	shll2	r2				/* FPSCR_FR */		; \
+	tst	r2, r0							; \
+	bf/s	2f							; \
+	 add	#(_REG_FR0 * 4), r1					; \
+	add	#(16 * 4), r1			/* bank1 */		; \
+2:	shlr	r2				/* FPSCR_SZ */		; \
+	not	r2, r2							; \
+	and	r0, r2							; \
+	lds	r2, fpscr						; \
+	fmov.s	@((12 * 4), r1), fr12					; \
+	fmov.s	@((13 * 4), r1), fr13					; \
+	fmov.s	@((14 * 4), r1), fr14					; \
+	fmov.s	@((15 * 4), r1), fr15					; \
+	lds	r0, fpscr						; \
+									  \
+	mov.l	.L_uc_gregs_offset, r1					; \
+	add	uc, r1		/* uc->uc_mcontext.__gregs */		; \
+.Lsetfp_end:
+#else	/* !__SH4__ */
+#define	FPUFLAG	0
+#define	GETFP(uc)
+#define	SETFP(uc)
+#endif	/* __SH4__ */
 
 /*
  * Only save/restore registers that are callee saved, i.e for which
@@ -64,13 +133,12 @@
 	sts	macl, r0						; \
 	mov.l	r0, @((_REG_MACL * 4), r1)				; \
 									  \
-	mov.l	.L_uc_flags, r0						; \
-									  \
 	add	#(_REG_R15 * 4), r1					; \
 	mov.l	r15, @r1						; \
 									  \
-	/* XXX:	FP registers fr12..fr15? */				  \
+	GETFP(uc)							; \
 									  \
+	mov.l	.L_uc_flags, r0						; \
 	mov.l	r0, @(UC_FLAGS, uc)
 
 
@@ -99,7 +167,7 @@
 	lds	r0, mach						; \
 	lds	r2, macl						; \
 									  \
-	/* XXX:	FP registers fr12..fr15? */				  \
+	SETFP(uc)							; \
 									  \
 	mov.l	@((_REG_PR * 4), r1), r0				; \
 	mov.l	@((_REG_PC * 4), r1), r2				; \
@@ -186,8 +254,12 @@
 
 	.align	2
 .L_uc_gregs_offset:	.long	UC_REGS
-.L_uc_flags:		.long	_UC_USER | _UC_CPU
+.L_uc_fpregs_offset:	.long	UC_FPREGS
+.L_uc_flags:		.long	_UC_USER | _UC_CPU | FPUFLAG
 .L_uc_user:		.long	_UC_USER
+#ifdef __SH4__
+.L_uc_fpuflags:		.long	FPUFLAG
+#endif
 #ifndef PIC
 .L_setcontext:		.long	_C_LABEL(setcontext)
 #endif
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libpthread/arch/sh3/genassym.cf src/lib/libpthread/arch/sh3/genassym.cf
--- src.orig/lib/libpthread/arch/sh3/genassym.cf	2008-04-29 11:37:22.000000000 +0900
+++ src/lib/libpthread/arch/sh3/genassym.cf	2008-04-29 11:46:38.000000000 +0900
@@ -72,3 +72,38 @@
 define	_REG_R0		_REG_R0
 define	_REG_R15	_REG_R15
 define	_REG_SP		_REG_SP
+
+define	_REG_FPSCR	_REG_FPSCR
+define	_REG_FPUL	_REG_FPUL
+define	_REG_FR0	_REG_FR0
+define	_REG_FR1	_REG_FR1
+define	_REG_FR2	_REG_FR2
+define	_REG_FR3	_REG_FR3
+define	_REG_FR4	_REG_FR4
+define	_REG_FR5	_REG_FR5
+define	_REG_FR6	_REG_FR6
+define	_REG_FR7	_REG_FR7
+define	_REG_FR8	_REG_FR8
+define	_REG_FR9	_REG_FR9
+define	_REG_FR10	_REG_FR10
+define	_REG_FR11	_REG_FR11
+define	_REG_FR12	_REG_FR12
+define	_REG_FR13	_REG_FR13
+define	_REG_FR14	_REG_FR14
+define	_REG_FR15	_REG_FR15
+define	_REG_FR16	_REG_FR16
+define	_REG_FR17	_REG_FR17
+define	_REG_FR18	_REG_FR18
+define	_REG_FR19	_REG_FR19
+define	_REG_FR20	_REG_FR20
+define	_REG_FR21	_REG_FR21
+define	_REG_FR22	_REG_FR22
+define	_REG_FR23	_REG_FR23
+define	_REG_FR24	_REG_FR24
+define	_REG_FR25	_REG_FR25
+define	_REG_FR26	_REG_FR26
+define	_REG_FR27	_REG_FR27
+define	_REG_FR28	_REG_FR28
+define	_REG_FR29	_REG_FR29
+define	_REG_FR30	_REG_FR30
+define	_REG_FR31	_REG_FR31
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libpthread/arch/sh3/pthread_md.h src/lib/libpthread/arch/sh3/pthread_md.h
--- src.orig/lib/libpthread/arch/sh3/pthread_md.h	2008-02-16 07:21:54.000000000 +0900
+++ src/lib/libpthread/arch/sh3/pthread_md.h	2008-02-16 07:27:56.000000000 +0900
@@ -76,7 +76,7 @@
 	(uc)->uc_flags = ((uc)->uc_flags | _UC_CPU) & ~_UC_USER;	\
 	} while (/*CONSTCOND*/0)
 
-#if 0 /* no struct fpreg!!! */
+#ifdef __SH4__
 #define PTHREAD_UCONTEXT_TO_FPREG(freg, uc)       			\
 	memcpy((freg), &(uc)->uc_mcontext.__fpregs, sizeof(*(freg)));
 
diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/libexec/ld.elf_so/arch/sh3/rtld_start.S src/libexec/ld.elf_so/arch/sh3/rtld_start.S
--- src.orig/libexec/ld.elf_so/arch/sh3/rtld_start.S	2008-04-29 11:37:25.000000000 +0900
+++ src/libexec/ld.elf_so/arch/sh3/rtld_start.S	2008-04-29 11:46:43.000000000 +0900
@@ -30,6 +30,7 @@
  */
 
 #include <machine/asm.h>
+#include <sh3/fpu.h>
 
 	.text
 	.align	2
@@ -93,6 +94,25 @@
 	mov.l	r7,@-r15
 	sts.l	mach,@-r15
 	sts.l	macl,@-r15
+#if defined(__SH4__)
+	sts	fpscr,r4
+	mov	#8,r5
+	swap.w	r5,r5
+	shll	r5		/* FPSCR_SZ */
+	not	r5,r5
+	and	r4,r5
+	lds	r5,fpscr
+	fmov.s	fr11,@-r15
+	fmov.s	fr10,@-r15
+	fmov.s	fr9,@-r15
+	fmov.s	fr8,@-r15
+	fmov.s	fr7,@-r15
+	fmov.s	fr6,@-r15
+	fmov.s	fr5,@-r15
+	fmov.s	fr4,@-r15
+	sts.l	fpul,@-r15
+	mov.l	r4,@-r15
+#endif
 	sts.l	pr,@-r15
 
 	mov	r0,r4		/* copy of obj */
@@ -101,6 +121,26 @@
 	 mov	r1,r5		/* copy of reloff */
 4:
 	lds.l	@r15+,pr	/* restore registers */
+#if defined(__SH4__)
+	mov.l	@r15+,r4
+	lds	r4,fpscr
+	mov	#8,r5
+	swap.w	r5,r5
+	shll	r5		/* FPSCR_SZ */
+	not	r5,r5
+	and	r4,r5
+	lds	r5,fpscr
+	lds.l	@r15+,fpul
+	fmov.s	@r15+,fr4
+	fmov.s	@r15+,fr5
+	fmov.s	@r15+,fr6
+	fmov.s	@r15+,fr7
+	fmov.s	@r15+,fr8
+	fmov.s	@r15+,fr9
+	fmov.s	@r15+,fr10
+	fmov.s	@r15+,fr11
+	lds	r4,fpscr
+#endif
 	lds.l	@r15+,macl
 	lds.l	@r15+,mach
 	mov.l	@r15+,r7
--- src.orig/gnu/dist/gcc4/gcc/config/sh/lib1funcs.asm	2006-04-20 18:49:36.000000000 +0900
+++ src/gnu/dist/gcc4/gcc/config/sh/lib1funcs.asm	2007-01-21 01:10:31.000000000 +0900
@@ -41,8 +41,13 @@
 #ifdef __ELF__
 #define LOCAL(X)	.L_##X
 #define FUNC(X)		.type X,@function
+#ifdef PIC
 #define HIDDEN_FUNC(X)	FUNC(X); .hidden X
 #define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
+#else
+#define HIDDEN_FUNC(X)	FUNC(X);
+#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y);
+#endif
 #define ENDFUNC0(X)	.Lfe_##X: .size X,.Lfe_##X-X
 #define ENDFUNC(X)	ENDFUNC0(X)
 #else
@@ -1035,7 +1040,7 @@
 #ifdef L_sdivsi3
 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
    sh2e/sh3e code.  */
-#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
+#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) || defined (__NetBSD__)
 !!
 !! Steve Chamberlain
 !! sac@cygnus.com
@@ -1486,7 +1491,7 @@
 #ifdef L_udivsi3
 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
    sh2e/sh3e code.  */
-#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
+#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) || defined(__NetBSD__)
 
 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
 	.global	GLOBAL(udivsi3)
--- src.orig/distrib/sets/lists/comp/ad.sh3	2007-02-10 07:22:32.000000000 +0900
+++ src/distrib/sets/lists/comp/ad.sh3	2007-02-10 11:15:00.000000000 +0900
@@ -17,6 +17,7 @@
 ./usr/include/sh3/endian.h			comp-c-include
 ./usr/include/sh3/endian_machdep.h		comp-c-include
 ./usr/include/sh3/float.h			comp-c-include
+./usr/include/sh3/fpu.h				comp-c-include
 ./usr/include/sh3/frame.h			comp-c-include
 ./usr/include/sh3/ieee.h			comp-c-include
 ./usr/include/sh3/ieeefp.h			comp-c-include