// file kernel/n/x86-64/montgomery.S: Montgomery modular exponentiation
/*-----------------------------------------------------------------------+
 |  Copyright 2005-2006, Michel Quercia (michel.quercia@prepas.org)      |
 |                                                                       |
 |  This file is part of Numerix. Numerix is free software; you can      |
 |  redistribute it and/or modify it under the terms of the GNU Lesser   |
 |  General Public License as published by the Free Software Foundation; |
 |  either version 2.1 of the License, or (at your option) any later     |
 |  version.                                                             |
 |                                                                       |
 |  The Numerix Library is distributed in the hope that it will be       |
 |  useful, but WITHOUT ANY WARRANTY; without even the implied warranty  |
 |  of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU  |
 |  Lesser General Public License for more details.                      |
 |                                                                       |
 |  You should have received a copy of the GNU Lesser General Public     |
 |  License along with the GNU MP Library; see the file COPYING. If not, |
 |  write to the Free Software Foundation, Inc., 59 Temple Place -       |
 |  Suite 330, Boston, MA 02111-1307, USA.                               |
 +-----------------------------------------------------------------------+
 |                                                                       |
 |                   Exponentiation modulaire de Montgomery              |
 |                                                                       |
 +-----------------------------------------------------------------------*/

                          # +----------------------+
                          # |  Division modulaire  |
                          # +----------------------+


# void xn(mgdiv_n2)(chiffre *a, chiffre *c, chiffre d, long n)
#
# entre :
# a = naturel de longueur 2n+1
# c = naturel de longueur n
# d = -1/c mod BASE
#
# contraintes :
# n >=2, a[0..2n-1] <= (BASE^n - 1)^2, a,c non confondus
#
# sortie :
# a[n..2n-1] <- a[0..2n-1]/BASE^n mod c, non normalis

#ifdef assembly_sn_mgdiv_n2
#undef L
#define L(x) .Lsn_mgdiv_##x
        
ENTER(sn_mgdiv_n2)

	# initialise les registres
	movq   %rdx,   %r10             # r10 <- d
	movq   %rcx,   %r11             # la <- n
	leaq -8(%rdi,%rcx,8), %rdi      # rdi <- &a[n-1]
	leaq   (%rsi,%rcx,8), %rsi      # rsi <- &c[n]
	negq   %rcx
	incq   %rcx                     # rcx <- 1-n
	movq   %rcx,   %r12             # r12 <- 1-n
        leaq   .Lsn_mul_add_loop(%rip), %r14
        andq   $7,     %rcx             # rcx <- (1 - n) mod 8
        leaq   (%rcx,%rcx,2), %rdx      # r14 <- adresse de saut
        leaq   (%r14,%rdx,8), %r14
        subq   %rcx,   %r14
	xorq   %rbx,   %rbx             # rbx <- 0 (pour retenues)
	 
	# a <- a - c*(a/c mod BASE^n)
        ALIGN(16)
L(loop):
        setc   %r13b                 	# sauve la retenue sur a[n]
        movq   (%rdi,%r12,8), %rax      # rax <- a[0]
	mulq   %r10
	movq   %rax,   %rbp             # rbp <- m = d*a[0] mod BASE
        mulq -8(%rsi,%r12,8)      	# calcule le premier produit
        movq   %rax,   %r8          	# init retenues
        movq   %rax,   %r9
        movq   %r12,    %rcx         	# rcx <- -8*ceil((n-1)/8)
        andq   $-8,    %rcx
        setc   %bl                  	# rbx <- 0
        call   *%r14                	# a += m*c
        addq   %r9,   (%rdi)         	# sauve les deux derniers chiffres
        leaq   8(%rdi), %rdi          	# a++
        adcq   %rbx,   %rdx
        bt     $0,    %r13          	# CF <- retenue sur a[n]
        adcq   %rdx,  (%rdi)
        decq   %r11                 	# la--
	jnz    L(loop)
		
        # s il y a retenue, retranche c
	jnc    L(done)
	clc
	ALIGN(8)
1:
	movq   -8(%rsi,%r12,8), %rax
	sbbq   %rax, (%rdi,%r12,8)
	incq   %r12
	jle    1b
L(done):
	RETURN_WITH_SP

#endif /* assembly_sn_mgdiv_n2 */
