#ifndef __ASSEMBLER__
# define __ASSEMBLER__ 1
#endif
#include "crypto/sparc_arch.h"

#ifdef	__arch64__
.register	%g2,#scratch
.register	%g3,#scratch
#endif

.section	".text",#alloc,#execinstr

#ifdef	__PIC__
SPARC_PIC_THUNK(%g1)
#endif
.globl	bn_mul_mont_t4_8
.align	32
bn_mul_mont_t4_8:
#ifdef	__arch64__
	mov	0,%g5
	mov	-128,%g4
#elif defined(SPARCV9_64BIT_STACK)
	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
	mov	-2047,%g4
	and	%g1,SPARCV9_64BIT_STACK,%g1
	movrz	%g1,0,%g4
	mov	-1,%g5
	add	%g4,-128,%g4
#else
	mov	-1,%g5
	mov	-128,%g4
#endif
	sllx	%g5,32,%g5
	save	%sp,%g4,%sp
#ifndef	__arch64__
	save	%sp,-128,%sp	! warm it up
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	restore
	restore
	restore
	restore
	restore
	restore
#endif
	and	%sp,1,%g4
	or	%g5,%fp,%fp
	or	%g4,%g5,%g5

	! copy arguments to global registers
	mov	%i0,%g1
	mov	%i1,%g2
	mov	%i2,%g3
	mov	%i3,%g4
	ld	[%i4+0],%f1	! load *n0
	ld	[%i4+4],%f0
	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g2+0*8+0],%l1
	ld	[%g2+0*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g2+1*8+0],%l2
	ld	[%g2+1*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g2+2*8+0],%l3
	ld	[%g2+2*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g2+3*8+0],%l4
	ld	[%g2+3*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g2+4*8+0],%l5
	ld	[%g2+4*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g2+5*8+0],%l6
	ld	[%g2+5*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g2+6*8+0],%l7
	ld	[%g2+6*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g2+7*8+0],%o0
	ld	[%g2+7*8+4],%l7
	sllx	%l7,32,%l7
	or	%o0,%l7,%l7
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g4+0*8+0],%l1
	ld	[%g4+0*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g4+1*8+0],%l2
	ld	[%g4+1*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g4+2*8+0],%l3
	ld	[%g4+2*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g4+3*8+0],%l4
	ld	[%g4+3*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g4+4*8+0],%l5
	ld	[%g4+4*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g4+5*8+0],%l6
	ld	[%g4+5*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g4+6*8+0],%l7
	ld	[%g4+6*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g4+7*8+0],%o0
	ld	[%g4+7*8+4],%l7
	sllx	%l7,32,%l7
	or	%o0,%l7,%l7
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	cmp	%g2,%g3
	be	SIZE_T_CC,.Lmsquare_8
	nop
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g3+0*8+0],%i1
	ld	[%g3+0*8+4],%i0
	sllx	%i0,32,%i0
	or	%i1,%i0,%i0
	ld	[%g3+1*8+0],%i2
	ld	[%g3+1*8+4],%i1
	sllx	%i1,32,%i1
	or	%i2,%i1,%i1
	ld	[%g3+2*8+0],%i3
	ld	[%g3+2*8+4],%i2
	sllx	%i2,32,%i2
	or	%i3,%i2,%i2
	ld	[%g3+3*8+0],%i4
	ld	[%g3+3*8+4],%i3
	sllx	%i3,32,%i3
	or	%i4,%i3,%i3
	ld	[%g3+4*8+0],%i5
	ld	[%g3+4*8+4],%i4
	sllx	%i4,32,%i4
	or	%i5,%i4,%i4
	ld	[%g3+5*8+0],%l0
	ld	[%g3+5*8+4],%i5
	sllx	%i5,32,%i5
	or	%l0,%i5,%i5
	ld	[%g3+6*8+0],%l1
	ld	[%g3+6*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g3+7*8+0],%l2
	ld	[%g3+7*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	.word	0x81b02920+8-1	! montmul	8-1
.Lmresume_8:
	fbu,pn	%fcc3,.Lmabort_8
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Lmabort_8
#endif
	nop
#ifdef	__arch64__
	restore
	restore
	restore
	restore
	restore
#else
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	 brz,pn	%g5,.Lmabort1_8
	restore
#endif
	.word	0x81b02310 !movxtod	%l0,%f0
	.word	0x85b02311 !movxtod	%l1,%f2
	.word	0x89b02312 !movxtod	%l2,%f4
	.word	0x8db02313 !movxtod	%l3,%f6
	.word	0x91b02314 !movxtod	%l4,%f8
	.word	0x95b02315 !movxtod	%l5,%f10
	.word	0x99b02316 !movxtod	%l6,%f12
	.word	0x9db02317 !movxtod	%l7,%f14
#ifdef	__arch64__
	restore
#else
	 and	%fp,%g5,%g5
	restore
	 and	%g5,1,%o7
	 and	%fp,%g5,%g5
	 srl	%fp,0,%fp		! just in case?
	 or	%o7,%g5,%g5
	brz,a,pn %g5,.Lmdone_8
	mov	0,%i0		! return failure
#endif
	st	%f1,[%g1+0*8+0]
	st	%f0,[%g1+0*8+4]
	st	%f3,[%g1+1*8+0]
	st	%f2,[%g1+1*8+4]
	st	%f5,[%g1+2*8+0]
	st	%f4,[%g1+2*8+4]
	st	%f7,[%g1+3*8+0]
	st	%f6,[%g1+3*8+4]
	st	%f9,[%g1+4*8+0]
	st	%f8,[%g1+4*8+4]
	st	%f11,[%g1+5*8+0]
	st	%f10,[%g1+5*8+4]
	st	%f13,[%g1+6*8+0]
	st	%f12,[%g1+6*8+4]
	st	%f15,[%g1+7*8+0]
	st	%f14,[%g1+7*8+4]
	mov	1,%i0		! return success
.Lmdone_8:
	ret
	restore

.Lmabort_8:
	restore
	restore
	restore
	restore
	restore
.Lmabort1_8:
	restore

	mov	0,%i0		! return failure
	ret
	restore

.align	32
.Lmsquare_8:
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	.word   0x81b02940+8-1	! montsqr	8-1
	ba	.Lmresume_8
	nop
.type	bn_mul_mont_t4_8, #function
.size	bn_mul_mont_t4_8, .-bn_mul_mont_t4_8
.globl	bn_mul_mont_t4_16
.align	32
bn_mul_mont_t4_16:
#ifdef	__arch64__
	mov	0,%g5
	mov	-128,%g4
#elif defined(SPARCV9_64BIT_STACK)
	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
	mov	-2047,%g4
	and	%g1,SPARCV9_64BIT_STACK,%g1
	movrz	%g1,0,%g4
	mov	-1,%g5
	add	%g4,-128,%g4
#else
	mov	-1,%g5
	mov	-128,%g4
#endif
	sllx	%g5,32,%g5
	save	%sp,%g4,%sp
#ifndef	__arch64__
	save	%sp,-128,%sp	! warm it up
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	restore
	restore
	restore
	restore
	restore
	restore
#endif
	and	%sp,1,%g4
	or	%g5,%fp,%fp
	or	%g4,%g5,%g5

	! copy arguments to global registers
	mov	%i0,%g1
	mov	%i1,%g2
	mov	%i2,%g3
	mov	%i3,%g4
	ld	[%i4+0],%f1	! load *n0
	ld	[%i4+4],%f0
	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g2+0*8+0],%l1
	ld	[%g2+0*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g2+1*8+0],%l2
	ld	[%g2+1*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g2+2*8+0],%l3
	ld	[%g2+2*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g2+3*8+0],%l4
	ld	[%g2+3*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g2+4*8+0],%l5
	ld	[%g2+4*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g2+5*8+0],%l6
	ld	[%g2+5*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g2+6*8+0],%l7
	ld	[%g2+6*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g2+7*8+0],%o0
	ld	[%g2+7*8+4],%l7
	sllx	%l7,32,%l7
	or	%o0,%l7,%l7
	ld	[%g2+8*8+0],%o1
	ld	[%g2+8*8+4],%o0
	sllx	%o0,32,%o0
	or	%o1,%o0,%o0
	ld	[%g2+9*8+0],%o2
	ld	[%g2+9*8+4],%o1
	sllx	%o1,32,%o1
	or	%o2,%o1,%o1
	ld	[%g2+10*8+0],%o3
	ld	[%g2+10*8+4],%o2
	sllx	%o2,32,%o2
	or	%o3,%o2,%o2
	ld	[%g2+11*8+0],%o4
	ld	[%g2+11*8+4],%o3
	sllx	%o3,32,%o3
	or	%o4,%o3,%o3
	ld	[%g2+12*8+0],%o5
	ld	[%g2+12*8+4],%o4
	sllx	%o4,32,%o4
	or	%o5,%o4,%o4
	ld	[%g2+13*8+0],%o7
	ld	[%g2+13*8+4],%o5
	sllx	%o5,32,%o5
	or	%o7,%o5,%o5
	ld	[%g2+14*8+0],%f5
	ld	[%g2+14*8+4],%f4
	.word	0xb1b00f04 !fsrc2	%f0,%f4,%f24
	ld	[%g2+15*8+0],%f7
	ld	[%g2+15*8+4],%f6
	.word	0xb5b00f06 !fsrc2	%f0,%f6,%f26
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g4+0*8+0],%l1
	ld	[%g4+0*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g4+1*8+0],%l2
	ld	[%g4+1*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g4+2*8+0],%l3
	ld	[%g4+2*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g4+3*8+0],%l4
	ld	[%g4+3*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g4+4*8+0],%l5
	ld	[%g4+4*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g4+5*8+0],%l6
	ld	[%g4+5*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g4+6*8+0],%l7
	ld	[%g4+6*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g4+7*8+0],%o0
	ld	[%g4+7*8+4],%l7
	sllx	%l7,32,%l7
	or	%o0,%l7,%l7
	ld	[%g4+8*8+0],%o1
	ld	[%g4+8*8+4],%o0
	sllx	%o0,32,%o0
	or	%o1,%o0,%o0
	ld	[%g4+9*8+0],%o2
	ld	[%g4+9*8+4],%o1
	sllx	%o1,32,%o1
	or	%o2,%o1,%o1
	ld	[%g4+10*8+0],%o3
	ld	[%g4+10*8+4],%o2
	sllx	%o2,32,%o2
	or	%o3,%o2,%o2
	ld	[%g4+11*8+0],%o4
	ld	[%g4+11*8+4],%o3
	sllx	%o3,32,%o3
	or	%o4,%o3,%o3
	ld	[%g4+12*8+0],%o5
	ld	[%g4+12*8+4],%o4
	sllx	%o4,32,%o4
	or	%o5,%o4,%o4
	ld	[%g4+13*8+0],%o7
	ld	[%g4+13*8+4],%o5
	sllx	%o5,32,%o5
	or	%o7,%o5,%o5
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g4+14*8+0],%l1
	ld	[%g4+14*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g4+15*8+0],%l2
	ld	[%g4+15*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	cmp	%g2,%g3
	be	SIZE_T_CC,.Lmsquare_16
	nop
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g3+0*8+0],%i1
	ld	[%g3+0*8+4],%i0
	sllx	%i0,32,%i0
	or	%i1,%i0,%i0
	ld	[%g3+1*8+0],%i2
	ld	[%g3+1*8+4],%i1
	sllx	%i1,32,%i1
	or	%i2,%i1,%i1
	ld	[%g3+2*8+0],%i3
	ld	[%g3+2*8+4],%i2
	sllx	%i2,32,%i2
	or	%i3,%i2,%i2
	ld	[%g3+3*8+0],%i4
	ld	[%g3+3*8+4],%i3
	sllx	%i3,32,%i3
	or	%i4,%i3,%i3
	ld	[%g3+4*8+0],%i5
	ld	[%g3+4*8+4],%i4
	sllx	%i4,32,%i4
	or	%i5,%i4,%i4
	ld	[%g3+5*8+0],%l0
	ld	[%g3+5*8+4],%i5
	sllx	%i5,32,%i5
	or	%l0,%i5,%i5
	ld	[%g3+6*8+0],%l1
	ld	[%g3+6*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g3+7*8+0],%l2
	ld	[%g3+7*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g3+8*8+0],%l3
	ld	[%g3+8*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g3+9*8+0],%l4
	ld	[%g3+9*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g3+10*8+0],%l5
	ld	[%g3+10*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g3+11*8+0],%l6
	ld	[%g3+11*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g3+12*8+0],%l7
	ld	[%g3+12*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g3+13*8+0],%o7
	ld	[%g3+13*8+4],%l7
	sllx	%l7,32,%l7
	or	%o7,%l7,%l7
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g3+14*8+0],%i1
	ld	[%g3+14*8+4],%i0
	sllx	%i0,32,%i0
	or	%i1,%i0,%i0
	ld	[%g3+15*8+0],%o7
	ld	[%g3+15*8+4],%i1
	sllx	%i1,32,%i1
	or	%o7,%i1,%i1
	.word	0x81b02920+16-1	! montmul	16-1
.Lmresume_16:
	fbu,pn	%fcc3,.Lmabort_16
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Lmabort_16
#endif
	nop
#ifdef	__arch64__
	restore
	restore
	restore
	restore
	restore
#else
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	 brz,pn	%g5,.Lmabort1_16
	restore
#endif
	.word	0x81b02310 !movxtod	%l0,%f0
	.word	0x85b02311 !movxtod	%l1,%f2
	.word	0x89b02312 !movxtod	%l2,%f4
	.word	0x8db02313 !movxtod	%l3,%f6
	.word	0x91b02314 !movxtod	%l4,%f8
	.word	0x95b02315 !movxtod	%l5,%f10
	.word	0x99b02316 !movxtod	%l6,%f12
	.word	0x9db02317 !movxtod	%l7,%f14
	.word	0xa1b02308 !movxtod	%o0,%f16
	.word	0xa5b02309 !movxtod	%o1,%f18
	.word	0xa9b0230a !movxtod	%o2,%f20
	.word	0xadb0230b !movxtod	%o3,%f22
	.word	0xbbb0230c !movxtod	%o4,%f60
	.word	0xbfb0230d !movxtod	%o5,%f62
#ifdef	__arch64__
	restore
#else
	 and	%fp,%g5,%g5
	restore
	 and	%g5,1,%o7
	 and	%fp,%g5,%g5
	 srl	%fp,0,%fp		! just in case?
	 or	%o7,%g5,%g5
	brz,a,pn %g5,.Lmdone_16
	mov	0,%i0		! return failure
#endif
	st	%f1,[%g1+0*8+0]
	st	%f0,[%g1+0*8+4]
	st	%f3,[%g1+1*8+0]
	st	%f2,[%g1+1*8+4]
	st	%f5,[%g1+2*8+0]
	st	%f4,[%g1+2*8+4]
	st	%f7,[%g1+3*8+0]
	st	%f6,[%g1+3*8+4]
	st	%f9,[%g1+4*8+0]
	st	%f8,[%g1+4*8+4]
	st	%f11,[%g1+5*8+0]
	st	%f10,[%g1+5*8+4]
	st	%f13,[%g1+6*8+0]
	st	%f12,[%g1+6*8+4]
	st	%f15,[%g1+7*8+0]
	st	%f14,[%g1+7*8+4]
	st	%f17,[%g1+8*8+0]
	st	%f16,[%g1+8*8+4]
	st	%f19,[%g1+9*8+0]
	st	%f18,[%g1+9*8+4]
	st	%f21,[%g1+10*8+0]
	st	%f20,[%g1+10*8+4]
	st	%f23,[%g1+11*8+0]
	st	%f22,[%g1+11*8+4]
	.word	0x81b00f1d !fsrc2	%f0,%f60,%f0
	st	%f1,[%g1+12*8+0]
	st	%f0,[%g1+12*8+4]
	.word	0x85b00f1f !fsrc2	%f0,%f62,%f2
	st	%f3,[%g1+13*8+0]
	st	%f2,[%g1+13*8+4]
	.word	0x89b00f18 !fsrc2	%f0,%f24,%f4
	st	%f5,[%g1+14*8+0]
	st	%f4,[%g1+14*8+4]
	.word	0x8db00f1a !fsrc2	%f0,%f26,%f6
	st	%f7,[%g1+15*8+0]
	st	%f6,[%g1+15*8+4]
	mov	1,%i0		! return success
.Lmdone_16:
	ret
	restore

.Lmabort_16:
	restore
	restore
	restore
	restore
	restore
.Lmabort1_16:
	restore

	mov	0,%i0		! return failure
	ret
	restore

.align	32
.Lmsquare_16:
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	.word   0x81b02940+16-1	! montsqr	16-1
	ba	.Lmresume_16
	nop
.type	bn_mul_mont_t4_16, #function
.size	bn_mul_mont_t4_16, .-bn_mul_mont_t4_16
.globl	bn_mul_mont_t4_24
.align	32
bn_mul_mont_t4_24:
#ifdef	__arch64__
	mov	0,%g5
	mov	-128,%g4
#elif defined(SPARCV9_64BIT_STACK)
	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
	mov	-2047,%g4
	and	%g1,SPARCV9_64BIT_STACK,%g1
	movrz	%g1,0,%g4
	mov	-1,%g5
	add	%g4,-128,%g4
#else
	mov	-1,%g5
	mov	-128,%g4
#endif
	sllx	%g5,32,%g5
	save	%sp,%g4,%sp
#ifndef	__arch64__
	save	%sp,-128,%sp	! warm it up
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	restore
	restore
	restore
	restore
	restore
	restore
#endif
	and	%sp,1,%g4
	or	%g5,%fp,%fp
	or	%g4,%g5,%g5

	! copy arguments to global registers
	mov	%i0,%g1
	mov	%i1,%g2
	mov	%i2,%g3
	mov	%i3,%g4
	ld	[%i4+0],%f1	! load *n0
	ld	[%i4+4],%f0
	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g2+0*8+0],%l1
	ld	[%g2+0*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g2+1*8+0],%l2
	ld	[%g2+1*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g2+2*8+0],%l3
	ld	[%g2+2*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g2+3*8+0],%l4
	ld	[%g2+3*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g2+4*8+0],%l5
	ld	[%g2+4*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g2+5*8+0],%l6
	ld	[%g2+5*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g2+6*8+0],%l7
	ld	[%g2+6*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g2+7*8+0],%o0
	ld	[%g2+7*8+4],%l7
	sllx	%l7,32,%l7
	or	%o0,%l7,%l7
	ld	[%g2+8*8+0],%o1
	ld	[%g2+8*8+4],%o0
	sllx	%o0,32,%o0
	or	%o1,%o0,%o0
	ld	[%g2+9*8+0],%o2
	ld	[%g2+9*8+4],%o1
	sllx	%o1,32,%o1
	or	%o2,%o1,%o1
	ld	[%g2+10*8+0],%o3
	ld	[%g2+10*8+4],%o2
	sllx	%o2,32,%o2
	or	%o3,%o2,%o2
	ld	[%g2+11*8+0],%o4
	ld	[%g2+11*8+4],%o3
	sllx	%o3,32,%o3
	or	%o4,%o3,%o3
	ld	[%g2+12*8+0],%o5
	ld	[%g2+12*8+4],%o4
	sllx	%o4,32,%o4
	or	%o5,%o4,%o4
	ld	[%g2+13*8+0],%o7
	ld	[%g2+13*8+4],%o5
	sllx	%o5,32,%o5
	or	%o7,%o5,%o5
	ld	[%g2+14*8+0],%f5
	ld	[%g2+14*8+4],%f4
	.word	0xb1b00f04 !fsrc2	%f0,%f4,%f24
	ld	[%g2+15*8+0],%f7
	ld	[%g2+15*8+4],%f6
	.word	0xb5b00f06 !fsrc2	%f0,%f6,%f26
	ld	[%g2+16*8+0],%f1
	ld	[%g2+16*8+4],%f0
	.word	0xb9b00f00 !fsrc2	%f0,%f0,%f28
	ld	[%g2+17*8+0],%f3
	ld	[%g2+17*8+4],%f2
	.word	0xbdb00f02 !fsrc2	%f0,%f2,%f30
	ld	[%g2+18*8+0],%f5
	ld	[%g2+18*8+4],%f4
	.word	0x83b00f04 !fsrc2	%f0,%f4,%f32
	ld	[%g2+19*8+0],%f7
	ld	[%g2+19*8+4],%f6
	.word	0x87b00f06 !fsrc2	%f0,%f6,%f34
	ld	[%g2+20*8+0],%f1
	ld	[%g2+20*8+4],%f0
	.word	0x8bb00f00 !fsrc2	%f0,%f0,%f36
	ld	[%g2+21*8+0],%f3
	ld	[%g2+21*8+4],%f2
	.word	0x8fb00f02 !fsrc2	%f0,%f2,%f38
	ld	[%g2+22*8+0],%f5
	ld	[%g2+22*8+4],%f4
	.word	0x93b00f04 !fsrc2	%f0,%f4,%f40
	ld	[%g2+23*8+0],%f7
	ld	[%g2+23*8+4],%f6
	.word	0x97b00f06 !fsrc2	%f0,%f6,%f42
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g4+0*8+0],%l1
	ld	[%g4+0*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g4+1*8+0],%l2
	ld	[%g4+1*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g4+2*8+0],%l3
	ld	[%g4+2*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g4+3*8+0],%l4
	ld	[%g4+3*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g4+4*8+0],%l5
	ld	[%g4+4*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g4+5*8+0],%l6
	ld	[%g4+5*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g4+6*8+0],%l7
	ld	[%g4+6*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g4+7*8+0],%o0
	ld	[%g4+7*8+4],%l7
	sllx	%l7,32,%l7
	or	%o0,%l7,%l7
	ld	[%g4+8*8+0],%o1
	ld	[%g4+8*8+4],%o0
	sllx	%o0,32,%o0
	or	%o1,%o0,%o0
	ld	[%g4+9*8+0],%o2
	ld	[%g4+9*8+4],%o1
	sllx	%o1,32,%o1
	or	%o2,%o1,%o1
	ld	[%g4+10*8+0],%o3
	ld	[%g4+10*8+4],%o2
	sllx	%o2,32,%o2
	or	%o3,%o2,%o2
	ld	[%g4+11*8+0],%o4
	ld	[%g4+11*8+4],%o3
	sllx	%o3,32,%o3
	or	%o4,%o3,%o3
	ld	[%g4+12*8+0],%o5
	ld	[%g4+12*8+4],%o4
	sllx	%o4,32,%o4
	or	%o5,%o4,%o4
	ld	[%g4+13*8+0],%o7
	ld	[%g4+13*8+4],%o5
	sllx	%o5,32,%o5
	or	%o7,%o5,%o5
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g4+14*8+0],%l1
	ld	[%g4+14*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g4+15*8+0],%l2
	ld	[%g4+15*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g4+16*8+0],%l3
	ld	[%g4+16*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g4+17*8+0],%l4
	ld	[%g4+17*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g4+18*8+0],%l5
	ld	[%g4+18*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g4+19*8+0],%l6
	ld	[%g4+19*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g4+20*8+0],%l7
	ld	[%g4+20*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g4+21*8+0],%o0
	ld	[%g4+21*8+4],%l7
	sllx	%l7,32,%l7
	or	%o0,%l7,%l7
	ld	[%g4+22*8+0],%o1
	ld	[%g4+22*8+4],%o0
	sllx	%o0,32,%o0
	or	%o1,%o0,%o0
	ld	[%g4+23*8+0],%o2
	ld	[%g4+23*8+4],%o1
	sllx	%o1,32,%o1
	or	%o2,%o1,%o1
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	cmp	%g2,%g3
	be	SIZE_T_CC,.Lmsquare_24
	nop
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g3+0*8+0],%i1
	ld	[%g3+0*8+4],%i0
	sllx	%i0,32,%i0
	or	%i1,%i0,%i0
	ld	[%g3+1*8+0],%i2
	ld	[%g3+1*8+4],%i1
	sllx	%i1,32,%i1
	or	%i2,%i1,%i1
	ld	[%g3+2*8+0],%i3
	ld	[%g3+2*8+4],%i2
	sllx	%i2,32,%i2
	or	%i3,%i2,%i2
	ld	[%g3+3*8+0],%i4
	ld	[%g3+3*8+4],%i3
	sllx	%i3,32,%i3
	or	%i4,%i3,%i3
	ld	[%g3+4*8+0],%i5
	ld	[%g3+4*8+4],%i4
	sllx	%i4,32,%i4
	or	%i5,%i4,%i4
	ld	[%g3+5*8+0],%l0
	ld	[%g3+5*8+4],%i5
	sllx	%i5,32,%i5
	or	%l0,%i5,%i5
	ld	[%g3+6*8+0],%l1
	ld	[%g3+6*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g3+7*8+0],%l2
	ld	[%g3+7*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g3+8*8+0],%l3
	ld	[%g3+8*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g3+9*8+0],%l4
	ld	[%g3+9*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g3+10*8+0],%l5
	ld	[%g3+10*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g3+11*8+0],%l6
	ld	[%g3+11*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g3+12*8+0],%l7
	ld	[%g3+12*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g3+13*8+0],%o7
	ld	[%g3+13*8+4],%l7
	sllx	%l7,32,%l7
	or	%o7,%l7,%l7
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g3+14*8+0],%i1
	ld	[%g3+14*8+4],%i0
	sllx	%i0,32,%i0
	or	%i1,%i0,%i0
	ld	[%g3+15*8+0],%i2
	ld	[%g3+15*8+4],%i1
	sllx	%i1,32,%i1
	or	%i2,%i1,%i1
	ld	[%g3+16*8+0],%i3
	ld	[%g3+16*8+4],%i2
	sllx	%i2,32,%i2
	or	%i3,%i2,%i2
	ld	[%g3+17*8+0],%i4
	ld	[%g3+17*8+4],%i3
	sllx	%i3,32,%i3
	or	%i4,%i3,%i3
	ld	[%g3+18*8+0],%i5
	ld	[%g3+18*8+4],%i4
	sllx	%i4,32,%i4
	or	%i5,%i4,%i4
	ld	[%g3+19*8+0],%l0
	ld	[%g3+19*8+4],%i5
	sllx	%i5,32,%i5
	or	%l0,%i5,%i5
	ld	[%g3+20*8+0],%l1
	ld	[%g3+20*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g3+21*8+0],%l2
	ld	[%g3+21*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g3+22*8+0],%l3
	ld	[%g3+22*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g3+23*8+0],%o7
	ld	[%g3+23*8+4],%l3
	sllx	%l3,32,%l3
	or	%o7,%l3,%l3
	.word	0x81b02920+24-1	! montmul	24-1
.Lmresume_24:
	fbu,pn	%fcc3,.Lmabort_24
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Lmabort_24
#endif
	nop
#ifdef	__arch64__
	restore
	restore
	restore
	restore
	restore
#else
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	 brz,pn	%g5,.Lmabort1_24
	restore
#endif
	.word	0x81b02310 !movxtod	%l0,%f0
	.word	0x85b02311 !movxtod	%l1,%f2
	.word	0x89b02312 !movxtod	%l2,%f4
	.word	0x8db02313 !movxtod	%l3,%f6
	.word	0x91b02314 !movxtod	%l4,%f8
	.word	0x95b02315 !movxtod	%l5,%f10
	.word	0x99b02316 !movxtod	%l6,%f12
	.word	0x9db02317 !movxtod	%l7,%f14
	.word	0xa1b02308 !movxtod	%o0,%f16
	.word	0xa5b02309 !movxtod	%o1,%f18
	.word	0xa9b0230a !movxtod	%o2,%f20
	.word	0xadb0230b !movxtod	%o3,%f22
	.word	0xbbb0230c !movxtod	%o4,%f60
	.word	0xbfb0230d !movxtod	%o5,%f62
#ifdef	__arch64__
	restore
#else
	 and	%fp,%g5,%g5
	restore
	 and	%g5,1,%o7
	 and	%fp,%g5,%g5
	 srl	%fp,0,%fp		! just in case?
	 or	%o7,%g5,%g5
	brz,a,pn %g5,.Lmdone_24
	mov	0,%i0		! return failure
#endif
	st	%f1,[%g1+0*8+0]
	st	%f0,[%g1+0*8+4]
	st	%f3,[%g1+1*8+0]
	st	%f2,[%g1+1*8+4]
	st	%f5,[%g1+2*8+0]
	st	%f4,[%g1+2*8+4]
	st	%f7,[%g1+3*8+0]
	st	%f6,[%g1+3*8+4]
	st	%f9,[%g1+4*8+0]
	st	%f8,[%g1+4*8+4]
	st	%f11,[%g1+5*8+0]
	st	%f10,[%g1+5*8+4]
	st	%f13,[%g1+6*8+0]
	st	%f12,[%g1+6*8+4]
	st	%f15,[%g1+7*8+0]
	st	%f14,[%g1+7*8+4]
	st	%f17,[%g1+8*8+0]
	st	%f16,[%g1+8*8+4]
	st	%f19,[%g1+9*8+0]
	st	%f18,[%g1+9*8+4]
	st	%f21,[%g1+10*8+0]
	st	%f20,[%g1+10*8+4]
	st	%f23,[%g1+11*8+0]
	st	%f22,[%g1+11*8+4]
	.word	0x81b00f1d !fsrc2	%f0,%f60,%f0
	st	%f1,[%g1+12*8+0]
	st	%f0,[%g1+12*8+4]
	.word	0x85b00f1f !fsrc2	%f0,%f62,%f2
	st	%f3,[%g1+13*8+0]
	st	%f2,[%g1+13*8+4]
	.word	0x89b00f18 !fsrc2	%f0,%f24,%f4
	st	%f5,[%g1+14*8+0]
	st	%f4,[%g1+14*8+4]
	.word	0x8db00f1a !fsrc2	%f0,%f26,%f6
	st	%f7,[%g1+15*8+0]
	st	%f6,[%g1+15*8+4]
	.word	0x81b00f1c !fsrc2	%f0,%f28,%f0
	st	%f1,[%g1+16*8+0]
	st	%f0,[%g1+16*8+4]
	.word	0x85b00f1e !fsrc2	%f0,%f30,%f2
	st	%f3,[%g1+17*8+0]
	st	%f2,[%g1+17*8+4]
	.word	0x89b00f01 !fsrc2	%f0,%f32,%f4
	st	%f5,[%g1+18*8+0]
	st	%f4,[%g1+18*8+4]
	.word	0x8db00f03 !fsrc2	%f0,%f34,%f6
	st	%f7,[%g1+19*8+0]
	st	%f6,[%g1+19*8+4]
	.word	0x81b00f05 !fsrc2	%f0,%f36,%f0
	st	%f1,[%g1+20*8+0]
	st	%f0,[%g1+20*8+4]
	.word	0x85b00f07 !fsrc2	%f0,%f38,%f2
	st	%f3,[%g1+21*8+0]
	st	%f2,[%g1+21*8+4]
	.word	0x89b00f09 !fsrc2	%f0,%f40,%f4
	st	%f5,[%g1+22*8+0]
	st	%f4,[%g1+22*8+4]
	.word	0x8db00f0b !fsrc2	%f0,%f42,%f6
	st	%f7,[%g1+23*8+0]
	st	%f6,[%g1+23*8+4]
	mov	1,%i0		! return success
.Lmdone_24:
	ret
	restore

.Lmabort_24:
	restore
	restore
	restore
	restore
	restore
.Lmabort1_24:
	restore

	mov	0,%i0		! return failure
	ret
	restore

.align	32
.Lmsquare_24:
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	.word   0x81b02940+24-1	! montsqr	24-1
	ba	.Lmresume_24
	nop
.type	bn_mul_mont_t4_24, #function
.size	bn_mul_mont_t4_24, .-bn_mul_mont_t4_24
.globl	bn_mul_mont_t4_32
.align	32
bn_mul_mont_t4_32:
#ifdef	__arch64__
	mov	0,%g5
	mov	-128,%g4
#elif defined(SPARCV9_64BIT_STACK)
	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
	mov	-2047,%g4
	and	%g1,SPARCV9_64BIT_STACK,%g1
	movrz	%g1,0,%g4
	mov	-1,%g5
	add	%g4,-128,%g4
#else
	mov	-1,%g5
	mov	-128,%g4
#endif
	sllx	%g5,32,%g5
	save	%sp,%g4,%sp
#ifndef	__arch64__
	save	%sp,-128,%sp	! warm it up
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	restore
	restore
	restore
	restore
	restore
	restore
#endif
	and	%sp,1,%g4
	or	%g5,%fp,%fp
	or	%g4,%g5,%g5

	! copy arguments to global registers
	mov	%i0,%g1
	mov	%i1,%g2
	mov	%i2,%g3
	mov	%i3,%g4
	ld	[%i4+0],%f1	! load *n0
	ld	[%i4+4],%f0
	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g2+0*8+0],%l1
	ld	[%g2+0*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g2+1*8+0],%l2
	ld	[%g2+1*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g2+2*8+0],%l3
	ld	[%g2+2*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g2+3*8+0],%l4
	ld	[%g2+3*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g2+4*8+0],%l5
	ld	[%g2+4*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g2+5*8+0],%l6
	ld	[%g2+5*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g2+6*8+0],%l7
	ld	[%g2+6*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g2+7*8+0],%o0
	ld	[%g2+7*8+4],%l7
	sllx	%l7,32,%l7
	or	%o0,%l7,%l7
	ld	[%g2+8*8+0],%o1
	ld	[%g2+8*8+4],%o0
	sllx	%o0,32,%o0
	or	%o1,%o0,%o0
	ld	[%g2+9*8+0],%o2
	ld	[%g2+9*8+4],%o1
	sllx	%o1,32,%o1
	or	%o2,%o1,%o1
	ld	[%g2+10*8+0],%o3
	ld	[%g2+10*8+4],%o2
	sllx	%o2,32,%o2
	or	%o3,%o2,%o2
	ld	[%g2+11*8+0],%o4
	ld	[%g2+11*8+4],%o3
	sllx	%o3,32,%o3
	or	%o4,%o3,%o3
	ld	[%g2+12*8+0],%o5
	ld	[%g2+12*8+4],%o4
	sllx	%o4,32,%o4
	or	%o5,%o4,%o4
	ld	[%g2+13*8+0],%o7
	ld	[%g2+13*8+4],%o5
	sllx	%o5,32,%o5
	or	%o7,%o5,%o5
	ld	[%g2+14*8+0],%f5
	ld	[%g2+14*8+4],%f4
	.word	0xb1b00f04 !fsrc2	%f0,%f4,%f24
	ld	[%g2+15*8+0],%f7
	ld	[%g2+15*8+4],%f6
	.word	0xb5b00f06 !fsrc2	%f0,%f6,%f26
	ld	[%g2+16*8+0],%f1
	ld	[%g2+16*8+4],%f0
	.word	0xb9b00f00 !fsrc2	%f0,%f0,%f28
	ld	[%g2+17*8+0],%f3
	ld	[%g2+17*8+4],%f2
	.word	0xbdb00f02 !fsrc2	%f0,%f2,%f30
	ld	[%g2+18*8+0],%f5
	ld	[%g2+18*8+4],%f4
	.word	0x83b00f04 !fsrc2	%f0,%f4,%f32
	ld	[%g2+19*8+0],%f7
	ld	[%g2+19*8+4],%f6
	.word	0x87b00f06 !fsrc2	%f0,%f6,%f34
	ld	[%g2+20*8+0],%f1
	ld	[%g2+20*8+4],%f0
	.word	0x8bb00f00 !fsrc2	%f0,%f0,%f36
	ld	[%g2+21*8+0],%f3
	ld	[%g2+21*8+4],%f2
	.word	0x8fb00f02 !fsrc2	%f0,%f2,%f38
	ld	[%g2+22*8+0],%f5
	ld	[%g2+22*8+4],%f4
	.word	0x93b00f04 !fsrc2	%f0,%f4,%f40
	ld	[%g2+23*8+0],%f7
	ld	[%g2+23*8+4],%f6
	.word	0x97b00f06 !fsrc2	%f0,%f6,%f42
	ld	[%g2+24*8+0],%f1
	ld	[%g2+24*8+4],%f0
	.word	0x9bb00f00 !fsrc2	%f0,%f0,%f44
	ld	[%g2+25*8+0],%f3
	ld	[%g2+25*8+4],%f2
	.word	0x9fb00f02 !fsrc2	%f0,%f2,%f46
	ld	[%g2+26*8+0],%f5
	ld	[%g2+26*8+4],%f4
	.word	0xa3b00f04 !fsrc2	%f0,%f4,%f48
	ld	[%g2+27*8+0],%f7
	ld	[%g2+27*8+4],%f6
	.word	0xa7b00f06 !fsrc2	%f0,%f6,%f50
	ld	[%g2+28*8+0],%f1
	ld	[%g2+28*8+4],%f0
	.word	0xabb00f00 !fsrc2	%f0,%f0,%f52
	ld	[%g2+29*8+0],%f3
	ld	[%g2+29*8+4],%f2
	.word	0xafb00f02 !fsrc2	%f0,%f2,%f54
	ld	[%g2+30*8+0],%f5
	ld	[%g2+30*8+4],%f4
	.word	0xb3b00f04 !fsrc2	%f0,%f4,%f56
	ld	[%g2+31*8+0],%f7
	ld	[%g2+31*8+4],%f6
	.word	0xb7b00f06 !fsrc2	%f0,%f6,%f58
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g4+0*8+0],%l1
	ld	[%g4+0*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g4+1*8+0],%l2
	ld	[%g4+1*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g4+2*8+0],%l3
	ld	[%g4+2*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g4+3*8+0],%l4
	ld	[%g4+3*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g4+4*8+0],%l5
	ld	[%g4+4*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g4+5*8+0],%l6
	ld	[%g4+5*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g4+6*8+0],%l7
	ld	[%g4+6*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g4+7*8+0],%o0
	ld	[%g4+7*8+4],%l7
	sllx	%l7,32,%l7
	or	%o0,%l7,%l7
	ld	[%g4+8*8+0],%o1
	ld	[%g4+8*8+4],%o0
	sllx	%o0,32,%o0
	or	%o1,%o0,%o0
	ld	[%g4+9*8+0],%o2
	ld	[%g4+9*8+4],%o1
	sllx	%o1,32,%o1
	or	%o2,%o1,%o1
	ld	[%g4+10*8+0],%o3
	ld	[%g4+10*8+4],%o2
	sllx	%o2,32,%o2
	or	%o3,%o2,%o2
	ld	[%g4+11*8+0],%o4
	ld	[%g4+11*8+4],%o3
	sllx	%o3,32,%o3
	or	%o4,%o3,%o3
	ld	[%g4+12*8+0],%o5
	ld	[%g4+12*8+4],%o4
	sllx	%o4,32,%o4
	or	%o5,%o4,%o4
	ld	[%g4+13*8+0],%o7
	ld	[%g4+13*8+4],%o5
	sllx	%o5,32,%o5
	or	%o7,%o5,%o5
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g4+14*8+0],%l1
	ld	[%g4+14*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g4+15*8+0],%l2
	ld	[%g4+15*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g4+16*8+0],%l3
	ld	[%g4+16*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g4+17*8+0],%l4
	ld	[%g4+17*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g4+18*8+0],%l5
	ld	[%g4+18*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g4+19*8+0],%l6
	ld	[%g4+19*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g4+20*8+0],%l7
	ld	[%g4+20*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g4+21*8+0],%o0
	ld	[%g4+21*8+4],%l7
	sllx	%l7,32,%l7
	or	%o0,%l7,%l7
	ld	[%g4+22*8+0],%o1
	ld	[%g4+22*8+4],%o0
	sllx	%o0,32,%o0
	or	%o1,%o0,%o0
	ld	[%g4+23*8+0],%o2
	ld	[%g4+23*8+4],%o1
	sllx	%o1,32,%o1
	or	%o2,%o1,%o1
	ld	[%g4+24*8+0],%o3
	ld	[%g4+24*8+4],%o2
	sllx	%o2,32,%o2
	or	%o3,%o2,%o2
	ld	[%g4+25*8+0],%o4
	ld	[%g4+25*8+4],%o3
	sllx	%o3,32,%o3
	or	%o4,%o3,%o3
	ld	[%g4+26*8+0],%o5
	ld	[%g4+26*8+4],%o4
	sllx	%o4,32,%o4
	or	%o5,%o4,%o4
	ld	[%g4+27*8+0],%o7
	ld	[%g4+27*8+4],%o5
	sllx	%o5,32,%o5
	or	%o7,%o5,%o5
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g4+28*8+0],%l1
	ld	[%g4+28*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g4+29*8+0],%l2
	ld	[%g4+29*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g4+30*8+0],%l3
	ld	[%g4+30*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g4+31*8+0],%o7
	ld	[%g4+31*8+4],%l3
	sllx	%l3,32,%l3
	or	%o7,%l3,%l3
	cmp	%g2,%g3
	be	SIZE_T_CC,.Lmsquare_32
	nop
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g3+0*8+0],%i1
	ld	[%g3+0*8+4],%i0
	sllx	%i0,32,%i0
	or	%i1,%i0,%i0
	ld	[%g3+1*8+0],%i2
	ld	[%g3+1*8+4],%i1
	sllx	%i1,32,%i1
	or	%i2,%i1,%i1
	ld	[%g3+2*8+0],%i3
	ld	[%g3+2*8+4],%i2
	sllx	%i2,32,%i2
	or	%i3,%i2,%i2
	ld	[%g3+3*8+0],%i4
	ld	[%g3+3*8+4],%i3
	sllx	%i3,32,%i3
	or	%i4,%i3,%i3
	ld	[%g3+4*8+0],%i5
	ld	[%g3+4*8+4],%i4
	sllx	%i4,32,%i4
	or	%i5,%i4,%i4
	ld	[%g3+5*8+0],%l0
	ld	[%g3+5*8+4],%i5
	sllx	%i5,32,%i5
	or	%l0,%i5,%i5
	ld	[%g3+6*8+0],%l1
	ld	[%g3+6*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g3+7*8+0],%l2
	ld	[%g3+7*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g3+8*8+0],%l3
	ld	[%g3+8*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g3+9*8+0],%l4
	ld	[%g3+9*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g3+10*8+0],%l5
	ld	[%g3+10*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g3+11*8+0],%l6
	ld	[%g3+11*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g3+12*8+0],%l7
	ld	[%g3+12*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g3+13*8+0],%o7
	ld	[%g3+13*8+4],%l7
	sllx	%l7,32,%l7
	or	%o7,%l7,%l7
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ld	[%g3+14*8+0],%i1
	ld	[%g3+14*8+4],%i0
	sllx	%i0,32,%i0
	or	%i1,%i0,%i0
	ld	[%g3+15*8+0],%i2
	ld	[%g3+15*8+4],%i1
	sllx	%i1,32,%i1
	or	%i2,%i1,%i1
	ld	[%g3+16*8+0],%i3
	ld	[%g3+16*8+4],%i2
	sllx	%i2,32,%i2
	or	%i3,%i2,%i2
	ld	[%g3+17*8+0],%i4
	ld	[%g3+17*8+4],%i3
	sllx	%i3,32,%i3
	or	%i4,%i3,%i3
	ld	[%g3+18*8+0],%i5
	ld	[%g3+18*8+4],%i4
	sllx	%i4,32,%i4
	or	%i5,%i4,%i4
	ld	[%g3+19*8+0],%l0
	ld	[%g3+19*8+4],%i5
	sllx	%i5,32,%i5
	or	%l0,%i5,%i5
	ld	[%g3+20*8+0],%l1
	ld	[%g3+20*8+4],%l0
	sllx	%l0,32,%l0
	or	%l1,%l0,%l0
	ld	[%g3+21*8+0],%l2
	ld	[%g3+21*8+4],%l1
	sllx	%l1,32,%l1
	or	%l2,%l1,%l1
	ld	[%g3+22*8+0],%l3
	ld	[%g3+22*8+4],%l2
	sllx	%l2,32,%l2
	or	%l3,%l2,%l2
	ld	[%g3+23*8+0],%l4
	ld	[%g3+23*8+4],%l3
	sllx	%l3,32,%l3
	or	%l4,%l3,%l3
	ld	[%g3+24*8+0],%l5
	ld	[%g3+24*8+4],%l4
	sllx	%l4,32,%l4
	or	%l5,%l4,%l4
	ld	[%g3+25*8+0],%l6
	ld	[%g3+25*8+4],%l5
	sllx	%l5,32,%l5
	or	%l6,%l5,%l5
	ld	[%g3+26*8+0],%l7
	ld	[%g3+26*8+4],%l6
	sllx	%l6,32,%l6
	or	%l7,%l6,%l6
	ld	[%g3+27*8+0],%o0
	ld	[%g3+27*8+4],%l7
	sllx	%l7,32,%l7
	or	%o0,%l7,%l7
	ld	[%g3+28*8+0],%o1
	ld	[%g3+28*8+4],%o0
	sllx	%o0,32,%o0
	or	%o1,%o0,%o0
	ld	[%g3+29*8+0],%o2
	ld	[%g3+29*8+4],%o1
	sllx	%o1,32,%o1
	or	%o2,%o1,%o1
	ld	[%g3+30*8+0],%o3
	ld	[%g3+30*8+4],%o2
	sllx	%o2,32,%o2
	or	%o3,%o2,%o2
	ld	[%g3+31*8+0],%o7
	ld	[%g3+31*8+4],%o3
	sllx	%o3,32,%o3
	or	%o7,%o3,%o3
	.word	0x81b02920+32-1	! montmul	32-1
.Lmresume_32:
	fbu,pn	%fcc3,.Lmabort_32
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Lmabort_32
#endif
	nop
#ifdef	__arch64__
	restore
	restore
	restore
	restore
	restore
#else
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	 brz,pn	%g5,.Lmabort1_32
	restore
#endif
	.word	0x81b02310 !movxtod	%l0,%f0
	.word	0x85b02311 !movxtod	%l1,%f2
	.word	0x89b02312 !movxtod	%l2,%f4
	.word	0x8db02313 !movxtod	%l3,%f6
	.word	0x91b02314 !movxtod	%l4,%f8
	.word	0x95b02315 !movxtod	%l5,%f10
	.word	0x99b02316 !movxtod	%l6,%f12
	.word	0x9db02317 !movxtod	%l7,%f14
	.word	0xa1b02308 !movxtod	%o0,%f16
	.word	0xa5b02309 !movxtod	%o1,%f18
	.word	0xa9b0230a !movxtod	%o2,%f20
	.word	0xadb0230b !movxtod	%o3,%f22
	.word	0xbbb0230c !movxtod	%o4,%f60
	.word	0xbfb0230d !movxtod	%o5,%f62
#ifdef	__arch64__
	restore
#else
	 and	%fp,%g5,%g5
	restore
	 and	%g5,1,%o7
	 and	%fp,%g5,%g5
	 srl	%fp,0,%fp		! just in case?
	 or	%o7,%g5,%g5
	brz,a,pn %g5,.Lmdone_32
	mov	0,%i0		! return failure
#endif
	st	%f1,[%g1+0*8+0]
	st	%f0,[%g1+0*8+4]
	st	%f3,[%g1+1*8+0]
	st	%f2,[%g1+1*8+4]
	st	%f5,[%g1+2*8+0]
	st	%f4,[%g1+2*8+4]
	st	%f7,[%g1+3*8+0]
	st	%f6,[%g1+3*8+4]
	st	%f9,[%g1+4*8+0]
	st	%f8,[%g1+4*8+4]
	st	%f11,[%g1+5*8+0]
	st	%f10,[%g1+5*8+4]
	st	%f13,[%g1+6*8+0]
	st	%f12,[%g1+6*8+4]
	st	%f15,[%g1+7*8+0]
	st	%f14,[%g1+7*8+4]
	st	%f17,[%g1+8*8+0]
	st	%f16,[%g1+8*8+4]
	st	%f19,[%g1+9*8+0]
	st	%f18,[%g1+9*8+4]
	st	%f21,[%g1+10*8+0]
	st	%f20,[%g1+10*8+4]
	st	%f23,[%g1+11*8+0]
	st	%f22,[%g1+11*8+4]
	.word	0x81b00f1d !fsrc2	%f0,%f60,%f0
	st	%f1,[%g1+12*8+0]
	st	%f0,[%g1+12*8+4]
	.word	0x85b00f1f !fsrc2	%f0,%f62,%f2
	st	%f3,[%g1+13*8+0]
	st	%f2,[%g1+13*8+4]
	.word	0x89b00f18 !fsrc2	%f0,%f24,%f4
	st	%f5,[%g1+14*8+0]
	st	%f4,[%g1+14*8+4]
	.word	0x8db00f1a !fsrc2	%f0,%f26,%f6
	st	%f7,[%g1+15*8+0]
	st	%f6,[%g1+15*8+4]
	.word	0x81b00f1c !fsrc2	%f0,%f28,%f0
	st	%f1,[%g1+16*8+0]
	st	%f0,[%g1+16*8+4]
	.word	0x85b00f1e !fsrc2	%f0,%f30,%f2
	st	%f3,[%g1+17*8+0]
	st	%f2,[%g1+17*8+4]
	.word	0x89b00f01 !fsrc2	%f0,%f32,%f4
	st	%f5,[%g1+18*8+0]
	st	%f4,[%g1+18*8+4]
	.word	0x8db00f03 !fsrc2	%f0,%f34,%f6
	st	%f7,[%g1+19*8+0]
	st	%f6,[%g1+19*8+4]
	.word	0x81b00f05 !fsrc2	%f0,%f36,%f0
	st	%f1,[%g1+20*8+0]
	st	%f0,[%g1+20*8+4]
	.word	0x85b00f07 !fsrc2	%f0,%f38,%f2
	st	%f3,[%g1+21*8+0]
	st	%f2,[%g1+21*8+4]
	.word	0x89b00f09 !fsrc2	%f0,%f40,%f4
	st	%f5,[%g1+22*8+0]
	st	%f4,[%g1+22*8+4]
	.word	0x8db00f0b !fsrc2	%f0,%f42,%f6
	st	%f7,[%g1+23*8+0]
	st	%f6,[%g1+23*8+4]
	.word	0x81b00f0d !fsrc2	%f0,%f44,%f0
	st	%f1,[%g1+24*8+0]
	st	%f0,[%g1+24*8+4]
	.word	0x85b00f0f !fsrc2	%f0,%f46,%f2
	st	%f3,[%g1+25*8+0]
	st	%f2,[%g1+25*8+4]
	.word	0x89b00f11 !fsrc2	%f0,%f48,%f4
	st	%f5,[%g1+26*8+0]
	st	%f4,[%g1+26*8+4]
	.word	0x8db00f13 !fsrc2	%f0,%f50,%f6
	st	%f7,[%g1+27*8+0]
	st	%f6,[%g1+27*8+4]
	.word	0x81b00f15 !fsrc2	%f0,%f52,%f0
	st	%f1,[%g1+28*8+0]
	st	%f0,[%g1+28*8+4]
	.word	0x85b00f17 !fsrc2	%f0,%f54,%f2
	st	%f3,[%g1+29*8+0]
	st	%f2,[%g1+29*8+4]
	.word	0x89b00f19 !fsrc2	%f0,%f56,%f4
	st	%f5,[%g1+30*8+0]
	st	%f4,[%g1+30*8+4]
	.word	0x8db00f1b !fsrc2	%f0,%f58,%f6
	st	%f7,[%g1+31*8+0]
	st	%f6,[%g1+31*8+4]
	mov	1,%i0		! return success
.Lmdone_32:
	ret
	restore

.Lmabort_32:
	restore
	restore
	restore
	restore
	restore
.Lmabort1_32:
	restore

	mov	0,%i0		! return failure
	ret
	restore

.align	32
.Lmsquare_32:
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	.word   0x81b02940+32-1	! montsqr	32-1
	ba	.Lmresume_32
	nop
.type	bn_mul_mont_t4_32, #function
.size	bn_mul_mont_t4_32, .-bn_mul_mont_t4_32
.globl	bn_pwr5_mont_t4_8
.align	32
bn_pwr5_mont_t4_8:
#ifdef	__arch64__
	mov	0,%g5
	mov	-128,%g4
#elif defined(SPARCV9_64BIT_STACK)
	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
	mov	-2047,%g4
	and	%g1,SPARCV9_64BIT_STACK,%g1
	movrz	%g1,0,%g4
	mov	-1,%g5
	add	%g4,-128,%g4
#else
	mov	-1,%g5
	mov	-128,%g4
#endif
	sllx	%g5,32,%g5
	save	%sp,%g4,%sp
#ifndef	__arch64__
	save	%sp,-128,%sp	! warm it up
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	restore
	restore
	restore
	restore
	restore
	restore
#endif
	and	%sp,1,%g4
	or	%g5,%fp,%fp
	or	%g4,%g5,%g5

	! copy arguments to global registers
	mov	%i0,%g1
	mov	%i1,%g2
	ld	[%i2+0],%f1	! load *n0
	ld	[%i2+4],%f0
	mov	%i3,%g3
	srl	%i4,%g0,%i4	! pack last arguments
	sllx	%i5,32,%g4
	or	%i4,%g4,%g4
	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%g1+0*8],%l0
	ldx	[%g1+1*8],%l1
	ldx	[%g1+2*8],%l2
	ldx	[%g1+3*8],%l3
	ldx	[%g1+4*8],%l4
	ldx	[%g1+5*8],%l5
	ldx	[%g1+6*8],%l6
	ldx	[%g1+7*8],%l7
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%g2+0*8],%l0
	ldx	[%g2+1*8],%l1
	ldx	[%g2+2*8],%l2
	ldx	[%g2+3*8],%l3
	ldx	[%g2+4*8],%l4
	ldx	[%g2+5*8],%l5
	ldx	[%g2+6*8],%l6
	ldx	[%g2+7*8],%l7
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	save	%sp,-128,%sp;		or	%g5,%fp,%fp

	srlx	%g4,	32,	%o4		! unpack %g4
	srl	%g4,	%g0,	%o5
	sub	%o4,	5,	%o4
	mov	%g3,	%o7
	sllx	%o4,	32,	%g4		! re-pack %g4
	or	%o5,	%g4,	%g4
	srl	%o5,	%o4,	%o5
	srl	%o5,	2,	%o4
	and	%o5,	3,	%o5
	and	%o4,	7,	%o4
	sll	%o5,	3,	%o5	! offset within first cache line
	add	%o5,	%o7,	%o7	! of the pwrtbl
	or	%g0,	1,	%o5
	sll	%o5,	%o4,	%o4
	wr	%o4,	%g0,	%ccr
	b	.Lstride_8
	nop
.align	16
.Lstride_8:
	ldx	[%o7+0*32],	%i0
	ldx	[%o7+8*32],	%i1
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%i0
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%i1
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%i0
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%i1
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%i0
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%i1
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%i0
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%i1
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%i0
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%i1
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%i0
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%i1
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%i0
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%i1
	ldx	[%o7+0*32],	%i2
	ldx	[%o7+8*32],	%i3
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%i2
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%i3
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%i2
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%i3
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%i2
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%i3
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%i2
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%i3
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%i2
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%i3
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%i2
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%i3
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%i2
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%i3
	ldx	[%o7+0*32],	%i4
	ldx	[%o7+8*32],	%i5
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%i4
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%i5
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%i4
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%i5
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%i4
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%i5
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%i4
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%i5
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%i4
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%i5
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%i4
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%i5
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%i4
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%i5
	ldx	[%o7+0*32],	%l0
	ldx	[%o7+8*32],	%l1
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l0
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l1
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l0
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l1
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l0
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l1
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l0
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l1
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l0
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l1
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l0
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l1
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l0
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l1
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	srax	%g4,	32,	%o4		! unpack %g4
	srl	%g4,	%g0,	%o5
	sub	%o4,	5,	%o4
	mov	%g3,	%i7
	sllx	%o4,	32,	%g4		! re-pack %g4
	or	%o5,	%g4,	%g4
	srl	%o5,	%o4,	%o5
	srl	%o5,	2,	%o4
	and	%o5,	3,	%o5
	and	%o4,	7,	%o4
	sll	%o5,	3,	%o5	! offset within first cache line
	add	%o5,	%i7,	%i7	! of the pwrtbl
	or	%g0,	1,	%o5
	sll	%o5,	%o4,	%o4
	.word	0x81b02940+8-1	! montsqr	8-1
	fbu,pn	%fcc3,.Labort_8
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_8
#endif
	nop
	.word	0x81b02940+8-1	! montsqr	8-1
	fbu,pn	%fcc3,.Labort_8
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_8
#endif
	nop
	.word	0x81b02940+8-1	! montsqr	8-1
	fbu,pn	%fcc3,.Labort_8
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_8
#endif
	nop
	.word	0x81b02940+8-1	! montsqr	8-1
	fbu,pn	%fcc3,.Labort_8
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_8
#endif
	nop
	.word	0x81b02940+8-1	! montsqr	8-1
	fbu,pn	%fcc3,.Labort_8
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_8
#endif
	nop
	wr	%o4,	%g0,	%ccr
	.word	0x81b02920+8-1	! montmul	8-1
	fbu,pn	%fcc3,.Labort_8
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_8
#endif

	srax	%g4,	32,	%o4
#ifdef	__arch64__
	brgez	%o4,.Lstride_8
	restore
	restore
	restore
	restore
	restore
#else
	brgez	%o4,.Lstride_8
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	 brz,pn	%g5,.Labort1_8
	restore
#endif
	.word	0x81b02310 !movxtod	%l0,%f0
	.word	0x85b02311 !movxtod	%l1,%f2
	.word	0x89b02312 !movxtod	%l2,%f4
	.word	0x8db02313 !movxtod	%l3,%f6
	.word	0x91b02314 !movxtod	%l4,%f8
	.word	0x95b02315 !movxtod	%l5,%f10
	.word	0x99b02316 !movxtod	%l6,%f12
	.word	0x9db02317 !movxtod	%l7,%f14
#ifdef	__arch64__
	restore
#else
	 and	%fp,%g5,%g5
	restore
	 and	%g5,1,%o7
	 and	%fp,%g5,%g5
	 srl	%fp,0,%fp		! just in case?
	 or	%o7,%g5,%g5
	brz,a,pn %g5,.Ldone_8
	mov	0,%i0		! return failure
#endif
	std	%f0,[%g1+0*8]
	std	%f2,[%g1+1*8]
	std	%f4,[%g1+2*8]
	std	%f6,[%g1+3*8]
	std	%f8,[%g1+4*8]
	std	%f10,[%g1+5*8]
	std	%f12,[%g1+6*8]
	std	%f14,[%g1+7*8]
	mov	1,%i0		! return success
.Ldone_8:
	ret
	restore

.Labort_8:
	restore
	restore
	restore
	restore
	restore
.Labort1_8:
	restore

	mov	0,%i0		! return failure
	ret
	restore
.type	bn_pwr5_mont_t4_8, #function
.size	bn_pwr5_mont_t4_8, .-bn_pwr5_mont_t4_8
.globl	bn_pwr5_mont_t4_16
.align	32
bn_pwr5_mont_t4_16:
#ifdef	__arch64__
	mov	0,%g5
	mov	-128,%g4
#elif defined(SPARCV9_64BIT_STACK)
	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
	mov	-2047,%g4
	and	%g1,SPARCV9_64BIT_STACK,%g1
	movrz	%g1,0,%g4
	mov	-1,%g5
	add	%g4,-128,%g4
#else
	mov	-1,%g5
	mov	-128,%g4
#endif
	sllx	%g5,32,%g5
	save	%sp,%g4,%sp
#ifndef	__arch64__
	save	%sp,-128,%sp	! warm it up
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	restore
	restore
	restore
	restore
	restore
	restore
#endif
	and	%sp,1,%g4
	or	%g5,%fp,%fp
	or	%g4,%g5,%g5

	! copy arguments to global registers
	mov	%i0,%g1
	mov	%i1,%g2
	ld	[%i2+0],%f1	! load *n0
	ld	[%i2+4],%f0
	mov	%i3,%g3
	srl	%i4,%g0,%i4	! pack last arguments
	sllx	%i5,32,%g4
	or	%i4,%g4,%g4
	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%g1+0*8],%l0
	ldx	[%g1+1*8],%l1
	ldx	[%g1+2*8],%l2
	ldx	[%g1+3*8],%l3
	ldx	[%g1+4*8],%l4
	ldx	[%g1+5*8],%l5
	ldx	[%g1+6*8],%l6
	ldx	[%g1+7*8],%l7
	ldx	[%g1+8*8],%o0
	ldx	[%g1+9*8],%o1
	ldx	[%g1+10*8],%o2
	ldx	[%g1+11*8],%o3
	ldx	[%g1+12*8],%o4
	ldx	[%g1+13*8],%o5
	ldd	[%g1+14*8],%f24
	ldd	[%g1+15*8],%f26
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%g2+0*8],%l0
	ldx	[%g2+1*8],%l1
	ldx	[%g2+2*8],%l2
	ldx	[%g2+3*8],%l3
	ldx	[%g2+4*8],%l4
	ldx	[%g2+5*8],%l5
	ldx	[%g2+6*8],%l6
	ldx	[%g2+7*8],%l7
	ldx	[%g2+8*8],%o0
	ldx	[%g2+9*8],%o1
	ldx	[%g2+10*8],%o2
	ldx	[%g2+11*8],%o3
	ldx	[%g2+12*8],%o4
	ldx	[%g2+13*8],%o5
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%g2+14*8],%l0
	ldx	[%g2+15*8],%l1
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	save	%sp,-128,%sp;		or	%g5,%fp,%fp

	srlx	%g4,	32,	%o4		! unpack %g4
	srl	%g4,	%g0,	%o5
	sub	%o4,	5,	%o4
	mov	%g3,	%o7
	sllx	%o4,	32,	%g4		! re-pack %g4
	or	%o5,	%g4,	%g4
	srl	%o5,	%o4,	%o5
	srl	%o5,	2,	%o4
	and	%o5,	3,	%o5
	and	%o4,	7,	%o4
	sll	%o5,	3,	%o5	! offset within first cache line
	add	%o5,	%o7,	%o7	! of the pwrtbl
	or	%g0,	1,	%o5
	sll	%o5,	%o4,	%o4
	wr	%o4,	%g0,	%ccr
	b	.Lstride_16
	nop
.align	16
.Lstride_16:
	ldx	[%o7+0*32],	%i0
	ldx	[%o7+8*32],	%i1
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%i0
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%i1
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%i0
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%i1
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%i0
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%i1
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%i0
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%i1
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%i0
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%i1
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%i0
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%i1
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%i0
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%i1
	ldx	[%o7+0*32],	%i2
	ldx	[%o7+8*32],	%i3
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%i2
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%i3
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%i2
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%i3
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%i2
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%i3
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%i2
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%i3
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%i2
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%i3
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%i2
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%i3
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%i2
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%i3
	ldx	[%o7+0*32],	%i4
	ldx	[%o7+8*32],	%i5
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%i4
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%i5
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%i4
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%i5
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%i4
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%i5
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%i4
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%i5
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%i4
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%i5
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%i4
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%i5
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%i4
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%i5
	ldx	[%o7+0*32],	%l0
	ldx	[%o7+8*32],	%l1
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l0
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l1
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l0
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l1
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l0
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l1
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l0
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l1
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l0
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l1
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l0
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l1
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l0
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l1
	ldx	[%o7+0*32],	%l2
	ldx	[%o7+8*32],	%l3
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l2
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l3
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l2
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l3
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l2
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l3
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l2
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l3
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l2
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l3
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l2
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l3
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l2
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l3
	ldx	[%o7+0*32],	%l4
	ldx	[%o7+8*32],	%l5
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l4
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l5
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l4
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l5
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l4
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l5
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l4
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l5
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l4
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l5
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l4
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l5
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l4
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l5
	ldx	[%o7+0*32],	%l6
	ldx	[%o7+8*32],	%l7
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l6
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l7
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l6
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l7
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l6
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l7
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l6
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l7
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l6
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l7
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l6
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l7
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l6
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l7
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%i7+0*32],	%i0
	ldx	[%i7+8*32],	%i1
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%i0
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%i1
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%i0
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%i1
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%i0
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%i1
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%i0
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%i1
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%i0
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%i1
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%i0
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%i1
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%i0
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%i1
	srax	%g4,	32,	%o4		! unpack %g4
	srl	%g4,	%g0,	%o5
	sub	%o4,	5,	%o4
	mov	%g3,	%i7
	sllx	%o4,	32,	%g4		! re-pack %g4
	or	%o5,	%g4,	%g4
	srl	%o5,	%o4,	%o5
	srl	%o5,	2,	%o4
	and	%o5,	3,	%o5
	and	%o4,	7,	%o4
	sll	%o5,	3,	%o5	! offset within first cache line
	add	%o5,	%i7,	%i7	! of the pwrtbl
	or	%g0,	1,	%o5
	sll	%o5,	%o4,	%o4
	.word	0x81b02940+16-1	! montsqr	16-1
	fbu,pn	%fcc3,.Labort_16
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_16
#endif
	nop
	.word	0x81b02940+16-1	! montsqr	16-1
	fbu,pn	%fcc3,.Labort_16
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_16
#endif
	nop
	.word	0x81b02940+16-1	! montsqr	16-1
	fbu,pn	%fcc3,.Labort_16
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_16
#endif
	nop
	.word	0x81b02940+16-1	! montsqr	16-1
	fbu,pn	%fcc3,.Labort_16
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_16
#endif
	nop
	.word	0x81b02940+16-1	! montsqr	16-1
	fbu,pn	%fcc3,.Labort_16
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_16
#endif
	nop
	wr	%o4,	%g0,	%ccr
	.word	0x81b02920+16-1	! montmul	16-1
	fbu,pn	%fcc3,.Labort_16
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_16
#endif

	srax	%g4,	32,	%o4
#ifdef	__arch64__
	brgez	%o4,.Lstride_16
	restore
	restore
	restore
	restore
	restore
#else
	brgez	%o4,.Lstride_16
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	 brz,pn	%g5,.Labort1_16
	restore
#endif
	.word	0x81b02310 !movxtod	%l0,%f0
	.word	0x85b02311 !movxtod	%l1,%f2
	.word	0x89b02312 !movxtod	%l2,%f4
	.word	0x8db02313 !movxtod	%l3,%f6
	.word	0x91b02314 !movxtod	%l4,%f8
	.word	0x95b02315 !movxtod	%l5,%f10
	.word	0x99b02316 !movxtod	%l6,%f12
	.word	0x9db02317 !movxtod	%l7,%f14
	.word	0xa1b02308 !movxtod	%o0,%f16
	.word	0xa5b02309 !movxtod	%o1,%f18
	.word	0xa9b0230a !movxtod	%o2,%f20
	.word	0xadb0230b !movxtod	%o3,%f22
	.word	0xbbb0230c !movxtod	%o4,%f60
	.word	0xbfb0230d !movxtod	%o5,%f62
#ifdef	__arch64__
	restore
#else
	 and	%fp,%g5,%g5
	restore
	 and	%g5,1,%o7
	 and	%fp,%g5,%g5
	 srl	%fp,0,%fp		! just in case?
	 or	%o7,%g5,%g5
	brz,a,pn %g5,.Ldone_16
	mov	0,%i0		! return failure
#endif
	std	%f0,[%g1+0*8]
	std	%f2,[%g1+1*8]
	std	%f4,[%g1+2*8]
	std	%f6,[%g1+3*8]
	std	%f8,[%g1+4*8]
	std	%f10,[%g1+5*8]
	std	%f12,[%g1+6*8]
	std	%f14,[%g1+7*8]
	std	%f16,[%g1+8*8]
	std	%f18,[%g1+9*8]
	std	%f20,[%g1+10*8]
	std	%f22,[%g1+11*8]
	std	%f60,[%g1+12*8]
	std	%f62,[%g1+13*8]
	std	%f24,[%g1+14*8]
	std	%f26,[%g1+15*8]
	mov	1,%i0		! return success
.Ldone_16:
	ret
	restore

.Labort_16:
	restore
	restore
	restore
	restore
	restore
.Labort1_16:
	restore

	mov	0,%i0		! return failure
	ret
	restore
.type	bn_pwr5_mont_t4_16, #function
.size	bn_pwr5_mont_t4_16, .-bn_pwr5_mont_t4_16
.globl	bn_pwr5_mont_t4_24
.align	32
bn_pwr5_mont_t4_24:
#ifdef	__arch64__
	mov	0,%g5
	mov	-128,%g4
#elif defined(SPARCV9_64BIT_STACK)
	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
	mov	-2047,%g4
	and	%g1,SPARCV9_64BIT_STACK,%g1
	movrz	%g1,0,%g4
	mov	-1,%g5
	add	%g4,-128,%g4
#else
	mov	-1,%g5
	mov	-128,%g4
#endif
	sllx	%g5,32,%g5
	save	%sp,%g4,%sp
#ifndef	__arch64__
	save	%sp,-128,%sp	! warm it up
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	restore
	restore
	restore
	restore
	restore
	restore
#endif
	and	%sp,1,%g4
	or	%g5,%fp,%fp
	or	%g4,%g5,%g5

	! copy arguments to global registers
	mov	%i0,%g1
	mov	%i1,%g2
	ld	[%i2+0],%f1	! load *n0
	ld	[%i2+4],%f0
	mov	%i3,%g3
	srl	%i4,%g0,%i4	! pack last arguments
	sllx	%i5,32,%g4
	or	%i4,%g4,%g4
	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%g1+0*8],%l0
	ldx	[%g1+1*8],%l1
	ldx	[%g1+2*8],%l2
	ldx	[%g1+3*8],%l3
	ldx	[%g1+4*8],%l4
	ldx	[%g1+5*8],%l5
	ldx	[%g1+6*8],%l6
	ldx	[%g1+7*8],%l7
	ldx	[%g1+8*8],%o0
	ldx	[%g1+9*8],%o1
	ldx	[%g1+10*8],%o2
	ldx	[%g1+11*8],%o3
	ldx	[%g1+12*8],%o4
	ldx	[%g1+13*8],%o5
	ldd	[%g1+14*8],%f24
	ldd	[%g1+15*8],%f26
	ldd	[%g1+16*8],%f28
	ldd	[%g1+17*8],%f30
	ldd	[%g1+18*8],%f32
	ldd	[%g1+19*8],%f34
	ldd	[%g1+20*8],%f36
	ldd	[%g1+21*8],%f38
	ldd	[%g1+22*8],%f40
	ldd	[%g1+23*8],%f42
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%g2+0*8],%l0
	ldx	[%g2+1*8],%l1
	ldx	[%g2+2*8],%l2
	ldx	[%g2+3*8],%l3
	ldx	[%g2+4*8],%l4
	ldx	[%g2+5*8],%l5
	ldx	[%g2+6*8],%l6
	ldx	[%g2+7*8],%l7
	ldx	[%g2+8*8],%o0
	ldx	[%g2+9*8],%o1
	ldx	[%g2+10*8],%o2
	ldx	[%g2+11*8],%o3
	ldx	[%g2+12*8],%o4
	ldx	[%g2+13*8],%o5
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%g2+14*8],%l0
	ldx	[%g2+15*8],%l1
	ldx	[%g2+16*8],%l2
	ldx	[%g2+17*8],%l3
	ldx	[%g2+18*8],%l4
	ldx	[%g2+19*8],%l5
	ldx	[%g2+20*8],%l6
	ldx	[%g2+21*8],%l7
	ldx	[%g2+22*8],%o0
	ldx	[%g2+23*8],%o1
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	save	%sp,-128,%sp;		or	%g5,%fp,%fp

	srlx	%g4,	32,	%o4		! unpack %g4
	srl	%g4,	%g0,	%o5
	sub	%o4,	5,	%o4
	mov	%g3,	%o7
	sllx	%o4,	32,	%g4		! re-pack %g4
	or	%o5,	%g4,	%g4
	srl	%o5,	%o4,	%o5
	srl	%o5,	2,	%o4
	and	%o5,	3,	%o5
	and	%o4,	7,	%o4
	sll	%o5,	3,	%o5	! offset within first cache line
	add	%o5,	%o7,	%o7	! of the pwrtbl
	or	%g0,	1,	%o5
	sll	%o5,	%o4,	%o4
	wr	%o4,	%g0,	%ccr
	b	.Lstride_24
	nop
.align	16
.Lstride_24:
	ldx	[%o7+0*32],	%i0
	ldx	[%o7+8*32],	%i1
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%i0
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%i1
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%i0
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%i1
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%i0
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%i1
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%i0
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%i1
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%i0
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%i1
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%i0
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%i1
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%i0
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%i1
	ldx	[%o7+0*32],	%i2
	ldx	[%o7+8*32],	%i3
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%i2
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%i3
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%i2
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%i3
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%i2
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%i3
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%i2
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%i3
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%i2
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%i3
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%i2
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%i3
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%i2
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%i3
	ldx	[%o7+0*32],	%i4
	ldx	[%o7+8*32],	%i5
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%i4
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%i5
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%i4
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%i5
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%i4
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%i5
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%i4
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%i5
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%i4
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%i5
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%i4
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%i5
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%i4
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%i5
	ldx	[%o7+0*32],	%l0
	ldx	[%o7+8*32],	%l1
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l0
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l1
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l0
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l1
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l0
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l1
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l0
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l1
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l0
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l1
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l0
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l1
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l0
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l1
	ldx	[%o7+0*32],	%l2
	ldx	[%o7+8*32],	%l3
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l2
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l3
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l2
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l3
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l2
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l3
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l2
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l3
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l2
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l3
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l2
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l3
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l2
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l3
	ldx	[%o7+0*32],	%l4
	ldx	[%o7+8*32],	%l5
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l4
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l5
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l4
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l5
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l4
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l5
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l4
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l5
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l4
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l5
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l4
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l5
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l4
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l5
	ldx	[%o7+0*32],	%l6
	ldx	[%o7+8*32],	%l7
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l6
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l7
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l6
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l7
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l6
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l7
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l6
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l7
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l6
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l7
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l6
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l7
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l6
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l7
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%i7+0*32],	%i0
	ldx	[%i7+8*32],	%i1
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%i0
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%i1
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%i0
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%i1
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%i0
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%i1
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%i0
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%i1
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%i0
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%i1
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%i0
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%i1
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%i0
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%i1
	ldx	[%i7+0*32],	%i2
	ldx	[%i7+8*32],	%i3
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%i2
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%i3
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%i2
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%i3
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%i2
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%i3
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%i2
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%i3
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%i2
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%i3
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%i2
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%i3
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%i2
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%i3
	ldx	[%i7+0*32],	%i4
	ldx	[%i7+8*32],	%i5
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%i4
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%i5
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%i4
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%i5
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%i4
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%i5
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%i4
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%i5
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%i4
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%i5
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%i4
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%i5
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%i4
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%i5
	ldx	[%i7+0*32],	%l0
	ldx	[%i7+8*32],	%l1
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%l0
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%l1
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%l0
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%l1
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%l0
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%l1
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%l0
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%l1
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%l0
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%l1
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%l0
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%l1
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%l0
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%l1
	ldx	[%i7+0*32],	%l2
	ldx	[%i7+8*32],	%l3
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%l2
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%l3
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%l2
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%l3
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%l2
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%l3
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%l2
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%l3
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%l2
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%l3
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%l2
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%l3
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%l2
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%l3
	srax	%g4,	32,	%o4		! unpack %g4
	srl	%g4,	%g0,	%o5
	sub	%o4,	5,	%o4
	mov	%g3,	%i7
	sllx	%o4,	32,	%g4		! re-pack %g4
	or	%o5,	%g4,	%g4
	srl	%o5,	%o4,	%o5
	srl	%o5,	2,	%o4
	and	%o5,	3,	%o5
	and	%o4,	7,	%o4
	sll	%o5,	3,	%o5	! offset within first cache line
	add	%o5,	%i7,	%i7	! of the pwrtbl
	or	%g0,	1,	%o5
	sll	%o5,	%o4,	%o4
	.word	0x81b02940+24-1	! montsqr	24-1
	fbu,pn	%fcc3,.Labort_24
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_24
#endif
	nop
	.word	0x81b02940+24-1	! montsqr	24-1
	fbu,pn	%fcc3,.Labort_24
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_24
#endif
	nop
	.word	0x81b02940+24-1	! montsqr	24-1
	fbu,pn	%fcc3,.Labort_24
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_24
#endif
	nop
	.word	0x81b02940+24-1	! montsqr	24-1
	fbu,pn	%fcc3,.Labort_24
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_24
#endif
	nop
	.word	0x81b02940+24-1	! montsqr	24-1
	fbu,pn	%fcc3,.Labort_24
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_24
#endif
	nop
	wr	%o4,	%g0,	%ccr
	.word	0x81b02920+24-1	! montmul	24-1
	fbu,pn	%fcc3,.Labort_24
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_24
#endif

	srax	%g4,	32,	%o4
#ifdef	__arch64__
	brgez	%o4,.Lstride_24
	restore
	restore
	restore
	restore
	restore
#else
	brgez	%o4,.Lstride_24
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	 brz,pn	%g5,.Labort1_24
	restore
#endif
	.word	0x81b02310 !movxtod	%l0,%f0
	.word	0x85b02311 !movxtod	%l1,%f2
	.word	0x89b02312 !movxtod	%l2,%f4
	.word	0x8db02313 !movxtod	%l3,%f6
	.word	0x91b02314 !movxtod	%l4,%f8
	.word	0x95b02315 !movxtod	%l5,%f10
	.word	0x99b02316 !movxtod	%l6,%f12
	.word	0x9db02317 !movxtod	%l7,%f14
	.word	0xa1b02308 !movxtod	%o0,%f16
	.word	0xa5b02309 !movxtod	%o1,%f18
	.word	0xa9b0230a !movxtod	%o2,%f20
	.word	0xadb0230b !movxtod	%o3,%f22
	.word	0xbbb0230c !movxtod	%o4,%f60
	.word	0xbfb0230d !movxtod	%o5,%f62
#ifdef	__arch64__
	restore
#else
	 and	%fp,%g5,%g5
	restore
	 and	%g5,1,%o7
	 and	%fp,%g5,%g5
	 srl	%fp,0,%fp		! just in case?
	 or	%o7,%g5,%g5
	brz,a,pn %g5,.Ldone_24
	mov	0,%i0		! return failure
#endif
	std	%f0,[%g1+0*8]
	std	%f2,[%g1+1*8]
	std	%f4,[%g1+2*8]
	std	%f6,[%g1+3*8]
	std	%f8,[%g1+4*8]
	std	%f10,[%g1+5*8]
	std	%f12,[%g1+6*8]
	std	%f14,[%g1+7*8]
	std	%f16,[%g1+8*8]
	std	%f18,[%g1+9*8]
	std	%f20,[%g1+10*8]
	std	%f22,[%g1+11*8]
	std	%f60,[%g1+12*8]
	std	%f62,[%g1+13*8]
	std	%f24,[%g1+14*8]
	std	%f26,[%g1+15*8]
	std	%f28,[%g1+16*8]
	std	%f30,[%g1+17*8]
	std	%f32,[%g1+18*8]
	std	%f34,[%g1+19*8]
	std	%f36,[%g1+20*8]
	std	%f38,[%g1+21*8]
	std	%f40,[%g1+22*8]
	std	%f42,[%g1+23*8]
	mov	1,%i0		! return success
.Ldone_24:
	ret
	restore

.Labort_24:
	restore
	restore
	restore
	restore
	restore
.Labort1_24:
	restore

	mov	0,%i0		! return failure
	ret
	restore
.type	bn_pwr5_mont_t4_24, #function
.size	bn_pwr5_mont_t4_24, .-bn_pwr5_mont_t4_24
.globl	bn_pwr5_mont_t4_32
.align	32
bn_pwr5_mont_t4_32:
#ifdef	__arch64__
	mov	0,%g5
	mov	-128,%g4
#elif defined(SPARCV9_64BIT_STACK)
	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
	mov	-2047,%g4
	and	%g1,SPARCV9_64BIT_STACK,%g1
	movrz	%g1,0,%g4
	mov	-1,%g5
	add	%g4,-128,%g4
#else
	mov	-1,%g5
	mov	-128,%g4
#endif
	sllx	%g5,32,%g5
	save	%sp,%g4,%sp
#ifndef	__arch64__
	save	%sp,-128,%sp	! warm it up
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	save	%sp,-128,%sp
	restore
	restore
	restore
	restore
	restore
	restore
#endif
	and	%sp,1,%g4
	or	%g5,%fp,%fp
	or	%g4,%g5,%g5

	! copy arguments to global registers
	mov	%i0,%g1
	mov	%i1,%g2
	ld	[%i2+0],%f1	! load *n0
	ld	[%i2+4],%f0
	mov	%i3,%g3
	srl	%i4,%g0,%i4	! pack last arguments
	sllx	%i5,32,%g4
	or	%i4,%g4,%g4
	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%g1+0*8],%l0
	ldx	[%g1+1*8],%l1
	ldx	[%g1+2*8],%l2
	ldx	[%g1+3*8],%l3
	ldx	[%g1+4*8],%l4
	ldx	[%g1+5*8],%l5
	ldx	[%g1+6*8],%l6
	ldx	[%g1+7*8],%l7
	ldx	[%g1+8*8],%o0
	ldx	[%g1+9*8],%o1
	ldx	[%g1+10*8],%o2
	ldx	[%g1+11*8],%o3
	ldx	[%g1+12*8],%o4
	ldx	[%g1+13*8],%o5
	ldd	[%g1+14*8],%f24
	ldd	[%g1+15*8],%f26
	ldd	[%g1+16*8],%f28
	ldd	[%g1+17*8],%f30
	ldd	[%g1+18*8],%f32
	ldd	[%g1+19*8],%f34
	ldd	[%g1+20*8],%f36
	ldd	[%g1+21*8],%f38
	ldd	[%g1+22*8],%f40
	ldd	[%g1+23*8],%f42
	ldd	[%g1+24*8],%f44
	ldd	[%g1+25*8],%f46
	ldd	[%g1+26*8],%f48
	ldd	[%g1+27*8],%f50
	ldd	[%g1+28*8],%f52
	ldd	[%g1+29*8],%f54
	ldd	[%g1+30*8],%f56
	ldd	[%g1+31*8],%f58
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%g2+0*8],%l0
	ldx	[%g2+1*8],%l1
	ldx	[%g2+2*8],%l2
	ldx	[%g2+3*8],%l3
	ldx	[%g2+4*8],%l4
	ldx	[%g2+5*8],%l5
	ldx	[%g2+6*8],%l6
	ldx	[%g2+7*8],%l7
	ldx	[%g2+8*8],%o0
	ldx	[%g2+9*8],%o1
	ldx	[%g2+10*8],%o2
	ldx	[%g2+11*8],%o3
	ldx	[%g2+12*8],%o4
	ldx	[%g2+13*8],%o5
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%g2+14*8],%l0
	ldx	[%g2+15*8],%l1
	ldx	[%g2+16*8],%l2
	ldx	[%g2+17*8],%l3
	ldx	[%g2+18*8],%l4
	ldx	[%g2+19*8],%l5
	ldx	[%g2+20*8],%l6
	ldx	[%g2+21*8],%l7
	ldx	[%g2+22*8],%o0
	ldx	[%g2+23*8],%o1
	ldx	[%g2+24*8],%o2
	ldx	[%g2+25*8],%o3
	ldx	[%g2+26*8],%o4
	ldx	[%g2+27*8],%o5
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%g2+28*8],%l0
	ldx	[%g2+29*8],%l1
	ldx	[%g2+30*8],%l2
	ldx	[%g2+31*8],%l3
	save	%sp,-128,%sp;		or	%g5,%fp,%fp

	srlx	%g4,	32,	%o4		! unpack %g4
	srl	%g4,	%g0,	%o5
	sub	%o4,	5,	%o4
	mov	%g3,	%o7
	sllx	%o4,	32,	%g4		! re-pack %g4
	or	%o5,	%g4,	%g4
	srl	%o5,	%o4,	%o5
	srl	%o5,	2,	%o4
	and	%o5,	3,	%o5
	and	%o4,	7,	%o4
	sll	%o5,	3,	%o5	! offset within first cache line
	add	%o5,	%o7,	%o7	! of the pwrtbl
	or	%g0,	1,	%o5
	sll	%o5,	%o4,	%o4
	wr	%o4,	%g0,	%ccr
	b	.Lstride_32
	nop
.align	16
.Lstride_32:
	ldx	[%o7+0*32],	%i0
	ldx	[%o7+8*32],	%i1
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%i0
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%i1
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%i0
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%i1
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%i0
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%i1
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%i0
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%i1
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%i0
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%i1
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%i0
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%i1
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%i0
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%i1
	ldx	[%o7+0*32],	%i2
	ldx	[%o7+8*32],	%i3
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%i2
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%i3
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%i2
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%i3
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%i2
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%i3
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%i2
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%i3
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%i2
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%i3
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%i2
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%i3
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%i2
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%i3
	ldx	[%o7+0*32],	%i4
	ldx	[%o7+8*32],	%i5
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%i4
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%i5
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%i4
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%i5
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%i4
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%i5
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%i4
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%i5
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%i4
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%i5
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%i4
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%i5
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%i4
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%i5
	ldx	[%o7+0*32],	%l0
	ldx	[%o7+8*32],	%l1
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l0
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l1
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l0
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l1
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l0
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l1
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l0
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l1
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l0
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l1
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l0
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l1
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l0
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l1
	ldx	[%o7+0*32],	%l2
	ldx	[%o7+8*32],	%l3
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l2
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l3
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l2
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l3
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l2
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l3
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l2
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l3
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l2
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l3
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l2
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l3
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l2
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l3
	ldx	[%o7+0*32],	%l4
	ldx	[%o7+8*32],	%l5
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l4
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l5
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l4
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l5
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l4
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l5
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l4
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l5
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l4
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l5
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l4
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l5
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l4
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l5
	ldx	[%o7+0*32],	%l6
	ldx	[%o7+8*32],	%l7
	ldx	[%o7+1*32],	%o4
	ldx	[%o7+9*32],	%o5
	movvs	%icc,	%o4,	%l6
	ldx	[%o7+2*32],	%o4
	movvs	%icc,	%o5,	%l7
	ldx	[%o7+10*32],%o5
	move	%icc,	%o4,	%l6
	ldx	[%o7+3*32],	%o4
	move	%icc,	%o5,	%l7
	ldx	[%o7+11*32],%o5
	movneg	%icc,	%o4,	%l6
	ldx	[%o7+4*32],	%o4
	movneg	%icc,	%o5,	%l7
	ldx	[%o7+12*32],%o5
	movcs	%xcc,	%o4,	%l6
	ldx	[%o7+5*32],%o4
	movcs	%xcc,	%o5,	%l7
	ldx	[%o7+13*32],%o5
	movvs	%xcc,	%o4,	%l6
	ldx	[%o7+6*32],	%o4
	movvs	%xcc,	%o5,	%l7
	ldx	[%o7+14*32],%o5
	move	%xcc,	%o4,	%l6
	ldx	[%o7+7*32],	%o4
	move	%xcc,	%o5,	%l7
	ldx	[%o7+15*32],%o5
	movneg	%xcc,	%o4,	%l6
	add	%o7,16*32,	%o7
	movneg	%xcc,	%o5,	%l7
	save	%sp,-128,%sp;		or	%g5,%fp,%fp
	ldx	[%i7+0*32],	%i0
	ldx	[%i7+8*32],	%i1
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%i0
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%i1
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%i0
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%i1
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%i0
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%i1
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%i0
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%i1
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%i0
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%i1
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%i0
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%i1
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%i0
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%i1
	ldx	[%i7+0*32],	%i2
	ldx	[%i7+8*32],	%i3
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%i2
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%i3
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%i2
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%i3
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%i2
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%i3
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%i2
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%i3
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%i2
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%i3
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%i2
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%i3
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%i2
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%i3
	ldx	[%i7+0*32],	%i4
	ldx	[%i7+8*32],	%i5
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%i4
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%i5
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%i4
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%i5
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%i4
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%i5
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%i4
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%i5
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%i4
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%i5
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%i4
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%i5
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%i4
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%i5
	ldx	[%i7+0*32],	%l0
	ldx	[%i7+8*32],	%l1
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%l0
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%l1
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%l0
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%l1
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%l0
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%l1
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%l0
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%l1
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%l0
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%l1
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%l0
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%l1
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%l0
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%l1
	ldx	[%i7+0*32],	%l2
	ldx	[%i7+8*32],	%l3
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%l2
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%l3
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%l2
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%l3
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%l2
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%l3
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%l2
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%l3
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%l2
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%l3
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%l2
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%l3
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%l2
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%l3
	ldx	[%i7+0*32],	%l4
	ldx	[%i7+8*32],	%l5
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%l4
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%l5
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%l4
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%l5
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%l4
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%l5
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%l4
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%l5
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%l4
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%l5
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%l4
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%l5
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%l4
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%l5
	ldx	[%i7+0*32],	%l6
	ldx	[%i7+8*32],	%l7
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%l6
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%l7
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%l6
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%l7
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%l6
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%l7
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%l6
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%l7
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%l6
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%l7
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%l6
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%l7
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%l6
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%l7
	ldx	[%i7+0*32],	%o0
	ldx	[%i7+8*32],	%o1
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%o0
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%o1
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%o0
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%o1
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%o0
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%o1
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%o0
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%o1
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%o0
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%o1
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%o0
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%o1
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%o0
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%o1
	ldx	[%i7+0*32],	%o2
	ldx	[%i7+8*32],	%o3
	ldx	[%i7+1*32],	%o4
	ldx	[%i7+9*32],	%o5
	movvs	%icc,	%o4,	%o2
	ldx	[%i7+2*32],	%o4
	movvs	%icc,	%o5,	%o3
	ldx	[%i7+10*32],%o5
	move	%icc,	%o4,	%o2
	ldx	[%i7+3*32],	%o4
	move	%icc,	%o5,	%o3
	ldx	[%i7+11*32],%o5
	movneg	%icc,	%o4,	%o2
	ldx	[%i7+4*32],	%o4
	movneg	%icc,	%o5,	%o3
	ldx	[%i7+12*32],%o5
	movcs	%xcc,	%o4,	%o2
	ldx	[%i7+5*32],%o4
	movcs	%xcc,	%o5,	%o3
	ldx	[%i7+13*32],%o5
	movvs	%xcc,	%o4,	%o2
	ldx	[%i7+6*32],	%o4
	movvs	%xcc,	%o5,	%o3
	ldx	[%i7+14*32],%o5
	move	%xcc,	%o4,	%o2
	ldx	[%i7+7*32],	%o4
	move	%xcc,	%o5,	%o3
	ldx	[%i7+15*32],%o5
	movneg	%xcc,	%o4,	%o2
	add	%i7,16*32,	%i7
	movneg	%xcc,	%o5,	%o3
	srax	%g4,	32,	%o4		! unpack %g4
	srl	%g4,	%g0,	%o5
	sub	%o4,	5,	%o4
	mov	%g3,	%i7
	sllx	%o4,	32,	%g4		! re-pack %g4
	or	%o5,	%g4,	%g4
	srl	%o5,	%o4,	%o5
	srl	%o5,	2,	%o4
	and	%o5,	3,	%o5
	and	%o4,	7,	%o4
	sll	%o5,	3,	%o5	! offset within first cache line
	add	%o5,	%i7,	%i7	! of the pwrtbl
	or	%g0,	1,	%o5
	sll	%o5,	%o4,	%o4
	.word	0x81b02940+32-1	! montsqr	32-1
	fbu,pn	%fcc3,.Labort_32
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_32
#endif
	nop
	.word	0x81b02940+32-1	! montsqr	32-1
	fbu,pn	%fcc3,.Labort_32
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_32
#endif
	nop
	.word	0x81b02940+32-1	! montsqr	32-1
	fbu,pn	%fcc3,.Labort_32
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_32
#endif
	nop
	.word	0x81b02940+32-1	! montsqr	32-1
	fbu,pn	%fcc3,.Labort_32
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_32
#endif
	nop
	.word	0x81b02940+32-1	! montsqr	32-1
	fbu,pn	%fcc3,.Labort_32
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_32
#endif
	nop
	wr	%o4,	%g0,	%ccr
	.word	0x81b02920+32-1	! montmul	32-1
	fbu,pn	%fcc3,.Labort_32
#ifndef	__arch64__
	and	%fp,%g5,%g5
	brz,pn	%g5,.Labort_32
#endif

	srax	%g4,	32,	%o4
#ifdef	__arch64__
	brgez	%o4,.Lstride_32
	restore
	restore
	restore
	restore
	restore
#else
	brgez	%o4,.Lstride_32
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	restore;		and	%fp,%g5,%g5
	 brz,pn	%g5,.Labort1_32
	restore
#endif
	.word	0x81b02310 !movxtod	%l0,%f0
	.word	0x85b02311 !movxtod	%l1,%f2
	.word	0x89b02312 !movxtod	%l2,%f4
	.word	0x8db02313 !movxtod	%l3,%f6
	.word	0x91b02314 !movxtod	%l4,%f8
	.word	0x95b02315 !movxtod	%l5,%f10
	.word	0x99b02316 !movxtod	%l6,%f12
	.word	0x9db02317 !movxtod	%l7,%f14
	.word	0xa1b02308 !movxtod	%o0,%f16
	.word	0xa5b02309 !movxtod	%o1,%f18
	.word	0xa9b0230a !movxtod	%o2,%f20
	.word	0xadb0230b !movxtod	%o3,%f22
	.word	0xbbb0230c !movxtod	%o4,%f60
	.word	0xbfb0230d !movxtod	%o5,%f62
#ifdef	__arch64__
	restore
#else
	 and	%fp,%g5,%g5
	restore
	 and	%g5,1,%o7
	 and	%fp,%g5,%g5
	 srl	%fp,0,%fp		! just in case?
	 or	%o7,%g5,%g5
	brz,a,pn %g5,.Ldone_32
	mov	0,%i0		! return failure
#endif
	std	%f0,[%g1+0*8]
	std	%f2,[%g1+1*8]
	std	%f4,[%g1+2*8]
	std	%f6,[%g1+3*8]
	std	%f8,[%g1+4*8]
	std	%f10,[%g1+5*8]
	std	%f12,[%g1+6*8]
	std	%f14,[%g1+7*8]
	std	%f16,[%g1+8*8]
	std	%f18,[%g1+9*8]
	std	%f20,[%g1+10*8]
	std	%f22,[%g1+11*8]
	std	%f60,[%g1+12*8]
	std	%f62,[%g1+13*8]
	std	%f24,[%g1+14*8]
	std	%f26,[%g1+15*8]
	std	%f28,[%g1+16*8]
	std	%f30,[%g1+17*8]
	std	%f32,[%g1+18*8]
	std	%f34,[%g1+19*8]
	std	%f36,[%g1+20*8]
	std	%f38,[%g1+21*8]
	std	%f40,[%g1+22*8]
	std	%f42,[%g1+23*8]
	std	%f44,[%g1+24*8]
	std	%f46,[%g1+25*8]
	std	%f48,[%g1+26*8]
	std	%f50,[%g1+27*8]
	std	%f52,[%g1+28*8]
	std	%f54,[%g1+29*8]
	std	%f56,[%g1+30*8]
	std	%f58,[%g1+31*8]
	mov	1,%i0		! return success
.Ldone_32:
	ret
	restore

.Labort_32:
	restore
	restore
	restore
	restore
	restore
.Labort1_32:
	restore

	mov	0,%i0		! return failure
	ret
	restore
.type	bn_pwr5_mont_t4_32, #function
.size	bn_pwr5_mont_t4_32, .-bn_pwr5_mont_t4_32
.globl	bn_mul_mont_t4
.align	32
bn_mul_mont_t4:
	add	%sp,	STACK_BIAS,	%g4	! real top of stack
	sll	%o5,	3,	%o5		! size in bytes
	add	%o5,	63,	%g1
	andn	%g1,	63,	%g1		! buffer size rounded up to 64 bytes
	sub	%g4,	%g1,	%g1
	andn	%g1,	63,	%g1		! align at 64 byte
	sub	%g1,	STACK_FRAME,	%g1	! new top of stack
	sub	%g1,	%g4,	%g1

	save	%sp,	%g1,	%sp
	ld	[%i4+0],	%l0	! pull n0[0..1] value
	ld	[%i4+4],	%l1
	add	%sp, STACK_BIAS+STACK_FRAME, %l5
	ldx	[%i2+0],	%g2	! m0=bp[0]
	sllx	%l1,	32,	%g1
	add	%i2,	8,	%i2
	or	%l0,	%g1,	%g1

	ldx	[%i1+0],	%o2	! ap[0]

	mulx	%o2,	%g2,	%g4	! ap[0]*bp[0]
	.word	0x8bb282c2 !umulxhi	%o2,%g2,%g5

	ldx	[%i1+8],	%o2	! ap[1]
	add	%i1,	16,	%i1
	ldx	[%i3+0],	%o4	! np[0]

	mulx	%g4,	%g1,	%g3	! "tp[0]"*n0

	mulx	%o2,	%g2,	%o3	! ap[1]*bp[0]
	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj

	mulx	%o4,	%g3,	%o0	! np[0]*m1
	.word	0x93b302c3 !umulxhi	%o4,%g3,%o1

	ldx	[%i3+8],	%o4	! np[1]

	addcc	%g4,	%o0,	%o0
	add	%i3,	16,	%i3
	.word	0x93b00229 !addxc	%g0,%o1,%o1

	mulx	%o4,	%g3,	%o5	! np[1]*m1
	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj

	ba	.L1st
	sub	%i5,	24,	%l4	! cnt=num-3

.align	16
.L1st:
	addcc	%o3,	%g5,	%g4
	.word	0x8bb28220 !addxc	%o2,%g0,%g5

	ldx	[%i1+0],	%o2	! ap[j]
	addcc	%o5,	%o1,	%o0
	add	%i1,	8,	%i1
	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj

	ldx	[%i3+0],	%o4	! np[j]
	mulx	%o2,	%g2,	%o3	! ap[j]*bp[0]
	add	%i3,	8,	%i3
	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj

	mulx	%o4,	%g3,	%o5	! np[j]*m1
	addcc	%g4,	%o0,	%o0	! np[j]*m1+ap[j]*bp[0]
	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
	.word	0x93b00229 !addxc	%g0,%o1,%o1
	stxa	%o0,	[%l5]0xe2	! tp[j-1]
	add	%l5,	8,	%l5	! tp++

	brnz,pt	%l4,	.L1st
	sub	%l4,	8,	%l4	! j--
!.L1st
	addcc	%o3,	%g5,	%g4
	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj

	addcc	%o5,	%o1,	%o0
	.word	0x93b30220 !addxc	%o4,%g0,%o1
	addcc	%g4,	%o0,	%o0	! np[j]*m1+ap[j]*bp[0]
	.word	0x93b00229 !addxc	%g0,%o1,%o1
	stxa	%o0,	[%l5]0xe2	! tp[j-1]
	add	%l5,	8,	%l5

	addcc	%g5,	%o1,	%o1
	.word	0xa1b00220 !addxc	%g0,%g0,%l0	! upmost overflow bit
	stxa	%o1,	[%l5]0xe2
	add	%l5,	8,	%l5

	ba	.Louter
	sub	%i5,	16,	%l1	! i=num-2

.align	16
.Louter:
	ldx	[%i2+0],	%g2	! m0=bp[i]
	add	%i2,	8,	%i2

	sub	%i1,	%i5,	%i1	! rewind
	sub	%i3,	%i5,	%i3
	sub	%l5,	%i5,	%l5

	ldx	[%i1+0],	%o2	! ap[0]
	ldx	[%i3+0],	%o4	! np[0]

	mulx	%o2,	%g2,	%g4	! ap[0]*bp[i]
	ldx	[%l5],		%o7	! tp[0]
	.word	0x8bb282c2 !umulxhi	%o2,%g2,%g5
	ldx	[%i1+8],	%o2	! ap[1]
	addcc	%g4,	%o7,	%g4	! ap[0]*bp[i]+tp[0]
	mulx	%o2,	%g2,	%o3	! ap[1]*bp[i]
	.word	0x8bb00225 !addxc	%g0,%g5,%g5
	mulx	%g4,	%g1,	%g3	! tp[0]*n0
	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
	mulx	%o4,	%g3,	%o0	! np[0]*m1
	add	%i1,	16,	%i1
	.word	0x93b302c3 !umulxhi	%o4,%g3,%o1
	ldx	[%i3+8],	%o4	! np[1]
	add	%i3,	16,	%i3
	addcc	%o0,	%g4,	%o0
	mulx	%o4,	%g3,	%o5	! np[1]*m1
	.word	0x93b00229 !addxc	%g0,%o1,%o1
	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj

	ba	.Linner
	sub	%i5,	24,	%l4	! cnt=num-3
.align	16
.Linner:
	addcc	%o3,	%g5,	%g4
	ldx	[%l5+8],	%o7	! tp[j]
	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
	ldx	[%i1+0],	%o2	! ap[j]
	add	%i1,	8,	%i1
	addcc	%o5,	%o1,	%o0
	mulx	%o2,	%g2,	%o3	! ap[j]*bp[i]
	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
	ldx	[%i3+0],	%o4	! np[j]
	add	%i3,	8,	%i3
	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
	addcc	%g4,	%o7,	%g4	! ap[j]*bp[i]+tp[j]
	mulx	%o4,	%g3,	%o5	! np[j]*m1
	.word	0x8bb00225 !addxc	%g0,%g5,%g5
	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
	addcc	%o0,	%g4,	%o0	! np[j]*m1+ap[j]*bp[i]+tp[j]
	.word	0x93b00229 !addxc	%g0,%o1,%o1
	stx	%o0,	[%l5]		! tp[j-1]
	add	%l5,	8,	%l5
	brnz,pt	%l4,	.Linner
	sub	%l4,	8,	%l4
!.Linner
	ldx	[%l5+8],	%o7	! tp[j]
	addcc	%o3,	%g5,	%g4
	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
	addcc	%g4,	%o7,	%g4	! ap[j]*bp[i]+tp[j]
	.word	0x8bb00225 !addxc	%g0,%g5,%g5

	addcc	%o5,	%o1,	%o0
	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
	addcc	%o0,	%g4,	%o0	! np[j]*m1+ap[j]*bp[i]+tp[j]
	.word	0x93b00229 !addxc	%g0,%o1,%o1
	stx	%o0,	[%l5]		! tp[j-1]

	subcc	%g0,	%l0,	%g0	! move upmost overflow to CCR.xcc
	.word	0x93b24265 !addxccc	%o1,%g5,%o1
	.word	0xa1b00220 !addxc	%g0,%g0,%l0
	stx	%o1,	[%l5+8]
	add	%l5,	16,	%l5

	brnz,pt	%l1,	.Louter
	sub	%l1,	8,	%l1

	sub	%i1,	%i5,	%i1	! rewind
	sub	%i3,	%i5,	%i3
	sub	%l5,	%i5,	%l5
	ba	.Lsub
	subcc	%i5,	8,	%l4	! cnt=num-1 and clear CCR.xcc

.align	16
.Lsub:
	ldx	[%l5],		%o7
	add	%l5,	8,	%l5
	ldx	[%i3+0],	%o4
	add	%i3,	8,	%i3
	subccc	%o7,	%o4,	%l2	! tp[j]-np[j]
	srlx	%o7,	32,	%o7
	srlx	%o4,	32,	%o4
	subccc	%o7,	%o4,	%l3
	add	%i0,	8,	%i0
	st	%l2,	[%i0-4]		! reverse order
	st	%l3,	[%i0-8]
	brnz,pt	%l4,	.Lsub
	sub	%l4,	8,	%l4

	sub	%i3,	%i5,	%i3	! rewind
	sub	%l5,	%i5,	%l5
	sub	%i0,	%i5,	%i0

	subccc	%l0,	%g0,	%l0	! handle upmost overflow bit
	ba	.Lcopy
	sub	%i5,	8,	%l4

.align	16
.Lcopy:					! conditional copy
	ldx	[%l5],		%o7
	ldx	[%i0+0],	%l2
	stx	%g0,	[%l5]		! zap
	add	%l5,	8,	%l5
	movcs	%icc,	%o7,	%l2
	stx	%l2,	[%i0+0]
	add	%i0,	8,	%i0
	brnz	%l4,	.Lcopy
	sub	%l4,	8,	%l4

	mov	1,	%o0
	ret
	restore
.type	bn_mul_mont_t4, #function
.size	bn_mul_mont_t4, .-bn_mul_mont_t4
.globl	bn_mul_mont_gather5_t4
.align	32
bn_mul_mont_gather5_t4:
	add	%sp,	STACK_BIAS,	%g4	! real top of stack
	sll	%o5,	3,	%o5		! size in bytes
	add	%o5,	63,	%g1
	andn	%g1,	63,	%g1		! buffer size rounded up to 64 bytes
	sub	%g4,	%g1,	%g1
	andn	%g1,	63,	%g1		! align at 64 byte
	sub	%g1,	STACK_FRAME,	%g1	! new top of stack
	sub	%g1,	%g4,	%g1
	LDPTR	[%sp+STACK_7thARG],	%g4	! load power, 7th argument

	save	%sp,	%g1,	%sp
	srl	%g4,	2,	%o4
	and	%g4,	3,	%o5
	and	%o4,	7,	%o4
	sll	%o5,	3,	%o5	! offset within first cache line
	add	%o5,	%i2,	%i2	! of the pwrtbl
	or	%g0,	1,	%o5
	sll	%o5,	%o4,	%l7
	wr	%l7,	%g0,	%ccr
	ldx	[%i2+0*32],	%g2
	ldx	[%i2+1*32],	%o4
	ldx	[%i2+2*32],	%o5
	movvs	%icc,	%o4,	%g2
	ldx	[%i2+3*32],	%o4
	move	%icc,	%o5,	%g2
	ldx	[%i2+4*32],	%o5
	movneg	%icc,	%o4,	%g2
	ldx	[%i2+5*32],	%o4
	movcs	%xcc,	%o5,	%g2
	ldx	[%i2+6*32],	%o5
	movvs	%xcc,	%o4,	%g2
	ldx	[%i2+7*32],	%o4
	move	%xcc,	%o5,	%g2
	add	%i2,8*32,	%i2
	movneg	%xcc,	%o4,	%g2
	ld	[%i4+0],	%l0	! pull n0[0..1] value
	ld	[%i4+4],	%l1
	add	%sp, STACK_BIAS+STACK_FRAME, %l5
	sllx	%l1,	32,	%g1
	or	%l0,	%g1,	%g1

	ldx	[%i1+0],	%o2	! ap[0]

	mulx	%o2,	%g2,	%g4	! ap[0]*bp[0]
	.word	0x8bb282c2 !umulxhi	%o2,%g2,%g5

	ldx	[%i1+8],	%o2	! ap[1]
	add	%i1,	16,	%i1
	ldx	[%i3+0],	%o4	! np[0]

	mulx	%g4,	%g1,	%g3	! "tp[0]"*n0

	mulx	%o2,	%g2,	%o3	! ap[1]*bp[0]
	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj

	mulx	%o4,	%g3,	%o0	! np[0]*m1
	.word	0x93b302c3 !umulxhi	%o4,%g3,%o1

	ldx	[%i3+8],	%o4	! np[1]

	addcc	%g4,	%o0,	%o0
	add	%i3,	16,	%i3
	.word	0x93b00229 !addxc	%g0,%o1,%o1

	mulx	%o4,	%g3,	%o5	! np[1]*m1
	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj

	ba	.L1st_g5
	sub	%i5,	24,	%l4	! cnt=num-3

.align	16
.L1st_g5:
	addcc	%o3,	%g5,	%g4
	.word	0x8bb28220 !addxc	%o2,%g0,%g5

	ldx	[%i1+0],	%o2	! ap[j]
	addcc	%o5,	%o1,	%o0
	add	%i1,	8,	%i1
	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj

	ldx	[%i3+0],	%o4	! np[j]
	mulx	%o2,	%g2,	%o3	! ap[j]*bp[0]
	add	%i3,	8,	%i3
	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj

	mulx	%o4,	%g3,	%o5	! np[j]*m1
	addcc	%g4,	%o0,	%o0	! np[j]*m1+ap[j]*bp[0]
	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
	.word	0x93b00229 !addxc	%g0,%o1,%o1
	stxa	%o0,	[%l5]0xe2	! tp[j-1]
	add	%l5,	8,	%l5	! tp++

	brnz,pt	%l4,	.L1st_g5
	sub	%l4,	8,	%l4	! j--
!.L1st_g5
	addcc	%o3,	%g5,	%g4
	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj

	addcc	%o5,	%o1,	%o0
	.word	0x93b30220 !addxc	%o4,%g0,%o1
	addcc	%g4,	%o0,	%o0	! np[j]*m1+ap[j]*bp[0]
	.word	0x93b00229 !addxc	%g0,%o1,%o1
	stxa	%o0,	[%l5]0xe2	! tp[j-1]
	add	%l5,	8,	%l5

	addcc	%g5,	%o1,	%o1
	.word	0xa1b00220 !addxc	%g0,%g0,%l0	! upmost overflow bit
	stxa	%o1,	[%l5]0xe2
	add	%l5,	8,	%l5

	ba	.Louter_g5
	sub	%i5,	16,	%l1	! i=num-2

.align	16
.Louter_g5:
	wr	%l7,	%g0,	%ccr
	ldx	[%i2+0*32],	%g2
	ldx	[%i2+1*32],	%o4
	ldx	[%i2+2*32],	%o5
	movvs	%icc,	%o4,	%g2
	ldx	[%i2+3*32],	%o4
	move	%icc,	%o5,	%g2
	ldx	[%i2+4*32],	%o5
	movneg	%icc,	%o4,	%g2
	ldx	[%i2+5*32],	%o4
	movcs	%xcc,	%o5,	%g2
	ldx	[%i2+6*32],	%o5
	movvs	%xcc,	%o4,	%g2
	ldx	[%i2+7*32],	%o4
	move	%xcc,	%o5,	%g2
	add	%i2,8*32,	%i2
	movneg	%xcc,	%o4,	%g2
	sub	%i1,	%i5,	%i1	! rewind
	sub	%i3,	%i5,	%i3
	sub	%l5,	%i5,	%l5

	ldx	[%i1+0],	%o2	! ap[0]
	ldx	[%i3+0],	%o4	! np[0]

	mulx	%o2,	%g2,	%g4	! ap[0]*bp[i]
	ldx	[%l5],		%o7	! tp[0]
	.word	0x8bb282c2 !umulxhi	%o2,%g2,%g5
	ldx	[%i1+8],	%o2	! ap[1]
	addcc	%g4,	%o7,	%g4	! ap[0]*bp[i]+tp[0]
	mulx	%o2,	%g2,	%o3	! ap[1]*bp[i]
	.word	0x8bb00225 !addxc	%g0,%g5,%g5
	mulx	%g4,	%g1,	%g3	! tp[0]*n0
	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
	mulx	%o4,	%g3,	%o0	! np[0]*m1
	add	%i1,	16,	%i1
	.word	0x93b302c3 !umulxhi	%o4,%g3,%o1
	ldx	[%i3+8],	%o4	! np[1]
	add	%i3,	16,	%i3
	addcc	%o0,	%g4,	%o0
	mulx	%o4,	%g3,	%o5	! np[1]*m1
	.word	0x93b00229 !addxc	%g0,%o1,%o1
	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj

	ba	.Linner_g5
	sub	%i5,	24,	%l4	! cnt=num-3
.align	16
.Linner_g5:
	addcc	%o3,	%g5,	%g4
	ldx	[%l5+8],	%o7	! tp[j]
	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
	ldx	[%i1+0],	%o2	! ap[j]
	add	%i1,	8,	%i1
	addcc	%o5,	%o1,	%o0
	mulx	%o2,	%g2,	%o3	! ap[j]*bp[i]
	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
	ldx	[%i3+0],	%o4	! np[j]
	add	%i3,	8,	%i3
	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
	addcc	%g4,	%o7,	%g4	! ap[j]*bp[i]+tp[j]
	mulx	%o4,	%g3,	%o5	! np[j]*m1
	.word	0x8bb00225 !addxc	%g0,%g5,%g5
	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
	addcc	%o0,	%g4,	%o0	! np[j]*m1+ap[j]*bp[i]+tp[j]
	.word	0x93b00229 !addxc	%g0,%o1,%o1
	stx	%o0,	[%l5]		! tp[j-1]
	add	%l5,	8,	%l5
	brnz,pt	%l4,	.Linner_g5
	sub	%l4,	8,	%l4
!.Linner_g5
	ldx	[%l5+8],	%o7	! tp[j]
	addcc	%o3,	%g5,	%g4
	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
	addcc	%g4,	%o7,	%g4	! ap[j]*bp[i]+tp[j]
	.word	0x8bb00225 !addxc	%g0,%g5,%g5

	addcc	%o5,	%o1,	%o0
	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
	addcc	%o0,	%g4,	%o0	! np[j]*m1+ap[j]*bp[i]+tp[j]
	.word	0x93b00229 !addxc	%g0,%o1,%o1
	stx	%o0,	[%l5]		! tp[j-1]

	subcc	%g0,	%l0,	%g0	! move upmost overflow to CCR.xcc
	.word	0x93b24265 !addxccc	%o1,%g5,%o1
	.word	0xa1b00220 !addxc	%g0,%g0,%l0
	stx	%o1,	[%l5+8]
	add	%l5,	16,	%l5

	brnz,pt	%l1,	.Louter_g5
	sub	%l1,	8,	%l1

	sub	%i1,	%i5,	%i1	! rewind
	sub	%i3,	%i5,	%i3
	sub	%l5,	%i5,	%l5
	ba	.Lsub_g5
	subcc	%i5,	8,	%l4	! cnt=num-1 and clear CCR.xcc

.align	16
.Lsub_g5:
	ldx	[%l5],		%o7
	add	%l5,	8,	%l5
	ldx	[%i3+0],	%o4
	add	%i3,	8,	%i3
	subccc	%o7,	%o4,	%l2	! tp[j]-np[j]
	srlx	%o7,	32,	%o7
	srlx	%o4,	32,	%o4
	subccc	%o7,	%o4,	%l3
	add	%i0,	8,	%i0
	st	%l2,	[%i0-4]		! reverse order
	st	%l3,	[%i0-8]
	brnz,pt	%l4,	.Lsub_g5
	sub	%l4,	8,	%l4

	sub	%i3,	%i5,	%i3	! rewind
	sub	%l5,	%i5,	%l5
	sub	%i0,	%i5,	%i0

	subccc	%l0,	%g0,	%l0	! handle upmost overflow bit
	ba	.Lcopy_g5
	sub	%i5,	8,	%l4

.align	16
.Lcopy_g5:				! conditional copy
	ldx	[%l5],		%o7
	ldx	[%i0+0],	%l2
	stx	%g0,	[%l5]		! zap
	add	%l5,	8,	%l5
	movcs	%icc,	%o7,	%l2
	stx	%l2,	[%i0+0]
	add	%i0,	8,	%i0
	brnz	%l4,	.Lcopy_g5
	sub	%l4,	8,	%l4

	mov	1,	%o0
	ret
	restore
.type	bn_mul_mont_gather5_t4, #function
.size	bn_mul_mont_gather5_t4, .-bn_mul_mont_gather5_t4
.globl	bn_flip_t4
.align	32
bn_flip_t4:
.Loop_flip:
	ld	[%o1+0],	%o4
	sub	%o2,	1,	%o2
	ld	[%o1+4],	%o5
	add	%o1,	8,	%o1
	st	%o5,	[%o0+0]
	st	%o4,	[%o0+4]
	brnz	%o2,	.Loop_flip
	add	%o0,	8,	%o0
	retl
	nop
.type	bn_flip_t4, #function
.size	bn_flip_t4, .-bn_flip_t4

.globl	bn_flip_n_scatter5_t4
.align	32
bn_flip_n_scatter5_t4:
	sll	%o3,	3,	%o3
	srl	%o1,	1,	%o1
	add	%o3,	%o2,	%o2	! &pwrtbl[pwr]
	sub	%o1,	1,	%o1
.Loop_flip_n_scatter5:
	ld	[%o0+0],	%o4	! inp[i]
	ld	[%o0+4],	%o5
	add	%o0,	8,	%o0
	sllx	%o5,	32,	%o5
	or	%o4,	%o5,	%o5
	stx	%o5,	[%o2]
	add	%o2,	32*8,	%o2
	brnz	%o1,	.Loop_flip_n_scatter5
	sub	%o1,	1,	%o1
	retl
	nop
.type	bn_flip_n_scatter5_t4, #function
.size	bn_flip_n_scatter5_t4, .-bn_flip_n_scatter5_t4

.globl	bn_gather5_t4
.align	32
bn_gather5_t4:
	srl	%o3,	2,	%o4
	and	%o3,	3,	%o5
	and	%o4,	7,	%o4
	sll	%o5,	3,	%o5	! offset within first cache line
	add	%o5,	%o2,	%o2	! of the pwrtbl
	or	%g0,	1,	%o5
	sll	%o5,	%o4,	%g1
	wr	%g1,	%g0,	%ccr
	sub	%o1,	1,	%o1
.Loop_gather5:
	ldx	[%o2+0*32],	%g1
	ldx	[%o2+1*32],	%o4
	ldx	[%o2+2*32],	%o5
	movvs	%icc,	%o4,	%g1
	ldx	[%o2+3*32],	%o4
	move	%icc,	%o5,	%g1
	ldx	[%o2+4*32],	%o5
	movneg	%icc,	%o4,	%g1
	ldx	[%o2+5*32],	%o4
	movcs	%xcc,	%o5,	%g1
	ldx	[%o2+6*32],	%o5
	movvs	%xcc,	%o4,	%g1
	ldx	[%o2+7*32],	%o4
	move	%xcc,	%o5,	%g1
	add	%o2,8*32,	%o2
	movneg	%xcc,	%o4,	%g1
	stx	%g1,	[%o0]
	add	%o0,	8,	%o0
	brnz	%o1,	.Loop_gather5
	sub	%o1,	1,	%o1

	retl
	nop
.type	bn_gather5_t4, #function
.size	bn_gather5_t4, .-bn_gather5_t4

.asciz	"Montgomery Multiplication for SPARC T4, David S. Miller, Andy Polyakov"
.align	4