; File:   trigf.s
; Author: Mikael Kalms <mikael@kalms.org>
; Date:   12 Mar 2000
; Title:  Trigonometric function library (FPU based)
;
; Description:
;   Contains sin()/cos()/tan() and their arcus equivalents,
;    faster than 68881/68882 hardware implementation and most
;    68040/68060 software emulations
;   Accuracy is wildly guessed to be:
;    for sin(), cos(), asin(), acos(), 14 decimal bits
;    for tan() and atan(), 14 significant bits
;
; History:
;   2000-03-12  Bugfixes! asinf() and acosf() sometimes produced
;                bogus results
;		Changed to work on radians rather than 'leaps', as this
;                confused people and was bad in general
;   1999-01-20  Initial version
;
; Note: Trig functions are 'adapted' CPU-only routines, and use
;       the same integer LUTs -- they could probably be better implemented
;
; float	sinf(angle a);
; float	cosf(angle a);
; float,float sincosf(angle a);
; float	tanf(angle a);
; angle	asinf(float x);
; angle	acosf(float x);
; angle	atanf(float x);
; angle	atan2f(float x, float y);


	section	code,code

; in	fp0	angle
; out	fp0	sin(angle)

sinf
	fmul.s	#$4622f983,fp0		; $10000 / (2*PI)
	fmove.l	fp0,d0
	movem.l	d2/a0,-(sp)
	lea	sintabl,a0
	move.w	d0,d2
	lsr.w	#6,d2
	and.w	#$3f,d0
	move.l	(a0,d2.w*4),d1
	move.l	4(a0,d2.w*4),d2
	sub.l	d1,d2
	muls.w	d2,d0
	asr.l	#6,d0
	add.l	d1,d0
	movem.l	(sp)+,d2/a0
	fmove.l	d0,fp0
	fmul.s	#$37800000,fp0		; 1 / $10000
	rts


; in	fp0	angle
; out	fp0	cos(angle)

cosf
	fmul.s	#$4622f983,fp0		; $10000 / (2*PI)
	fmove.l	fp0,d0
	add.w	#$4000,d0
	movem.l	d2/a0,-(sp)
	lea	sintabl,a0
	move.w	d0,d2
	lsr.w	#6,d2
	and.w	#$3f,d0
	move.l	(a0,d2.w*4),d1
	move.l	4(a0,d2.w*4),d2
	sub.l	d1,d2
	muls.w	d2,d0
	asr.l	#6,d0
	add.l	d1,d0
	movem.l	(sp)+,d2/a0
	fmove.l	d0,fp0
	fmul.s	#$37800000,fp0		; 1 / $10000
	rts


; in	fp0	angle
; out	fp0	sin(angle)
;	fp1	cos(angle)

sincosf
	fmul.s	#$4622f983,fp0		; $10000 / (2*PI)
	fmove.l	fp0,d0
	movem.l	d2-d3/a0,-(sp)
	lea	sintabl,a0
	move.w	d0,d3
	lsr.w	#6,d3
	and.w	#$3f,d0
	move.l	(a0,d3.w*4),d1
	move.l	4(a0,d3.w*4),d2
	sub.l	d1,d2
	muls.w	d0,d2
	asr.l	#6,d2
	add.l	d1,d2
	lea	costabl,a0
	move.l	(a0,d3.w*4),d1
	move.l	4(a0,d3.w*4),d3
	sub.l	d1,d3
	muls.w	d0,d3
	asr.l	#6,d3
	add.l	d3,d1
	move.l	d2,d0
	movem.l	(sp)+,d2-d3/a0
	fmove.l	d0,fp0
	fmove.l	d1,fp1
	fmul.s	#$37800000,fp0		; 1 / $10000
	fmul.s	#$37800000,fp1		; 1 / $10000
	rts


; in	fp0	angle
; out	fp0	tan(angle)

tanf
	fmul.s	#$4622f983,fp0		; $10000 / (2*PI)
	fmove.l	fp0,d0
	movem.l	d2/a0,-(sp)
	lea	tantabl,a0
	move.w	d0,d2
	lsr.w	#6,d2
	and.w	#$3f,d0
	move.l	(a0,d2.w*4),d1
	move.l	4(a0,d2.w*4),d2
	sub.l	d1,d2
	muls.w	d2,d0
	asr.l	#6,d0
	add.l	d1,d0
	movem.l	(sp)+,d2/a0
	fmove.l	d0,fp0
	fmul.s	#$37800000,fp0		; 1 / $10000
	rts


; in	fp0	sin(angle)
; out	fp0	angle (-PI/2 <= x <= PI/2)

asinf
	fmul.s	#$47800000,fp0		; $10000
	fmove.l	fp0,d0
	cmp.l	#$10000,d0
	bge.s	.up
	cmp.l	#-$10000,d0
	ble.s	.down
	movem.l	d2/a0,-(sp)
	lea	asintabl,a0
	move.l	d0,d2
	asr.l	#8,d2
	and.w	#$ff,d0
	move.l	(a0,d2.l*4),d1
	move.l	4(a0,d2.l*4),d2
	sub.l	d1,d2
	muls.w	d2,d0
	asr.l	#8,d0
	add.l	d1,d0
	movem.l	(sp)+,d2/a0
.ok	fmove.l	d0,fp0
	fmul.s	#$38c90fdb,fp0		; (2*PI) / $10000
	rts
.up	move.l	#$4000,d0
	bra.s	.ok
.down	move.l	#-$4000,d0
	bra.s	.ok


; in	fp0	cos(angle)
; out	fp0	angle (0 <= x <= PI)

acosf
	fmul.s	#$47800000,fp0		; $10000
	fmove.l	fp0,d0
	cmp.l	#$10000,d0
	bge.s	.up
	cmp.l	#-$10000,d0
	ble.s	.down
	movem.l	d2/a0,-(sp)
	lea	acostabl,a0
	move.l	d0,d2
	asr.l	#8,d2
	and.w	#$ff,d0
	move.l	(a0,d2.l*4),d1
	move.l	4(a0,d2.l*4),d2
	sub.l	d1,d2
	muls.w	d2,d0
	asr.l	#8,d0
	add.l	d1,d0
	movem.l	(sp)+,d2/a0
.ok	fmove.l	d0,fp0
	fmul.s	#$38c90fdb,fp0		; (2*PI) / $10000
	rts
.up	move.l	#$8000,d0
	bra.s	.ok
.down	moveq	#0,d0
	bra.s	.ok


; in	fp0	tan(angle) == dy/dx
; out	fp0	angle (-PI/2 <= x <= PI/2)

atanf
	fmul.s	#$47800000,fp0		; $10000
	fmove.l	fp0,d0
	movem.l	d2/a0,-(sp)
	move.l	d0,-(sp)
	bpl.s	.pos
	neg.l	d0
.pos	lea	atantabl,a0
	cmp.l	#$10000,d0
	bgt.s	.more45
	move.l	d0,d2
	lsr.l	#8,d2
	and.w	#$ff,d0
	move.l	(a0,d2.w*4),d1
	move.l	4(a0,d2.w*4),d2
	sub.l	d1,d2
	muls.w	d2,d0
	asr.l	#8,d0
	add.l	d1,d0
	tst.l	(sp)+
	bpl.s	.pos2
	neg.l	d0
.pos2	movem.l	(sp)+,d2/a0
	fmove.l	d0,fp0
	fmul.s	#$38c90fdb,fp0		; (2*PI) / $10000
	rts
.more45
	moveq	#0,d2
	moveq	#1,d1
	divs.l	d0,d1:d2
	move.w	d2,d0
	lsr.w	#8,d2
	and.w	#$ff,d0
	move.l	(a0,d2.w*4),d1
	move.l	4(a0,d2.w*4),d2
	sub.l	d1,d2
	muls.w	d2,d0
	asr.l	#8,d0
	add.l	d1,d0
	sub.l	#$10000/4,d0
	neg.l	d0
	tst.l	(sp)+
	bpl.s	.pos3
	neg.l	d0
.pos3	movem.l	(sp)+,d2/a0
	fmove.l	d0,fp0
	fmul.s	#$38c90fdb,fp0		; (2*PI) / $10000
	rts


; in	fp0	x
;	fp1	y
; out	fp0	angle (0 <= x < PI*2) == atan2(y, x)

atan2f
	fmul.s	#$47800000,fp0		; $10000
	fmul.s	#$47800000,fp1		; $10000
	fmove.l	fp0,d0
	fmove.l	fp1,d1
	movem.l	d2-d4/a0,-(sp)
	tst.l	d1
	beq	.yz
	tst.l	d0
	beq	.xz
	move.l	d0,d2
	bpl.s	.ok1
	neg.l	d2
.ok1	move.l	d1,d3
	bpl.s	.ok2
	neg.l	d3
.ok2	lea	atantabl,a0
	cmp.l	d2,d3
	bgt.s	.ok3
	fabs	fp0
	fabs	fp1
	fmul.s	#$47800000,fp1		; $10000
	fdiv	fp0,fp1
	fmove.l	fp1,d3
	move.w	d3,d4
	lsr.w	#8,d3
	and.w	#$ff,d4
	move.l	(a0,d3.w*4),d2
	move.l	4(a0,d3.w*4),d3
	sub.l	d2,d3
	muls.w	d4,d3
	asr.l	#8,d3
	add.l	d3,d2
	bra.s	.ok7
.ok3	exg	d2,d3
	fabs	fp0
	fabs	fp1
	fmul.s	#$47800000,fp0		; $10000
	fdiv	fp1,fp0
	fmove.l	fp0,d3
	move.w	d3,d4
	lsr.w	#8,d3
	and.w	#$ff,d4
	move.l	(a0,d3.w*4),d2
	move.l	4(a0,d3.w*4),d3
	sub.l	d2,d3
	muls.w	d4,d3
	asr.l	#8,d3
	add.l	d3,d2
	sub.l	#$10000/4,d2
	neg.l	d2
.ok7	tst.l	d1
	bmi.s	.ok4
	tst.l	d0
	bpl.s	.ok5
	sub.l	#$10000/2,d2
	neg.l	d2
	bra.s	.ok5
.ok4	sub.l	#$10000,d2
	neg.l	d2
	tst.l	d0
	bpl.s	.ok5
	sub.l	#$10000+$10000/2,d2
	neg.l	d2
.ok5	move.l	d2,d0
	andi.l	#$ffff,d0
	movem.l	(sp)+,d2-d4/a0
	fmove.l	d0,fp0
	fmul.s	#$38c90fdb,fp0		; (2*PI) / $10000
	rts
.yz	tst.l	d0
	bpl.s	.yz2
	move.l	#$10000/2,d2
	bra.s	.ok5
.yz2	moveq	#0,d2
	bra.s	.ok5
.xz	tst.l	d1
	bpl.s	.xz2
	move.l	#$10000*3/4,d2
	bra.s	.ok5
.xz2	move.l	#$10000/4,d2
	bra.s	.ok5


	section	data,data

sintabl	incbin	dat/trig/sincos1024l65536.bin
costabl EQU	sintabl+256*4
tantabl	incbin	dat/trig/tan1024l65536.bin

asintabl EQU	*+256*4
	incbin	dat/trig/asin512l65536.bin
acostabl EQU	*+256*4
	incbin	dat/trig/acos512l65536.bin
atantabl incbin	dat/trig/atan256l65536.bin