; ************************************
; * 16-bit Mul test  by PEK '2000
; * With Atmels optimized unsigned
; * routines.
; ************************************


.include "8515def.inc"


; ************************************
; * Register and Variable Definitions             
; ************************************
.cseg
.def	temp	=r16

.equ	DATA	=946

; ************************************
; * Start of Code                    
; ************************************
.cseg
.org 0
	rjmp	Start


;***************************************************************************
;*
;* "mpy16u" - 16x16 Bit Unsigned Multiplication
;*
;* This subroutine multiplies the two 16-bit register variables 
;* mp16uH:mp16uL and mc16uH:mc16uL.
;* The result is placed in m16u3:m16u2:m16u1:m16u0.
;*  
;* Number of words	:14 + return
;* Number of cycles	:153 + return
;* Low registers used	:None
;* High registers used  :7 (mp16uL,mp16uH,mc16uL/m16u0,mc16uH/m16u1,m16u2,
;*                          m16u3,mcnt16u)	
;*
;***************************************************************************

;***** Subroutine Register Variables

.def	mc16uL	=r16		;multiplicand low byte
.def	mc16uH	=r17		;multiplicand high byte
.def	mp16uL	=r18		;multiplier low byte
.def	mp16uH	=r19		;multiplier high byte
.def	m16u0	=r18		;result byte 0 (LSB)
.def	m16u1	=r19		;result byte 1
.def	m16u2	=r20		;result byte 2
.def	m16u3	=r21		;result byte 3 (MSB)
.def	mcnt16u	=r22		;loop counter

;***** Code

mpy16u:	clr	m16u3		;clear 2 highest bytes of result
	clr	m16u2
	ldi	mcnt16u,16	;init loop counter
	lsr	mp16uH
	ror	mp16uL

m16u_1:	brcc	noad8		;if bit 0 of multiplier set
	add	m16u2,mc16uL	;add multiplicand Low to byte 2 of res
	adc	m16u3,mc16uH	;add multiplicand high to byte 3 of res
noad8:	ror	m16u3		;shift right result byte 3
	ror	m16u2		;rotate right result byte 2
	ror	m16u1		;rotate result byte 1 and multiplier High
	ror	m16u0		;rotate result byte 0 and multiplier Low
	dec	mcnt16u		;decrement loop counter
	brne	m16u_1		;if not done, loop more
	ret


; ** Start of code *******************
Start:	ldi	temp,low(RAMEND)
	out	SPL,temp		; Set stack pointer to last internal RAM location
	ldi	temp,high(RAMEND)
	out	SPH,temp

	ldi	mc16uL,low(360)		; Angle = DATA*360 / 1024
	ldi	mc16uH,high(360)
	ldi	mp16uL,low(DATA)
	ldi	mp16uH,high(DATA)

	rcall	mpy16u			; Do the Multiplication...
	lsr	m16u2			; and the Division by 1024
	ror	m16u1
	lsr	m16u2
	ror	m16u1			; The result is in m16u2:m16u1

Continue_WD:
	rjmp	Continue_WD