237 lines
4.7 KiB
C
237 lines
4.7 KiB
C
|
// guest openmusiclabs 6.8.11
|
||
|
// taken from http://mekonik.wordpress.com/2009/03/18/arduino-avr-gcc-multiplication/
|
||
|
|
||
|
// longRes = intIn1 * intIn2
|
||
|
#define MultiU16X16to32(longRes, intIn1, intIn2) \
|
||
|
asm volatile ( \
|
||
|
"clr r26 \n\t" \
|
||
|
"mul %A1, %A2 \n\t" \
|
||
|
"movw %A0, r0 \n\t" \
|
||
|
"mul %B1, %B2 \n\t" \
|
||
|
"movw %C0, r0 \n\t" \
|
||
|
"mul %B2, %A1 \n\t" \
|
||
|
"add %B0, r0 \n\t" \
|
||
|
"adc %C0, r1 \n\t" \
|
||
|
"adc %D0, r26 \n\t" \
|
||
|
"mul %B1, %A2 \n\t" \
|
||
|
"add %B0, r0 \n\t" \
|
||
|
"adc %C0, r1 \n\t" \
|
||
|
"adc %D0, r26 \n\t" \
|
||
|
"clr r1 \n\t" \
|
||
|
: \
|
||
|
"=&r" (longRes) \
|
||
|
: \
|
||
|
"a" (intIn1), \
|
||
|
"a" (intIn2) \
|
||
|
: \
|
||
|
"r26" \
|
||
|
)
|
||
|
|
||
|
// intRes = intIn1 * intIn2 >> 16
|
||
|
// uses:
|
||
|
// r26 to store 0
|
||
|
// r27 to store the byte 1 of the 32bit result
|
||
|
#define MultiU16X16toH16(intRes, intIn1, intIn2) \
|
||
|
asm volatile ( \
|
||
|
"clr r26 \n\t" \
|
||
|
"mul %A1, %A2 \n\t" \
|
||
|
"mov r27, r1 \n\t" \
|
||
|
"mul %B1, %B2 \n\t" \
|
||
|
"movw %A0, r0 \n\t" \
|
||
|
"mul %B2, %A1 \n\t" \
|
||
|
"add r27, r0 \n\t" \
|
||
|
"adc %A0, r1 \n\t" \
|
||
|
"adc %B0, r26 \n\t" \
|
||
|
"mul %B1, %A2 \n\t" \
|
||
|
"add r27, r0 \n\t" \
|
||
|
"adc %A0, r1 \n\t" \
|
||
|
"adc %B0, r26 \n\t" \
|
||
|
"clr r1 \n\t" \
|
||
|
: \
|
||
|
"=&r" (intRes) \
|
||
|
: \
|
||
|
"a" (intIn1), \
|
||
|
"a" (intIn2) \
|
||
|
: \
|
||
|
"r26" , "r27" \
|
||
|
)
|
||
|
|
||
|
// intRes = intIn1 * intIn2 >> 16 + round
|
||
|
// uses:
|
||
|
// r26 to store 0
|
||
|
// r27 to store the byte 1 of the 32bit result
|
||
|
// 21 cycles
|
||
|
#define MultiU16X16toH16Round(intRes, intIn1, intIn2) \
|
||
|
asm volatile ( \
|
||
|
"clr r26 \n\t" \
|
||
|
"mul %A1, %A2 \n\t" \
|
||
|
"mov r27, r1 \n\t" \
|
||
|
"mul %B1, %B2 \n\t" \
|
||
|
"movw %A0, r0 \n\t" \
|
||
|
"mul %B2, %A1 \n\t" \
|
||
|
"add r27, r0 \n\t" \
|
||
|
"adc %A0, r1 \n\t" \
|
||
|
"adc %B0, r26 \n\t" \
|
||
|
"mul %B1, %A2 \n\t" \
|
||
|
"add r27, r0 \n\t" \
|
||
|
"adc %A0, r1 \n\t" \
|
||
|
"adc %B0, r26 \n\t" \
|
||
|
"lsl r27 \n\t" \
|
||
|
"adc %A0, r26 \n\t" \
|
||
|
"adc %B0, r26 \n\t" \
|
||
|
"clr r1 \n\t" \
|
||
|
: \
|
||
|
"=&r" (intRes) \
|
||
|
: \
|
||
|
"a" (intIn1), \
|
||
|
"a" (intIn2) \
|
||
|
: \
|
||
|
"r26" , "r27" \
|
||
|
)
|
||
|
|
||
|
|
||
|
// signed16 * signed16
|
||
|
// 22 cycles
|
||
|
#define MultiS16X16to32(longRes, intIn1, intIn2) \
|
||
|
asm volatile ( \
|
||
|
"clr r26 \n\t" \
|
||
|
"mul %A1, %A2 \n\t" \
|
||
|
"movw %A0, r0 \n\t" \
|
||
|
"muls %B1, %B2 \n\t" \
|
||
|
"movw %C0, r0 \n\t" \
|
||
|
"mulsu %B2, %A1 \n\t" \
|
||
|
"sbc %D0, r26 \n\t" \
|
||
|
"add %B0, r0 \n\t" \
|
||
|
"adc %C0, r1 \n\t" \
|
||
|
"adc %D0, r26 \n\t" \
|
||
|
"mulsu %B1, %A2 \n\t" \
|
||
|
"sbc %D0, r26 \n\t" \
|
||
|
"add %B0, r0 \n\t" \
|
||
|
"adc %C0, r1 \n\t" \
|
||
|
"adc %D0, r26 \n\t" \
|
||
|
"clr r1 \n\t" \
|
||
|
: \
|
||
|
"=&r" (longRes) \
|
||
|
: \
|
||
|
"a" (intIn1), \
|
||
|
"a" (intIn2) \
|
||
|
: \
|
||
|
"r26" \
|
||
|
)
|
||
|
|
||
|
|
||
|
// signed16 * signed 16 >> 16
|
||
|
#define MultiS16X16toH16(intRes, intIn1, intIn2) \
|
||
|
asm volatile ( \
|
||
|
"clr r26 \n\t" \
|
||
|
"mul %A1, %A2 \n\t" \
|
||
|
"mov r27, r1 \n\t" \
|
||
|
"muls %B1, %B2 \n\t" \
|
||
|
"movw %A0, r0 \n\t" \
|
||
|
"mulsu %B2, %A1 \n\t" \
|
||
|
"sbc %B0, r26 \n\t" \
|
||
|
"add r27, r0 \n\t" \
|
||
|
"adc %A0, r1 \n\t" \
|
||
|
"adc %B0, r26 \n\t" \
|
||
|
"mulsu %B1, %A2 \n\t" \
|
||
|
"sbc %B0, r26 \n\t" \
|
||
|
"add r27, r0 \n\t" \
|
||
|
"adc %A0, r1 \n\t" \
|
||
|
"adc %B0, r26 \n\t" \
|
||
|
"clr r1 \n\t" \
|
||
|
: \
|
||
|
"=&r" (intRes) \
|
||
|
: \
|
||
|
"a" (intIn1), \
|
||
|
"a" (intIn2) \
|
||
|
: \
|
||
|
"r26", "r27" \
|
||
|
)
|
||
|
|
||
|
// multiplies a signed and unsigned 16 bit ints with a 32 bit result
|
||
|
#define MultiSU16X16to32(longRes, intIn1, intIn2) \
|
||
|
asm volatile ( \
|
||
|
"clr r26 \n\t" \
|
||
|
"mul %A1, %A2 \n\t" \
|
||
|
"movw %A0, r0 \n\t" \
|
||
|
"mulsu %B1, %B2 \n\t" \
|
||
|
"movw %C0, r0 \n\t" \
|
||
|
"mul %B2, %A1 \n\t" \
|
||
|
"add %B0, r0 \n\t" \
|
||
|
"adc %C0, r1 \n\t" \
|
||
|
"adc %D0, r26 \n\t" \
|
||
|
"mulsu %B1, %A2 \n\t" \
|
||
|
"sbc %D0, r26 \n\t" \
|
||
|
"add %B0, r0 \n\t" \
|
||
|
"adc %C0, r1 \n\t" \
|
||
|
"adc %D0, r26 \n\t" \
|
||
|
"clr r1 \n\t" \
|
||
|
: \
|
||
|
"=&r" (longRes) \
|
||
|
: \
|
||
|
"a" (intIn1), \
|
||
|
"a" (intIn2) \
|
||
|
: \
|
||
|
"r26" \
|
||
|
)
|
||
|
|
||
|
// multiplies signed x unsigned int and returns the highest 16 bits of the result
|
||
|
#define MultiSU16X16toH16(intRes, intIn1, intIn2) \
|
||
|
asm volatile ( \
|
||
|
"clr r26 \n\t" \
|
||
|
"mul %A1, %A2 \n\t" \
|
||
|
"mov r27, r1 \n\t" \
|
||
|
"mulsu %B1, %B2 \n\t" \
|
||
|
"movw %A0, r0 \n\t" \
|
||
|
"mul %B2, %A1 \n\t" \
|
||
|
"add r27, r0 \n\t" \
|
||
|
"adc %A0, r1 \n\t" \
|
||
|
"adc %B0, r26 \n\t" \
|
||
|
"mulsu %B1, %A2 \n\t" \
|
||
|
"sbc %B0, r26 \n\t" \
|
||
|
"add r27, r0 \n\t" \
|
||
|
"adc %A0, r1 \n\t" \
|
||
|
"adc %B0, r26 \n\t" \
|
||
|
"clr r1 \n\t" \
|
||
|
: \
|
||
|
"=&r" (intRes) \
|
||
|
: \
|
||
|
"a" (intIn1), \
|
||
|
"a" (intIn2) \
|
||
|
: \
|
||
|
"r26", "r27" \
|
||
|
)
|
||
|
|
||
|
// multiplies signed x unsigned int and returns the highest 16 bits of the result
|
||
|
// rounds the result based on the MSB of the lower 16 bits
|
||
|
// 22 cycles
|
||
|
#define MultiSU16X16toH16Round(intRes, intIn1, intIn2) \
|
||
|
asm volatile ( \
|
||
|
"clr r26 \n\t" \
|
||
|
"mul %A1, %A2 \n\t" \
|
||
|
"mov r27, r1 \n\t" \
|
||
|
"mulsu %B1, %B2 \n\t" \
|
||
|
"movw %A0, r0 \n\t" \
|
||
|
"mul %A1, %B2 \n\t" \
|
||
|
"add r27, r0 \n\t" \
|
||
|
"adc %A0, r1 \n\t" \
|
||
|
"adc %B0, r26 \n\t" \
|
||
|
"mulsu %B1, %A2 \n\t" \
|
||
|
"sbc %B0, r26 \n\t" \
|
||
|
"add r27, r0 \n\t" \
|
||
|
"adc %A0, r1 \n\t" \
|
||
|
"adc %B0, r26 \n\t" \
|
||
|
"lsl r27 \n\t" \
|
||
|
"adc %A0, r26 \n\t" \
|
||
|
"adc %B0, r26 \n\t" \
|
||
|
"clr r1 \n\t" \
|
||
|
: \
|
||
|
"=&r" (intRes) \
|
||
|
: \
|
||
|
"a" (intIn1), \
|
||
|
"a" (intIn2) \
|
||
|
: \
|
||
|
"r26", "r27" \
|
||
|
)
|
||
|
|