mirror of
https://github.com/signalwire/freeswitch.git
synced 2025-02-05 10:34:54 +00:00
Thanks to Phil Zimmermann for the code and for the license exception we needed to include it. There remains some build system integration work to be done before this code will build properly in the FreeSWITCH tree.
461 lines
14 KiB
C
461 lines
14 KiB
C
/*
|
|
* Copyright (c) 1995 Colin Plumb. All rights reserved.
|
|
* For licensing and other legal details, see the file legal.c.
|
|
*
|
|
* lbn68000.c - 16-bit bignum primitives for the 68000 (or 68010) processors.
|
|
*
|
|
* This was written for Metrowerks C, and while it should be reasonably
|
|
* portable, NOTE that Metrowerks lets a callee trash a0, a1, d0, d1, and d2.
|
|
* Some 680x0 compilers make d2 callee-save, so instructions to save it
|
|
* will have to be added.
|
|
*
|
|
* This code supports 16 or 32-bit ints, based on UINT_MAX.
|
|
* Regardless of UINT_MAX, only bignums up to 64K words (1 million bits)
|
|
* are supported. (68k hackers will recognize this as a consequence of
|
|
* using dbra.)
|
|
*
|
|
* These primitives use little-endian word order.
|
|
* (The order of bytes within words is irrelevant to this issue.)
|
|
*/
|
|
|
|
#include <limits.h>
|
|
|
|
#include "lbn.h" /* Should include lbn68000.h */
|
|
|
|
/*
|
|
* The Metrowerks C compiler (1.2.2) produces bad 68k code for the
|
|
* following input, which happens to be the inner loop of lbnSub1,
|
|
* so a few less than critical routines have been recoded in assembly
|
|
* to avoid the bug. (Optimizer on or off does not matter.)
|
|
*
|
|
* unsigned
|
|
* decrement(unsigned *num, unsigned len)
|
|
* {
|
|
* do {
|
|
* if ((*num++)-- != 0)
|
|
* return 0;
|
|
* } while (--len);
|
|
* return 1;
|
|
* }
|
|
*/
|
|
asm BNWORD16
|
|
lbnSub1_16(BNWORD16 *num, unsigned len, BNWORD16 borrow)
|
|
{
|
|
movea.l 4(sp),a0 /* num */
|
|
#if UINT_MAX == 0xffff
|
|
move.w 10(sp),d0 /* borrow */
|
|
#else
|
|
move.w 12(sp),d0 /* borrow */
|
|
#endif
|
|
sub.w d0,(a0)+
|
|
bcc done
|
|
#if UINT_MAX == 0xffff
|
|
move.w 8(sp),d0 /* len */
|
|
#else
|
|
move.w 10(sp),d0 /* len */
|
|
#endif
|
|
subq.w #2,d0
|
|
bcs done
|
|
loop:
|
|
subq.w #1,(a0)+
|
|
dbcc d0,loop
|
|
done:
|
|
moveq.l #0,d0
|
|
addx.w d0,d0
|
|
rts
|
|
}
|
|
|
|
asm BNWORD16
|
|
lbnAdd1_16(BNWORD16 *num, unsigned len, BNWORD16 carry)
|
|
{
|
|
movea.l 4(sp),a0 /* num */
|
|
#if UINT_MAX == 0xffff
|
|
move.w 10(sp),d0 /* carry */
|
|
#else
|
|
move.w 12(sp),d0 /* carry */
|
|
#endif
|
|
add.w d0,(a0)+
|
|
bcc done
|
|
#if UINT_MAX == 0xffff
|
|
move.w 8(sp),d0 /* len */
|
|
#else
|
|
move.w 10(sp),d0 /* len */
|
|
#endif
|
|
subq.w #2,d0
|
|
bcs done
|
|
loop:
|
|
addq.w #1,(a0)+
|
|
dbcc d0,loop
|
|
done:
|
|
moveq.l #0,d0
|
|
addx.w d0,d0
|
|
rts
|
|
}
|
|
|
|
asm void
|
|
lbnMulN1_16(BNWORD16 *out, BNWORD16 const *in, unsigned len, BNWORD16 k)
|
|
{
|
|
move.w d3,-(sp) /* 2 bytes of stack frame */
|
|
move.l 2+4(sp),a1 /* out */
|
|
move.l 2+8(sp),a0 /* in */
|
|
#if UINT_MAX == 0xffff
|
|
move.w 2+12(sp),d3 /* len */
|
|
move.w 2+14(sp),d2 /* k */
|
|
#else
|
|
move.w 2+14(sp),d3 /* len (low 16 bits) */
|
|
move.w 2+16(sp),d2 /* k */
|
|
#endif
|
|
|
|
move.w (a0)+,d1 /* First multiply */
|
|
mulu.w d2,d1
|
|
move.w d1,(a1)+
|
|
clr.w d1
|
|
swap d1
|
|
|
|
subq.w #1,d3 /* Setup for loop unrolling */
|
|
lsr.w #1,d3
|
|
bcs.s m16_even
|
|
beq.s m16_short
|
|
|
|
subq.w #1,d3 /* Set up software pipeline properly */
|
|
move.l d1,d0
|
|
|
|
m16_loop:
|
|
move.w (a0)+,d1
|
|
mulu.w d2,d1
|
|
add.l d0,d1
|
|
move.w d1,(a1)+
|
|
clr.w d1
|
|
swap d1
|
|
m16_even:
|
|
|
|
move.w (a0)+,d0
|
|
mulu.w d2,d0
|
|
add.l d1,d0
|
|
move.w d0,(a1)+
|
|
clr.w d0
|
|
swap d0
|
|
|
|
dbra d3,m16_loop
|
|
|
|
move.w d0,(a1)
|
|
move.w (sp)+,d3
|
|
rts
|
|
m16_short:
|
|
move.w d1,(a1)
|
|
move.w (sp)+,d3
|
|
rts
|
|
}
|
|
|
|
|
|
asm BNWORD16
|
|
lbnMulAdd1_16(BNWORD16 *out, BNWORD16 const *in, unsigned len, BNWORD16 k)
|
|
{
|
|
move.w d4,-(sp)
|
|
clr.w d4
|
|
move.w d3,-(sp) /* 4 bytes of stack frame */
|
|
move.l 4+4(sp),a1 /* out */
|
|
move.l 4+8(sp),a0 /* in */
|
|
#if UINT_MAX == 0xffff
|
|
move.w 4+12(sp),d3 /* len */
|
|
move.w 4+14(sp),d2 /* k */
|
|
#else
|
|
move.w 4+14(sp),d3 /* len (low 16 bits) */
|
|
move.w 4+16(sp),d2 /* k */
|
|
#endif
|
|
|
|
move.w (a0)+,d1 /* First multiply */
|
|
mulu.w d2,d1
|
|
add.w d1,(a1)+
|
|
clr.w d1
|
|
swap d1
|
|
addx.w d4,d1
|
|
|
|
subq.w #1,d3 /* Setup for loop unrolling */
|
|
lsr.w #1,d3
|
|
bcs.s ma16_even
|
|
beq.s ma16_short
|
|
|
|
subq.w #1,d3 /* Set up software pipeline properly */
|
|
move.l d1,d0
|
|
|
|
ma16_loop:
|
|
move.w (a0)+,d1
|
|
mulu.w d2,d1
|
|
add.l d0,d1
|
|
add.w d1,(a1)+
|
|
clr.w d1
|
|
swap d1
|
|
addx.w d4,d1
|
|
ma16_even:
|
|
|
|
move.w (a0)+,d0
|
|
mulu.w d2,d0
|
|
add.l d1,d0
|
|
add.w d0,(a1)+
|
|
clr.w d0
|
|
swap d0
|
|
addx.w d4,d0
|
|
|
|
dbra d3,ma16_loop
|
|
|
|
move.w (sp)+,d3
|
|
move.w (sp)+,d4
|
|
rts
|
|
ma16_short:
|
|
move.w (sp)+,d3
|
|
move.l d1,d0
|
|
move.w (sp)+,d4
|
|
rts
|
|
}
|
|
|
|
|
|
|
|
asm BNWORD16
|
|
lbnMulSub1_16(BNWORD16 *out, BNWORD16 const *in, unsigned len, BNWORD16 k)
|
|
{
|
|
move.w d4,-(sp)
|
|
clr.w d4
|
|
move.w d3,-(sp) /* 4 bytes of stack frame */
|
|
move.l 4+4(sp),a1 /* out */
|
|
move.l 4+8(sp),a0 /* in */
|
|
#if UINT_MAX == 0xffff
|
|
move.w 4+12(sp),d3 /* len */
|
|
move.w 4+14(sp),d2 /* k */
|
|
#else
|
|
move.w 4+14(sp),d3 /* len (low 16 bits) */
|
|
move.w 4+16(sp),d2 /* k */
|
|
#endif
|
|
|
|
move.w (a0)+,d1 /* First multiply */
|
|
mulu.w d2,d1
|
|
sub.w d1,(a1)+
|
|
clr.w d1
|
|
swap d1
|
|
addx.w d4,d1
|
|
|
|
subq.w #1,d3 /* Setup for loop unrolling */
|
|
lsr.w #1,d3
|
|
bcs.s ms16_even
|
|
beq.s ms16_short
|
|
|
|
subq.w #1,d3 /* Set up software pipeline properly */
|
|
move.l d1,d0
|
|
|
|
ms16_loop:
|
|
move.w (a0)+,d1
|
|
mulu.w d2,d1
|
|
add.l d0,d1
|
|
sub.w d1,(a1)+
|
|
clr.w d1
|
|
swap d1
|
|
addx.w d4,d1
|
|
ms16_even:
|
|
|
|
move.w (a0)+,d0
|
|
mulu.w d2,d0
|
|
add.l d1,d0
|
|
sub.w d0,(a1)+
|
|
clr.w d0
|
|
swap d0
|
|
addx.w d4,d0
|
|
|
|
dbra d3,ms16_loop
|
|
|
|
move.w (sp)+,d3
|
|
move.w (sp)+,d4
|
|
rts
|
|
ms16_short:
|
|
move.w (sp)+,d3
|
|
move.l d1,d0
|
|
move.w (sp)+,d4
|
|
rts
|
|
}
|
|
|
|
/* The generic long/short divide doesn't know that nh < d */
|
|
asm BNWORD16
|
|
lbnDiv21_16(BNWORD16 *q, BNWORD16 nh, BNWORD16 nl, BNWORD16 d)
|
|
{
|
|
move.l 8(sp),d0 /* nh *and* nl */
|
|
divu.w 12(sp),d0
|
|
move.l 4(sp),a0
|
|
move.w d0,(a0)
|
|
clr.w d0
|
|
swap d0
|
|
rts
|
|
}
|
|
|
|
asm unsigned
|
|
lbnModQ_16(BNWORD16 const *n, unsigned len, BNWORD16 d)
|
|
{
|
|
move.l 4(sp),a0 /* n */
|
|
moveq.l #0,d1
|
|
#if UINT_MAX == 0xffff
|
|
move.w 8(sp),d1 /* len */
|
|
move.w 10(sp),d2 /* d */
|
|
#else
|
|
move.w 10(sp),d1 /* len (low 16 bits) */
|
|
move.w 12(sp),d2 /* d */
|
|
#endif
|
|
|
|
add.l d1,a0
|
|
add.l d1,a0 /* n += len */
|
|
moveq.l #0,d0
|
|
subq.w #1,d1
|
|
|
|
mq16_loop:
|
|
move.w -(a0),d0 /* Assemble remainder and new word */
|
|
divu.w d2,d0 /* Put remainder in high half of d0 */
|
|
dbra d1,mq16_loop
|
|
|
|
mq16_done:
|
|
clr.w d0
|
|
swap d0
|
|
rts
|
|
}
|
|
|
|
/*
|
|
* Detect if this is a 32-bit processor (68020+ *or* CPU32).
|
|
* Both the 68020+ and CPU32 processors (which have 32x32->64-bit
|
|
* multiply, what the 32-bit math library wants) support scaled indexed
|
|
* addressing. The 68000 and 68010 ignore the scale selection
|
|
* bits, treating it as *1 all the time. So a 32-bit processor
|
|
* will evaluate -2(a0,a0.w*2) as 1+1*2-2 = 1.
|
|
* A 16-bit processor will compute 1+1-2 = 0.
|
|
*
|
|
* Thus, the return value will indicate whether the chip this is
|
|
* running on supports 32x32->64-bit multiply (mulu.l).
|
|
*/
|
|
asm int
|
|
is68020(void)
|
|
{
|
|
machine 68020
|
|
lea 1,a0
|
|
#if 0
|
|
lea -2(a0,a0.w*2),a0 /* Metrowerks won't assemble this, arrgh */
|
|
#else
|
|
dc.w 0x41f0, 0x82fe
|
|
#endif
|
|
move.l a0,d0
|
|
rts
|
|
}
|
|
/*
|
|
* Since I had to hand-assemble that fancy addressing mode, I had to study
|
|
* up on 680x0 addressing modes.
|
|
* A summary of 680x0 addressing modes.
|
|
* A 68000 effective address specifies an operand on an instruction, which
|
|
* may be a register or in memory. It is made up of a 3-bit mode and a
|
|
* 3-bit register specifier. The meanings of the various modes are:
|
|
*
|
|
* 000 reg - Dn, n specified by "reg"
|
|
* 001 reg - An, n specified by "reg"
|
|
* 010 reg - (An)
|
|
* 011 reg - (An)+
|
|
* 100 reg - -(An)
|
|
* 101 reg - d16(An), one 16-bit displacement word follows, sign-extended
|
|
* 110 reg - Fancy addressing mode off of An, see extension word below
|
|
* 111 000 - abs.W, one 16-bit signed absolute address follows
|
|
* 111 001 - abs.L, one 32-bit absolute address follows
|
|
* 111 010 - d16(PC), one 16-bit displacemnt word follows, sign-extended
|
|
* 111 011 - Fancy addressing mode off of PC, see extension word below
|
|
* 111 100 - #immediate, followed by 16 or 32 bits of immediate value
|
|
* 111 101 - unused, reserved
|
|
* 111 110 - unused, reserved
|
|
* 111 111 - unused, reserved
|
|
*
|
|
* Memory references are to data space, except that PC-relative references
|
|
* are to program space, and are read-only.
|
|
*
|
|
* Fancy addressing modes are followed by a 16-bit extension word, and come
|
|
* in "brief" and "full" forms.
|
|
* The "brief" form looks like this. Bit 8 is 0 to indicate this form:
|
|
*
|
|
* 1 1 1 1 1 1 1
|
|
* 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
|
|
* +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
|
|
* |A/D| register |L/W| scale | 0 | 8-bit signed displacement |
|
|
* +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
|
|
*
|
|
* The basic effective address specifies a 32-bit base register - A0 through
|
|
* A7 or PC (the address of the following instruction).
|
|
* The A/D and register fields specify an index register. A/D is 1 for
|
|
* address registers, and 0 for data registers. L/W specifies the length
|
|
* of the index register, 1 for 32 bits, and 0 for 16 bits (sign-extended).
|
|
* The scale field is a left shift amount (0 to 3 bits) to apply to the
|
|
* sign-extended index register. The final address is d8(An,Rn.X*SCALE),
|
|
* also written (d8,An,Rn.X*SCALE). X is "W" or "L", SCALE is 1, 2, 4 or 8.
|
|
* "*1" may be omitted, as may a d8 of 0.
|
|
*
|
|
* The 68000 supports this form, but only with a scale field of 0.
|
|
* It does NOT (says the MC68030 User's Manual MC68030UM/AD, section 2.7)
|
|
* decode the scale field and the following format bit. They are treated
|
|
* as 0.
|
|
* I recall (I don't have the data book handy) that the CPU32 processor
|
|
* core used in the 683xx series processors supports variable scales,
|
|
* but only the brief extension word form. I suspect it decodes the
|
|
* format bit and traps if it is not zero, but I don't recall.
|
|
*
|
|
* The "full" form (680x0, x >= 2 processors only) looks like this:
|
|
*
|
|
* 1 1 1 1 1 1 1
|
|
* 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
|
|
* +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
|
|
* |A/D| register |L/W| scale | 1 | BS| IS|BD size| 0 | P |OD size|
|
|
* +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
|
|
*
|
|
* The first 8 bits are interpreted the same way as in the brief form,
|
|
* except that bit 8 is set to 1 to indicate the full form.
|
|
* BS, Base Suppress, if set, causes a value of 0 to be used in place of
|
|
* the base register value. If this is set, the base register
|
|
* specified is irrelevant, except that if it is the PC, the fetch is
|
|
* still done from program space. The specifier "ZPC" can be used in
|
|
* place of "PC" in the effective address mnemonic to represent this
|
|
* case.
|
|
* IS, Index Suppress, if set, causes a value of 0 to be used in place
|
|
* of the scaled index register. In this case, the first 7 bits of the
|
|
* extension word are irrelevant.
|
|
* BD size specifies the base displacement size. A value of 00
|
|
* in this field is illegal, while 01, 10 and 11 indicate that the
|
|
* extension word is followed by 0, 1 or 2 16-bit words of base displacement
|
|
* (zero, sign-extended to 32 bits, and most-significant word first,
|
|
* respectively) to add to the base register value.
|
|
* Bit 3 is unused.
|
|
* The P bit is the pre/post indexing bit, and only applies if an outer
|
|
* displacement is used. This is explained later.
|
|
* OD size specifies the size of an outer displacement. In the simple
|
|
* case, this field is set to 00 and the effective address is
|
|
* (disp,An,Rn.X*SCALE) or (disp,PC,Rn.X*SCALE).
|
|
* In this case the P bit must be 0. Any of those compnents may be
|
|
* suppressed, with a BD size of 01, the BS bit, or the IS bit.
|
|
* If the OD size is not 00, it encodes an outer displacement in the same
|
|
* manner as the BD size, and 0, 1 or 2 16-bit words of outer displacement
|
|
* follow the base displacement in the instruction stream. In this case,
|
|
* this is a double-indirect addressing mode. The base, base displacement,
|
|
* and possibly the index, specify a 32-bit memory word which holds a value
|
|
* which is fetched, and the outer displacement and possibly the index are
|
|
* added to produce the address of the operand.
|
|
* If the P bit is 0, this is pre-indexed, and the index value is added
|
|
* before the fetch of the indirect word, producing an effective address
|
|
* of ([disp,An,Rn.X*SCALE],disp). If the P bit is 1, the post-indexed case,
|
|
* the memory word is fectched from base+base displacement, then the index
|
|
* and outer displacement are added to compute the address of the operand.
|
|
* This effective address is written ([disp,An],Rn.X*SCALE,disp).
|
|
* (In both cases, "An" may also be "PC" or "ZPC".)
|
|
* Any of the components may be omitted. If the index is omitted (using the
|
|
* IS bit), the P bit is irrelevant, but must be written as 0.
|
|
* Thus, legal combinations of IS, P and OD size are:
|
|
* 0 0 00 - (disp,An,Rn.X*SCALE), also written disp(An,Rn.X*SCALE)
|
|
* 0 0 01 - ([disp,An,Rn.X*SCALE])
|
|
* 0 0 10 - ([disp,An,Rn.X*SCALE],d16)
|
|
* 0 0 11 - ([disp,An,Rn.X*SCALE],d32)
|
|
* 0 1 01 - ([disp,An],Rn.X*SCALE)
|
|
* 0 1 10 - ([disp,An],Rn.X*SCALE,d16)
|
|
* 0 1 11 - ([disp,An],Rn.X*SCALE,d32)
|
|
* 1 0 00 - (disp,An), also written disp(An)
|
|
* 1 0 01 - ([disp,An])
|
|
* 1 0 10 - ([disp,An],d16)
|
|
* 1 0 11 - ([disp,An],d32)
|
|
*/
|
|
|
|
/* 45678901234567890123456789012345678901234567890123456789012345678901234567 */
|