"Spoon" <root@127.0.0.1> wrote in message news:e5kb97$lrh$1@nntp.aioe.org...
> Boudewijn Dijkstra wrote:
>
>> jukka wrote:
>>
>>> Here's C++ code, you get assembler routine if you compile this
>>> (compilers will give assembly output with right compiler switch,
>>> for example Visual C++ that would be /Faout.asm )
>>>
>>> inline int log2ui(uint32 u)
>>> {
>>> float v(u);
>>> return (static_cast<int32>(floatInt(v)) >> 23) - 127;
>>> }
>>
>> Might as well do it compiler-independant, type-safely and in C:
>>
>> inline int log2i(unsigned int i)
>> {
>> if (i == 0) {
>> return -1;
>> }
>> union {
>> float mf;
>> int mi;
>> } u;
>> u.mf = (float) i;
>> return (u.mi >> 23) - 127;
>> }
>
> As far as I understand, your code has implementation-defined behavior,
> therefore it is not compiler-independent ;-)
>
> <quote>
> With one exception, if a member of a union object is accessed after
> a value has been stored in a different member of the object, the
> behavior is implementation-defined. One special guarantee is made
> in order to simplify the use of unions: If a union contains several
> structures that share a common initial sequence, and if the union
> object currently contains one of these structures, it is permitted to
> inspect the common initial part of any of them. Two structures share
> a common initial sequence if corresponding members have compatible
> types for a sequence of one or more initial members.
> </quote>
>
> By the way, the OP said he didn't have an FPU :-)
/*
This glop is obviously C, but compile it with assembly output and it should
give you a simple assembly routine or just code the same algorithms.
Benchmark to see which work best for your system.
*/
const int tab_m37_ff[] = {
-1,0,25,1,22,26,31,2,15,23,29,27,10,-1,12,3,6,16,
-1,24,21,30,14,28,9,11,5,-1,20,13,8,4,19,7,18,17
};
const int tab_8[]={
-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};
/***********************************************/
/* Locate the postion of the highest bit set. */
/* A binary search is used. The result is an */
/* approximation of log2(n) [the integer part] */
/***********************************************/
unsigned long a_log2(unsigned long n)
{
unsigned long i = (-1);
/* Is there a bit on in the high word? */
/* Else, all the high bits are already zero. */
if (n & 0xffff0000) {
i += 16; /* Update our search position */
n >>= 16; /* Shift out lower (irrelevant) bits */
}
/* Is there a bit on in the high byte of the current word? */
/* Else, all the high bits are already zero. */
if (n & 0xff00) {
i += 8; /* Update our search position */
n >>= 8; /* Shift out lower (irrelevant) bits */
}
/* Is there a bit on in the current nybble? */
/* Else, all the high bits are already zero. */
if (n & 0xf0) {
i += 4; /* Update our search position */
n >>= 4; /* Shift out lower (irrelevant) bits */
}
/* Is there a bit on in the high 2 bits of the current nybble? */
/* 0xc is 1100 in binary... */
/* Else, all the high bits are already zero. */
if (n & 0xc) {
i += 2; /* Update our search position */
n >>= 2; /* Shift out lower (irrelevant) bits */
}
/* Is the 2nd bit on? [ 0x2 is 0010 in binary...] */
/* Else, all the 2nd bit is already zero. */
if (n & 0x2) {
i++; /* Update our search position */
n >>= 1; /* Shift out lower (irrelevant) bit */
}
/* Is the lowest bit set? */
if (n)
i++; /* Update our search position */
return i;
}
/*
** From: <bousek@$smtpg.compsys.com$>
** Subject: Re: Is there a faster way to find the highest bit set in a 32
bit integer? [Includes C code listing]
** Newsgroups: comp.graphics.algorithms
** References: <01bc3fac$69b314c0$ca61e426@DCorbit.solutionsiq.com>
** Organization: TDS Telecom - Madison, WI
** Message-ID: <01bc405b$2c1c8740$ca61e426@DCorbit.solutionsiq.com>
** X-Newsreader: Microsoft Internet News 4.70.1155
** MIME-Version: 1.0
** Content-Type: text/plain; charset=ISO-8859-1
** Content-Transfer-Encoding: 7bit
**
** "Dann Corbit" <dcorbit@solutionsiq.com> wrote:
** <snip>
** hey Dann:
** you could modify your routine something like this (although i would tend
to
** put this type of routine in assembler)...
*/
unsigned long b_log2(unsigned long n)
{
register unsigned long i = (n & 0xffff0000) ? 16 : 0;
if ((n >>= i) & 0xff00)
i |= 8, n >>= 8;
if (n & 0xf0)
i |= 4, n >>= 4;
if (n & 0xc)
i |= 2, n >>= 2;
return (i | (n >> 1));
}
unsigned long c_log2(unsigned long n)
{
register unsigned long p = 16,
i = 0;
do {
unsigned long t = n >> p;
if (t)
n = t, i |= p;
} while (p >>= 1);
return i;
}
#include <math.h>
/*
* FROM: steve@tangled-web.compulink.co.uk
* This is the fully portable version, which uses
* the 'frexp' function to get the mantissa and
* exponent of a number.
*/
unsigned long d_log2(unsigned long value)
{
int exponent;
double d_val = (double) value;
(void)frexp(d_val, &exponent);
return exponent - 1;
}
/*
* FROM: steve@tangled-web.compulink.co.uk
* This is the rather less portable version, which
* requires you to know how doubles are stored
* in memory, and the location and bias of the
* exponent for such numbers.
* This example is for doubles ( 64 bits )
* on Intel hardware - YMMV.
*/
/* CHANGED FROM DOUBLE TO FLOAT --
#define MP(x) ((long unsigned long *)&d_val)[x]
unsigned long tlog2( unsigned long value )
{
double d_val = (double)value ;
return (int)( MP(1) >> 20 & 0x7ff ) - 0x3ff ;
}
*/
#define MP(x) ((unsigned long *)&d_val)[x]
unsigned long e_log2(unsigned long value)
{
float d_val = (float) value;
return (int) (MP(0) >> 23 & 0xff) - 0x7f;
}
/*
From: Robert E. Minsk[SMTP:egbert@spimageworks.com]
Sent: Saturday, April 12, 1997 9:44 PM
To: Dann Corbit
Subject: Is there a faster way to find the highest bit set in a 32
bit integer? [Includes C code listing]
I did not see the original post but why don't you build a table of the
highest bit set in a byte and then check each byte. For example.
*/
static unsigned char hiBitSetTab[] = {
0, 1, 2, 2, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7
};
/* On my machine unsigned unsigned long is 32-bits, big ended */
unsigned long f_log2(unsigned long val)
{
unsigned long tmp;
tmp = val >> 24;
if (tmp) {
return hiBitSetTab[tmp] + 23;
}
tmp = (val >> 16) & 0xff;
if (tmp) {
return hiBitSetTab[tmp] + 15;
}
tmp = (val >> 8) & 0xff;
if (tmp) {
return hiBitSetTab[tmp] + 7;
}
return hiBitSetTab[val & 0xff] - 1;
}
/* On my machine unsigned unsigned long is 32-bits, big ended */
unsigned long g_log2(unsigned long val)
{
// unsigned long tmp;
unsigned char *foo = (unsigned char *) &val;
if (foo[3]) {
return hiBitSetTab[foo[3]] + 23;
}
if (foo[2]) {
return hiBitSetTab[foo[2]] + 15;
}
if (foo[1]) {
return hiBitSetTab[foo[1]] + 7;
}
return hiBitSetTab[foo[0]] - 1;
}
/*
or even:
*/
typedef union {
unsigned long i;
unsigned char c[4];
} u32bitType;
/* On my machine unsigned unsigned long is 32-bits, big ended */
unsigned long h_log2(long l)
{
u32bitType val;
val.i = l;
if (val.c[3]) {
return hiBitSetTab[val.c[3]] + 23;
}
if (val.c[2]) {
return hiBitSetTab[val.c[2]] + 15;
}
if (val.c[1]) {
return hiBitSetTab[val.c[1]] + 7;
}
return hiBitSetTab[val.c[0]] - 1;
}
/* exhaustive binary search (you won't get much faster without a lookup
table) */
unsigned long i_log2(unsigned long n)
{
return n & 65535 << 16 ? n & 255 << 24 ? n & 15 << 28 ? n & 12 << 28 ? n
& 8 << 28 ? 31 : 30 : n & 2 << 28 ? 29 : 28
: n & 12 << 24 ? n & 8 << 24 ? 27 : 26 : n & 2 << 24 ? 25 : 24 : n & 15
<< 20 ? n & 12 << 20 ? n & 8 << 20 ? 23 :
22 : n & 2 << 20 ? 21 : 20 : n & 12 << 16 ? n & 8 << 16 ? 19 : 18 : n &
2 << 16 ? 17 : 16 : n & 255 << 8 ? n & 15
<< 12 ? n & 12 << 12 ? n & 8 << 12 ? 15 : 14 : n & 2 << 12 ? 13 : 12 : n
& 12 << 8 ? n & 8 << 8 ? 11 : 10 : n & 2
<< 8 ? 9 : 8 : n & 240 ? n & 192 ? n & 128 ? 7 : 6 : n & 32 ? 5 : 4 : n
& 12 ? n & 8 ? 3 : 2 : n & 2 ? 1 : n & 1 ? 0 : -1;
}
/* bytewise binary search, lookup table */
unsigned long j_log2(unsigned long n)
{
static const unsigned long *t = tab_8;
return n >> 16 ? n >> 24 ? 24 + t[n >> 24] : 16 + t[n >> 16] : n >> 8 ?
8 + t[n >> 8] : t[n];
}
/* frexp */
unsigned long k_log2(unsigned long n)
{
int h;
frexp((double) n, &h);
return h - 1;
}
/* magical :) but slow :( */
unsigned long l_log2(unsigned long n)
{
static const unsigned long *t = tab_m37_ff;
return t[(n |= n >> 1, n |= n >> 2, n |= n >> 4, n |= n >> 8, n |= n >>
16) % 37];
}
/* bytewise linear search, with a small comparison-avoiding twist */
unsigned long m_log2(unsigned long n)
{
static const unsigned long *t = tab_8;
return n >= 1 << 23 ? 24 + t[n >> 24] : n >= 1 << 15 ? 16 + t[n >> 16] :
n >= 1 << 8 ? 8 + t[n >> 8] : t[n];
}
/* bytewise binary search, with the same twist, even though there's no
reason it
should work here (always two comparisons anyway) */
unsigned long n_log2(unsigned long n)
{
static const unsigned long *t = tab_8;
return n >= 1 << 15 ? n >= 1 << 23 ? 24 + t[n >> 24] : 16 + t[n >> 16] :
n >= 1 << 8 ? 8 + t[n >> 8] : t[n];
}
/* log() - found to be non-portable due to vague log() accuracy */
unsigned long o_log2(unsigned long n)
{
return n ? (int) floor(log((double) n) / log(2.)) : -1;
}
/* IEEE trick */
unsigned long p_log2(unsigned long n)
{
if (n) {
float f = n;
return ((*(unsigned long *) &f) >> 23) - 127;
} else {
return -1;
}
}
#ifdef TEST
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
int main(void)
{
unsigned long l;
unsigned long m;
unsigned long limit = 100000000;
for (l = 0; l < limit; l++) {
m = l * rand();
m = a_log2(m), m = b_log2(m), m = c_log2(m), m = /*d_log2(m), m =*/
e_log2(m), m = f_log2(m),
m = g_log2(m), m = h_log2(m), m = i_log2(m), m = j_log2(m), m =
/*k_log2(m), m =*/ l_log2(m),
m = m_log2(m), m = n_log2(m),/* m = o_log2(m), */ m = p_log2(m);
}
for (m = 0; m < limit; m += 10000000) {
printf(
"m = %lu, a_log2 = %lu, b_log2 = %lu, c_log2 = %lu, d_log2 =
%lu, e_log2 = %lu, f_log2 = %lu, g_log2 = %lu, h_log2 = %lu, i_log2 = %lu,
j_log2 = %lu, k_log2 = %lu, l_log2 = %lu, _log2 = %lu, n_log2 = %lu, o_log2
= %lu, p_log2 = %lu",
m, a_log2(m), b_log2(m), c_log2(m), d_log2(m), e_log2(m),
f_log2(m), g_log2(m), h_log2(m), i_log2(m), j_log2(m), k_log2(m), l_log2(m),
m_log2(m), n_log2(m), o_log2(m), p_log2(m));
printf("floating point approximation is %f\n", log((double) m) /
log(2.));
}
return 0;
}
#endif
|