util.strbitop: Add common_prefix_bits() method

This returns the number of bits that two strings have in common. It is
significantly more efficient than similar calculations in Lua.
This commit is contained in:
Matthew Wild 2024-02-23 12:08:37 +00:00
parent 1606675762
commit 7f748556a2
2 changed files with 81 additions and 0 deletions

View file

@ -38,4 +38,48 @@ describe("util.strbitop", function ()
assert.equal("hello", strbitop.sxor("hello", ""));
end);
end);
describe("common_prefix_bits()", function ()
local function B(s)
assert(#s%8==0, "Invalid test input: B(s): s should be a multiple of 8 bits in length");
local byte = 0;
local out_str = {};
for i = 1, #s do
local bit_ascii = s:byte(i);
if bit_ascii == 49 then -- '1'
byte = byte + 2^((7-(i-1))%8);
elseif bit_ascii ~= 48 then
error("Invalid test input: B(s): s should contain only '0' or '1' characters");
end
if (i-1)%8 == 7 then
table.insert(out_str, string.char(byte));
byte = 0;
end
end
return table.concat(out_str);
end
local _cpb = strbitop.common_prefix_bits;
local function test(a, b)
local Ba, Bb = B(a), B(b);
local ret1 = _cpb(Ba, Bb);
local ret2 = _cpb(Bb, Ba);
assert(ret1 == ret2, ("parameter order should not make a difference to the result (%s, %s) = %d, reversed = %d"):format(a, b, ret1, ret2));
return ret1;
end
local hex = require "util.hex";
it("works on single bytes", function ()
assert.equal(0, test("00000000", "11111111"));
assert.equal(1, test("10000000", "11111111"));
assert.equal(0, test("01000000", "11111111"));
assert.equal(0, test("01000000", "11111111"));
assert.equal(8, test("11111111", "11111111"));
end);
it("works on multiple bytes", function ()
for i = 0, 16 do
assert.equal(i, test(string.rep("1", i)..string.rep("0", 16-i), "1111111111111111"));
end
end);
end);
end);

View file

@ -8,6 +8,8 @@
#include <lua.h>
#include <lauxlib.h>
#include <sys/param.h>
#include <limits.h>
/* TODO Deduplicate code somehow */
@ -74,11 +76,46 @@ static int strop_xor(lua_State *L) {
return 1;
}
unsigned int clz(unsigned char c) {
#if __GNUC__
return __builtin_clz((unsigned int) c) - ((sizeof(int)-1)*CHAR_BIT);
#else
if(c & 0x80) return 0;
if(c & 0x40) return 1;
if(c & 0x20) return 2;
if(c & 0x10) return 3;
if(c & 0x08) return 4;
if(c & 0x04) return 5;
if(c & 0x02) return 6;
if(c & 0x01) return 7;
return 8;
#endif
}
LUA_API int strop_common_prefix_bits(lua_State *L) {
size_t a, b, i;
const char *str_a = luaL_checklstring(L, 1, &a);
const char *str_b = luaL_checklstring(L, 2, &b);
size_t min_len = MIN(a, b);
for(i=0; i<min_len; i++) {
if(str_a[i] != str_b[i]) {
lua_pushinteger(L, i*8 + (clz(str_a[i] ^ str_b[i])));
return 1;
}
}
lua_pushinteger(L, i*8);
return 1;
}
LUA_API int luaopen_prosody_util_strbitop(lua_State *L) {
luaL_Reg exports[] = {
{ "sand", strop_and },
{ "sor", strop_or },
{ "sxor", strop_xor },
{ "common_prefix_bits", strop_common_prefix_bits },
{ NULL, NULL }
};