diff options
-rw-r--r-- | buf.c | 65 | ||||
-rw-r--r-- | lib.c | 10 | ||||
-rw-r--r-- | lib.h | 1 | ||||
-rw-r--r-- | tests/012/buf.tl | 7 | ||||
-rw-r--r-- | txr.1 | 52 |
5 files changed, 134 insertions, 1 deletions
@@ -1385,6 +1385,69 @@ static val buf_decompress(val buf) uw_throwf(error_s, lit("~a: decompression failed"), self, nao); } +static val str_compress(val str, val level_opt) +{ + val self = lit("str-compress"); + val level = default_arg(level_opt, negone); + int lev = c_int(level, self); + size_t sz; + unsigned char *u8 = utf8_dup_to_buf(c_str(str, self), &sz, 0); + uLong bound = compressBound(sz), zsize = bound; + mem_t *zdata = chk_malloc(bound); + + if (convert(uLong, sz) != sz) { + free(zdata); + err_oflow(self); + } + + if (compress2(zdata, &zsize, u8, sz, lev) != Z_OK) { + free(zdata); + uw_throwf(error_s, lit("~a: compression failed"), self, nao); + } + + zdata = chk_realloc(zdata, zsize); + return make_owned_buf(unum(zsize), zdata); +} + +static val str_decompress(val buf) +{ + val self = lit("str-decompress"); + struct buf *b = buf_handle(buf, self); + ucnum zsize = c_unum(b->len, self); + uLong zsz10 = 10 * zsize; + uLong size = if3(zsz10 > zsize, zsz10, convert(uLong, -1)); + mem_t *data = chk_malloc(size); + val ret = nil; + + for (;;) { + switch (uncompress(data, &size, b->data, zsize)) { + case Z_OK: + + data = chk_realloc(data, size); + ret = string_utf8_from_buf(coerce(char *, data), size); + free(data); + return ret; + case Z_BUF_ERROR: + if (size == convert(uLong, -1)) + break; + if (size * 2 > size) + size = size * 2; + else if (size == convert(uLong, -1)) + break; + else + size = convert(uLong, -1); + data = chk_realloc(data, size); + continue; + default: + break; + } + break; + } + + free(data); + uw_throwf(error_s, lit("~a: decompression failed"), self, nao); +} + #endif val buf_ash(val buf, val bits) @@ -2084,6 +2147,8 @@ void buf_init(void) #if HAVE_ZLIB reg_fun(intern(lit("buf-compress"), user_package), func_n2o(buf_compress, 1)); reg_fun(intern(lit("buf-decompress"), user_package), func_n1(buf_decompress)); + reg_fun(intern(lit("str-compress"), user_package), func_n2o(str_compress, 1)); + reg_fun(intern(lit("str-decompress"), user_package), func_n1(str_decompress)); #endif reg_fun(intern(lit("buf-ash"), user_package), func_n2(buf_ash)); @@ -5381,6 +5381,16 @@ val string_utf8(const char *str) return obj; } +val string_utf8_from_buf(const char *str, size_t len) +{ + val obj = make_obj(); + obj->st.type = STR; + obj->st.str = utf8_dup_from_buf(str, len); + obj->st.len = nil; + obj->st.alloc = 0; + return obj; +} + val string_8bit(const unsigned char *str) { size_t l = strlen(coerce(const char *, str)), i; @@ -1087,6 +1087,7 @@ val bitset(val n); val string_own(wchar_t *str); val string(const wchar_t *str); val string_utf8(const char *str); +val string_utf8_from_buf(const char *str, size_t len); val string_8bit(const unsigned char *str); val string_8bit_size(const unsigned char *str, size_t sz); val mkstring(val len, val ch); diff --git a/tests/012/buf.tl b/tests/012/buf.tl index 3dfdabc6..c0f392ee 100644 --- a/tests/012/buf.tl +++ b/tests/012/buf.tl @@ -25,7 +25,12 @@ (mtest (buf-decompress (make-buf 1024)) :error - (buf-decompress (make-buf 1024 255)) :error)) + (buf-decompress (make-buf 1024 255)) :error) + + (mtest + (str-decompress (str-compress "")) "" + (str-decompress (str-compress "abc")) "abc" + (str-decompress (str-compress "a\xdc00;bc\xdcff;d")) "a\xdc00;bc\xdcff;d")) (let ((buf (make-buf 16))) (mtest @@ -30637,6 +30637,58 @@ function throws an error exception if .meta buf doesn't contain a compressed image. +.coNP Functions @ str-compress and @ str-decompress +.synb +.mets (str-compress < str <> [ level ]) +.mets (str-decompress << buf ) +.syne +.desc +The +.code str-compress +and +.code str-decompress +functions provide direct data compression and decompression of the UTF-8 +representation of text. They use the Deflate algorithm, via Zlib, +and are only only available if \*(TX is built with Zlib support. + +The +.code str-compress +function converts the input string argument +.meta str +into a sequence of UTF-8 bytes, and then applies compression, +returning the compressed result as a +.code buf +object. + +The optional +.meta level +argument specifies the compression level as an integer; +more detail is given in the description of +.codn buf-compress . + +The +.code str-decompress +function performs the inverse operation: it decodes the compressed data in +.metn buf , +and then applies UTF-8 decoding to recover a string, which is returned. + +The operation +.mono +.meti (str-decompress (str-compress << s)) +.onom +recovers +.meta s +for any string value. + +The following equivalences hold between string and buffer +compression, except that string compression may elide the +temporary buffer objects that appear on the right-hand side: + +.verb + (str-compress s) <--> (buf-compress (buf-str s)) + (str-decompress b) <--> (str-buf (buf-decompress b)) +.brev + .SS* Structures \*(TX supports user-defined types in the form of structures. Structures |