summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--buf.c65
-rw-r--r--lib.c10
-rw-r--r--lib.h1
-rw-r--r--tests/012/buf.tl7
-rw-r--r--txr.152
5 files changed, 134 insertions, 1 deletions
diff --git a/buf.c b/buf.c
index ffa2af3b..94cccaa7 100644
--- a/buf.c
+++ b/buf.c
@@ -1385,6 +1385,69 @@ static val buf_decompress(val buf)
uw_throwf(error_s, lit("~a: decompression failed"), self, nao);
}
+static val str_compress(val str, val level_opt)
+{
+ val self = lit("str-compress");
+ val level = default_arg(level_opt, negone);
+ int lev = c_int(level, self);
+ size_t sz;
+ unsigned char *u8 = utf8_dup_to_buf(c_str(str, self), &sz, 0);
+ uLong bound = compressBound(sz), zsize = bound;
+ mem_t *zdata = chk_malloc(bound);
+
+ if (convert(uLong, sz) != sz) {
+ free(zdata);
+ err_oflow(self);
+ }
+
+ if (compress2(zdata, &zsize, u8, sz, lev) != Z_OK) {
+ free(zdata);
+ uw_throwf(error_s, lit("~a: compression failed"), self, nao);
+ }
+
+ zdata = chk_realloc(zdata, zsize);
+ return make_owned_buf(unum(zsize), zdata);
+}
+
+static val str_decompress(val buf)
+{
+ val self = lit("str-decompress");
+ struct buf *b = buf_handle(buf, self);
+ ucnum zsize = c_unum(b->len, self);
+ uLong zsz10 = 10 * zsize;
+ uLong size = if3(zsz10 > zsize, zsz10, convert(uLong, -1));
+ mem_t *data = chk_malloc(size);
+ val ret = nil;
+
+ for (;;) {
+ switch (uncompress(data, &size, b->data, zsize)) {
+ case Z_OK:
+
+ data = chk_realloc(data, size);
+ ret = string_utf8_from_buf(coerce(char *, data), size);
+ free(data);
+ return ret;
+ case Z_BUF_ERROR:
+ if (size == convert(uLong, -1))
+ break;
+ if (size * 2 > size)
+ size = size * 2;
+ else if (size == convert(uLong, -1))
+ break;
+ else
+ size = convert(uLong, -1);
+ data = chk_realloc(data, size);
+ continue;
+ default:
+ break;
+ }
+ break;
+ }
+
+ free(data);
+ uw_throwf(error_s, lit("~a: decompression failed"), self, nao);
+}
+
#endif
val buf_ash(val buf, val bits)
@@ -2084,6 +2147,8 @@ void buf_init(void)
#if HAVE_ZLIB
reg_fun(intern(lit("buf-compress"), user_package), func_n2o(buf_compress, 1));
reg_fun(intern(lit("buf-decompress"), user_package), func_n1(buf_decompress));
+ reg_fun(intern(lit("str-compress"), user_package), func_n2o(str_compress, 1));
+ reg_fun(intern(lit("str-decompress"), user_package), func_n1(str_decompress));
#endif
reg_fun(intern(lit("buf-ash"), user_package), func_n2(buf_ash));
diff --git a/lib.c b/lib.c
index 5916e1c6..b23a883a 100644
--- a/lib.c
+++ b/lib.c
@@ -5381,6 +5381,16 @@ val string_utf8(const char *str)
return obj;
}
+val string_utf8_from_buf(const char *str, size_t len)
+{
+ val obj = make_obj();
+ obj->st.type = STR;
+ obj->st.str = utf8_dup_from_buf(str, len);
+ obj->st.len = nil;
+ obj->st.alloc = 0;
+ return obj;
+}
+
val string_8bit(const unsigned char *str)
{
size_t l = strlen(coerce(const char *, str)), i;
diff --git a/lib.h b/lib.h
index 2e416dcd..70753bb2 100644
--- a/lib.h
+++ b/lib.h
@@ -1087,6 +1087,7 @@ val bitset(val n);
val string_own(wchar_t *str);
val string(const wchar_t *str);
val string_utf8(const char *str);
+val string_utf8_from_buf(const char *str, size_t len);
val string_8bit(const unsigned char *str);
val string_8bit_size(const unsigned char *str, size_t sz);
val mkstring(val len, val ch);
diff --git a/tests/012/buf.tl b/tests/012/buf.tl
index 3dfdabc6..c0f392ee 100644
--- a/tests/012/buf.tl
+++ b/tests/012/buf.tl
@@ -25,7 +25,12 @@
(mtest
(buf-decompress (make-buf 1024)) :error
- (buf-decompress (make-buf 1024 255)) :error))
+ (buf-decompress (make-buf 1024 255)) :error)
+
+ (mtest
+ (str-decompress (str-compress "")) ""
+ (str-decompress (str-compress "abc")) "abc"
+ (str-decompress (str-compress "a\xdc00;bc\xdcff;d")) "a\xdc00;bc\xdcff;d"))
(let ((buf (make-buf 16)))
(mtest
diff --git a/txr.1 b/txr.1
index 54c9cee3..d8d99971 100644
--- a/txr.1
+++ b/txr.1
@@ -30637,6 +30637,58 @@ function throws an error exception if
.meta buf
doesn't contain a compressed image.
+.coNP Functions @ str-compress and @ str-decompress
+.synb
+.mets (str-compress < str <> [ level ])
+.mets (str-decompress << buf )
+.syne
+.desc
+The
+.code str-compress
+and
+.code str-decompress
+functions provide direct data compression and decompression of the UTF-8
+representation of text. They use the Deflate algorithm, via Zlib,
+and are only only available if \*(TX is built with Zlib support.
+
+The
+.code str-compress
+function converts the input string argument
+.meta str
+into a sequence of UTF-8 bytes, and then applies compression,
+returning the compressed result as a
+.code buf
+object.
+
+The optional
+.meta level
+argument specifies the compression level as an integer;
+more detail is given in the description of
+.codn buf-compress .
+
+The
+.code str-decompress
+function performs the inverse operation: it decodes the compressed data in
+.metn buf ,
+and then applies UTF-8 decoding to recover a string, which is returned.
+
+The operation
+.mono
+.meti (str-decompress (str-compress << s))
+.onom
+recovers
+.meta s
+for any string value.
+
+The following equivalences hold between string and buffer
+compression, except that string compression may elide the
+temporary buffer objects that appear on the right-hand side:
+
+.verb
+ (str-compress s) <--> (buf-compress (buf-str s))
+ (str-decompress b) <--> (str-buf (buf-decompress b))
+.brev
+
.SS* Structures
\*(TX supports user-defined types in the form of structures. Structures