From 00ae1477295136c8a3e81b5afae1b714de9822f8 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Tue, 1 Jun 2021 11:17:15 -0700 Subject: chr-iscntrl: recognize Unicode C0 and C1. * lib.c (chr_iscntrl): Don't use iswcntrl; it fails to report 0x80-0x9F as control characters. A bit of hand-crafted logic does the job. * txr.1: Redocumented. --- lib.c | 8 +++++++- txr.1 | 9 ++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/lib.c b/lib.c index 11ad76b5..38837e43 100644 --- a/lib.c +++ b/lib.c @@ -5897,7 +5897,13 @@ val chr_isascii(val ch) val chr_iscntrl(val ch) { - return tnil(iswcntrl(c_chr(ch))); + wchar_t c = c_chr(ch); + switch ((c >> 5)) { + case 0: case 4: + return t; + default: + return tnil(c == 0x7F); + } } val chr_isdigit(val ch) diff --git a/txr.1 b/txr.1 index 9de10070..de1e1912 100644 --- a/txr.1 +++ b/txr.1 @@ -24586,11 +24586,14 @@ function returns .code t if the character .meta char -is a character whose code -ranges from 0 to 31, or is 127. In other words, any non-printable ASCII -character. For other characters, it returns +is a control character. For all other character, it returns .codn nil . +A control character is one which belongs to the Unicode C0 or C1 block. +C0 consists of the the characters U+0000 through U+001F, plus the +character U+007F. These are the original ASCII control characters. +Block C1 consists of U+0080 through U+009F. + .coNP Functions @ chr-isdigit and @ chr-digit .synb .mets (chr-isdigit << char ) -- cgit v1.2.3