aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--termkey.c243
1 files changed, 102 insertions, 141 deletions
diff --git a/termkey.c b/termkey.c
index 3a80896..ff290bd 100644
--- a/termkey.c
+++ b/termkey.c
@@ -166,6 +166,97 @@ static inline void eatbytes(termkey_t *tk, size_t count)
tk->buffvalid -= count;
}
+#define UTF8_INVALID 0xFFFD
+
+static int utf8_seqlen(int codepoint)
+{
+ if(codepoint < 0x0000080) return 1;
+ if(codepoint < 0x0000800) return 2;
+ if(codepoint < 0x0010000) return 3;
+ if(codepoint < 0x0200000) return 4;
+ if(codepoint < 0x4000000) return 5;
+ return 6;
+}
+
+static void fill_utf8(termkey_key *key)
+{
+ int codepoint = key->code;
+ int nbytes = utf8_seqlen(codepoint);
+
+ key->utf8[nbytes] = 0;
+
+ // This is easier done backwards
+ int b = nbytes;
+ while(b > 1) {
+ b--;
+ key->utf8[b] = 0x80 | (codepoint & 0x3f);
+ codepoint >>= 6;
+ }
+
+ switch(nbytes) {
+ case 1: key->utf8[0] = (codepoint & 0x7f); break;
+ case 2: key->utf8[0] = 0xc0 | (codepoint & 0x1f); break;
+ case 3: key->utf8[0] = 0xe0 | (codepoint & 0x0f); break;
+ case 4: key->utf8[0] = 0xf0 | (codepoint & 0x07); break;
+ case 5: key->utf8[0] = 0xf8 | (codepoint & 0x03); break;
+ case 6: key->utf8[0] = 0xfc | (codepoint & 0x01); break;
+ }
+}
+
+static inline void do_codepoint(termkey_t *tk, int codepoint, termkey_key *key)
+{
+ if(codepoint < 0x20) {
+ // C0 range
+ key->code = 0;
+
+ if(!(tk->flags & TERMKEY_FLAG_NOINTERPRET) && tk->c0[codepoint] != TERMKEY_SYM_UNKNOWN)
+ key->code = tk->c0[codepoint];
+
+ if(!key->code) {
+ key->code = codepoint + 0x40;
+ key->modifiers = TERMKEY_KEYMOD_CTRL;
+ key->flags = 0;
+ }
+ else {
+ key->modifiers = 0;
+ key->flags = TERMKEY_KEYFLAG_SPECIAL;
+ }
+ }
+ else if(codepoint == 0x20 && !(tk->flags & TERMKEY_FLAG_NOINTERPRET)) {
+ // ASCII space
+ key->code = TERMKEY_SYM_SPACE;
+ key->modifiers = 0;
+ key->flags = TERMKEY_KEYFLAG_SPECIAL;
+ }
+ else if(codepoint == 0x7f && !(tk->flags & TERMKEY_FLAG_NOINTERPRET)) {
+ // ASCII DEL
+ key->code = TERMKEY_SYM_DEL;
+ key->modifiers = 0;
+ key->flags = TERMKEY_KEYFLAG_SPECIAL;
+ }
+ else if(codepoint >= 0x20 && codepoint < 0x80) {
+ // ASCII lowbyte range
+ key->code = codepoint;
+ key->modifiers = 0;
+ key->flags = 0;
+ }
+ else if(codepoint >= 0x80 && codepoint < 0xa0) {
+ // UTF-8 never starts with a C1 byte. So we can be sure of these
+ key->code = codepoint - 0x40;
+ key->modifiers = TERMKEY_KEYMOD_CTRL|TERMKEY_KEYMOD_ALT;
+ key->flags = 0;
+ }
+ else {
+ // UTF-8 codepoint
+ key->code = codepoint;
+ key->flags = 0;
+ key->modifiers = 0;
+ }
+
+ if(!(key->flags & TERMKEY_KEYFLAG_SPECIAL))
+ fill_utf8(key);
+}
+
static termkey_result getkey_csi(termkey_t *tk, size_t introlen, termkey_key *key)
{
size_t csi_end = introlen;
@@ -280,43 +371,6 @@ static termkey_result getkey_ss3(termkey_t *tk, size_t introlen, termkey_key *ke
return TERMKEY_RES_KEY;
}
-#define UTF8_INVALID 0xFFFD
-
-static int utf8_seqlen(int codepoint)
-{
- if(codepoint < 0x0000080) return 1;
- if(codepoint < 0x0000800) return 2;
- if(codepoint < 0x0010000) return 3;
- if(codepoint < 0x0200000) return 4;
- if(codepoint < 0x4000000) return 5;
- return 6;
-}
-
-static void fill_utf8(termkey_key *key)
-{
- int codepoint = key->code;
- int nbytes = utf8_seqlen(codepoint);
-
- key->utf8[nbytes] = 0;
-
- // This is easier done backwards
- int b = nbytes;
- while(b > 1) {
- b--;
- key->utf8[b] = 0x80 | (codepoint & 0x3f);
- codepoint >>= 6;
- }
-
- switch(nbytes) {
- case 1: key->utf8[0] = (codepoint & 0x7f); break;
- case 2: key->utf8[0] = 0xc0 | (codepoint & 0x1f); break;
- case 3: key->utf8[0] = 0xe0 | (codepoint & 0x0f); break;
- case 4: key->utf8[0] = 0xf0 | (codepoint & 0x07); break;
- case 5: key->utf8[0] = 0xf8 | (codepoint & 0x03); break;
- case 6: key->utf8[0] = 0xfc | (codepoint & 0x01); break;
- }
-}
-
termkey_result termkey_getkey(termkey_t *tk, termkey_key *key)
{
if(tk->buffvalid == 0)
@@ -327,12 +381,8 @@ termkey_result termkey_getkey(termkey_t *tk, termkey_key *key)
if(b0 == 0x1b) {
if(tk->buffvalid == 1) {
- key->code = TERMKEY_SYM_ESCAPE;
- key->modifiers = 0;
- key->flags = TERMKEY_KEYFLAG_SPECIAL;
-
+ do_codepoint(tk, b0, key);
eatbytes(tk, 1);
-
return TERMKEY_RES_KEY;
}
@@ -361,79 +411,16 @@ termkey_result termkey_getkey(termkey_t *tk, termkey_key *key)
return metakey_result;
}
- else if(b0 < 0x20) {
- // Control key
-
- key->code = 0;
-
- if(!(tk->flags & TERMKEY_FLAG_NOINTERPRET) && tk->c0[b0] != TERMKEY_SYM_UNKNOWN)
- key->code = tk->c0[b0];
-
- if(!key->code) {
- key->code = b0 + 0x40;
- key->modifiers = TERMKEY_KEYMOD_CTRL;
- key->flags = 0;
-
- key->utf8[0] = key->code;
- key->utf8[1] = 0;
- }
- else {
- key->modifiers = 0;
- key->flags = TERMKEY_KEYFLAG_SPECIAL;
- }
-
- eatbytes(tk, 1);
-
- return TERMKEY_RES_KEY;
- }
- else if(b0 == 0x20 && !(tk->flags & TERMKEY_FLAG_NOINTERPRET)) {
- key->code = TERMKEY_SYM_SPACE;
- key->modifiers = 0;
- key->flags = TERMKEY_KEYFLAG_SPECIAL;
-
- eatbytes(tk, 1);
-
- return TERMKEY_RES_KEY;
- }
- else if(b0 == 0x7f && !(tk->flags & TERMKEY_FLAG_NOINTERPRET)) {
- key->code = TERMKEY_SYM_DEL;
- key->modifiers = 0;
- key->flags = TERMKEY_KEYFLAG_SPECIAL;
-
- eatbytes(tk, 1);
-
- return TERMKEY_RES_KEY;
- }
- else if(b0 >= 0x20 && b0 < 0x80) {
- // ASCII lowbyte range
- key->code = b0;
- key->modifiers = 0;
- key->flags = 0;
-
- key->utf8[0] = key->code;
- key->utf8[1] = 0;
-
- eatbytes(tk, 1);
-
- return TERMKEY_RES_KEY;
- }
else if(b0 == 0x8f) {
return getkey_ss3(tk, 1, key);
}
else if(b0 == 0x9b) {
return getkey_csi(tk, 1, key);
}
- else if(b0 >= 0x80 && b0 < 0xa0) {
- // UTF-8 never starts with a C1 byte. So we can be sure of these
- key->code = b0 - 0x40;
- key->modifiers = TERMKEY_KEYMOD_CTRL|TERMKEY_KEYMOD_ALT;
- key->flags = 0;
-
- key->utf8[0] = key->code;
- key->utf8[1] = 0;
-
+ else if(b0 < 0xa0) {
+ // Single byte C0, G0 or C1 - C1 is never UTF-8 initial byte
+ do_codepoint(tk, b0, key);
eatbytes(tk, 1);
-
return TERMKEY_RES_KEY;
}
else if(tk->flags & TERMKEY_FLAG_UTF8) {
@@ -446,11 +433,8 @@ termkey_result termkey_getkey(termkey_t *tk, termkey_key *key)
if(b0 < 0xc0) {
// Starts with a continuation byte - that's not right
- key->code = UTF8_INVALID;
-
- fill_utf8(key);
+ do_codepoint(tk, UTF8_INVALID, key);
eatbytes(tk, 1);
-
return TERMKEY_RES_KEY;
}
else if(b0 < 0xe0) {
@@ -474,11 +458,8 @@ termkey_result termkey_getkey(termkey_t *tk, termkey_key *key)
codepoint = b0 & 0x01;
}
else {
- key->code = UTF8_INVALID;
-
- fill_utf8(key);
+ do_codepoint(tk, UTF8_INVALID, key);
eatbytes(tk, 1);
-
return TERMKEY_RES_KEY;
}
@@ -488,11 +469,8 @@ termkey_result termkey_getkey(termkey_t *tk, termkey_key *key)
for(int b = 1; b < nbytes; b++) {
unsigned char cb = tk->buffer[b];
if(cb < 0x80 || cb >= 0xc0) {
- key->code = UTF8_INVALID;
-
- fill_utf8(key);
+ do_codepoint(tk, UTF8_INVALID, key);
eatbytes(tk, b - 1);
-
return TERMKEY_RES_KEY;
}
@@ -501,34 +479,17 @@ termkey_result termkey_getkey(termkey_t *tk, termkey_key *key)
}
// Check for overlong sequences
- if(nbytes > utf8_seqlen(codepoint)) {
- key->code = UTF8_INVALID;
-
- fill_utf8(key);
- eatbytes(tk, nbytes);
-
- return TERMKEY_RES_KEY;
- }
+ if(nbytes > utf8_seqlen(codepoint))
+ codepoint = UTF8_INVALID;
// Check for UTF-16 surrogates or invalid codepoints
if((codepoint >= 0xD800 && codepoint <= 0xDFFF) ||
codepoint == 0xFFFE ||
codepoint == 0xFFFF)
- {
- key->code = UTF8_INVALID;
-
- fill_utf8(key);
- eatbytes(tk, nbytes);
-
- return TERMKEY_RES_KEY;
- }
-
- key->code = codepoint;
- memcpy(key->utf8, tk->buffer, nbytes);
- key->utf8[nbytes] = 0;
+ codepoint = UTF8_INVALID;
+ do_codepoint(tk, codepoint, key);
eatbytes(tk, nbytes);
-
return TERMKEY_RES_KEY;
}
else {