diff options
-rw-r--r-- | termkey.c | 243 |
1 files changed, 102 insertions, 141 deletions
@@ -166,6 +166,97 @@ static inline void eatbytes(termkey_t *tk, size_t count) tk->buffvalid -= count; } +#define UTF8_INVALID 0xFFFD + +static int utf8_seqlen(int codepoint) +{ + if(codepoint < 0x0000080) return 1; + if(codepoint < 0x0000800) return 2; + if(codepoint < 0x0010000) return 3; + if(codepoint < 0x0200000) return 4; + if(codepoint < 0x4000000) return 5; + return 6; +} + +static void fill_utf8(termkey_key *key) +{ + int codepoint = key->code; + int nbytes = utf8_seqlen(codepoint); + + key->utf8[nbytes] = 0; + + // This is easier done backwards + int b = nbytes; + while(b > 1) { + b--; + key->utf8[b] = 0x80 | (codepoint & 0x3f); + codepoint >>= 6; + } + + switch(nbytes) { + case 1: key->utf8[0] = (codepoint & 0x7f); break; + case 2: key->utf8[0] = 0xc0 | (codepoint & 0x1f); break; + case 3: key->utf8[0] = 0xe0 | (codepoint & 0x0f); break; + case 4: key->utf8[0] = 0xf0 | (codepoint & 0x07); break; + case 5: key->utf8[0] = 0xf8 | (codepoint & 0x03); break; + case 6: key->utf8[0] = 0xfc | (codepoint & 0x01); break; + } +} + +static inline void do_codepoint(termkey_t *tk, int codepoint, termkey_key *key) +{ + if(codepoint < 0x20) { + // C0 range + key->code = 0; + + if(!(tk->flags & TERMKEY_FLAG_NOINTERPRET) && tk->c0[codepoint] != TERMKEY_SYM_UNKNOWN) + key->code = tk->c0[codepoint]; + + if(!key->code) { + key->code = codepoint + 0x40; + key->modifiers = TERMKEY_KEYMOD_CTRL; + key->flags = 0; + } + else { + key->modifiers = 0; + key->flags = TERMKEY_KEYFLAG_SPECIAL; + } + } + else if(codepoint == 0x20 && !(tk->flags & TERMKEY_FLAG_NOINTERPRET)) { + // ASCII space + key->code = TERMKEY_SYM_SPACE; + key->modifiers = 0; + key->flags = TERMKEY_KEYFLAG_SPECIAL; + } + else if(codepoint == 0x7f && !(tk->flags & TERMKEY_FLAG_NOINTERPRET)) { + // ASCII DEL + key->code = TERMKEY_SYM_DEL; + key->modifiers = 0; + key->flags = TERMKEY_KEYFLAG_SPECIAL; + } + else if(codepoint >= 0x20 && codepoint < 0x80) { + // ASCII lowbyte range + key->code = codepoint; + key->modifiers = 0; + key->flags = 0; + } + else if(codepoint >= 0x80 && codepoint < 0xa0) { + // UTF-8 never starts with a C1 byte. So we can be sure of these + key->code = codepoint - 0x40; + key->modifiers = TERMKEY_KEYMOD_CTRL|TERMKEY_KEYMOD_ALT; + key->flags = 0; + } + else { + // UTF-8 codepoint + key->code = codepoint; + key->flags = 0; + key->modifiers = 0; + } + + if(!(key->flags & TERMKEY_KEYFLAG_SPECIAL)) + fill_utf8(key); +} + static termkey_result getkey_csi(termkey_t *tk, size_t introlen, termkey_key *key) { size_t csi_end = introlen; @@ -280,43 +371,6 @@ static termkey_result getkey_ss3(termkey_t *tk, size_t introlen, termkey_key *ke return TERMKEY_RES_KEY; } -#define UTF8_INVALID 0xFFFD - -static int utf8_seqlen(int codepoint) -{ - if(codepoint < 0x0000080) return 1; - if(codepoint < 0x0000800) return 2; - if(codepoint < 0x0010000) return 3; - if(codepoint < 0x0200000) return 4; - if(codepoint < 0x4000000) return 5; - return 6; -} - -static void fill_utf8(termkey_key *key) -{ - int codepoint = key->code; - int nbytes = utf8_seqlen(codepoint); - - key->utf8[nbytes] = 0; - - // This is easier done backwards - int b = nbytes; - while(b > 1) { - b--; - key->utf8[b] = 0x80 | (codepoint & 0x3f); - codepoint >>= 6; - } - - switch(nbytes) { - case 1: key->utf8[0] = (codepoint & 0x7f); break; - case 2: key->utf8[0] = 0xc0 | (codepoint & 0x1f); break; - case 3: key->utf8[0] = 0xe0 | (codepoint & 0x0f); break; - case 4: key->utf8[0] = 0xf0 | (codepoint & 0x07); break; - case 5: key->utf8[0] = 0xf8 | (codepoint & 0x03); break; - case 6: key->utf8[0] = 0xfc | (codepoint & 0x01); break; - } -} - termkey_result termkey_getkey(termkey_t *tk, termkey_key *key) { if(tk->buffvalid == 0) @@ -327,12 +381,8 @@ termkey_result termkey_getkey(termkey_t *tk, termkey_key *key) if(b0 == 0x1b) { if(tk->buffvalid == 1) { - key->code = TERMKEY_SYM_ESCAPE; - key->modifiers = 0; - key->flags = TERMKEY_KEYFLAG_SPECIAL; - + do_codepoint(tk, b0, key); eatbytes(tk, 1); - return TERMKEY_RES_KEY; } @@ -361,79 +411,16 @@ termkey_result termkey_getkey(termkey_t *tk, termkey_key *key) return metakey_result; } - else if(b0 < 0x20) { - // Control key - - key->code = 0; - - if(!(tk->flags & TERMKEY_FLAG_NOINTERPRET) && tk->c0[b0] != TERMKEY_SYM_UNKNOWN) - key->code = tk->c0[b0]; - - if(!key->code) { - key->code = b0 + 0x40; - key->modifiers = TERMKEY_KEYMOD_CTRL; - key->flags = 0; - - key->utf8[0] = key->code; - key->utf8[1] = 0; - } - else { - key->modifiers = 0; - key->flags = TERMKEY_KEYFLAG_SPECIAL; - } - - eatbytes(tk, 1); - - return TERMKEY_RES_KEY; - } - else if(b0 == 0x20 && !(tk->flags & TERMKEY_FLAG_NOINTERPRET)) { - key->code = TERMKEY_SYM_SPACE; - key->modifiers = 0; - key->flags = TERMKEY_KEYFLAG_SPECIAL; - - eatbytes(tk, 1); - - return TERMKEY_RES_KEY; - } - else if(b0 == 0x7f && !(tk->flags & TERMKEY_FLAG_NOINTERPRET)) { - key->code = TERMKEY_SYM_DEL; - key->modifiers = 0; - key->flags = TERMKEY_KEYFLAG_SPECIAL; - - eatbytes(tk, 1); - - return TERMKEY_RES_KEY; - } - else if(b0 >= 0x20 && b0 < 0x80) { - // ASCII lowbyte range - key->code = b0; - key->modifiers = 0; - key->flags = 0; - - key->utf8[0] = key->code; - key->utf8[1] = 0; - - eatbytes(tk, 1); - - return TERMKEY_RES_KEY; - } else if(b0 == 0x8f) { return getkey_ss3(tk, 1, key); } else if(b0 == 0x9b) { return getkey_csi(tk, 1, key); } - else if(b0 >= 0x80 && b0 < 0xa0) { - // UTF-8 never starts with a C1 byte. So we can be sure of these - key->code = b0 - 0x40; - key->modifiers = TERMKEY_KEYMOD_CTRL|TERMKEY_KEYMOD_ALT; - key->flags = 0; - - key->utf8[0] = key->code; - key->utf8[1] = 0; - + else if(b0 < 0xa0) { + // Single byte C0, G0 or C1 - C1 is never UTF-8 initial byte + do_codepoint(tk, b0, key); eatbytes(tk, 1); - return TERMKEY_RES_KEY; } else if(tk->flags & TERMKEY_FLAG_UTF8) { @@ -446,11 +433,8 @@ termkey_result termkey_getkey(termkey_t *tk, termkey_key *key) if(b0 < 0xc0) { // Starts with a continuation byte - that's not right - key->code = UTF8_INVALID; - - fill_utf8(key); + do_codepoint(tk, UTF8_INVALID, key); eatbytes(tk, 1); - return TERMKEY_RES_KEY; } else if(b0 < 0xe0) { @@ -474,11 +458,8 @@ termkey_result termkey_getkey(termkey_t *tk, termkey_key *key) codepoint = b0 & 0x01; } else { - key->code = UTF8_INVALID; - - fill_utf8(key); + do_codepoint(tk, UTF8_INVALID, key); eatbytes(tk, 1); - return TERMKEY_RES_KEY; } @@ -488,11 +469,8 @@ termkey_result termkey_getkey(termkey_t *tk, termkey_key *key) for(int b = 1; b < nbytes; b++) { unsigned char cb = tk->buffer[b]; if(cb < 0x80 || cb >= 0xc0) { - key->code = UTF8_INVALID; - - fill_utf8(key); + do_codepoint(tk, UTF8_INVALID, key); eatbytes(tk, b - 1); - return TERMKEY_RES_KEY; } @@ -501,34 +479,17 @@ termkey_result termkey_getkey(termkey_t *tk, termkey_key *key) } // Check for overlong sequences - if(nbytes > utf8_seqlen(codepoint)) { - key->code = UTF8_INVALID; - - fill_utf8(key); - eatbytes(tk, nbytes); - - return TERMKEY_RES_KEY; - } + if(nbytes > utf8_seqlen(codepoint)) + codepoint = UTF8_INVALID; // Check for UTF-16 surrogates or invalid codepoints if((codepoint >= 0xD800 && codepoint <= 0xDFFF) || codepoint == 0xFFFE || codepoint == 0xFFFF) - { - key->code = UTF8_INVALID; - - fill_utf8(key); - eatbytes(tk, nbytes); - - return TERMKEY_RES_KEY; - } - - key->code = codepoint; - memcpy(key->utf8, tk->buffer, nbytes); - key->utf8[nbytes] = 0; + codepoint = UTF8_INVALID; + do_codepoint(tk, codepoint, key); eatbytes(tk, nbytes); - return TERMKEY_RES_KEY; } else { |