3 files changed, 114 insertions, 104 deletions
diff --git a/driver-csi.c b/driver-csi.c
index d07c404..0c37265 100644
--- a/driver-csi.c
+++ b/driver-csi.c
@@ -120,8 +120,6 @@ static void free_driver(void *private)
   free(csi);
 }
 
-#define UTF8_INVALID 0xFFFD
-
 #define CHARAT(i) (tk->buffer[tk->buffstart + (i)])
 
 static termkey_result getkey_csi(termkey_t *tk, size_t introlen, termkey_key *key)
@@ -332,96 +330,8 @@ static termkey_result getkey(termkey_t *tk, termkey_key *key)
   else if(b0 == 0x9b) {
     return getkey_csi(tk, 1, key);
   }
-  else if(b0 < 0xa0) {
-    // Single byte C0, G0 or C1 - C1 is never UTF-8 initial byte
-    (*tk->method.emit_codepoint)(tk, b0, key);
-    (*tk->method.eat_bytes)(tk, 1);
-    return TERMKEY_RES_KEY;
-  }
-  else if(tk->flags & TERMKEY_FLAG_UTF8) {
-    // Some UTF-8
-    int nbytes;
-    int codepoint;
-
-    key->type = TERMKEY_TYPE_UNICODE;
-    key->modifiers = 0;
-
-    if(b0 < 0xc0) {
-      // Starts with a continuation byte - that's not right
-      (*tk->method.emit_codepoint)(tk, UTF8_INVALID, key);
-      (*tk->method.eat_bytes)(tk, 1);
-      return TERMKEY_RES_KEY;
-    }
-    else if(b0 < 0xe0) {
-      nbytes = 2;
-      codepoint = b0 & 0x1f;
-    }
-    else if(b0 < 0xf0) {
-      nbytes = 3;
-      codepoint = b0 & 0x0f;
-    }
-    else if(b0 < 0xf8) {
-      nbytes = 4;
-      codepoint = b0 & 0x07;
-    }
-    else if(b0 < 0xfc) {
-      nbytes = 5;
-      codepoint = b0 & 0x03;
-    }
-    else if(b0 < 0xfe) {
-      nbytes = 6;
-      codepoint = b0 & 0x01;
-    }
-    else {
-      (*tk->method.emit_codepoint)(tk, UTF8_INVALID, key);
-      (*tk->method.eat_bytes)(tk, 1);
-      return TERMKEY_RES_KEY;
-    }
-
-    if(tk->buffcount < nbytes)
-      return tk->waittime ? TERMKEY_RES_AGAIN : TERMKEY_RES_NONE;
-
-    for(int b = 1; b < nbytes; b++) {
-      unsigned char cb = CHARAT(b);
-      if(cb < 0x80 || cb >= 0xc0) {
-        (*tk->method.emit_codepoint)(tk, UTF8_INVALID, key);
-        (*tk->method.eat_bytes)(tk, b - 1);
-        return TERMKEY_RES_KEY;
-      }
-
-      codepoint <<= 6;
-      codepoint |= cb & 0x3f;
-    }
-
-    // Check for overlong sequences
-    if(nbytes > utf8_seqlen(codepoint))
-      codepoint = UTF8_INVALID;
-
-    // Check for UTF-16 surrogates or invalid codepoints
-    if((codepoint >= 0xD800 && codepoint <= 0xDFFF) ||
-       codepoint == 0xFFFE ||
-       codepoint == 0xFFFF)
-      codepoint = UTF8_INVALID;
-
-    (*tk->method.emit_codepoint)(tk, codepoint, key);
-    (*tk->method.eat_bytes)(tk, nbytes);
-    return TERMKEY_RES_KEY;
-  }
-  else {
-    // Non UTF-8 case - just report the raw byte
-    key->type = TERMKEY_TYPE_UNICODE;
-    key->code.codepoint = b0;
-    key->modifiers = 0;
-
-    key->utf8[0] = key->code.codepoint;
-    key->utf8[1] = 0;
-
-    (*tk->method.eat_bytes)(tk, 1);
-
-    return TERMKEY_RES_KEY;
-  }
-
-  return TERMKEY_SYM_NONE;
+  else
+    return (*tk->method.getkey_simple)(tk, key);
 }
 
 static termkey_keysym register_csi_ss3(termkey_csi *csi, termkey_type type, termkey_keysym sym, unsigned char cmd, const char *name)
diff --git a/termkey-internal.h b/termkey-internal.h
index 9abc43d..57c74b9 100644
--- a/termkey-internal.h
+++ b/termkey-internal.h
@@ -49,20 +49,10 @@ struct termkey {
   struct {
     void (*eat_bytes)(termkey_t *tk, size_t count);
     void (*emit_codepoint)(termkey_t *tk, int codepoint, termkey_key *key);
+    termkey_result (*getkey_simple)(termkey_t *tk, termkey_key *key);
   } method;
 };
 
 extern struct termkey_driver termkey_driver_csi;
 
-// Keep this here for now since it's tiny
-static inline int utf8_seqlen(int codepoint)
-{
-  if(codepoint < 0x0000080) return 1;
-  if(codepoint < 0x0000800) return 2;
-  if(codepoint < 0x0010000) return 3;
-  if(codepoint < 0x0200000) return 4;
-  if(codepoint < 0x4000000) return 5;
-  return 6;
-}
-
 #endif
diff --git a/termkey.c b/termkey.c
index ac03b2b..98726fd 100644
--- a/termkey.c
+++ b/termkey.c
@@ -16,6 +16,7 @@ static struct termkey_driver *drivers[] = {
 // Forwards for the "protected" methods
 static void eat_bytes(termkey_t *tk, size_t count);
 static void emit_codepoint(termkey_t *tk, int codepoint, termkey_key *key);
+static termkey_result getkey_simple(termkey_t *tk, termkey_key *key);
 
 static termkey_keysym register_c0(termkey_t *tk, termkey_keysym sym, unsigned char ctrl, const char *name);
 static termkey_keysym register_c0_full(termkey_t *tk, termkey_keysym sym, int modifier_set, int modifier_mask, unsigned char ctrl, const char *name);
@@ -74,8 +75,9 @@ termkey_t *termkey_new_full(int fd, int flags, size_t buffsize, int waittime)
   for(i = 0; i < 32; i++)
     tk->c0[i].sym = TERMKEY_SYM_NONE;
 
-  tk->method.eat_bytes = &eat_bytes;
+  tk->method.eat_bytes      = &eat_bytes;
   tk->method.emit_codepoint = &emit_codepoint;
+  tk->method.getkey_simple  = &getkey_simple;
 
   register_c0(tk, TERMKEY_SYM_BACKSPACE, 0x08, "Backspace");
   register_c0(tk, TERMKEY_SYM_TAB,       0x09, "Tab");
@@ -170,6 +172,16 @@ static void eat_bytes(termkey_t *tk, size_t count)
   }
 }
 
+static inline int utf8_seqlen(int codepoint)
+{
+  if(codepoint < 0x0000080) return 1;
+  if(codepoint < 0x0000800) return 2;
+  if(codepoint < 0x0010000) return 3;
+  if(codepoint < 0x0200000) return 4;
+  if(codepoint < 0x4000000) return 5;
+  return 6;
+}
+
 static void fill_utf8(termkey_key *key)
 {
   int codepoint = key->code.codepoint;
@@ -251,6 +263,104 @@ static void emit_codepoint(termkey_t *tk, int codepoint, termkey_key *key)
     fill_utf8(key);
 }
 
+#define UTF8_INVALID 0xFFFD
+
+#define CHARAT(i) (tk->buffer[tk->buffstart + (i)])
+
+static termkey_result getkey_simple(termkey_t *tk, termkey_key *key)
+{
+  unsigned char b0 = CHARAT(0);
+
+  if(b0 < 0xa0) {
+    // Single byte C0, G0 or C1 - C1 is never UTF-8 initial byte
+    (*tk->method.emit_codepoint)(tk, b0, key);
+    (*tk->method.eat_bytes)(tk, 1);
+    return TERMKEY_RES_KEY;
+  }
+  else if(tk->flags & TERMKEY_FLAG_UTF8) {
+    // Some UTF-8
+    int nbytes;
+    int codepoint;
+
+    key->type = TERMKEY_TYPE_UNICODE;
+    key->modifiers = 0;
+
+    if(b0 < 0xc0) {
+      // Starts with a continuation byte - that's not right
+      (*tk->method.emit_codepoint)(tk, UTF8_INVALID, key);
+      (*tk->method.eat_bytes)(tk, 1);
+      return TERMKEY_RES_KEY;
+    }
+    else if(b0 < 0xe0) {
+      nbytes = 2;
+      codepoint = b0 & 0x1f;
+    }
+    else if(b0 < 0xf0) {
+      nbytes = 3;
+      codepoint = b0 & 0x0f;
+    }
+    else if(b0 < 0xf8) {
+      nbytes = 4;
+      codepoint = b0 & 0x07;
+    }
+    else if(b0 < 0xfc) {
+      nbytes = 5;
+      codepoint = b0 & 0x03;
+    }
+    else if(b0 < 0xfe) {
+      nbytes = 6;
+      codepoint = b0 & 0x01;
+    }
+    else {
+      (*tk->method.emit_codepoint)(tk, UTF8_INVALID, key);
+      (*tk->method.eat_bytes)(tk, 1);
+      return TERMKEY_RES_KEY;
+    }
+
+    if(tk->buffcount < nbytes)
+      return tk->waittime ? TERMKEY_RES_AGAIN : TERMKEY_RES_NONE;
+
+    for(int b = 1; b < nbytes; b++) {
+      unsigned char cb = CHARAT(b);
+      if(cb < 0x80 || cb >= 0xc0) {
+        (*tk->method.emit_codepoint)(tk, UTF8_INVALID, key);
+        (*tk->method.eat_bytes)(tk, b - 1);
+        return TERMKEY_RES_KEY;
+      }
+
+      codepoint <<= 6;
+      codepoint |= cb & 0x3f;
+    }
+
+    // Check for overlong sequences
+    if(nbytes > utf8_seqlen(codepoint))
+      codepoint = UTF8_INVALID;
+
+    // Check for UTF-16 surrogates or invalid codepoints
+    if((codepoint >= 0xD800 && codepoint <= 0xDFFF) ||
+       codepoint == 0xFFFE ||
+       codepoint == 0xFFFF)
+      codepoint = UTF8_INVALID;
+
+    (*tk->method.emit_codepoint)(tk, codepoint, key);
+    (*tk->method.eat_bytes)(tk, nbytes);
+    return TERMKEY_RES_KEY;
+  }
+  else {
+    // Non UTF-8 case - just report the raw byte
+    key->type = TERMKEY_TYPE_UNICODE;
+    key->code.codepoint = b0;
+    key->modifiers = 0;
+
+    key->utf8[0] = key->code.codepoint;
+    key->utf8[1] = 0;
+
+    (*tk->method.eat_bytes)(tk, 1);
+
+    return TERMKEY_RES_KEY;
+  }
+}
+
 termkey_result termkey_getkey(termkey_t *tk, termkey_key *key)
 {
   return (*tk->driver.getkey)(tk, key);