#define _XOPEN_SOURCE 500 #include #include #include #include #include #include #include #include #include #include #include #if defined __GNUC__ #define ATTRIBUTE_PRINTF(x, y) __attribute__ ((format (printf, x, y))) #else // ! __GNUC__ #define ATTRIBUTE_PRINTF(x, y) #endif // ! __GNUC__ #define N_ELEMENTS(a) (sizeof (a) / sizeof ((a)[0])) // --- Utilities --------------------------------------------------------------- static char *format (const char *format, ...) ATTRIBUTE_PRINTF (1, 2); static char * strdup_vprintf (const char *format, va_list ap) { va_list aq; va_copy (aq, ap); int size = vsnprintf (NULL, 0, format, aq); va_end (aq); if (size < 0) return NULL; char buf[size + 1]; size = vsnprintf (buf, sizeof buf, format, ap); if (size < 0) return NULL; return strdup (buf); } static char * format (const char *format, ...) { va_list ap; va_start (ap, format); char *result = strdup_vprintf (format, ap); va_end (ap); return result; } // --- Generic buffer ---------------------------------------------------------- struct buffer { char *s; ///< Buffer data size_t alloc; ///< Number of bytes allocated size_t len; ///< Number of bytes used bool memory_failure; ///< Memory allocation failed }; #define BUFFER_INITIALIZER { NULL, 0, 0, false } static bool buffer_append (struct buffer *self, const void *s, size_t n) { if (self->memory_failure) return false; if (!self->s) self->s = malloc (self->alloc = 8); while (self->len + n > self->alloc) self->s = realloc (self->s, self->alloc <<= 1); if (!self->s) { self->memory_failure = true; return false; } memcpy (self->s + self->len, s, n); self->len += n; return true; } inline static bool buffer_append_c (struct buffer *self, char c) { return buffer_append (self, &c, 1); } // --- Data types -------------------------------------------------------------- enum item_type { ITEM_STRING, ITEM_LIST }; struct item { enum item_type type; ///< The type of this object struct item *next; ///< Next item on the list/stack struct item *head; ///< The head of the list size_t len; ///< Length of the string (sans '\0') char value[]; ///< The null-terminated string value }; const char * item_type_to_str (enum item_type type) { switch (type) { case ITEM_STRING: return "string"; case ITEM_LIST: return "list"; } abort (); } // --- Item management --------------------------------------------------------- static void item_free_list (struct item *); static struct item *new_clone_list (const struct item *); static void item_free (struct item *item) { if (item->type == ITEM_LIST) item_free_list (get_list (item)); free (item); } static void item_free_list (struct item *item) { while (item) { struct item *link = item; item = item->next; item_free (link); } } static struct item * new_clone (const struct item *item) { size_t size = sizeof *item; if (item->type == ITEM_STRING) size += item->len + 1; struct item *clone = malloc (size); if (!clone) return NULL; memcpy (clone, item, size); if (item->type == ITEM_LIST) { if (clone->head && !(clone->head = new_clone_list (clone->head))) { free (clone); return NULL; } } clone->next = NULL; return clone; } static struct item * new_clone_list (const struct item *item) { struct item *head = NULL, *clone; for (struct item **out = &head; item; item = item->next) { if (!(clone = *out = new_clone (item))) { item_free_list (head); return NULL; } clone->next = NULL; out = &clone->next; } return head; } static struct item * new_string (const char *s, ssize_t len) { if (len < 0) len = strlen (s); struct item *item = calloc (1, sizeof *item + len + 1); if (!item) return NULL; item->type = ITEM_STRING; item->len = len; memcpy (item->value, s, len); item->value[len] = '\0'; return item; } static struct item * new_list (struct item *head) { struct item *item = calloc (1, sizeof *item); if (!item) return NULL; item->type = ITEM_LIST; item->head = head; return item; } // --- Lexer ------------------------------------------------------------------- enum token { T_ABORT, ///< EOF or error T_LPAREN, ///< Left parenthesis T_RPAREN, ///< Right parenthesis T_LBRACKET, ///< Left bracket T_RBRACKET, ///< Right bracket T_LBRACE, ///< Left curly bracket T_RBRACE, ///< Right curly bracket T_STRING, ///< Everything else that's not space T_NEWLINE, ///< New line T_AT ///< At symbol }; static const char * token_name (enum token token) { switch (token) { case T_ABORT: return "end of input"; case T_LPAREN: return "left parenthesis"; case T_RPAREN: return "right parenthesis"; case T_LBRACKET: return "left bracket"; case T_RBRACKET: return "right bracket"; case T_LBRACE: return "left brace"; case T_RBRACE: return "right brace"; case T_STRING: return "string"; case T_NEWLINE: return "newline"; case T_AT: return "at symbol"; default: abort (); return NULL; } } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - struct lexer { const char *p; ///< Current position in input size_t len; ///< How many bytes of input are left unsigned line; ///< Current line unsigned column; ///< Current column int64_t integer; ///< Parsed boolean or integer value struct str string; ///< Parsed string value }; /// Input has to be null-terminated anyway static void lexer_init (struct lexer *self, const char *p, size_t len) { memset (self, 0, sizeof *self); self->p = p; self->len = len; str_init (&self->string); } static void lexer_free (struct lexer *self) { str_free (&self->string); } static bool lexer_is_word_char (int c) { return !strchr ("()[]{}\n@#'", c); } static int lexer_advance (struct lexer *self) { int c = *self->p++; if (c == '\n') { self->column = 0; self->line++; } else self->column++; self->len--; return c; } static void lexer_error (struct lexer *self, struct error **e, const char *format, ...) ATTRIBUTE_PRINTF (3, 4); static void lexer_error (struct lexer *self, struct error **e, const char *format, ...) { struct str description; str_init (&description); va_list ap; va_start (ap, format); str_append_vprintf (&description, format, ap); va_end (ap); if (self->report_line) error_set (e, "near line %u, column %u: %s", self->line + 1, self->column + 1, description.str); else if (self->len) error_set (e, "near character %u: %s", self->column + 1, description.str); else error_set (e, "near end: %s", description.str); str_free (&description); } static bool lexer_hexa_escape (struct lexer *self, struct str *output) { int i; unsigned char code = 0; for (i = 0; self->len && i < 2; i++) { unsigned char c = tolower (*self->p); if (c >= '0' && c <= '9') code = (code << 4) | (c - '0'); else if (c >= 'a' && c <= 'f') code = (code << 4) | (c - 'a' + 10); else break; lexer_advance (self); } if (!i) return false; str_append_c (output, code); return true; } static bool lexer_escape_sequence (struct lexer *self, struct str *output, struct error **e) { if (!self->len) { lexer_error (self, e, "premature end of escape sequence"); return false; } unsigned char c; switch ((c = *self->p)) { case '"': break; case '\\': break; case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case 'x': case 'X': lexer_advance (self); if (lexer_hexa_escape (self, output)) return true; lexer_error (self, e, "invalid hexadecimal escape"); return false; default: lexer_error (self, e, "unknown escape sequence"); return false; } str_append_c (output, c); lexer_advance (self); return true; } static bool lexer_string (struct lexer *self, struct str *output, struct error **e) { unsigned char c; while (self->len) { if ((c = lexer_advance (self)) == '\'') return true; if (c != '\\') str_append_c (output, c); else if (!lexer_escape_sequence (self, output, e)) return false; } lexer_error (self, e, "premature end of string"); return false; } static enum token lexer_next (struct lexer *self, struct error **e) { // Skip over any whitespace between tokens while (self->len && isspace_ascii (*self->p) && *self->p != '\n') lexer_advance (self); if (!self->len) return T_ABORT; switch (*self->p) { case '(': lexer_advance (self); return T_LPAREN; case ')': lexer_advance (self); return T_RPAREN; case '[': lexer_advance (self); return T_LBRACKET; case ']': lexer_advance (self); return T_RBRACKET; case '{': lexer_advance (self); return T_LBRACE; case '}': lexer_advance (self); return T_RBRACE; case '\n': lexer_advance (self); return T_NEWLINE; case '@': lexer_advance (self); return T_AT; case '#': // Comments go until newline while (self->len) if (lexer_advance (self) == '\n') return T_NEWLINE; return T_ABORT; case '\'': lexer_advance (self); str_reset (&self->string); if (!lexer_string (self, &self->string, e)) return T_ABORT; return T_STRING; } assert (lexer_is_word_char (*self->p)); str_reset (&self->string); do str_append_c (&self->string, lexer_advance (self)); while (lexer_is_word_char (*self->p)); return T_STRING; } // --- Parsing ----------------------------------------------------------------- #define PARSE_ERROR_TABLE(XX) \ XX( OK, NULL ) \ XX( EOF, "unexpected end of input" ) \ XX( INVALID_HEXA_ESCAPE, "invalid hexadecimal escape sequence" ) \ XX( INVALID_ESCAPE, "unrecognized escape sequence" ) \ XX( MEMORY, "memory allocation failure" ) \ XX( FLOAT_RANGE, "floating point value out of range" ) \ XX( INTEGER_RANGE, "integer out of range" ) \ XX( INVALID_INPUT, "invalid input" ) \ XX( UNEXPECTED_INPUT, "unexpected input" ) enum tokenizer_error { #define XX(x, y) PARSE_ERROR_ ## x, PARSE_ERROR_TABLE (XX) #undef XX PARSE_ERROR_COUNT }; struct tokenizer { const char *cursor; enum tokenizer_error error; }; static bool decode_hexa_escape (struct tokenizer *self, struct buffer *buf) { int i; char c, code = 0; for (i = 0; i < 2; i++) { c = tolower (*self->cursor); if (c >= '0' && c <= '9') code = (code << 4) | (c - '0'); else if (c >= 'a' && c <= 'f') code = (code << 4) | (c - 'a' + 10); else break; self->cursor++; } if (!i) return false; buffer_append_c (buf, code); return true; } static bool decode_escape_sequence (struct tokenizer *self, struct buffer *buf) { // Support some basic escape sequences from the C language char c; switch ((c = *self->cursor)) { case '\0': self->error = PARSE_ERROR_EOF; return false; case 'x': case 'X': self->cursor++; if (decode_hexa_escape (self, buf)) return true; self->error = PARSE_ERROR_INVALID_HEXA_ESCAPE; return false; default: self->cursor++; const char *from = "abfnrtv\"\\", *to = "\a\b\f\n\r\t\v\"\\", *x; if ((x = strchr (from, c))) { buffer_append_c (buf, to[x - from]); return true; } self->error = PARSE_ERROR_INVALID_ESCAPE; return false; } } static struct item * parse_string (struct tokenizer *self) { struct buffer buf = BUFFER_INITIALIZER; struct item *item = NULL; char c; while (true) switch ((c = *self->cursor++)) { case '\0': self->cursor--; self->error = PARSE_ERROR_EOF; goto end; case '"': if (buf.memory_failure || !(item = new_string (buf.s, buf.len))) self->error = PARSE_ERROR_MEMORY; goto end; case '\\': if (decode_escape_sequence (self, &buf)) break; goto end; default: buffer_append_c (&buf, c); } end: free (buf.s); return item; } static struct item * parse_word (struct tokenizer *self) { struct buffer buf = BUFFER_INITIALIZER; struct item *item = NULL; char c; // Here we accept almost anything that doesn't break the grammar while (!strchr (" []\"", (c = *self->cursor++)) && (unsigned char) c > ' ') buffer_append_c (&buf, c); self->cursor--; if (buf.memory_failure) self->error = PARSE_ERROR_MEMORY; else if (!buf.len) self->error = PARSE_ERROR_INVALID_INPUT; else if (!(item = new_word (buf.s, buf.len))) self->error = PARSE_ERROR_MEMORY; free (buf.s); return item; } static struct item *parse_item_list (struct tokenizer *); static struct item * parse_list (struct tokenizer *self) { struct item *list = parse_item_list (self); if (self->error) { assert (list == NULL); return NULL; } if (!*self->cursor) { self->error = PARSE_ERROR_EOF; item_free_list (list); return NULL; } assert (*self->cursor == ']'); self->cursor++; return new_list (list); } static struct item * parse_item (struct tokenizer *self) { char c; switch ((c = *self->cursor++)) { case '[': return parse_list (self); case '"': return parse_string (self); default:; } self->cursor--; return parse_word (self); } static struct item * parse_item_list (struct tokenizer *self) { struct item *head = NULL; struct item **tail = &head; char c; bool expected = true; while ((c = *self->cursor) && c != ']') { if (isspace (c)) { self->cursor++; expected = true; continue; } else if (!expected) { self->error = PARSE_ERROR_UNEXPECTED_INPUT; goto fail; } if (!(*tail = parse_item (self))) goto fail; tail = &(*tail)->next; expected = false; } return head; fail: item_free_list (head); return NULL; } static struct item * parse (const char *s, const char **error) { struct tokenizer self = { .cursor = s, .error = PARSE_ERROR_OK }; struct item *list = parse_item_list (&self); if (!self.error && *self.cursor != '\0') { self.error = PARSE_ERROR_UNEXPECTED_INPUT; item_free_list (list); list = NULL; } #define XX(x, y) y, static const char *strings[PARSE_ERROR_COUNT] = { PARSE_ERROR_TABLE (XX) }; #undef XX static char error_buf[128]; if (self.error && error) { snprintf (error_buf, sizeof error_buf, "at character %d: %s", (int) (self.cursor - s) + 1, strings[self.error]); *error = error_buf; } return list; } // --- Runtime ----------------------------------------------------------------- struct context { struct item *stack; ///< The current top of the stack size_t stack_size; ///< Number of items on the stack char *error; ///< Error information bool error_is_fatal; ///< Whether the error can be catched bool memory_failure; ///< Memory allocation failure void *user_data; ///< User data }; /// Internal handler for a function typedef bool (*handler_fn) (struct context *); struct fn { struct fn *next; ///< The next link in the chain handler_fn handler; ///< Internal C handler, or NULL struct item *script; ///< Alternatively runtime code char name[]; ///< The name of the function }; struct fn *g_functions; ///< Maps words to functions static void context_init (struct context *ctx) { ctx->stack = NULL; ctx->stack_size = 0; ctx->error = NULL; ctx->error_is_fatal = false; ctx->memory_failure = false; ctx->user_data = NULL; } static void context_free (struct context *ctx) { item_free_list (ctx->stack); ctx->stack = NULL; free (ctx->error); ctx->error = NULL; } static bool set_error (struct context *ctx, const char *format, ...) { free (ctx->error); va_list ap; va_start (ap, format); ctx->error = strdup_vprintf (format, ap); va_end (ap); if (!ctx->error) ctx->memory_failure = true; return false; } static bool push (struct context *ctx, struct item *item) { // The `item' is typically a result from new_(), thus when it is null, // that function must have failed. This is a shortcut for convenience. if (!item) { ctx->memory_failure = true; return false; } assert (item->next == NULL); item->next = ctx->stack; ctx->stack = item; ctx->stack_size++; return true; } static bool execute (struct context *, struct item *); static bool call_function (struct context *ctx, const char *name) { struct fn *iter; for (iter = g_functions; iter; iter = iter->next) if (!strcmp (name, iter->name)) goto found; return set_error (ctx, "unknown function: %s", name); found: if (iter->handler ? iter->handler (ctx) : execute (ctx, iter->script)) return true; // In this case, `error' is NULL if (ctx->memory_failure) return false; // This creates some form of a stack trace char *tmp = ctx->error; ctx->error = NULL; set_error (ctx, "%s -> %s", name, tmp); free (tmp); return false; } static void free_function (struct fn *fn) { item_free_list (fn->script); free (fn); } static void unregister_function (const char *name) { for (struct fn **iter = &g_functions; *iter; iter = &(*iter)->next) if (!strcmp ((*iter)->name, name)) { struct fn *tmp = *iter; *iter = tmp->next; free_function (tmp); break; } } static struct fn * prepend_new_fn (const char *name) { struct fn *fn = calloc (1, sizeof *fn + strlen (name) + 1); if (!fn) return NULL; strcpy (fn->name, name); fn->next = g_functions; return g_functions = fn; } static bool register_handler (const char *name, handler_fn handler) { unregister_function (name); struct fn *fn = prepend_new_fn (name); if (!fn) return false; fn->handler = handler; return true; } static bool register_script (const char *name, struct item *script) { unregister_function (name); struct fn *fn = prepend_new_fn (name); if (!fn) return false; fn->script = script; return true; } static bool execute (struct context *ctx, struct item *script) { for (; script; script = script->next) { if (script->type != ITEM_STRING) { if (!push (ctx, new_clone (script))) return false; } else if (!call_function (ctx, get_word (script))) return false; } return true; } // --- Runtime library --------------------------------------------------------- #define defn(name) static bool name (struct context *ctx) static bool init_runtime_library_scripts (void) { bool ok = true; // It's much cheaper (and more fun) to define functions in terms of other // ones. The "unit tests" serve a secondary purpose of showing the usage. struct script { const char *name; ///< Name of the function const char *definition; ///< The defining script } scripts[] = { { "greet", "print (.. 'hello ' (.. @1))" }, }; for (size_t i = 0; i < N_ELEMENTS (scripts); i++) { const char *error = NULL; struct item *script = parse (scripts[i].definition, &error); if (error) { printf ("error parsing internal script `%s': %s\n", scripts[i].definition, error); ok = false; } else ok &= register_script (scripts[i].name, script); } return ok; } defn (fn_print) { check_stack (1); struct item *item = pop (ctx); struct user_info *info = ctx->user_data; struct buffer buf = BUFFER_INITIALIZER; item_to_str (item, &buf); item_free (item); buffer_append_c (&buf, '\0'); if (buf.memory_failure) { ctx->memory_failure = true; return false; } printf ("%s\n", buf.s); free (buf.s); return true; } defn (fn_concatenate) { // TODO: concatenate string arguments, error on list return true; } static bool init_runtime_library (void) { return register_handler ("..", fn_concatenate); && register_handler ("print", fn_print); && init_runtime_library_scripts (); } static void free_runtime_library (void) { struct fn *next, *iter; for (iter = g_functions; iter; iter = next) { next = iter->next; free_function (iter); } } // --- Main -------------------------------------------------------------------- static void process_message (const char *msg) { // Finally parse and execute the macro const char *error = NULL; struct item *script = parse (msg, &error); if (error) { printf ("%s: %s\r\n", "parse error", error); goto end; } struct context ctx; context_init (&ctx); ctx.user_data = &info; execute (&ctx, script); item_free_list (script); const char *failure = NULL; if (ctx.memory_failure) failure = "memory allocation failure"; else if (ctx.error) failure = ctx.error; if (failure) printf ("%s: %s\r\n", "runtime error", failure); context_free (&ctx); end: free (msg_ctx_quote); } int main (int argc, char *argv[]) { freopen (NULL, "rb", stdin); setvbuf (stdin, NULL, _IOLBF, BUFSIZ); freopen (NULL, "wb", stdout); setvbuf (stdout, NULL, _IOLBF, BUFSIZ); if (!init_runtime_library () || !register_handler (".", fn_dot)) printf ("%s\n", "runtime library initialization failed"); // TODO: load the entirety of stdin and execute it process_message ("print 'hello world\n'"); free_runtime_library (); return 0; }