diff options
-rw-r--r-- | Makefile | 7 | ||||
-rwxr-xr-x | ell.c | 913 |
2 files changed, 920 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3810925 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +CFLAGS = -std=gnu99 -Wall +all: ell +ell: ell.c + $(CC) $(CFLAGS) $< -o $@ +clean: + rm ell +.PHONY: all clean @@ -0,0 +1,913 @@ +#define _XOPEN_SOURCE 500 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> +#include <stdarg.h> +#include <assert.h> +#include <time.h> +#include <stdbool.h> +#include <strings.h> +#include <math.h> + +#if defined __GNUC__ +#define ATTRIBUTE_PRINTF(x, y) __attribute__ ((format (printf, x, y))) +#else // ! __GNUC__ +#define ATTRIBUTE_PRINTF(x, y) +#endif // ! __GNUC__ + +#define N_ELEMENTS(a) (sizeof (a) / sizeof ((a)[0])) + +// --- Utilities --------------------------------------------------------------- + +static char *format (const char *format, ...) ATTRIBUTE_PRINTF (1, 2); + +static char * +strdup_vprintf (const char *format, va_list ap) { + va_list aq; + va_copy (aq, ap); + int size = vsnprintf (NULL, 0, format, aq); + va_end (aq); + if (size < 0) + return NULL; + + char buf[size + 1]; + size = vsnprintf (buf, sizeof buf, format, ap); + if (size < 0) + return NULL; + + return strdup (buf); +} + +static char * +format (const char *format, ...) { + va_list ap; + va_start (ap, format); + char *result = strdup_vprintf (format, ap); + va_end (ap); + return result; +} + +// --- Generic buffer ---------------------------------------------------------- + +struct buffer { + char *s; ///< Buffer data + size_t alloc; ///< Number of bytes allocated + size_t len; ///< Number of bytes used + bool memory_failure; ///< Memory allocation failed +}; + +#define BUFFER_INITIALIZER { NULL, 0, 0, false } + +static bool +buffer_append (struct buffer *self, const void *s, size_t n) { + if (self->memory_failure) + return false; + + if (!self->s) + self->s = malloc (self->alloc = 8); + while (self->len + n > self->alloc) + self->s = realloc (self->s, self->alloc <<= 1); + + if (!self->s) { + self->memory_failure = true; + return false; + } + + memcpy (self->s + self->len, s, n); + self->len += n; + return true; +} + +inline static bool +buffer_append_c (struct buffer *self, char c) { + return buffer_append (self, &c, 1); +} + +// --- Data types -------------------------------------------------------------- + +enum item_type { ITEM_STRING, ITEM_LIST }; + +struct item { + enum item_type type; ///< The type of this object + struct item *next; ///< Next item on the list/stack + + struct item *head; ///< The head of the list + size_t len; ///< Length of the string (sans '\0') + char value[]; ///< The null-terminated string value +}; + +const char * +item_type_to_str (enum item_type type) { + switch (type) { + case ITEM_STRING: return "string"; + case ITEM_LIST: return "list"; + } + abort (); +} + +// --- Item management --------------------------------------------------------- + +static void item_free_list (struct item *); +static struct item *new_clone_list (const struct item *); + +static void +item_free (struct item *item) { + if (item->type == ITEM_LIST) + item_free_list (get_list (item)); + free (item); +} + +static void +item_free_list (struct item *item) { + while (item) { + struct item *link = item; + item = item->next; + item_free (link); + } +} + +static struct item * +new_clone (const struct item *item) { + size_t size = sizeof *item; + if (item->type == ITEM_STRING) + size += item->len + 1; + + struct item *clone = malloc (size); + if (!clone) + return NULL; + + memcpy (clone, item, size); + if (item->type == ITEM_LIST) { + if (clone->head && !(clone->head = new_clone_list (clone->head))) { + free (clone); + return NULL; + } + } + clone->next = NULL; + return clone; +} + +static struct item * +new_clone_list (const struct item *item) { + struct item *head = NULL, *clone; + for (struct item **out = &head; item; item = item->next) { + if (!(clone = *out = new_clone (item))) { + item_free_list (head); + return NULL; + } + clone->next = NULL; + out = &clone->next; + } + return head; +} + +static struct item * +new_string (const char *s, ssize_t len) { + if (len < 0) + len = strlen (s); + + struct item *item = calloc (1, sizeof *item + len + 1); + if (!item) + return NULL; + + item->type = ITEM_STRING; + item->len = len; + memcpy (item->value, s, len); + item->value[len] = '\0'; + return item; +} + +static struct item * +new_list (struct item *head) { + struct item *item = calloc (1, sizeof *item); + if (!item) + return NULL; + + item->type = ITEM_LIST; + item->head = head; + return item; +} + +// --- Lexer ------------------------------------------------------------------- + +enum token { + T_ABORT, ///< EOF or error + + T_LPAREN, ///< Left parenthesis + T_RPAREN, ///< Right parenthesis + T_LBRACKET, ///< Left bracket + T_RBRACKET, ///< Right bracket + T_LBRACE, ///< Left curly bracket + T_RBRACE, ///< Right curly bracket + T_STRING, ///< Everything else that's not space + T_NEWLINE, ///< New line + T_AT ///< At symbol +}; + +static const char * +token_name (enum token token) { + switch (token) { + case T_ABORT: return "end of input"; + + case T_LPAREN: return "left parenthesis"; + case T_RPAREN: return "right parenthesis"; + case T_LBRACKET: return "left bracket"; + case T_RBRACKET: return "right bracket"; + case T_LBRACE: return "left brace"; + case T_RBRACE: return "right brace"; + case T_STRING: return "string"; + case T_NEWLINE: return "newline"; + case T_AT: return "at symbol"; + + default: + abort (); + return NULL; + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +struct lexer { + const char *p; ///< Current position in input + size_t len; ///< How many bytes of input are left + + unsigned line; ///< Current line + unsigned column; ///< Current column + + int64_t integer; ///< Parsed boolean or integer value + struct str string; ///< Parsed string value +}; + +/// Input has to be null-terminated anyway +static void +lexer_init (struct lexer *self, const char *p, size_t len) { + memset (self, 0, sizeof *self); + self->p = p; + self->len = len; + str_init (&self->string); +} + +static void +lexer_free (struct lexer *self) { + str_free (&self->string); +} + +static bool lexer_is_word_char (int c) { return !strchr ("()[]{}\n@#'", c); } + +static int +lexer_advance (struct lexer *self) { + int c = *self->p++; + if (c == '\n') { + self->column = 0; + self->line++; + } else + self->column++; + + self->len--; + return c; +} + +static void lexer_error (struct lexer *self, + struct error **e, const char *format, ...) ATTRIBUTE_PRINTF (3, 4); + +static void +lexer_error (struct lexer *self, struct error **e, const char *format, ...) { + struct str description; + str_init (&description); + + va_list ap; + va_start (ap, format); + str_append_vprintf (&description, format, ap); + va_end (ap); + + if (self->report_line) + error_set (e, "near line %u, column %u: %s", + self->line + 1, self->column + 1, description.str); + else if (self->len) + error_set (e, "near character %u: %s", + self->column + 1, description.str); + else + error_set (e, "near end: %s", description.str); + + str_free (&description); +} + +static bool +lexer_hexa_escape (struct lexer *self, struct str *output) { + int i; + unsigned char code = 0; + + for (i = 0; self->len && i < 2; i++) { + unsigned char c = tolower (*self->p); + if (c >= '0' && c <= '9') + code = (code << 4) | (c - '0'); + else if (c >= 'a' && c <= 'f') + code = (code << 4) | (c - 'a' + 10); + else + break; + + lexer_advance (self); + } + + if (!i) + return false; + + str_append_c (output, code); + return true; +} + +static bool +lexer_escape_sequence + (struct lexer *self, struct str *output, struct error **e) { + if (!self->len) { + lexer_error (self, e, "premature end of escape sequence"); + return false; + } + + unsigned char c; + switch ((c = *self->p)) { + case '"': break; + case '\\': break; + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + + case 'x': + case 'X': + lexer_advance (self); + if (lexer_hexa_escape (self, output)) + return true; + + lexer_error (self, e, "invalid hexadecimal escape"); + return false; + + default: + lexer_error (self, e, "unknown escape sequence"); + return false; + } + + str_append_c (output, c); + lexer_advance (self); + return true; +} + +static bool +lexer_string (struct lexer *self, struct str *output, struct error **e) { + unsigned char c; + while (self->len) { + if ((c = lexer_advance (self)) == '\'') + return true; + if (c != '\\') + str_append_c (output, c); + else if (!lexer_escape_sequence (self, output, e)) + return false; + } + lexer_error (self, e, "premature end of string"); + return false; +} + +static enum token +lexer_next (struct lexer *self, struct error **e) { + // Skip over any whitespace between tokens + while (self->len && isspace_ascii (*self->p) && *self->p != '\n') + lexer_advance (self); + if (!self->len) + return T_ABORT; + + switch (*self->p) { + case '(': lexer_advance (self); return T_LPAREN; + case ')': lexer_advance (self); return T_RPAREN; + case '[': lexer_advance (self); return T_LBRACKET; + case ']': lexer_advance (self); return T_RBRACKET; + case '{': lexer_advance (self); return T_LBRACE; + case '}': lexer_advance (self); return T_RBRACE; + case '\n': lexer_advance (self); return T_NEWLINE; + case '@': lexer_advance (self); return T_AT; + + case '#': + // Comments go until newline + while (self->len) + if (lexer_advance (self) == '\n') + return T_NEWLINE; + return T_ABORT; + + case '\'': + lexer_advance (self); + str_reset (&self->string); + if (!lexer_string (self, &self->string, e)) + return T_ABORT; + return T_STRING; + } + + assert (lexer_is_word_char (*self->p)); + str_reset (&self->string); + do + str_append_c (&self->string, lexer_advance (self)); + while (lexer_is_word_char (*self->p)); + return T_STRING; +} + +// --- Parsing ----------------------------------------------------------------- + +#define PARSE_ERROR_TABLE(XX) \ + XX( OK, NULL ) \ + XX( EOF, "unexpected end of input" ) \ + XX( INVALID_HEXA_ESCAPE, "invalid hexadecimal escape sequence" ) \ + XX( INVALID_ESCAPE, "unrecognized escape sequence" ) \ + XX( MEMORY, "memory allocation failure" ) \ + XX( FLOAT_RANGE, "floating point value out of range" ) \ + XX( INTEGER_RANGE, "integer out of range" ) \ + XX( INVALID_INPUT, "invalid input" ) \ + XX( UNEXPECTED_INPUT, "unexpected input" ) + +enum tokenizer_error { +#define XX(x, y) PARSE_ERROR_ ## x, + PARSE_ERROR_TABLE (XX) +#undef XX + PARSE_ERROR_COUNT +}; + +struct tokenizer { + const char *cursor; + enum tokenizer_error error; +}; + +static bool +decode_hexa_escape (struct tokenizer *self, struct buffer *buf) { + int i; + char c, code = 0; + + for (i = 0; i < 2; i++) { + c = tolower (*self->cursor); + if (c >= '0' && c <= '9') + code = (code << 4) | (c - '0'); + else if (c >= 'a' && c <= 'f') + code = (code << 4) | (c - 'a' + 10); + else + break; + + self->cursor++; + } + + if (!i) + return false; + + buffer_append_c (buf, code); + return true; +} + +static bool +decode_escape_sequence (struct tokenizer *self, struct buffer *buf) { + // Support some basic escape sequences from the C language + char c; + switch ((c = *self->cursor)) { + case '\0': + self->error = PARSE_ERROR_EOF; + return false; + case 'x': + case 'X': + self->cursor++; + if (decode_hexa_escape (self, buf)) + return true; + + self->error = PARSE_ERROR_INVALID_HEXA_ESCAPE; + return false; + default: + self->cursor++; + const char *from = "abfnrtv\"\\", *to = "\a\b\f\n\r\t\v\"\\", *x; + if ((x = strchr (from, c))) { + buffer_append_c (buf, to[x - from]); + return true; + } + self->error = PARSE_ERROR_INVALID_ESCAPE; + return false; + } +} + +static struct item * +parse_string (struct tokenizer *self) { + struct buffer buf = BUFFER_INITIALIZER; + struct item *item = NULL; + char c; + + while (true) + switch ((c = *self->cursor++)) { + case '\0': + self->cursor--; + self->error = PARSE_ERROR_EOF; + goto end; + case '"': + if (buf.memory_failure + || !(item = new_string (buf.s, buf.len))) + self->error = PARSE_ERROR_MEMORY; + goto end; + case '\\': + if (decode_escape_sequence (self, &buf)) + break; + goto end; + default: + buffer_append_c (&buf, c); + } + +end: + free (buf.s); + return item; +} + +static struct item * +parse_word (struct tokenizer *self) { + struct buffer buf = BUFFER_INITIALIZER; + struct item *item = NULL; + char c; + + // Here we accept almost anything that doesn't break the grammar + while (!strchr (" []\"", (c = *self->cursor++)) && (unsigned char) c > ' ') + buffer_append_c (&buf, c); + self->cursor--; + + if (buf.memory_failure) + self->error = PARSE_ERROR_MEMORY; + else if (!buf.len) + self->error = PARSE_ERROR_INVALID_INPUT; + else if (!(item = new_word (buf.s, buf.len))) + self->error = PARSE_ERROR_MEMORY; + + free (buf.s); + return item; +} + +static struct item *parse_item_list (struct tokenizer *); + +static struct item * +parse_list (struct tokenizer *self) { + struct item *list = parse_item_list (self); + if (self->error) { + assert (list == NULL); + return NULL; + } + if (!*self->cursor) { + self->error = PARSE_ERROR_EOF; + item_free_list (list); + return NULL; + } + assert (*self->cursor == ']'); + self->cursor++; + return new_list (list); +} + +static struct item * +parse_item (struct tokenizer *self) { + char c; + switch ((c = *self->cursor++)) { + case '[': return parse_list (self); + case '"': return parse_string (self); + default:; + } + + self->cursor--; + return parse_word (self); +} + +static struct item * +parse_item_list (struct tokenizer *self) { + struct item *head = NULL; + struct item **tail = &head; + + char c; + bool expected = true; + while ((c = *self->cursor) && c != ']') { + if (isspace (c)) { + self->cursor++; + expected = true; + continue; + } else if (!expected) { + self->error = PARSE_ERROR_UNEXPECTED_INPUT; + goto fail; + } + + if (!(*tail = parse_item (self))) + goto fail; + tail = &(*tail)->next; + expected = false; + } + return head; + +fail: + item_free_list (head); + return NULL; +} + +static struct item * +parse (const char *s, const char **error) { + struct tokenizer self = { .cursor = s, .error = PARSE_ERROR_OK }; + struct item *list = parse_item_list (&self); + if (!self.error && *self.cursor != '\0') { + self.error = PARSE_ERROR_UNEXPECTED_INPUT; + item_free_list (list); + list = NULL; + } + +#define XX(x, y) y, + static const char *strings[PARSE_ERROR_COUNT] = + { PARSE_ERROR_TABLE (XX) }; +#undef XX + + static char error_buf[128]; + if (self.error && error) { + snprintf (error_buf, sizeof error_buf, "at character %d: %s", + (int) (self.cursor - s) + 1, strings[self.error]); + *error = error_buf; + } + return list; +} + +// --- Runtime ----------------------------------------------------------------- + +struct context { + struct item *stack; ///< The current top of the stack + size_t stack_size; ///< Number of items on the stack + + char *error; ///< Error information + bool error_is_fatal; ///< Whether the error can be catched + bool memory_failure; ///< Memory allocation failure + + void *user_data; ///< User data +}; + +/// Internal handler for a function +typedef bool (*handler_fn) (struct context *); + +struct fn { + struct fn *next; ///< The next link in the chain + + handler_fn handler; ///< Internal C handler, or NULL + struct item *script; ///< Alternatively runtime code + char name[]; ///< The name of the function +}; + +struct fn *g_functions; ///< Maps words to functions + +static void +context_init (struct context *ctx) { + ctx->stack = NULL; + ctx->stack_size = 0; + + ctx->error = NULL; + ctx->error_is_fatal = false; + ctx->memory_failure = false; + + ctx->user_data = NULL; +} + +static void +context_free (struct context *ctx) { + item_free_list (ctx->stack); + ctx->stack = NULL; + + free (ctx->error); + ctx->error = NULL; +} + +static bool +set_error (struct context *ctx, const char *format, ...) { + free (ctx->error); + + va_list ap; + va_start (ap, format); + ctx->error = strdup_vprintf (format, ap); + va_end (ap); + + if (!ctx->error) + ctx->memory_failure = true; + return false; +} + +static bool +push (struct context *ctx, struct item *item) { + // The `item' is typically a result from new_<type>(), thus when it is null, + // that function must have failed. This is a shortcut for convenience. + if (!item) { + ctx->memory_failure = true; + return false; + } + + assert (item->next == NULL); + item->next = ctx->stack; + ctx->stack = item; + ctx->stack_size++; + return true; +} + +static bool execute (struct context *, struct item *); + +static bool +call_function (struct context *ctx, const char *name) { + struct fn *iter; + for (iter = g_functions; iter; iter = iter->next) + if (!strcmp (name, iter->name)) + goto found; + return set_error (ctx, "unknown function: %s", name); + +found: + if (iter->handler + ? iter->handler (ctx) + : execute (ctx, iter->script)) + return true; + + // In this case, `error' is NULL + if (ctx->memory_failure) + return false; + + // This creates some form of a stack trace + char *tmp = ctx->error; + ctx->error = NULL; + set_error (ctx, "%s -> %s", name, tmp); + free (tmp); + return false; +} + +static void +free_function (struct fn *fn) { + item_free_list (fn->script); + free (fn); +} + +static void +unregister_function (const char *name) { + for (struct fn **iter = &g_functions; *iter; iter = &(*iter)->next) + if (!strcmp ((*iter)->name, name)) { + struct fn *tmp = *iter; + *iter = tmp->next; + free_function (tmp); + break; + } +} + +static struct fn * +prepend_new_fn (const char *name) { + struct fn *fn = calloc (1, sizeof *fn + strlen (name) + 1); + if (!fn) + return NULL; + + strcpy (fn->name, name); + fn->next = g_functions; + return g_functions = fn; +} + +static bool +register_handler (const char *name, handler_fn handler) { + unregister_function (name); + struct fn *fn = prepend_new_fn (name); + if (!fn) + return false; + fn->handler = handler; + return true; +} + +static bool +register_script (const char *name, struct item *script) { + unregister_function (name); + struct fn *fn = prepend_new_fn (name); + if (!fn) + return false; + fn->script = script; + return true; +} + +static bool +execute (struct context *ctx, struct item *script) { + for (; script; script = script->next) { + if (script->type != ITEM_STRING) { + if (!push (ctx, new_clone (script))) + return false; + } + else if (!call_function (ctx, get_word (script))) + return false; + } + return true; +} + +// --- Runtime library --------------------------------------------------------- + +#define defn(name) static bool name (struct context *ctx) + +static bool +init_runtime_library_scripts (void) { + bool ok = true; + + // It's much cheaper (and more fun) to define functions in terms of other + // ones. The "unit tests" serve a secondary purpose of showing the usage. + struct script { + const char *name; ///< Name of the function + const char *definition; ///< The defining script + } scripts[] = { + { "greet", "print (.. 'hello ' (.. @1))" }, + }; + + for (size_t i = 0; i < N_ELEMENTS (scripts); i++) { + const char *error = NULL; + struct item *script = parse (scripts[i].definition, &error); + if (error) { + printf ("error parsing internal script `%s': %s\n", + scripts[i].definition, error); + ok = false; + } else + ok &= register_script (scripts[i].name, script); + } + return ok; +} + +defn (fn_print) { + check_stack (1); + struct item *item = pop (ctx); + struct user_info *info = ctx->user_data; + + struct buffer buf = BUFFER_INITIALIZER; + item_to_str (item, &buf); + item_free (item); + buffer_append_c (&buf, '\0'); + if (buf.memory_failure) { + ctx->memory_failure = true; + return false; + } + + printf ("%s\n", buf.s); + free (buf.s); + return true; +} + +defn (fn_concatenate) { + // TODO: concatenate string arguments, error on list + return true; +} + +static bool +init_runtime_library (void) +{ + return register_handler ("..", fn_concatenate); + && register_handler ("print", fn_print); + && init_runtime_library_scripts (); +} + +static void +free_runtime_library (void) { + struct fn *next, *iter; + for (iter = g_functions; iter; iter = next) { + next = iter->next; + free_function (iter); + } +} + +// --- Main -------------------------------------------------------------------- + +static void +process_message (const char *msg) { + // Finally parse and execute the macro + const char *error = NULL; + struct item *script = parse (msg, &error); + if (error) { + printf ("%s: %s\r\n", "parse error", error); + goto end; + } + + struct context ctx; + context_init (&ctx); + ctx.user_data = &info; + execute (&ctx, script); + item_free_list (script); + + const char *failure = NULL; + if (ctx.memory_failure) + failure = "memory allocation failure"; + else if (ctx.error) + failure = ctx.error; + if (failure) + printf ("%s: %s\r\n", "runtime error", failure); + context_free (&ctx); +end: + free (msg_ctx_quote); +} + +int +main (int argc, char *argv[]) { + freopen (NULL, "rb", stdin); setvbuf (stdin, NULL, _IOLBF, BUFSIZ); + freopen (NULL, "wb", stdout); setvbuf (stdout, NULL, _IOLBF, BUFSIZ); + + if (!init_runtime_library () + || !register_handler (".", fn_dot)) + printf ("%s\n", "runtime library initialization failed"); + + // TODO: load the entirety of stdin and execute it + process_message ("print 'hello world\n'"); + + free_runtime_library (); + return 0; +} + |