From a1939346747b4270520b03e66ba923f4805925c0 Mon Sep 17 00:00:00 2001 From: Přemysl Janouch Date: Thu, 18 May 2017 14:03:47 +0200 Subject: Write the parser Came out much simpler than what it used to parse originally. --- ell.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 199 insertions(+), 27 deletions(-) diff --git a/ell.c b/ell.c index 38b88da..4b1ddce 100755 --- a/ell.c +++ b/ell.c @@ -30,6 +30,7 @@ #include #include #include +#include #if defined __GNUC__ #define ATTRIBUTE_PRINTF(x, y) __attribute__ ((format (printf, x, y))) @@ -427,34 +428,192 @@ lexer_next (struct lexer *self, char **e) { // --- Parsing ----------------------------------------------------------------- -// TODO: parse "s" into a tree, including all the syntax sugar +struct parser +{ + struct lexer lexer; ///< Tokenizer + char *error; ///< Tokenizer error + enum token token; ///< Current token in the lexer + bool replace_token; ///< Replace the token +}; + +static void +parser_init (struct parser *self, const char *script, size_t len) { + memset (self, 0, sizeof *self); + lexer_init (&self->lexer, script, len); + + // As reading in tokens may cause exceptions, we wait for the first peek() + // to replace the initial T_ABORT. + self->replace_token = true; +} + +static void +parser_free (struct parser *self) { + lexer_free (&self->lexer); + if (self->error) + free (self->error); +} + +static enum token +parser_peek (struct parser *self, jmp_buf out) { + if (self->replace_token) + { + self->token = lexer_next (&self->lexer, &self->error); + if (self->error) + longjmp (out, 1); + self->replace_token = false; + +#ifndef NDEBUG + if (self->token == T_STRING) { + buffer_append_c (&self->lexer.string, 0); + printf ("'%s'\n", self->lexer.string.s); + } else { + printf ("%s\n", token_name (self->token)); + } +#endif + } + return self->token; +} + +static bool +parser_accept (struct parser *self, enum token token, jmp_buf out) { + return self->replace_token = (parser_peek (self, out) == token); +} + +static void +parser_expect (struct parser *self, enum token token, jmp_buf out) { + if (parser_accept (self, token, out)) + return; + + lexer_error (&self->lexer, &self->error, "unexpected `%s', expected `%s'", + token_name (self->token), + token_name (token)); + longjmp (out, 1); +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// We don't need no generator, but a few macros will come in handy. +// From time to time C just doesn't have the right features. + +#define PEEK() parser_peek (self, err) +#define ACCEPT(token) parser_accept (self, token, err) +#define EXPECT(token) parser_expect (self, token, err) +#define SKIP_NL() do {} while (ACCEPT (T_NEWLINE)) + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static struct item * parse_line (struct parser *self, jmp_buf out); + static struct item * -parse (const char *s, const char **error) { - struct lexer lexer; - lexer_init (&lexer, s, strlen (s)); +parse_prefix_list (struct item *list, const char *name) { + struct item *prefix = new_string (name, strlen (name)); + prefix->next = list; + return new_list (prefix); +} - char *e = NULL; - enum token type; - while ((type = lexer_next (&lexer, &e)) != T_ABORT) { - printf ("%s", token_name (type)); - if (type == T_STRING) { - buffer_append_c (&lexer.string, 0); - printf (" '%s'", lexer.string.s); +static struct item * +parse_item (struct parser *self, jmp_buf out) { + struct item *volatile result = NULL, *volatile *tail = &result; + jmp_buf err; + + if (setjmp (err)) { + item_free_list (result); + longjmp (out, 1); + } + + SKIP_NL (); + if (ACCEPT (T_STRING)) + return new_string (self->lexer.string.s, self->lexer.string.len); + if (ACCEPT (T_AT)) { + result = parse_item (self, out); + return parse_prefix_list (result, "set"); + } + if (ACCEPT (T_LPAREN)) { + while (!ACCEPT (T_RPAREN)) { + *tail = parse_item (self, err); + tail = &(*tail)->next; + SKIP_NL (); } - printf ("\n"); + return new_list (result); } - if (e) { - printf ("error: %s\n", e); - free (e); + if (ACCEPT (T_LBRACKET)) { + while (!ACCEPT (T_RBRACKET)) { + *tail = parse_item (self, err); + tail = &(*tail)->next; + SKIP_NL (); + } + return parse_prefix_list (result, "list"); + } + if (ACCEPT (T_LBRACE)) { + while ((*tail = parse_line (self, err))) + tail = &(*tail)->next; + EXPECT (T_RBRACE); + return parse_prefix_list (result, "quote"); + } + + lexer_error (&self->lexer, &self->error, + "unexpected `%s', expected a value", token_name (self->token)); + longjmp (out, 1); +} + +static struct item * +parse_line (struct parser *self, jmp_buf out) { + struct item *volatile result = NULL, *volatile *tail = &result; + jmp_buf err; + + if (setjmp (err)) { + item_free_list (result); + longjmp (out, 1); + } + + while (PEEK () != T_RBRACE && PEEK () != T_ABORT) { + if (ACCEPT (T_NEWLINE)) { + if (result) + return new_list (result); + } else { + *tail = parse_item (self, err); + tail = &(*tail)->next; + } } - lexer_free (&lexer); + if (result) + return new_list (result); return NULL; } +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#undef PEEK +#undef ACCEPT +#undef EXPECT +#undef SKIP_NL + +static struct item * +parse (const char *s, size_t len, char **e) { + struct parser parser; + parser_init (&parser, s, len); + + struct item *volatile result = NULL, *volatile *tail = &result; + jmp_buf err; + + if (setjmp (err)) { + item_free_list (result); + *e = parser.error; + lexer_free (&parser.lexer); + return NULL; + } + + while ((*tail = parse_line (&parser, err))) + tail = &(*tail)->next; + parser_expect (&parser, T_ABORT, err); + + parser_free (&parser); + return new_list (result); +} + // --- Runtime ----------------------------------------------------------------- struct context { - struct item variables; ///< List of variables + struct item *variables; ///< List of variables char *error; ///< Error information bool error_is_fatal; ///< Whether the error can be catched @@ -483,7 +642,7 @@ context_init (struct context *ctx) { static void context_free (struct context *ctx) { - item_free_list (ctx->variables.head); + item_free_list (ctx->variables); free (ctx->error); } @@ -607,11 +766,13 @@ init_runtime_library_scripts (void) { }; for (size_t i = 0; i < N_ELEMENTS (scripts); i++) { - const char *error = NULL; - struct item *script = parse (scripts[i].definition, &error); - if (error) { + char *e = NULL; + struct item *script = parse (scripts[i].definition, + strlen (scripts[i].definition), &e); + if (e) { printf ("error parsing internal script `%s': %s\n", - scripts[i].definition, error); + scripts[i].definition, e); + free (e); ok = false; } else ok &= register_script (scripts[i].name, script); @@ -664,6 +825,16 @@ free_runtime_library (void) { // --- Main -------------------------------------------------------------------- +static void +print_tree (struct item *tree) { + // TODO: first figure out how to just print the tree + // TODO: also re-add syntax sugar + for (; tree; tree = tree->next) { + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + int main (int argc, char *argv[]) { if (!init_runtime_library ()) @@ -672,10 +843,11 @@ main (int argc, char *argv[]) { // TODO: load the entirety of stdin and execute it const char *program = "print 'hello world\n'"; - const char *error = NULL; - struct item *script = parse (program, &error); - if (error) { - printf ("%s: %s\r\n", "parse error", error); + char *e = NULL; + struct item *script = parse (program, strlen (program), &e); + if (e) { + printf ("%s: %s\n", "parse error", e); + free (e); return 1; } @@ -691,7 +863,7 @@ main (int argc, char *argv[]) { else if (ctx.error) failure = ctx.error; if (failure) - printf ("%s: %s\r\n", "runtime error", failure); + printf ("%s: %s\n", "runtime error", failure); context_free (&ctx); free_runtime_library (); -- cgit v1.2.3-70-g09d2