From 128fb157b302f0f0881844fc7a2fa5653c74a197 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Janouch?= Date: Thu, 22 Dec 2016 22:58:20 +0100 Subject: Initial commit --- interpreters/bf-jit.c | 327 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 327 insertions(+) create mode 100644 interpreters/bf-jit.c (limited to 'interpreters/bf-jit.c') diff --git a/interpreters/bf-jit.c b/interpreters/bf-jit.c new file mode 100644 index 0000000..0be6d94 --- /dev/null +++ b/interpreters/bf-jit.c @@ -0,0 +1,327 @@ +// This is an exercise in futility more than anything else +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +#if (defined __x86_64__ || defined __amd64__) && defined __unix__ + #include +#else + #error Platform not supported +#endif + +#define exit_fatal(...) \ + do { \ + fprintf (stderr, "fatal: " __VA_ARGS__); \ + exit (EXIT_FAILURE); \ + } while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ + void *p = calloc (m, n); + if (!p) + exit_fatal ("calloc: %s\n", strerror (errno)); + return p; +} + +static void * +xrealloc (void *o, size_t n) +{ + void *p = realloc (o, n); + if (!p && n) + exit_fatal ("realloc: %s\n", strerror (errno)); + return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ + char *str; ///< String data, null terminated + size_t alloc; ///< How many bytes are allocated + size_t len; ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ + self->len = 0; + self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ + // We allocate at least one more byte for the terminating null character + size_t new_alloc = self->alloc; + while (new_alloc <= self->len + n) + new_alloc <<= 1; + if (new_alloc != self->alloc) + self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ + str_ensure_space (self, n); + memcpy (self->str + self->len, data, n); + self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ + str_append_data (self, &c, 1); +} + +// --- Application ------------------------------------------------------------- + +struct str data; ///< Data tape +volatile size_t dataptr; ///< Current location on the tape +FILE *input; ///< User input + +enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END }; +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0 }; +struct instruction { enum command cmd; size_t arg; }; + +// - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// Some things I just really don't want to write in assembly even though it +// is effectively a big performance hit, eliminating the advantage of JIT + +static void +right (size_t arg) +{ + assert (SIZE_MAX - dataptr > arg); + dataptr += arg; + + while (dataptr >= data.len) + str_append_c (&data, 0); +} + +static void +left (size_t arg) +{ + assert (dataptr >= arg); + dataptr -= arg; +} + +static int +cin (void) +{ + int c = fgetc (input); + assert (c != EOF); + return c; +} + +// - - Main - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +int +main (int argc, char *argv[]) +{ + (void) argc; + (void) argv; + + struct str program; + str_init (&program); + + int c; + while ((c = fgetc (stdin)) != EOF) + str_append_c (&program, c); + if (ferror (stdin)) + exit_fatal ("can't read program\n"); + if (!(input = fopen ("/dev/tty", "rb"))) + exit_fatal ("can't open terminal for reading\n"); + +// - - Decode and group - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + struct instruction *parsed = xcalloc (sizeof *parsed, program.len); + size_t parsed_len = 0; + + for (size_t i = 0; i < program.len; i++) + { + enum command cmd; + switch (program.str[i]) + { + case '>': cmd = RIGHT; break; + case '<': cmd = LEFT; break; + case '+': cmd = INC; break; + case '-': cmd = DEC; break; + case '.': cmd = OUT; break; + case ',': cmd = IN; break; + case '[': cmd = BEGIN; break; + case ']': cmd = END; break; + default: continue; + } + + if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd) + parsed_len++; + + parsed[parsed_len - 1].cmd = cmd; + parsed[parsed_len - 1].arg++; + } + +// - - Simple optimization pass - - - - - - - - - - - - - - - - - - - - - - - - + + size_t in = 0, out = 0; + for (; in < parsed_len; in++, out++) + { + if (in + 2 < parsed_len + && parsed[in ].cmd == BEGIN + && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + && parsed[in + 2].cmd == END) + { + parsed[out].cmd = SET; + parsed[out].arg = 0; + in += 2; + } + else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC) + parsed[--out].arg += parsed[in].arg; + else if (out != in) + parsed[out] = parsed[in]; + } + + parsed_len = out; + +// - - Loop pairing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + size_t nesting = 0; + size_t *stack = xcalloc (sizeof *stack, parsed_len); + for (size_t i = 0; i < parsed_len; i++) + { + switch (parsed[i].cmd) + { + case BEGIN: + stack[nesting++] = i; + break; + case END: + assert (nesting > 0); + + --nesting; + parsed[stack[nesting]].arg = i + 1; + parsed[i].arg = stack[nesting] + 1; + default: + break; + } + } + free (stack); + assert (nesting == 0); + +// - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + // Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15; + // while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers. + + str_init (&program); + size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1); + +#define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); } +#define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); } + + CODE ("\x49\xBD") WORD (&dataptr) // mov r13, qword "&dataptr" + CODE ("\x49\xBF") WORD (&data.str) // mov r15, qword "&data.str" + CODE ("\x4D\x8B\x37") // mov r14, qword [r15] + CODE ("\x30\xDB") // xor bl, bl + + for (size_t i = 0; i < parsed_len; i++) + { + offsets[i] = program.len; + + size_t arg = parsed[i].arg; + switch (parsed[i].cmd) + { + case RIGHT: + CODE ("\x41\x88\x1E") // mov [r14], bl + CODE ("\x48\xBF") WORD (arg) // mov rdi, "arg" + CODE ("\x48\xB8") WORD (right) // mov rax, "right" + CODE ("\xFF\xD0") // call rax + + // The data could get reallocated, so reload the address + CODE ("\x4D\x8B\x37") // mov r14, qword [r15] + CODE ("\x4D\x03\x75\x00") // add r14, [r13] + CODE ("\x41\x8A\x1E") // mov bl, [r14] + break; + case LEFT: + CODE ("\x41\x88\x1E") // mov [r14], bl + CODE ("\x48\xBF") WORD (arg) // mov rdi, "arg" + CODE ("\x49\x29\xFE") // sub r14, rdi -- optimistic + CODE ("\x48\xB8") WORD (left) // mov rax, "left" + CODE ("\xFF\xD0") // call rax + CODE ("\x41\x8A\x1E") // mov bl, [r14] + break; + + case INC: + CODE ("\x80\xC3") // add bl, "arg" + str_append_c (&program, arg); + break; + case DEC: + CODE ("\x80\xEB") // sub bl, "arg" + str_append_c (&program, arg); + break; + case SET: + CODE ("\xB3") // mov bl, "arg" + str_append_c (&program, arg); + break; + + case OUT: + CODE ("\x48\x0F\xB6\xFB") // movzx rdi, bl + CODE ("\x48\xBE") WORD (stdout) // mov rsi, "stdout" + CODE ("\x48\xB8") WORD (fputc) // mov rax, "fputc" + CODE ("\xFF\xD0") // call rax + break; + case IN: + CODE ("\x48\xB8") WORD (cin) // mov rax, "cin" + CODE ("\xFF\xD0") // call rax + CODE ("\x88\xC3") // mov bl, al + break; + + case BEGIN: + CODE ("\x84\xDB") // test bl, bl + CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]" + break; + case END: + CODE ("\x84\xDB") // test bl, bl + CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]" + break; + } + } + // When there is a loop at the end we need to be able to jump past it + offsets[parsed_len] = program.len; + str_append_c (&program, '\xC3'); // ret + + // Now that we know where each instruction is, fill in relative jumps + for (size_t i = 0; i < parsed_len; i++) + { + if (parsed[i].cmd != BEGIN && parsed[i].cmd != END) + continue; + size_t fixup = offsets[i] + 4; + *(int32_t *)(program.str + fixup) = + ((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4)); + } + free (offsets); + +// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + // Some systems may have W^X + void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (!executable) + exit_fatal ("mmap: %s\n", strerror (errno)); + + memcpy (executable, program.str, program.len); + if (mprotect (executable, program.len, PROT_READ | PROT_EXEC)) + exit_fatal ("mprotect: %s\n", strerror (errno)); + + str_init (&data); + str_append_c (&data, 0); + ((void (*) (void)) executable)(); + return 0; +} -- cgit v1.2.3